├── AlexNet └── AlexNet.py ├── README.md ├── MobileNet └── MobileNet.py ├── VGG ├── VGG16.py └── VGG19.py ├── ResNet ├── ResNet34.py └── ResNet50.py ├── SqueezeNet └── Vanilla_SqueezeNet1_0.py ├── ResNeXt └── ResNeXt50-32-4d.py ├── DenseNet └── DenseNet.py ├── ShuffleNet └── ShuffleNet.py ├── ResNet50_with_Stochastic_Depth └── ResNet50_with_Stochastic_Depth.py ├── GoogLeNet └── GoogLeNet.py └── DeepLearningBasics ├── LogisticRegression.py └── ShallowNeuralNetwork.py /AlexNet/AlexNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | 7 | class AlexNet(nn.Module): 8 | def __init__(self, num_classes): 9 | super(AlexNet, self).__init__() 10 | self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 96, kernel_size = 11, stride=4, padding=2) 11 | self.pool1 = nn.MaxPool2d(kernel_size = 3, stride = 2) 12 | self.conv2 = nn.Conv2d(in_channels = 96, out_channels = 256, kernel_size = 5, stride=1, padding=2) 13 | self.pool2 = nn.MaxPool2d(kernel_size = 3, stride = 2) 14 | self.conv3 = nn.Conv2d(in_channels = 256, out_channels = 384, kernel_size = 3, stride=1, padding=1) 15 | self.conv4 = nn.Conv2d(in_channels = 384, out_channels = 384, kernel_size = 3, stride=1, padding=1) 16 | self.conv5 = nn.Conv2d(in_channels = 384, out_channels = 256, kernel_size = 3, stride=1, padding=1) 17 | self.pool3 = nn.MaxPool2d(kernel_size = 3, stride = 2) 18 | self.fc1 = nn.Linear(256 * 6 * 6, 4096) 19 | self.fc2 = nn.Linear(4096, 4096) 20 | self.fc3 = nn.Linear(4096, num_classes) 21 | 22 | def forward(self, x): 23 | x = self.pool1(F.relu(self.conv1(x))) 24 | x = self.pool2(F.relu(self.conv2(x))) 25 | x = F.relu(self.conv3(x)) 26 | x = F.relu(self.conv4(x)) 27 | x = self.pool3(F.relu(self.conv5(x))) 28 | x = x.view(-1, 256 * 6 * 6) 29 | x = F.dropout(x) 30 | x = F.relu(self.fc1(x)) 31 | x = F.dropout(x) 32 | x = F.relu(self.fc2(x)) 33 | x = F.softmax(self.fc3(x)) 34 | return(x) 35 | 36 | 37 | net = AlexNet(1000) 38 | print(net) 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning From Scratch 2 | ## Introduction 3 | This repository stores a series of reproductions of deep learning milestones in Computer Vision. 4 | 5 | For obvious reason, I'm unable to train the model on ImageNet from scratch. Therefore, for the image classification task, all the models are trained on [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html), whose images are rescaled to be as large as those described in the original papers. 6 | 7 | If my model achieves comparable performance in both accuracy and speed with the corresponding model given in the model zoo without pre-trained weight under the same setting in the first three epochs, I consider it is a valid reproduction. 8 | 9 | **Stars are welcomed!** 10 | 11 | I guess no one would read my code line by line. LOL. 12 | 13 | ## Image Classification 14 | * [AlexNet](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) 15 | * [VGG](https://arxiv.org/abs/1409.1556) 16 | * [GoogLeNet](https://arxiv.org/abs/1409.4842) 17 | * [ResNet](https://arxiv.org/abs/1512.03385) 18 | * [ResNeXt](https://arxiv.org/abs/1611.05431) 19 | * [SqueezeNet](https://arxiv.org/abs/1602.07360) 20 | * [MobileNet](https://arxiv.org/abs/1704.04861) 21 | * [DenseNet](https://arxiv.org/abs/1608.06993) 22 | * [ShuffleNet](https://arxiv.org/abs/1707.01083) 23 | * [ResNet with Stochastic Depth](https://arxiv.org/abs/1603.09382) 24 | ## Model Zoo 25 | My models are compared with models in this section. 26 | * [AlexNet](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.alexnet) 27 | * [VGG16](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.vgg16) 28 | * [VGG19](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.vgg19) 29 | * [GoogLeNet](https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/symbols/googlenet.py) 30 | * [ResNet34](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.resnet34) 31 | * [ResNet50](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.resnet50) 32 | * [ResNeXt50-32x4d](https://github.com/prlz77/ResNeXt.pytorch) 33 | * [SqueezeNet 1.0](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.squeezenet1_0) 34 | * [MobileNet](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py) 35 | * [DenseNet121](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.densenet121) 36 | * [ShuffleNet](https://github.com/jaxony/ShuffleNet/blob/master/model.py) 37 | * [ResNet with Stochastic Depth](https://github.com/yueatsprograms/Stochastic_Depth) 38 | 39 | -------------------------------------------------------------------------------- /MobileNet/MobileNet.py: -------------------------------------------------------------------------------- 1 | class DepthwiseSeparableConv2d(nn.Module): 2 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0): 3 | super(DepthwiseSeparableConv2d, self).__init__() 4 | self.layer = nn.Sequential( 5 | nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=in_channels), 6 | nn.BatchNorm2d(in_channels), 7 | nn.ReLU(inplace=True), 8 | 9 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0), 10 | nn.BatchNorm2d(out_channels), 11 | nn.ReLU(inplace=True) 12 | ) 13 | 14 | def forward(self, x): 15 | x = self.layer(x) 16 | return(x) 17 | 18 | 19 | class MobileNet(nn.Module): 20 | def __init__(self, num_classes, alpha=1.0): 21 | super(MobileNet, self).__init__() 22 | self.conv0 = nn.Sequential( 23 | nn.Conv2d(in_channels=3, out_channels=int(alpha * 32), kernel_size=3, stride=2, padding=1), 24 | nn.BatchNorm2d(int(alpha * 32)), 25 | nn.ReLU(inplace=True) 26 | ) 27 | self.entry = nn.Sequential( 28 | DepthwiseSeparableConv2d(in_channels=int(alpha * 32), out_channels=int(alpha * 64), stride=1, padding=1), 29 | DepthwiseSeparableConv2d(in_channels=int(alpha * 64), out_channels=int(alpha * 128), stride=2, padding=1), 30 | DepthwiseSeparableConv2d(in_channels=int(alpha * 128), out_channels=int(alpha * 128), stride=1, padding=1), 31 | DepthwiseSeparableConv2d(in_channels=int(alpha * 128), out_channels=int(alpha * 256), stride=2, padding=1), 32 | DepthwiseSeparableConv2d(in_channels=int(alpha * 256), out_channels=int(alpha * 256), stride=1, padding=1), 33 | DepthwiseSeparableConv2d(in_channels=int(alpha * 256), out_channels=int(alpha * 512), stride=2, padding=1) 34 | ) 35 | 36 | self.middle = nn.Sequential( 37 | DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1), 38 | DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1), 39 | DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1), 40 | DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1), 41 | DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1) 42 | ) 43 | self.exit = nn.Sequential( 44 | DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 1024), stride=2, padding=1), 45 | DepthwiseSeparableConv2d(in_channels=int(alpha * 1024), out_channels=int(alpha * 1024), stride=1, padding=1) 46 | ) 47 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1) 48 | self.classifier = nn.Linear(in_features=int(alpha * 1024), out_features=num_classes) 49 | 50 | def forward(self, x): 51 | x = self.conv0(x) 52 | x = self.entry(x) 53 | x = self.middle(x) 54 | x = self.exit(x) 55 | x = self.avgpool(x) 56 | x = x.view(x.size(0), -1) 57 | x = self.classifier(x) 58 | return(x) 59 | 60 | -------------------------------------------------------------------------------- /VGG/VGG16.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torchvision.models as models 6 | from torch.autograd import Variable 7 | 8 | 9 | class VGG16(nn.Module): 10 | def __init__(self, num_classes): 11 | super(VGG16, self).__init__() 12 | self.feature = nn.Sequential( 13 | nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, stride=1, padding=1), 14 | nn.ReLU(inplace=True), 15 | nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride=1, padding=1), 16 | nn.ReLU(inplace=True), 17 | nn.MaxPool2d(2, 2), 18 | nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride=1, padding=1), 19 | nn.ReLU(inplace=True), 20 | nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride=1, padding=1), 21 | nn.ReLU(inplace=True), 22 | nn.MaxPool2d(2, 2), 23 | nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, stride=1, padding=1), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1), 26 | nn.ReLU(inplace=True), 27 | nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1), 28 | nn.ReLU(inplace=True), 29 | nn.MaxPool2d(2, 2), 30 | nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, stride=1, padding=1), 31 | nn.ReLU(inplace=True), 32 | nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1), 33 | nn.ReLU(inplace=True), 34 | nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1), 35 | nn.ReLU(inplace=True), 36 | nn.MaxPool2d(2, 2), 37 | nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1), 40 | nn.ReLU(inplace=True), 41 | nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1), 42 | nn.ReLU(inplace=True), 43 | nn.MaxPool2d(2, 2), 44 | ) 45 | self.classifier = nn.Sequential( 46 | nn.Linear(512 * 7 * 7, 4096), 47 | nn.ReLU(inplace=True), 48 | nn.Dropout(), 49 | nn.Linear(4096, 4096), 50 | nn.ReLU(inplace=True), 51 | nn.Dropout(), 52 | nn.Linear(4096, num_classes) 53 | ) 54 | self._initialize_weights() 55 | 56 | def forward(self, x): 57 | x = self.feature(x) 58 | x = x.view(x.size(0), -1) 59 | x = self.classifier(x) 60 | return(x) 61 | # This function is the key to have a competitive performance. 62 | # Borrowed from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py 63 | def _initialize_weights(self): 64 | for m in self.modules(): 65 | if isinstance(m, nn.Conv2d): 66 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 67 | m.weight.data.normal_(0, math.sqrt(2. / n)) 68 | if m.bias is not None: 69 | m.bias.data.zero_() 70 | elif isinstance(m, nn.BatchNorm2d): 71 | m.weight.data.fill_(1) 72 | m.bias.data.zero_() 73 | elif isinstance(m, nn.Linear): 74 | m.weight.data.normal_(0, 0.01) 75 | m.bias.data.zero_() 76 | -------------------------------------------------------------------------------- /ResNet/ResNet34.py: -------------------------------------------------------------------------------- 1 | class Block(nn.Module): 2 | def __init__(self, in_channels, out_channels, stride): 3 | super(Block, self).__init__() 4 | self.conv1 = nn.Sequential( 5 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1), 6 | nn.BatchNorm2d(out_channels), 7 | nn.ReLU(inplace=True) 8 | ) 9 | self.conv2 = nn.Sequential( 10 | nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1), 11 | nn.BatchNorm2d(out_channels), 12 | nn.ReLU(inplace=True) 13 | ) 14 | self.relu = nn.ReLU(inplace=True) 15 | self.downsample = None 16 | if stride != 1: 17 | self.downsample = nn.Sequential( 18 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride), 19 | nn.BatchNorm2d(out_channels) 20 | ) 21 | 22 | def forward(self, x): 23 | identity = x 24 | x = self.conv1(x) 25 | x = self.conv2(x) 26 | if self.downsample is not None: 27 | identity = self.downsample(identity) 28 | print("identity", identity.size()) 29 | x = x + identity 30 | x = self.relu(x) 31 | return(x) 32 | 33 | class ResNet34(nn.Module): 34 | def __init__(self, num_classes): 35 | super(ResNet34, self).__init__() 36 | self.head = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2) 37 | self.bn = nn.BatchNorm2d(num_features=64) 38 | self.relu = nn.ReLU(inplace=True) 39 | self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 40 | self.group1 = self._make_group(block=Block, in_channels=64, out_channels=64, blocks=3, stride=1) 41 | self.group2 = self._make_group(block=Block, in_channels=64, out_channels=128, blocks=4, stride=2) 42 | self.group3 = self._make_group(block=Block, in_channels=128, out_channels=256, blocks=6, stride=2) 43 | self.group4 = self._make_group(block=Block, in_channels=256, out_channels=512, blocks=3, stride=2) 44 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1) 45 | self.classifier = nn.Linear(in_features=512, out_features=num_classes) 46 | self._initialize_weights() 47 | 48 | def forward(self, x): 49 | x = self.head(x) 50 | x = self.bn(x) 51 | x = self.relu(x) 52 | x = self.pool(x) 53 | x = self.group1(x) 54 | x = self.group2(x) 55 | x = self.group3(x) 56 | x = self.group4(x) 57 | x = self.avgpool(x) 58 | x = x.view(x.size(0), -1) 59 | x = self.classifier(x) 60 | return(x) 61 | 62 | def _make_group(self, block, in_channels, out_channels, blocks, stride): 63 | layers = [] 64 | layers.append(block(in_channels=in_channels, out_channels=out_channels, stride=stride)) 65 | stride = 1 66 | for i in range(blocks): 67 | layers.append(block(in_channels=out_channels, out_channels=out_channels, stride=stride)) 68 | 69 | return(nn.Sequential(*layers)) 70 | 71 | def _initialize_weights(self): 72 | for m in self.modules(): 73 | if isinstance(m, nn.Conv2d): 74 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 75 | m.weight.data.normal_(0, math.sqrt(2. / n)) 76 | if m.bias is not None: 77 | m.bias.data.zero_() 78 | elif isinstance(m, nn.BatchNorm2d): 79 | m.weight.data.fill_(1) 80 | m.bias.data.zero_() 81 | elif isinstance(m, nn.Linear): 82 | m.weight.data.normal_(0, 0.01) 83 | m.bias.data.zero_() 84 | -------------------------------------------------------------------------------- /SqueezeNet/Vanilla_SqueezeNet1_0.py: -------------------------------------------------------------------------------- 1 | class Fire(nn.Module): 2 | def __init__(self, in_channels, squeeze_channels, expand1x1_channels, expand3x3_channels): 3 | super(Fire, self).__init__() 4 | self.squeeze = nn.Conv2d(in_channels=in_channels, out_channels=squeeze_channels, kernel_size=1) 5 | self.squ_relu = nn.ReLU(inplace=True) 6 | self.expand1x1 = nn.Conv2d(in_channels=squeeze_channels, out_channels=expand1x1_channels, kernel_size=1) 7 | self.relu_1x1 = nn.ReLU(inplace=True) 8 | self.expand3x3 = nn.Conv2d(in_channels=squeeze_channels, out_channels=expand3x3_channels, kernel_size=3, padding=1) 9 | self.relu_3x3 = nn.ReLU(inplace=True) 10 | 11 | def forward(self, x): 12 | x = self.squeeze(x) 13 | x = self.squ_relu(x) 14 | x_1x1 = self.expand1x1(x) 15 | x_1x1 = self.relu_1x1(x_1x1) 16 | x_3x3 = self.expand3x3(x) 17 | x_3x3 = self.relu_3x3(x_3x3) 18 | return(torch.cat([x_1x1, x_3x3], 1)) 19 | 20 | 21 | class SqueezeNet(nn.Module): 22 | def __init__(self, num_classes): 23 | super(SqueezeNet, self).__init__() 24 | self.head = nn.Sequential( 25 | nn.Conv2d(in_channels=3, out_channels=96, kernel_size=7, stride=2, padding=2), 26 | nn.ReLU(inplace=True), 27 | nn.MaxPool2d(kernel_size=3, stride=2) 28 | ) 29 | self.fire2 = Fire(in_channels=96, squeeze_channels=16, expand1x1_channels=64, expand3x3_channels=64) 30 | self.fire3 = Fire(in_channels=128, squeeze_channels=16, expand1x1_channels=64, expand3x3_channels=64) 31 | self.fire4 = Fire(in_channels=128, squeeze_channels=32, expand1x1_channels=128, expand3x3_channels=128) 32 | self.pool4 = nn.MaxPool2d(kernel_size=3, stride=2) 33 | self.fire5 = Fire(in_channels=256, squeeze_channels=32, expand1x1_channels=128, expand3x3_channels=128) 34 | self.fire6 = Fire(in_channels=256, squeeze_channels=48, expand1x1_channels=192, expand3x3_channels=192) 35 | self.fire7 = Fire(in_channels=384, squeeze_channels=48, expand1x1_channels=192, expand3x3_channels=192) 36 | self.fire8 = Fire(in_channels=384, squeeze_channels=64, expand1x1_channels=256, expand3x3_channels=256) 37 | self.pool8 = nn.MaxPool2d(kernel_size=3, stride=2) 38 | self.fire9 = Fire(in_channels=512, squeeze_channels=64, expand1x1_channels=256, expand3x3_channels=256) 39 | self.conv10 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1, stride=1) 40 | self.relu = nn.ReLU(inplace=True) 41 | self.avgpool = nn.AvgPool2d(kernel_size=13, stride=1) 42 | self._initialize_weights() 43 | 44 | def forward(self, x): 45 | x = self.head(x) 46 | x = self.fire2(x) 47 | x = self.fire3(x) 48 | x = self.fire4(x) 49 | x = self.pool4(x) 50 | x = self.fire5(x) 51 | x = self.fire6(x) 52 | x = self.fire7(x) 53 | x = self.fire8(x) 54 | x = self.pool8(x) 55 | x = self.fire9(x) 56 | x = self.conv10(x) 57 | x = self.relu(x) 58 | x = self.avgpool(x) 59 | x = x.view(x.size(0), -1) 60 | return(x) 61 | 62 | def _initialize_weights(self): 63 | for m in self.modules(): 64 | if isinstance(m, nn.Conv2d): 65 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 66 | m.weight.data.normal_(0, math.sqrt(2. / n)) 67 | if m.bias is not None: 68 | m.bias.data.zero_() 69 | elif isinstance(m, nn.BatchNorm2d): 70 | m.weight.data.fill_(1) 71 | m.bias.data.zero_() 72 | elif isinstance(m, nn.Linear): 73 | m.weight.data.normal_(0, 0.01) 74 | m.bias.data.zero_() 75 | -------------------------------------------------------------------------------- /ResNet/ResNet50.py: -------------------------------------------------------------------------------- 1 | class BottleNeck(nn.Module): 2 | def __init__(self, in_channels, out_channels, stride): 3 | super(BottleNeck, self).__init__() 4 | self.conv1 = nn.Sequential( 5 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1), 6 | nn.BatchNorm2d(out_channels), 7 | nn.ReLU(inplace=True) 8 | ) 9 | self.conv2 = nn.Sequential( 10 | nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1), 11 | nn.BatchNorm2d(out_channels), 12 | nn.ReLU(inplace=True) 13 | ) 14 | self.conv3 = nn.Sequential( 15 | nn.Conv2d(in_channels=out_channels, out_channels=(out_channels * 4), kernel_size=1), 16 | nn.BatchNorm2d((out_channels * 4)), 17 | nn.ReLU(inplace=True) 18 | ) 19 | self.relu = nn.ReLU(inplace=True) 20 | self.downsample = nn.Sequential( 21 | nn.Conv2d(in_channels=in_channels, out_channels=(out_channels * 4), kernel_size=1, stride=stride), 22 | nn.BatchNorm2d((out_channels * 4)) 23 | ) 24 | self._initialize_weights() 25 | 26 | def forward(self, x): 27 | identity = x 28 | identity = self.downsample(identity) 29 | x = self.conv1(x) 30 | x = self.conv2(x) 31 | x = self.conv3(x) 32 | x = x + identity 33 | x = self.relu(x) 34 | return(x) 35 | 36 | def _initialize_weights(self): 37 | for m in self.modules(): 38 | if isinstance(m, nn.Conv2d): 39 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 40 | m.weight.data.normal_(0, math.sqrt(2. / n)) 41 | if m.bias is not None: 42 | m.bias.data.zero_() 43 | elif isinstance(m, nn.BatchNorm2d): 44 | m.weight.data.fill_(1) 45 | m.bias.data.zero_() 46 | elif isinstance(m, nn.Linear): 47 | m.weight.data.normal_(0, 0.01) 48 | m.bias.data.zero_() 49 | 50 | class ResNet50(nn.Module): 51 | def __init__(self, num_classes): 52 | super(ResNet50, self).__init__() 53 | self.head = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2) 54 | self.bn = nn.BatchNorm2d(num_features=64) 55 | self.relu = nn.ReLU(inplace=True) 56 | self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 57 | self.group1 = self._make_group(BottleNeck, in_channels=64, out_channels=64, blocks=3, stride=1) 58 | self.group2 = self._make_group(BottleNeck, in_channels=256, out_channels=128, blocks=4, stride=2) 59 | self.group3 = self._make_group(BottleNeck, in_channels=512, out_channels=256, blocks=6, stride=2) 60 | self.group4 = self._make_group(BottleNeck, in_channels=1024, out_channels=512, blocks=3, stride=2) 61 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1) 62 | self.classifier = nn.Linear(in_features=2048, out_features=num_classes) 63 | 64 | def forward(self, x): 65 | x = self.head(x) 66 | x = self.bn(x) 67 | x = self.relu(x) 68 | x = self.pool(x) 69 | x = self.group1(x) 70 | x = self.group2(x) 71 | x = self.group3(x) 72 | x = self.group4(x) 73 | x = self.avgpool(x) 74 | x = x.view(x.size(0), -1) 75 | x = self.classifier(x) 76 | print(x) 77 | return(x) 78 | 79 | def _make_group(self, block, in_channels, out_channels, blocks, stride): 80 | layers = [] 81 | layers.append(block(in_channels=in_channels, out_channels=out_channels, stride=stride)) 82 | stride = 1 83 | for i in range(1, blocks): 84 | layers.append(block(in_channels=(out_channels * 4), out_channels=out_channels, stride=stride)) 85 | 86 | return(nn.Sequential(*layers)) 87 | -------------------------------------------------------------------------------- /ResNeXt/ResNeXt50-32-4d.py: -------------------------------------------------------------------------------- 1 | class ResNeXtBottleNeck(nn.Module): 2 | def __init__(self, in_channels, intermediate, out_channels, stride, cardinality = 32): 3 | super(ResNeXtBottleNeck, self).__init__() 4 | self.conv1 = nn.Sequential( 5 | nn.Conv2d(in_channels=in_channels, out_channels=intermediate, kernel_size=1, stride=1), 6 | nn.BatchNorm2d(intermediate), 7 | nn.ReLU(inplace=True) 8 | ) 9 | self.conv2 = nn.Sequential( 10 | nn.Conv2d(in_channels=intermediate, out_channels=intermediate, groups=cardinality, kernel_size=3, stride=stride, padding=1), 11 | nn.BatchNorm2d(intermediate), 12 | nn.ReLU(inplace=True) 13 | ) 14 | self.conv3 = nn.Sequential( 15 | nn.Conv2d(in_channels=intermediate, out_channels=out_channels, kernel_size=1), 16 | nn.BatchNorm2d(out_channels), 17 | nn.ReLU(inplace=True) 18 | ) 19 | self.relu = nn.ReLU(inplace=True) 20 | self.downsample = nn.Sequential( 21 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride), 22 | nn.BatchNorm2d(out_channels) 23 | ) 24 | 25 | def forward(self, x): 26 | identity = x 27 | identity = self.downsample(identity) 28 | x = self.conv1(x) 29 | x = self.conv2(x) 30 | x = self.conv3(x) 31 | x = x + identity 32 | x = self.relu(x) 33 | return(x) 34 | 35 | 36 | 37 | class ResNeXt50(nn.Module): 38 | def __init__(self, num_classes): 39 | super(ResNeXt50, self).__init__() 40 | self.head = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2) 41 | self.bn = nn.BatchNorm2d(num_features=64) 42 | self.relu = nn.ReLU(inplace=True) 43 | self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 44 | self.group1 = self._make_group(ResNeXtBottleNeck, in_channels=64, intermediate=128, out_channels=256, blocks=3, stride=1) 45 | self.group2 = self._make_group(ResNeXtBottleNeck, in_channels=256, intermediate=256, out_channels=512, blocks=4, stride=2) 46 | self.group3 = self._make_group(ResNeXtBottleNeck, in_channels=512, intermediate=512, out_channels=1024, blocks=6, stride=2) 47 | self.group4 = self._make_group(ResNeXtBottleNeck, in_channels=1024, intermediate=1024, out_channels=2048, blocks=3, stride=2) 48 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1) 49 | self.classifier = nn.Linear(in_features=2048, out_features=num_classes) 50 | self._initialize_weights() 51 | 52 | def forward(self, x): 53 | x = self.head(x) 54 | x = self.bn(x) 55 | x = self.relu(x) 56 | x = self.pool(x) 57 | x = self.group1(x) 58 | x = self.group2(x) 59 | x = self.group3(x) 60 | x = self.group4(x) 61 | x = self.avgpool(x) 62 | x = x.view(x.size(0), -1) 63 | x = self.classifier(x) 64 | return(x) 65 | 66 | def _make_group(self, block, in_channels, intermediate, out_channels, blocks, stride): 67 | layers = [] 68 | layers.append(block(in_channels=in_channels, intermediate=intermediate, out_channels=out_channels, stride=stride)) 69 | stride = 1 70 | for i in range(blocks): 71 | layers.append(block(in_channels=out_channels, intermediate=intermediate, out_channels=out_channels, stride=stride)) 72 | 73 | return(nn.Sequential(*layers)) 74 | 75 | def _initialize_weights(self): 76 | for m in self.modules(): 77 | if isinstance(m, nn.Conv2d): 78 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 79 | m.weight.data.normal_(0, math.sqrt(2. / n)) 80 | if m.bias is not None: 81 | m.bias.data.zero_() 82 | elif isinstance(m, nn.BatchNorm2d): 83 | m.weight.data.fill_(1) 84 | m.bias.data.zero_() 85 | elif isinstance(m, nn.Linear): 86 | m.weight.data.normal_(0, 0.01) 87 | m.bias.data.zero_() 88 | -------------------------------------------------------------------------------- /VGG/VGG19.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torchvision 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torchvision.models as models 7 | from torch.autograd import Variable 8 | 9 | 10 | 11 | class VGG19(nn.Module): 12 | def __init__(self, num_classes): 13 | super(VGG19, self).__init__() 14 | self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, stride=1, padding=1) 15 | self.conv2 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride=1, padding=1) 16 | self.pool1 = nn.MaxPool2d(2, 2) 17 | self.conv3 = nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride=1, padding=1) 18 | self.conv4 = nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride=1, padding=1) 19 | self.pool2 = nn.MaxPool2d(2, 2) 20 | self.conv5 = nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, stride=1, padding=1) 21 | self.conv6 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1) 22 | self.conv7 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1) 23 | self.conv8 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1) 24 | self.pool3 = nn.MaxPool2d(2, 2) 25 | self.conv9 = nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, stride=1, padding=1) 26 | self.conv10 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1) 27 | self.conv11 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1) 28 | self.conv12 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1) 29 | self.pool4 = nn.MaxPool2d(2, 2) 30 | self.conv13 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1) 31 | self.conv14 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1) 32 | self.conv15 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1) 33 | self.conv16 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1) 34 | self.pool5 = nn.MaxPool2d(2, 2) 35 | self.fc1 = nn.Linear(512 * 7 * 7, 4096) 36 | self.fc2 = nn.Linear(4096, 4096) 37 | self.fc3 = nn.Linear(4096, num_classes) 38 | self._initialize_weights() 39 | 40 | def forward(self, x): 41 | x = F.relu(self.conv1(x)) 42 | x = F.relu(self.conv2(x)) 43 | x = self.pool1(x) 44 | x = F.relu(self.conv3(x)) 45 | x = F.relu(self.conv4(x)) 46 | x = self.pool2(x) 47 | x = F.relu(self.conv5(x)) 48 | x = F.relu(self.conv6(x)) 49 | x = F.relu(self.conv7(x)) 50 | x = F.relu(self.conv8(x)) 51 | x = self.pool3(x) 52 | x = F.relu(self.conv9(x)) 53 | x = F.relu(self.conv10(x)) 54 | x = F.relu(self.conv11(x)) 55 | x = F.relu(self.conv12(x)) 56 | x = self.pool4(x) 57 | x = F.relu(self.conv13(x)) 58 | x = F.relu(self.conv14(x)) 59 | x = F.relu(self.conv15(x)) 60 | x = F.relu(self.conv16(x)) 61 | x = self.pool5(x) 62 | x = x.view(-1, 512 * 7 * 7) 63 | x = F.relu(self.fc1(x)) 64 | x = F.dropout(x) 65 | x = F.relu(self.fc2(x)) 66 | x = F.dropout(x) 67 | x = F.softmax(self.fc3(x)) 68 | return(x) 69 | 70 | # This function is the key to have a competitive performance. 71 | # Borrowed from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py 72 | def _initialize_weights(self): 73 | for m in self.modules(): 74 | if isinstance(m, nn.Conv2d): 75 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 76 | m.weight.data.normal_(0, math.sqrt(2. / n)) 77 | if m.bias is not None: 78 | m.bias.data.zero_() 79 | elif isinstance(m, nn.BatchNorm2d): 80 | m.weight.data.fill_(1) 81 | m.bias.data.zero_() 82 | elif isinstance(m, nn.Linear): 83 | m.weight.data.normal_(0, 0.01) 84 | m.bias.data.zero_() 85 | 86 | -------------------------------------------------------------------------------- /DenseNet/DenseNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class DenseLayer(nn.Module): 6 | def __init__(self, in_channels, growth_rate, bottleneck_factor = 4, drop_rate = 0.5): 7 | super(DenseLayer, self).__init__() 8 | self.layer = nn.Sequential( 9 | nn.BatchNorm2d(in_channels), 10 | nn.ReLU(inplace=True), 11 | nn.Conv2d(in_channels=in_channels, out_channels=growth_rate * bottleneck_factor, kernel_size=1, stride=1), 12 | 13 | nn.BatchNorm2d(growth_rate * bottleneck_factor), 14 | nn.ReLU(inplace=True), 15 | nn.Conv2d(in_channels=growth_rate * bottleneck_factor, out_channels=growth_rate, kernel_size=3, stride=1, padding=1), 16 | 17 | nn.Dropout(p=drop_rate) 18 | ) 19 | 20 | def forward(self, x): 21 | identity = x 22 | x = self.layer(x) 23 | total = torch.cat([identity, x], 1) 24 | return(total) 25 | 26 | class Transition(nn.Module): 27 | def __init__(self, in_channels, out_channels): 28 | super(Transition, self).__init__() 29 | self.layer = nn.Sequential( 30 | nn.BatchNorm2d(in_channels), 31 | nn.ReLU(inplace=True), 32 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 33 | nn.AvgPool2d(kernel_size=2, stride=2) 34 | ) 35 | 36 | def forward(self, x): 37 | x = self.layer(x) 38 | return(x) 39 | 40 | class DenseNet(nn.Module): 41 | def __init__(self, num_classes, growth_rate, compression_factor, blocks): 42 | super(DenseNet, self).__init__() 43 | num_features = growth_rate * 2 44 | self.entry = nn.Sequential( 45 | nn.Conv2d(in_channels=3, out_channels=num_features, kernel_size=7, stride=2, padding=3), 46 | nn.BatchNorm2d(num_features=num_features), 47 | nn.ReLU(inplace=True), 48 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 49 | ) 50 | self.DenseBlock1 = self._make_block(DenseLayer, blocks[0], in_channels=num_features, growth_rate=growth_rate) 51 | num_features = num_features + blocks[0] * growth_rate 52 | compressed_features = int(num_features * compression_factor) 53 | self.Transition1 = Transition(in_channels=num_features, out_channels=compressed_features) 54 | 55 | self.DenseBlock2 = self._make_block(DenseLayer, blocks[1], in_channels=compressed_features, growth_rate=growth_rate) 56 | num_features = compressed_features + blocks[1] * growth_rate 57 | compressed_features = int(num_features * compression_factor) 58 | self.Transition2 = Transition(in_channels=num_features, out_channels=compressed_features) 59 | 60 | self.DenseBlock3 = self._make_block(DenseLayer, blocks[2], in_channels=compressed_features, growth_rate=growth_rate) 61 | num_features = compressed_features + blocks[2] * growth_rate 62 | compressed_features = int(num_features * compression_factor) 63 | self.Transition3 = Transition(in_channels=num_features, out_channels=compressed_features) 64 | 65 | self.DenseBlock4 = self._make_block(DenseLayer, blocks[3], in_channels=compressed_features, growth_rate=growth_rate) 66 | num_features = compressed_features + blocks[3] * growth_rate 67 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1) 68 | self.classifier = nn.Linear(in_features=num_features, out_features=num_classes) 69 | self._initialize_weights() 70 | 71 | def forward(self, x): 72 | x = self.entry(x) 73 | x = self.DenseBlock1(x) 74 | x = self.Transition1(x) 75 | x = self.DenseBlock2(x) 76 | x = self.Transition2(x) 77 | x = self.DenseBlock3(x) 78 | x = self.Transition3(x) 79 | x = self.DenseBlock4(x) 80 | x = self.avgpool(x) 81 | x = x.view(x.size(0), -1) 82 | x = self.classifier(x) 83 | return(x) 84 | 85 | def _make_block(self, layer, num_layers, in_channels, growth_rate, bottleneck_factor = 4, drop_rate = 0.5): 86 | block = [] 87 | for i in range(num_layers): 88 | block.append(layer(in_channels=in_channels + i * growth_rate, growth_rate=growth_rate, bottleneck_factor=bottleneck_factor, drop_rate=drop_rate)) 89 | return(nn.Sequential(*block)) 90 | 91 | def _initialize_weights(self): 92 | for m in self.modules(): 93 | if isinstance(m, nn.Conv2d): 94 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 95 | m.weight.data.normal_(0, math.sqrt(2. / n)) 96 | if m.bias is not None: 97 | m.bias.data.zero_() 98 | elif isinstance(m, nn.BatchNorm2d): 99 | m.weight.data.fill_(1) 100 | m.bias.data.zero_() 101 | elif isinstance(m, nn.Linear): 102 | m.weight.data.normal_(0, 0.01) 103 | m.bias.data.zero_() 104 | -------------------------------------------------------------------------------- /ShuffleNet/ShuffleNet.py: -------------------------------------------------------------------------------- 1 | def channel_shuffle(x, groups): 2 | batchsize, num_channels, height, width = x.data.size() 3 | channels_per_group = num_channels // groups 4 | x = x.view(batchsize, groups, channels_per_group, height, width) 5 | x = torch.transpose(x, 1, 2).contiguous() 6 | x = x.view(batchsize, -1, height, width) 7 | return x 8 | 9 | class Conv_1x1(nn.Module): 10 | def __init__(self, in_channels, out_channels, groups): 11 | super(Conv_1x1, self).__init__() 12 | self.layer = nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, groups=groups), 14 | nn.BatchNorm2d(num_features=out_channels) 15 | ) 16 | 17 | def forward(self, x): 18 | x = self.layer(x) 19 | return(x) 20 | 21 | class DWConv_3x3(nn.Module): 22 | def __init__(self, in_channels, out_channels, stride): 23 | super(DWConv_3x3, self).__init__() 24 | self.layer = nn.Sequential( 25 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels), 26 | nn.BatchNorm2d(num_features=out_channels) 27 | ) 28 | 29 | def forward(self, x): 30 | x = self.layer(x) 31 | return(x) 32 | 33 | class ShuffleUnit(nn.Module): 34 | def __init__(self, in_channels, out_channels, groups, mode): 35 | super(ShuffleUnit, self).__init__() 36 | self.mode = mode 37 | self.groups = groups 38 | if mode == "add": 39 | stride = 1 40 | if mode == "cat": 41 | stride = 2 42 | self.gconv_1x1_head = Conv_1x1(in_channels=in_channels, out_channels=(out_channels // 4), groups=groups) 43 | self.relu = nn.ReLU(inplace=True) 44 | self.dwconv_3x3 = DWConv_3x3(in_channels=(out_channels // 4), out_channels=(out_channels // 4), stride=stride) 45 | self.gconv_1x1_cat_tail = Conv_1x1(in_channels=(out_channels // 4), out_channels=(out_channels - in_channels), groups=groups) 46 | self.gconv_1x1_add_tail = Conv_1x1(in_channels=(out_channels // 4), out_channels=out_channels, groups=groups) 47 | self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1) 48 | 49 | def forward(self, x): 50 | if self.mode == "add": 51 | identity = x 52 | x = self.gconv_1x1_head(x) 53 | x = channel_shuffle(x, self.groups) 54 | x = self.dwconv_3x3(x) 55 | x = self.gconv_1x1_add_tail(x) 56 | x = x + identity 57 | x = self.relu(x) 58 | return(x) 59 | if self.mode == "cat": 60 | identity = x 61 | identity = self.avgpool(identity) 62 | print("stage head identity: ", identity.size()) 63 | x = self.gconv_1x1_head(x) 64 | x = channel_shuffle(x, self.groups) 65 | x = self.dwconv_3x3(x) 66 | x = self.gconv_1x1_cat_tail(x) 67 | x = torch.cat([x, identity], 1) 68 | x = self.relu(x) 69 | return(x) 70 | 71 | 72 | class Stage(nn.Module): 73 | def __init__(self, in_channels, out_channels, repeats, groups): 74 | super(Stage, self).__init__() 75 | self.head = ShuffleUnit(in_channels=in_channels, out_channels=out_channels, groups=groups, mode="cat") 76 | self.body = self._make_stage(in_channels=out_channels, out_channels=out_channels, groups=groups, repeats=repeats) 77 | 78 | def forward(self, x): 79 | x = self.head(x) 80 | print("stage head: ", x.size()) 81 | x = self.body(x) 82 | return(x) 83 | 84 | def _make_stage(self, in_channels, out_channels, groups, repeats): 85 | layers = [] 86 | for i in range(repeats): 87 | layers.append(ShuffleUnit(in_channels=in_channels, out_channels=out_channels, groups=groups, mode="add")) 88 | return(nn.Sequential(*layers)) 89 | 90 | 91 | class ShuffleNet(nn.Module): 92 | def __init__(self, num_classes, groups=3): 93 | super(ShuffleNet, self).__init__() 94 | self.conv1 = nn.Sequential( 95 | nn.Conv2d(in_channels=3, out_channels=24, kernel_size=7, stride=2, padding=3), 96 | nn.BatchNorm2d(num_features=24), 97 | nn.ReLU(inplace=True) 98 | ) 99 | self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 100 | self.stage1 = Stage(in_channels=24, out_channels=240, repeats=3, groups=groups) 101 | self.stage2 = Stage(in_channels=240, out_channels=480, repeats=7, groups=groups) 102 | self.stage3 = Stage(in_channels=480, out_channels=960, repeats=3, groups=groups) 103 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1) 104 | self.classifier = nn.Linear(in_features=960, out_features=num_classes) 105 | 106 | def forward(self, x): 107 | x = self.conv1(x) 108 | print("conv1: ", x.size()) 109 | x = self.pool(x) 110 | print("pool: ", x.size()) 111 | x = self.stage1(x) 112 | x = self.stage2(x) 113 | x = self.stage3(x) 114 | x = self.avgpool(x) 115 | x = x.view(x.size(0), -1) 116 | x = self.classifier(x) 117 | return(x) 118 | -------------------------------------------------------------------------------- /ResNet50_with_Stochastic_Depth/ResNet50_with_Stochastic_Depth.py: -------------------------------------------------------------------------------- 1 | class BottleNeck(nn.Module): 2 | def __init__(self, in_channels, out_channels, stride): 3 | super(BottleNeck, self).__init__() 4 | self.conv1 = nn.Sequential( 5 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1), 6 | nn.BatchNorm2d(out_channels), 7 | nn.ReLU(inplace=True) 8 | ) 9 | self.conv2 = nn.Sequential( 10 | nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1), 11 | nn.BatchNorm2d(out_channels), 12 | nn.ReLU(inplace=True) 13 | ) 14 | self.conv3 = nn.Sequential( 15 | nn.Conv2d(in_channels=out_channels, out_channels=(out_channels * 4), kernel_size=1), 16 | nn.BatchNorm2d((out_channels * 4)), 17 | nn.ReLU(inplace=True) 18 | ) 19 | self.relu = nn.ReLU(inplace=True) 20 | self.downsample = nn.Sequential( 21 | nn.Conv2d(in_channels=in_channels, out_channels=(out_channels * 4), kernel_size=1, stride=stride), 22 | nn.BatchNorm2d((out_channels * 4)) 23 | ) 24 | self._initialize_weights() 25 | 26 | def forward(self, x, active, prob): 27 | if self.training: 28 | if active == 1: 29 | # print("active") 30 | identity = x 31 | identity = self.downsample(identity) 32 | x = self.conv1(x) 33 | x = self.conv2(x) 34 | x = self.conv3(x) 35 | x = x + identity 36 | x = self.relu(x) 37 | return(x) 38 | else: 39 | # print("inactive") 40 | x = self.downsample(x) 41 | x = self.relu(x) 42 | return(x) 43 | else: 44 | identity = x 45 | identity = self.downsample(identity) 46 | x = self.conv1(x) 47 | x = self.conv2(x) 48 | x = self.conv3(x) 49 | x = prob * x + identity 50 | x = self.relu(x) 51 | return(x) 52 | 53 | 54 | def _initialize_weights(self): 55 | for m in self.modules(): 56 | if isinstance(m, nn.Conv2d): 57 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 58 | m.weight.data.normal_(0, math.sqrt(2. / n)) 59 | if m.bias is not None: 60 | m.bias.data.zero_() 61 | elif isinstance(m, nn.BatchNorm2d): 62 | m.weight.data.fill_(1) 63 | m.bias.data.zero_() 64 | elif isinstance(m, nn.Linear): 65 | m.weight.data.normal_(0, 0.01) 66 | m.bias.data.zero_() 67 | 68 | class Group(nn.Module): 69 | def __init__(self, num_blocks, in_channels, out_channels, stride): 70 | super(Group, self).__init__() 71 | self.num_blocks = num_blocks 72 | self.head_layer = BottleNeck(in_channels=in_channels, out_channels=out_channels, stride=stride) 73 | self.tail_layer = BottleNeck(in_channels=(out_channels * 4), out_channels=out_channels, stride=1) 74 | def forward(self, x, active, probs): 75 | x = self.head_layer(x, active[0], probs[0]) 76 | for i in range(1, self.num_blocks): 77 | x = self.tail_layer(x, active[i], probs[i]) 78 | return(x) 79 | 80 | class ResNet50_Stochastic_Depth(nn.Module): 81 | def __init__(self, num_classes, pL=0.5): 82 | super(ResNet50_Stochastic_Depth, self).__init__() 83 | self.num_classes = num_classes 84 | self.probabilities = torch.linspace(start=1, end=pL, steps=16) 85 | self.actives = torch.bernoulli(self.probabilities) 86 | 87 | self.head = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2) 88 | self.bn = nn.BatchNorm2d(num_features=64) 89 | self.relu = nn.ReLU(inplace=True) 90 | self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 91 | self.group1 = Group(num_blocks = 3, in_channels=64, out_channels=64, stride=1) 92 | self.group2 = Group(num_blocks = 4, in_channels=256, out_channels=128, stride=2) 93 | self.group3 = Group(num_blocks = 6, in_channels=512, out_channels=256, stride=2) 94 | self.group4 = Group(num_blocks = 3, in_channels=1024, out_channels=512, stride=2) 95 | 96 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1) 97 | self.classifier = nn.Linear(in_features=2048, out_features=num_classes) 98 | 99 | def forward(self, x): 100 | 101 | self.actives = torch.bernoulli(self.probabilities) 102 | # print("The sum of actives blocks: ", int(torch.sum(self.actives))) 103 | x = self.head(x) 104 | x = self.bn(x) 105 | x = self.relu(x) 106 | x = self.pool(x) 107 | x = self.group1(x, self.actives[:3], self.probabilities[:3]) 108 | x = self.group2(x, self.actives[3:7], self.probabilities[3:7]) 109 | x = self.group3(x, self.actives[7:13], self.probabilities[7:13]) 110 | x = self.group4(x, self.actives[13:], self.probabilities[13:]) 111 | x = self.avgpool(x) 112 | x = x.view(x.size(0), -1) 113 | x = self.classifier(x) 114 | 115 | return(x) 116 | -------------------------------------------------------------------------------- /GoogLeNet/GoogLeNet.py: -------------------------------------------------------------------------------- 1 | class Inception(nn.Module): 2 | def __init__(self, in_channels, k_1x1, k_3x3red, k_3x3, k_5x5red, k_5x5, pool_proj): 3 | super(Inception, self).__init__() 4 | self.b1 = nn.Sequential( 5 | nn.Conv2d(in_channels=in_channels, out_channels=k_1x1, kernel_size=1), 6 | nn.ReLU(inplace=True) 7 | ) 8 | self.b2 = nn.Sequential( 9 | nn.Conv2d(in_channels=in_channels, out_channels=k_3x3red, kernel_size=1), 10 | nn.ReLU(inplace=True), 11 | nn.Conv2d(in_channels=k_3x3red, out_channels=k_3x3, kernel_size=3, padding=1), 12 | nn.ReLU(inplace=True) 13 | ) 14 | self.b3 = nn.Sequential( 15 | nn.Conv2d(in_channels=in_channels, out_channels=k_5x5red, kernel_size=1, padding=1), 16 | nn.ReLU(inplace=True), 17 | nn.Conv2d(in_channels=k_5x5red, out_channels=k_5x5, kernel_size=5, padding=1), 18 | nn.ReLU(inplace=True) 19 | ) 20 | self.b4 = nn.Sequential( 21 | nn.MaxPool2d(kernel_size=3, stride=1), 22 | nn.Conv2d(in_channels=in_channels, out_channels=pool_proj, kernel_size=1, padding=1), 23 | nn.ReLU(inplace=True) 24 | ) 25 | def forward(self, x): 26 | y1 = self.b1(x) 27 | y2 = self.b2(x) 28 | y3 = self.b3(x) 29 | y4 = self.b4(x) 30 | return(torch.cat([y1, y2, y3, y4], 1)) 31 | 32 | class AuxClassifier(nn.Module): 33 | def __init__(self, num_classes, in_channels): 34 | super(AuxClassifier, self).__init__() 35 | self.pool1 = nn.AvgPool2d(kernel_size=5, stride=3) 36 | self.conv1 = nn.Sequential( 37 | nn.Conv2d(in_channels=in_channels, out_channels=128, kernel_size=1), 38 | nn.ReLU(inplace=True) 39 | ) 40 | 41 | self.fc1 = nn.Sequential( 42 | nn.Linear(in_features=4 * 4 * 128, out_features=1024), 43 | nn.ReLU(inplace=True) 44 | ) 45 | self.drop = nn.Dropout(p=0.3) 46 | self.fc2 = nn.Linear(in_features=1024, out_features=num_classes) 47 | def forward(self, x): 48 | x = self.pool1(x) 49 | x = self.conv1(x) 50 | x = x.view(x.size(0), -1) 51 | x = self.fc1(x) 52 | x = self.drop(x) 53 | x = self.fc2(x) 54 | return(x) 55 | 56 | class GoogLeNet(nn.Module): 57 | def __init__(self, num_classes, aux_classifier=True): 58 | super(GoogLeNet, self).__init__() 59 | self.aux_classifier = aux_classifier 60 | self.head = nn.Sequential( 61 | nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3), 62 | nn.ReLU(inplace=True), 63 | nn.MaxPool2d(kernel_size=3, stride=2), 64 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=1), 65 | nn.ReLU(inplace=True), 66 | nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1), 67 | nn.ReLU(inplace=True), 68 | nn.MaxPool2d(kernel_size=3, stride=2) 69 | ) 70 | 71 | self.block3a = Inception(in_channels=192, k_1x1=64, k_3x3red=96, k_3x3=128, k_5x5red=16, k_5x5=32, pool_proj=32) 72 | self.block3b = Inception(in_channels=256, k_1x1=128, k_3x3red=128, k_3x3=192, k_5x5red=32, k_5x5=96, pool_proj=64) 73 | self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 74 | self.block4a = Inception(in_channels=480, k_1x1=192, k_3x3red=96, k_3x3=208, k_5x5red=16, k_5x5=48, pool_proj=64) 75 | if aux_classifier: 76 | self.aux0 = AuxClassifier(num_classes=num_classes, in_channels=512) 77 | self.block4b = Inception(in_channels=512, k_1x1=160, k_3x3red=112, k_3x3=224, k_5x5red=24, k_5x5=64, pool_proj=64) 78 | self.block4c = Inception(in_channels=512, k_1x1=128, k_3x3red=128, k_3x3=256, k_5x5red=24, k_5x5=64, pool_proj=64) 79 | self.block4d = Inception(in_channels=512, k_1x1=112, k_3x3red=144, k_3x3=288, k_5x5red=32, k_5x5=64, pool_proj=64) 80 | if aux_classifier: 81 | self.aux1 = AuxClassifier(num_classes=num_classes, in_channels=528) 82 | self.block4e = Inception(in_channels=528, k_1x1=256, k_3x3red=160, k_3x3=320, k_5x5red=32, k_5x5=128, pool_proj=128) 83 | self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 84 | self.block5a = Inception(in_channels=832, k_1x1=256, k_3x3red=160, k_3x3=320, k_5x5red=32, k_5x5=128, pool_proj=128) 85 | self.block5b = Inception(in_channels=832, k_1x1=384, k_3x3red=192, k_3x3=384, k_5x5red=48, k_5x5=128, pool_proj=128) 86 | self.pool3 = nn.AvgPool2d(kernel_size=7, stride=1) 87 | self.drop = nn.Dropout(p=0.4) 88 | self.classifier = nn.Linear(in_features=1024, out_features=num_classes) 89 | self._initialize_weights() 90 | 91 | def forward(self, x): 92 | x = self.head(x) 93 | x = self.block3a(x) 94 | x = self.block3b(x) 95 | x = self.pool1(x) 96 | x = self.block4a(x) 97 | if self.training and self.aux_classifier: 98 | output0 = self.aux0(x) 99 | x = self.block4b(x) 100 | x = self.block4c(x) 101 | x = self.block4d(x) 102 | if self.training and self.aux_classifier: 103 | output1 = self.aux1(x) 104 | x = self.block4e(x) 105 | x = self.pool2(x) 106 | x = self.block5a(x) 107 | x = self.block5b(x) 108 | x = self.pool3(x) 109 | x = x.view(x.size(0), -1) 110 | x = self.drop(x) 111 | output2 = self.classifier(x) 112 | return(output0, output1, output2) 113 | 114 | def _initialize_weights(self): 115 | for m in self.modules(): 116 | if isinstance(m, nn.Conv2d): 117 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 118 | m.weight.data.normal_(0, math.sqrt(2. / n)) 119 | if m.bias is not None: 120 | m.bias.data.zero_() 121 | elif isinstance(m, nn.BatchNorm2d): 122 | m.weight.data.fill_(1) 123 | m.bias.data.zero_() 124 | elif isinstance(m, nn.Linear): 125 | m.weight.data.normal_(0, 0.01) 126 | m.bias.data.zero_() 127 | -------------------------------------------------------------------------------- /DeepLearningBasics/LogisticRegression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sigmoid(z): 5 | """ 6 | Compute the sigmoid of z 7 | 8 | Arguments: 9 | z -- A scalar or numpy array of any size. 10 | 11 | Return: 12 | s -- sigmoid(z) 13 | """ 14 | 15 | ### START CODE HERE ### (≈ 1 line of code) 16 | s = 1 / (1 + np.exp(-z)) 17 | ### END CODE HERE ### 18 | 19 | return s 20 | 21 | def initialize_with_zeros(dim): 22 | """ 23 | This function creates a vector of zeros of shape (dim, 1) for w and initializes b to 0. 24 | 25 | Argument: 26 | dim -- size of the w vector we want (or number of parameters in this case) 27 | 28 | Returns: 29 | w -- initialized vector of shape (dim, 1) 30 | b -- initialized scalar (corresponds to the bias) 31 | """ 32 | 33 | ### START CODE HERE ### (≈ 1 line of code) 34 | w = np.zeros((dim, 1)) 35 | b = 0 36 | ### END CODE HERE ### 37 | 38 | assert(w.shape == (dim, 1)) 39 | assert(isinstance(b, float) or isinstance(b, int)) 40 | 41 | return w, b 42 | 43 | 44 | def propagate(w, b, X, Y): 45 | """ 46 | Implement the cost function and its gradient for the propagation explained above 47 | 48 | Arguments: 49 | w -- weights, a numpy array of size (num_px * num_px * 3, 1) 50 | b -- bias, a scalar 51 | X -- data of size (num_px * num_px * 3, number of examples) 52 | Y -- true "label" vector (containing 0 if non-cat, 1 if cat) of size (1, number of examples) 53 | 54 | Return: 55 | cost -- negative log-likelihood cost for logistic regression 56 | dw -- gradient of the loss with respect to w, thus same shape as w 57 | db -- gradient of the loss with respect to b, thus same shape as b 58 | 59 | Tips: 60 | - Write your code step by step for the propagation. np.log(), np.dot() 61 | """ 62 | 63 | m = X.shape[1] 64 | 65 | # FORWARD PROPAGATION (FROM X TO COST) 66 | ### START CODE HERE ### (≈ 2 lines of code) 67 | A = sigmoid(np.dot(w.T, X) + b) 68 | cost = (-1/m)*np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A)) 69 | ### END CODE HERE ### 70 | 71 | # BACKWARD PROPAGATION (TO FIND GRAD) 72 | ### START CODE HERE ### (≈ 2 lines of code) 73 | dw = (1/m) * np.dot(X, (A - Y).T) 74 | db = (1/m) * np.sum(A - Y) 75 | ### END CODE HERE ### 76 | 77 | assert(dw.shape == w.shape) 78 | assert(db.dtype == float) 79 | cost = np.squeeze(cost) 80 | assert(cost.shape == ()) 81 | 82 | grads = {"dw": dw, 83 | "db": db} 84 | 85 | return grads, cost 86 | 87 | 88 | def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False): 89 | """ 90 | This function optimizes w and b by running a gradient descent algorithm 91 | 92 | Arguments: 93 | w -- weights, a numpy array of size (num_px * num_px * 3, 1) 94 | b -- bias, a scalar 95 | X -- data of shape (num_px * num_px * 3, number of examples) 96 | Y -- true "label" vector (containing 0 if non-cat, 1 if cat), of shape (1, number of examples) 97 | num_iterations -- number of iterations of the optimization loop 98 | learning_rate -- learning rate of the gradient descent update rule 99 | print_cost -- True to print the loss every 100 steps 100 | 101 | Returns: 102 | params -- dictionary containing the weights w and bias b 103 | grads -- dictionary containing the gradients of the weights and bias with respect to the cost function 104 | costs -- list of all the costs computed during the optimization, this will be used to plot the learning curve. 105 | 106 | Tips: 107 | You basically need to write down two steps and iterate through them: 108 | 1) Calculate the cost and the gradient for the current parameters. Use propagate(). 109 | 2) Update the parameters using gradient descent rule for w and b. 110 | """ 111 | 112 | costs = [] 113 | 114 | for i in range(num_iterations): 115 | 116 | 117 | # Cost and gradient calculation (≈ 1-4 lines of code) 118 | ### START CODE HERE ### 119 | grads, cost = propagate(w, b, X, Y) 120 | ### END CODE HERE ### 121 | 122 | # Retrieve derivatives from grads 123 | dw = grads["dw"] 124 | db = grads["db"] 125 | 126 | # update rule (≈ 2 lines of code) 127 | ### START CODE HERE ### 128 | w = w - learning_rate * dw 129 | b = b - learning_rate * db 130 | ### END CODE HERE ### 131 | 132 | # Record the costs 133 | if i % 100 == 0: 134 | costs.append(cost) 135 | 136 | # Print the cost every 100 training examples 137 | if print_cost and i % 100 == 0: 138 | print ("Cost after iteration %i: %f" %(i, cost)) 139 | 140 | params = {"w": w, 141 | "b": b} 142 | 143 | grads = {"dw": dw, 144 | "db": db} 145 | 146 | return params, grads, costs 147 | 148 | 149 | def predict(w, b, X): 150 | ''' 151 | Predict whether the label is 0 or 1 using learned logistic regression parameters (w, b) 152 | 153 | Arguments: 154 | w -- weights, a numpy array of size (num_px * num_px * 3, 1) 155 | b -- bias, a scalar 156 | X -- data of size (num_px * num_px * 3, number of examples) 157 | 158 | Returns: 159 | Y_prediction -- a numpy array (vector) containing all predictions (0/1) for the examples in X 160 | ''' 161 | 162 | m = X.shape[1] 163 | Y_prediction = np.zeros((1,m)) 164 | w = w.reshape(X.shape[0], 1) 165 | 166 | # Compute vector "A" predicting the probabilities of a cat being present in the picture 167 | ### START CODE HERE ### (≈ 1 line of code) 168 | A = sigmoid(np.dot(w.T, X) + b) 169 | ### END CODE HERE ### 170 | 171 | for i in range(A.shape[1]): 172 | 173 | # Convert probabilities A[0,i] to actual predictions p[0,i] 174 | ### START CODE HERE ### (≈ 4 lines of code) 175 | if A[0,i] > 0.5: 176 | Y_prediction[0,i] = 1 177 | ### END CODE HERE ### 178 | 179 | assert(Y_prediction.shape == (1, m)) 180 | 181 | return Y_prediction 182 | 183 | 184 | def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False): 185 | """ 186 | Builds the logistic regression model by calling the function you've implemented previously 187 | 188 | Arguments: 189 | X_train -- training set represented by a numpy array of shape (num_px * num_px * 3, m_train) 190 | Y_train -- training labels represented by a numpy array (vector) of shape (1, m_train) 191 | X_test -- test set represented by a numpy array of shape (num_px * num_px * 3, m_test) 192 | Y_test -- test labels represented by a numpy array (vector) of shape (1, m_test) 193 | num_iterations -- hyperparameter representing the number of iterations to optimize the parameters 194 | learning_rate -- hyperparameter representing the learning rate used in the update rule of optimize() 195 | print_cost -- Set to true to print the cost every 100 iterations 196 | 197 | Returns: 198 | d -- dictionary containing information about the model. 199 | """ 200 | 201 | ### START CODE HERE ### 202 | 203 | # initialize parameters with zeros (≈ 1 line of code) 204 | w, b = initialize_with_zeros(X_train.shape[0]) 205 | 206 | # Gradient descent (≈ 1 line of code) 207 | parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost) 208 | 209 | # Retrieve parameters w and b from dictionary "parameters" 210 | w = parameters["w"] 211 | b = parameters["b"] 212 | 213 | # Predict test/train set examples (≈ 2 lines of code) 214 | Y_prediction_test = predict(w, b, X_test) 215 | Y_prediction_train = predict(w, b, X_train) 216 | 217 | ### END CODE HERE ### 218 | 219 | # Print train/test Errors 220 | print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100)) 221 | print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100)) 222 | 223 | 224 | d = {"costs": costs, 225 | "Y_prediction_test": Y_prediction_test, 226 | "Y_prediction_train" : Y_prediction_train, 227 | "w" : w, 228 | "b" : b, 229 | "learning_rate" : learning_rate, 230 | "num_iterations": num_iterations} 231 | 232 | return d 233 | -------------------------------------------------------------------------------- /DeepLearningBasics/ShallowNeuralNetwork.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def layer_sizes(X, Y): 5 | """ 6 | Arguments: 7 | X -- input dataset of shape (input size, number of examples) 8 | Y -- labels of shape (output size, number of examples) 9 | 10 | Returns: 11 | n_x -- the size of the input layer 12 | n_h -- the size of the hidden layer 13 | n_y -- the size of the output layer 14 | """ 15 | ### START CODE HERE ### (≈ 3 lines of code) 16 | n_x = X.shape[0] # size of input layer 17 | n_h = 4 18 | n_y = Y.shape[0] # size of output layer 19 | ### END CODE HERE ### 20 | return (n_x, n_h, n_y) 21 | 22 | 23 | def initialize_parameters(n_x, n_h, n_y): 24 | """ 25 | Argument: 26 | n_x -- size of the input layer 27 | n_h -- size of the hidden layer 28 | n_y -- size of the output layer 29 | 30 | Returns: 31 | params -- python dictionary containing your parameters: 32 | W1 -- weight matrix of shape (n_h, n_x) 33 | b1 -- bias vector of shape (n_h, 1) 34 | W2 -- weight matrix of shape (n_y, n_h) 35 | b2 -- bias vector of shape (n_y, 1) 36 | """ 37 | 38 | np.random.seed(2) # we set up a seed so that your output matches ours although the initialization is random. 39 | 40 | ### START CODE HERE ### (≈ 4 lines of code) 41 | W1 = np.random.randn(n_h, n_x) * 0.01 42 | b1 = np.zeros((n_h, 1)) 43 | W2 = np.random.randn(n_y, n_h) * 0.01 44 | b2 = np.zeros((n_y, 1)) 45 | ### END CODE HERE ### 46 | 47 | assert (W1.shape == (n_h, n_x)) 48 | assert (b1.shape == (n_h, 1)) 49 | assert (W2.shape == (n_y, n_h)) 50 | assert (b2.shape == (n_y, 1)) 51 | 52 | parameters = {"W1": W1, 53 | "b1": b1, 54 | "W2": W2, 55 | "b2": b2} 56 | 57 | return parameters 58 | 59 | 60 | def forward_propagation(X, parameters): 61 | """ 62 | Argument: 63 | X -- input data of size (n_x, m) 64 | parameters -- python dictionary containing your parameters (output of initialization function) 65 | 66 | Returns: 67 | A2 -- The sigmoid output of the second activation 68 | cache -- a dictionary containing "Z1", "A1", "Z2" and "A2" 69 | """ 70 | # Retrieve each parameter from the dictionary "parameters" 71 | ### START CODE HERE ### (≈ 4 lines of code) 72 | W1 = parameters["W1"] 73 | b1 = parameters["b1"] 74 | W2 = parameters["W2"] 75 | b2 = parameters["b2"] 76 | ### END CODE HERE ### 77 | 78 | # Implement Forward Propagation to calculate A2 (probabilities) 79 | ### START CODE HERE ### (≈ 4 lines of code) 80 | Z1 = np.dot(W1, X) + b1 81 | A1 = np.maximum(Z1, 0) # np.tanh(Z1) 82 | Z2 = np.dot(W2, A1) + b2 83 | A2 = sigmoid(Z2) 84 | ### END CODE HERE ### 85 | 86 | assert(A2.shape == (1, X.shape[1])) 87 | 88 | cache = {"Z1": Z1, 89 | "A1": A1, 90 | "Z2": Z2, 91 | "A2": A2} 92 | 93 | return A2, cache 94 | 95 | 96 | 97 | def compute_cost(A2, Y, parameters): 98 | """ 99 | Computes the cross-entropy cost given in equation (13) 100 | 101 | Arguments: 102 | A2 -- The sigmoid output of the second activation, of shape (1, number of examples) 103 | Y -- "true" labels vector of shape (1, number of examples) 104 | parameters -- python dictionary containing your parameters W1, b1, W2 and b2 105 | 106 | Returns: 107 | cost -- cross-entropy cost given equation (13) 108 | """ 109 | 110 | m = Y.shape[1] # number of example 111 | 112 | # Compute the cross-entropy cost 113 | ### START CODE HERE ### (≈ 2 lines of code) 114 | logprobs = np.multiply(np.log(A2),Y) + np.multiply(np.log(1 - A2), (1 - Y)) 115 | cost = - np.mean(logprobs) 116 | ### END CODE HERE ### 117 | 118 | cost = np.squeeze(cost) # makes sure cost is the dimension we expect. 119 | # E.g., turns [[17]] into 17 120 | assert(isinstance(cost, float)) 121 | 122 | return cost 123 | 124 | 125 | def backward_propagation(parameters, cache, X, Y): 126 | """ 127 | Implement the backward propagation using the instructions above. 128 | 129 | Arguments: 130 | parameters -- python dictionary containing our parameters 131 | cache -- a dictionary containing "Z1", "A1", "Z2" and "A2". 132 | X -- input data of shape (2, number of examples) 133 | Y -- "true" labels vector of shape (1, number of examples) 134 | 135 | Returns: 136 | grads -- python dictionary containing your gradients with respect to different parameters 137 | """ 138 | m = X.shape[1] 139 | 140 | # First, retrieve W1 and W2 from the dictionary "parameters". 141 | ### START CODE HERE ### (≈ 2 lines of code) 142 | W1 = parameters["W1"] 143 | W2 = parameters["W2"] 144 | ### END CODE HERE ### 145 | 146 | # Retrieve also A1 and A2 from dictionary "cache". 147 | ### START CODE HERE ### (≈ 2 lines of code) 148 | A1 = cache["A1"] 149 | A2 = cache["A2"] 150 | Z1 = cache["Z1"] 151 | ### END CODE HERE ### 152 | 153 | # Backward propagation: calculate dW1, db1, dW2, db2. 154 | ### START CODE HERE ### (≈ 6 lines of code, corresponding to 6 equations on slide above) 155 | dZ2= A2 - Y 156 | dW2 = np.dot(dZ2, A1.T) / m 157 | db2 = np.sum(dZ2, axis = 1, keepdims=True) / m 158 | dZ1 = W2.T * dZ2 * (Z1>=0).astype(int) # np.dot(W2.T, dZ2) * (1 - np.power(A1, 2)) 159 | dW1 = np.dot(dZ1, X.T) / m 160 | db1 = np.sum(dZ1, axis = 1, keepdims=True) / m 161 | ### END CODE HERE ### 162 | 163 | grads = {"dW1": dW1, 164 | "db1": db1, 165 | "dW2": dW2, 166 | "db2": db2} 167 | 168 | return grads 169 | 170 | def update_parameters(parameters, grads, learning_rate = 1.2): 171 | """ 172 | Updates parameters using the gradient descent update rule given above 173 | 174 | Arguments: 175 | parameters -- python dictionary containing your parameters 176 | grads -- python dictionary containing your gradients 177 | 178 | Returns: 179 | parameters -- python dictionary containing your updated parameters 180 | """ 181 | # Retrieve each parameter from the dictionary "parameters" 182 | ### START CODE HERE ### (≈ 4 lines of code) 183 | W1 = parameters["W1"] 184 | b1 = parameters["b1"] 185 | W2 = parameters["W2"] 186 | b2 = parameters["b2"] 187 | ### END CODE HERE ### 188 | 189 | # Retrieve each gradient from the dictionary "grads" 190 | ### START CODE HERE ### (≈ 4 lines of code) 191 | dW1 = grads["dW1"] 192 | db1 = grads["db1"] 193 | dW2 = grads["dW2"] 194 | db2 = grads["db2"] 195 | ## END CODE HERE ### 196 | 197 | # Update rule for each parameter 198 | ### START CODE HERE ### (≈ 4 lines of code) 199 | W1 = W1 - learning_rate * dW1 200 | b1 = b1 - learning_rate * db1 201 | W2 = W2 - learning_rate * dW2 202 | b2 = b2 - learning_rate * db2 203 | ### END CODE HERE ### 204 | 205 | parameters = {"W1": W1, 206 | "b1": b1, 207 | "W2": W2, 208 | "b2": b2} 209 | 210 | return parameters 211 | 212 | 213 | def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False, eta = 0.01): 214 | """ 215 | Arguments: 216 | X -- dataset of shape (2, number of examples) 217 | Y -- labels of shape (1, number of examples) 218 | n_h -- size of the hidden layer 219 | num_iterations -- Number of iterations in gradient descent loop 220 | print_cost -- if True, print the cost every 1000 iterations 221 | 222 | Returns: 223 | parameters -- parameters learnt by the model. They can then be used to predict. 224 | """ 225 | 226 | np.random.seed(3) 227 | n_x = layer_sizes(X, Y)[0] 228 | n_y = layer_sizes(X, Y)[2] 229 | 230 | # Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters". 231 | ### START CODE HERE ### (≈ 5 lines of code) 232 | parameters = initialize_parameters(n_x, n_h, n_y) 233 | W1 = parameters["W1"] 234 | b1 = parameters["b1"] 235 | W2 = parameters["W2"] 236 | b2 = parameters["b2"] 237 | ### END CODE HERE ### 238 | 239 | # Loop (gradient descent) 240 | 241 | for i in range(0, num_iterations): 242 | 243 | ### START CODE HERE ### (≈ 4 lines of code) 244 | # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache". 245 | A2, cache = forward_propagation(X, parameters) 246 | 247 | # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost". 248 | cost = compute_cost(A2, Y, parameters) 249 | 250 | # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads". 251 | grads = backward_propagation(parameters, cache, X, Y) 252 | 253 | # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters". 254 | parameters = update_parameters(parameters, grads, learning_rate=eta) 255 | 256 | ### END CODE HERE ### 257 | 258 | # Print the cost every 1000 iterations 259 | if print_cost and i % 1000 == 0: 260 | print ("Cost after iteration %i: %f" %(i, cost)) 261 | 262 | return parameters 263 | 264 | 265 | def predict(parameters, X): 266 | """ 267 | Using the learned parameters, predicts a class for each example in X 268 | 269 | Arguments: 270 | parameters -- python dictionary containing your parameters 271 | X -- input data of size (n_x, m) 272 | 273 | Returns 274 | predictions -- vector of predictions of our model (red: 0 / blue: 1) 275 | """ 276 | 277 | # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold. 278 | ### START CODE HERE ### (≈ 2 lines of code) 279 | A2, cache = forward_propagation(X, parameters) 280 | predictions = A2 > 0.5 281 | ### END CODE HERE ### 282 | 283 | return predictions 284 | --------------------------------------------------------------------------------