├── AlexNet
    └── AlexNet.py
├── README.md
├── MobileNet
    └── MobileNet.py
├── VGG
    ├── VGG16.py
    └── VGG19.py
├── ResNet
    ├── ResNet34.py
    └── ResNet50.py
├── SqueezeNet
    └── Vanilla_SqueezeNet1_0.py
├── ResNeXt
    └── ResNeXt50-32-4d.py
├── DenseNet
    └── DenseNet.py
├── ShuffleNet
    └── ShuffleNet.py
├── ResNet50_with_Stochastic_Depth
    └── ResNet50_with_Stochastic_Depth.py
├── GoogLeNet
    └── GoogLeNet.py
└── DeepLearningBasics
    ├── LogisticRegression.py
    └── ShallowNeuralNetwork.py


/AlexNet/AlexNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | class AlexNet(nn.Module):
 8 |     def __init__(self, num_classes):
 9 |         super(AlexNet, self).__init__()
10 |         self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 96, kernel_size = 11, stride=4, padding=2)
11 |         self.pool1 = nn.MaxPool2d(kernel_size = 3, stride = 2)
12 |         self.conv2 = nn.Conv2d(in_channels = 96, out_channels = 256, kernel_size = 5, stride=1, padding=2)
13 |         self.pool2 = nn.MaxPool2d(kernel_size = 3, stride = 2)
14 |         self.conv3 = nn.Conv2d(in_channels = 256, out_channels = 384, kernel_size = 3, stride=1, padding=1)
15 |         self.conv4 = nn.Conv2d(in_channels = 384, out_channels = 384, kernel_size = 3, stride=1, padding=1)
16 |         self.conv5 = nn.Conv2d(in_channels = 384, out_channels = 256, kernel_size = 3, stride=1, padding=1)
17 |         self.pool3 = nn.MaxPool2d(kernel_size = 3, stride = 2)
18 |         self.fc1 = nn.Linear(256 * 6 * 6, 4096)
19 |         self.fc2 = nn.Linear(4096, 4096)
20 |         self.fc3 = nn.Linear(4096, num_classes)
21 | 
22 |     def forward(self, x):
23 |         x = self.pool1(F.relu(self.conv1(x)))
24 |         x = self.pool2(F.relu(self.conv2(x)))
25 |         x = F.relu(self.conv3(x))
26 |         x = F.relu(self.conv4(x))
27 |         x = self.pool3(F.relu(self.conv5(x)))
28 |         x = x.view(-1, 256 * 6 * 6)
29 |         x = F.dropout(x)
30 |         x = F.relu(self.fc1(x))
31 |         x = F.dropout(x)
32 |         x = F.relu(self.fc2(x))
33 |         x = F.softmax(self.fc3(x))
34 |         return(x)
35 | 
36 | 
37 | net = AlexNet(1000)
38 | print(net)
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning From Scratch
 2 | ## Introduction
 3 | This repository stores a series of reproductions of deep learning milestones in Computer Vision.
 4 | 
 5 | For obvious reason, I'm unable to train the model on ImageNet from scratch. Therefore, for the image classification task, all the models are trained on [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html), whose images are rescaled to be as large as those described in the original papers.
 6 | 
 7 | If my model achieves comparable performance in both accuracy and speed with the corresponding model given in the model zoo without pre-trained weight under the same setting in the first three epochs, I consider it is a valid reproduction.
 8 | 
 9 | **Stars are welcomed!**
10 | 
11 | I guess no one would read my code line by line. LOL.
12 | 
13 | ## Image Classification
14 | * [AlexNet](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks)
15 | * [VGG](https://arxiv.org/abs/1409.1556)
16 | * [GoogLeNet](https://arxiv.org/abs/1409.4842)
17 | * [ResNet](https://arxiv.org/abs/1512.03385)
18 | * [ResNeXt](https://arxiv.org/abs/1611.05431)
19 | * [SqueezeNet](https://arxiv.org/abs/1602.07360)
20 | * [MobileNet](https://arxiv.org/abs/1704.04861)
21 | * [DenseNet](https://arxiv.org/abs/1608.06993)
22 | * [ShuffleNet](https://arxiv.org/abs/1707.01083)
23 | * [ResNet with Stochastic Depth](https://arxiv.org/abs/1603.09382)
24 | ## Model Zoo
25 | My models are compared with models in this section.
26 | * [AlexNet](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.alexnet)
27 | * [VGG16](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.vgg16)
28 | * [VGG19](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.vgg19)
29 | * [GoogLeNet](https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/symbols/googlenet.py)
30 | * [ResNet34](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.resnet34)
31 | * [ResNet50](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.resnet50)
32 | * [ResNeXt50-32x4d](https://github.com/prlz77/ResNeXt.pytorch)
33 | * [SqueezeNet 1.0](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.squeezenet1_0)
34 | * [MobileNet](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)
35 | * [DenseNet121](http://pytorch.org/docs/master/torchvision/models.html#torchvision.models.densenet121)
36 | * [ShuffleNet](https://github.com/jaxony/ShuffleNet/blob/master/model.py)
37 | * [ResNet with Stochastic Depth](https://github.com/yueatsprograms/Stochastic_Depth)
38 | 
39 | 


--------------------------------------------------------------------------------
/MobileNet/MobileNet.py:
--------------------------------------------------------------------------------
 1 | class DepthwiseSeparableConv2d(nn.Module):
 2 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0):
 3 |         super(DepthwiseSeparableConv2d, self).__init__()
 4 |         self.layer = nn.Sequential(
 5 |             nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=in_channels),
 6 |             nn.BatchNorm2d(in_channels),
 7 |             nn.ReLU(inplace=True),
 8 | 
 9 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
10 |             nn.BatchNorm2d(out_channels),
11 |             nn.ReLU(inplace=True)
12 |         )
13 | 
14 |     def forward(self, x):
15 |         x = self.layer(x)
16 |         return(x)
17 | 
18 | 
19 | class MobileNet(nn.Module):
20 |     def __init__(self, num_classes, alpha=1.0):
21 |         super(MobileNet, self).__init__()
22 |         self.conv0 = nn.Sequential(
23 |             nn.Conv2d(in_channels=3, out_channels=int(alpha * 32), kernel_size=3, stride=2, padding=1),
24 |             nn.BatchNorm2d(int(alpha * 32)),
25 |             nn.ReLU(inplace=True)
26 |         )
27 |         self.entry = nn.Sequential(
28 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 32), out_channels=int(alpha * 64), stride=1, padding=1),
29 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 64), out_channels=int(alpha * 128), stride=2, padding=1),
30 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 128), out_channels=int(alpha * 128), stride=1, padding=1),
31 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 128), out_channels=int(alpha * 256), stride=2, padding=1),
32 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 256), out_channels=int(alpha * 256), stride=1, padding=1),
33 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 256), out_channels=int(alpha * 512), stride=2, padding=1)
34 |         )
35 | 
36 |         self.middle = nn.Sequential(
37 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1),
38 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1),
39 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1),
40 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1),
41 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 512), stride=1, padding=1)
42 |         )
43 |         self.exit = nn.Sequential(
44 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 512), out_channels=int(alpha * 1024), stride=2, padding=1),
45 |             DepthwiseSeparableConv2d(in_channels=int(alpha * 1024), out_channels=int(alpha * 1024), stride=1, padding=1)
46 |         )
47 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
48 |         self.classifier = nn.Linear(in_features=int(alpha * 1024), out_features=num_classes)
49 | 
50 |     def forward(self, x):
51 |         x = self.conv0(x)
52 |         x = self.entry(x)
53 |         x = self.middle(x)
54 |         x = self.exit(x)
55 |         x = self.avgpool(x)
56 |         x = x.view(x.size(0), -1)
57 |         x = self.classifier(x)
58 |         return(x)
59 |         
60 | 


--------------------------------------------------------------------------------
/VGG/VGG16.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | import torchvision.models as models
 6 | from torch.autograd import Variable
 7 | 
 8 | 
 9 | class VGG16(nn.Module):
10 |     def __init__(self, num_classes):
11 |         super(VGG16, self).__init__()
12 |         self.feature = nn.Sequential(
13 |             nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, stride=1, padding=1),
14 |             nn.ReLU(inplace=True),
15 |             nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride=1, padding=1),
16 |             nn.ReLU(inplace=True),
17 |             nn.MaxPool2d(2, 2),
18 |             nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride=1, padding=1),
19 |             nn.ReLU(inplace=True),
20 |             nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride=1, padding=1),
21 |             nn.ReLU(inplace=True),
22 |             nn.MaxPool2d(2, 2),
23 |             nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, stride=1, padding=1),
24 |             nn.ReLU(inplace=True),
25 |             nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1),
26 |             nn.ReLU(inplace=True),
27 |             nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1),
28 |             nn.ReLU(inplace=True),
29 |             nn.MaxPool2d(2, 2),
30 |             nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, stride=1, padding=1),
31 |             nn.ReLU(inplace=True),
32 |             nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1),
33 |             nn.ReLU(inplace=True),
34 |             nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1),
35 |             nn.ReLU(inplace=True),
36 |             nn.MaxPool2d(2, 2),
37 |             nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1),
38 |             nn.ReLU(inplace=True),
39 |             nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1),
40 |             nn.ReLU(inplace=True),
41 |             nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1),
42 |             nn.ReLU(inplace=True),
43 |             nn.MaxPool2d(2, 2),
44 |         )
45 |         self.classifier = nn.Sequential(
46 |             nn.Linear(512 * 7 * 7, 4096),
47 |             nn.ReLU(inplace=True),
48 |             nn.Dropout(),
49 |             nn.Linear(4096, 4096),
50 |             nn.ReLU(inplace=True),
51 |             nn.Dropout(),
52 |             nn.Linear(4096, num_classes)
53 |         )
54 |         self._initialize_weights()
55 |         
56 |     def forward(self, x):
57 |         x = self.feature(x)
58 |         x = x.view(x.size(0), -1)
59 |         x = self.classifier(x)
60 |         return(x)
61 |     # This function is the key to have a competitive performance.
62 |     # Borrowed from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
63 |     def _initialize_weights(self):
64 |         for m in self.modules():
65 |             if isinstance(m, nn.Conv2d):
66 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
67 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
68 |                 if m.bias is not None:
69 |                     m.bias.data.zero_()
70 |             elif isinstance(m, nn.BatchNorm2d):
71 |                 m.weight.data.fill_(1)
72 |                 m.bias.data.zero_()
73 |             elif isinstance(m, nn.Linear):
74 |                 m.weight.data.normal_(0, 0.01)
75 |                 m.bias.data.zero_()
76 | 


--------------------------------------------------------------------------------
/ResNet/ResNet34.py:
--------------------------------------------------------------------------------
 1 | class Block(nn.Module):
 2 |     def __init__(self, in_channels, out_channels, stride):
 3 |         super(Block, self).__init__()
 4 |         self.conv1 = nn.Sequential(
 5 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1),
 6 |             nn.BatchNorm2d(out_channels),
 7 |             nn.ReLU(inplace=True)
 8 |         )
 9 |         self.conv2 = nn.Sequential(
10 |             nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1),
11 |             nn.BatchNorm2d(out_channels),
12 |             nn.ReLU(inplace=True)
13 |         )
14 |         self.relu = nn.ReLU(inplace=True)
15 |         self.downsample = None
16 |         if stride != 1:
17 |             self.downsample = nn.Sequential(
18 |                 nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride),
19 |                 nn.BatchNorm2d(out_channels)
20 |             )
21 | 
22 |     def forward(self, x):
23 |         identity = x
24 |         x = self.conv1(x)
25 |         x = self.conv2(x)
26 |         if self.downsample is not None:
27 |             identity = self.downsample(identity)
28 |             print("identity", identity.size())
29 |         x = x + identity
30 |         x = self.relu(x)
31 |         return(x)
32 | 
33 | class ResNet34(nn.Module):
34 |     def __init__(self, num_classes):
35 |         super(ResNet34, self).__init__()
36 |         self.head = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2)
37 |         self.bn = nn.BatchNorm2d(num_features=64)
38 |         self.relu = nn.ReLU(inplace=True)
39 |         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
40 |         self.group1 = self._make_group(block=Block, in_channels=64, out_channels=64, blocks=3, stride=1)
41 |         self.group2 = self._make_group(block=Block, in_channels=64, out_channels=128, blocks=4, stride=2)
42 |         self.group3 = self._make_group(block=Block, in_channels=128, out_channels=256, blocks=6, stride=2)
43 |         self.group4 = self._make_group(block=Block, in_channels=256, out_channels=512, blocks=3, stride=2)
44 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
45 |         self.classifier = nn.Linear(in_features=512, out_features=num_classes)
46 |         self._initialize_weights()
47 | 
48 |     def forward(self, x):
49 |         x = self.head(x)
50 |         x = self.bn(x)
51 |         x = self.relu(x)
52 |         x = self.pool(x)
53 |         x = self.group1(x)
54 |         x = self.group2(x)
55 |         x = self.group3(x)
56 |         x = self.group4(x)
57 |         x = self.avgpool(x)
58 |         x = x.view(x.size(0), -1)
59 |         x = self.classifier(x)
60 |         return(x)
61 | 
62 |     def _make_group(self, block, in_channels, out_channels, blocks, stride):
63 |         layers = []
64 |         layers.append(block(in_channels=in_channels, out_channels=out_channels, stride=stride))
65 |         stride = 1
66 |         for i in range(blocks):
67 |             layers.append(block(in_channels=out_channels, out_channels=out_channels, stride=stride))
68 | 
69 |         return(nn.Sequential(*layers))
70 | 
71 |     def _initialize_weights(self):
72 |         for m in self.modules():
73 |             if isinstance(m, nn.Conv2d):
74 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
75 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
76 |                 if m.bias is not None:
77 |                     m.bias.data.zero_()
78 |             elif isinstance(m, nn.BatchNorm2d):
79 |                 m.weight.data.fill_(1)
80 |                 m.bias.data.zero_()
81 |             elif isinstance(m, nn.Linear):
82 |                 m.weight.data.normal_(0, 0.01)
83 |                 m.bias.data.zero_()
84 | 


--------------------------------------------------------------------------------
/SqueezeNet/Vanilla_SqueezeNet1_0.py:
--------------------------------------------------------------------------------
 1 | class Fire(nn.Module):
 2 |     def __init__(self, in_channels, squeeze_channels, expand1x1_channels, expand3x3_channels):
 3 |         super(Fire, self).__init__()
 4 |         self.squeeze = nn.Conv2d(in_channels=in_channels, out_channels=squeeze_channels, kernel_size=1)
 5 |         self.squ_relu = nn.ReLU(inplace=True)
 6 |         self.expand1x1 = nn.Conv2d(in_channels=squeeze_channels, out_channels=expand1x1_channels, kernel_size=1)
 7 |         self.relu_1x1 = nn.ReLU(inplace=True)
 8 |         self.expand3x3 = nn.Conv2d(in_channels=squeeze_channels, out_channels=expand3x3_channels, kernel_size=3, padding=1)
 9 |         self.relu_3x3 = nn.ReLU(inplace=True)
10 | 
11 |     def forward(self, x):
12 |         x = self.squeeze(x)
13 |         x = self.squ_relu(x)
14 |         x_1x1 = self.expand1x1(x)
15 |         x_1x1 = self.relu_1x1(x_1x1)
16 |         x_3x3 = self.expand3x3(x)
17 |         x_3x3 = self.relu_3x3(x_3x3)
18 |         return(torch.cat([x_1x1, x_3x3], 1))
19 | 
20 | 
21 | class SqueezeNet(nn.Module):
22 |     def __init__(self, num_classes):
23 |         super(SqueezeNet, self).__init__()
24 |         self.head = nn.Sequential(
25 |             nn.Conv2d(in_channels=3, out_channels=96, kernel_size=7, stride=2, padding=2),
26 |             nn.ReLU(inplace=True),
27 |             nn.MaxPool2d(kernel_size=3, stride=2)
28 |         )
29 |         self.fire2 = Fire(in_channels=96, squeeze_channels=16, expand1x1_channels=64, expand3x3_channels=64)
30 |         self.fire3 = Fire(in_channels=128, squeeze_channels=16, expand1x1_channels=64, expand3x3_channels=64)
31 |         self.fire4 = Fire(in_channels=128, squeeze_channels=32, expand1x1_channels=128, expand3x3_channels=128)
32 |         self.pool4 = nn.MaxPool2d(kernel_size=3, stride=2)
33 |         self.fire5 = Fire(in_channels=256, squeeze_channels=32, expand1x1_channels=128, expand3x3_channels=128)
34 |         self.fire6 = Fire(in_channels=256, squeeze_channels=48, expand1x1_channels=192, expand3x3_channels=192)
35 |         self.fire7 = Fire(in_channels=384, squeeze_channels=48, expand1x1_channels=192, expand3x3_channels=192)
36 |         self.fire8 = Fire(in_channels=384, squeeze_channels=64, expand1x1_channels=256, expand3x3_channels=256)
37 |         self.pool8 = nn.MaxPool2d(kernel_size=3, stride=2)
38 |         self.fire9 = Fire(in_channels=512, squeeze_channels=64, expand1x1_channels=256, expand3x3_channels=256)
39 |         self.conv10 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1, stride=1)
40 |         self.relu = nn.ReLU(inplace=True)
41 |         self.avgpool = nn.AvgPool2d(kernel_size=13, stride=1)
42 |         self._initialize_weights()
43 | 
44 |     def forward(self, x):
45 |         x = self.head(x)
46 |         x = self.fire2(x)
47 |         x = self.fire3(x)
48 |         x = self.fire4(x)
49 |         x = self.pool4(x)
50 |         x = self.fire5(x)
51 |         x = self.fire6(x)
52 |         x = self.fire7(x)
53 |         x = self.fire8(x)
54 |         x = self.pool8(x)
55 |         x = self.fire9(x)
56 |         x = self.conv10(x)
57 |         x = self.relu(x)
58 |         x = self.avgpool(x)
59 |         x = x.view(x.size(0), -1)
60 |         return(x)
61 | 
62 |     def _initialize_weights(self):
63 |         for m in self.modules():
64 |             if isinstance(m, nn.Conv2d):
65 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
66 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
67 |                 if m.bias is not None:
68 |                     m.bias.data.zero_()
69 |             elif isinstance(m, nn.BatchNorm2d):
70 |                 m.weight.data.fill_(1)
71 |                 m.bias.data.zero_()
72 |             elif isinstance(m, nn.Linear):
73 |                 m.weight.data.normal_(0, 0.01)
74 |                 m.bias.data.zero_()
75 | 


--------------------------------------------------------------------------------
/ResNet/ResNet50.py:
--------------------------------------------------------------------------------
 1 | class BottleNeck(nn.Module):
 2 |     def __init__(self, in_channels, out_channels, stride):
 3 |         super(BottleNeck, self).__init__()
 4 |         self.conv1 = nn.Sequential(
 5 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
 6 |             nn.BatchNorm2d(out_channels),
 7 |             nn.ReLU(inplace=True)
 8 |         )
 9 |         self.conv2 = nn.Sequential(
10 |             nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1),
11 |             nn.BatchNorm2d(out_channels),
12 |             nn.ReLU(inplace=True)
13 |         )
14 |         self.conv3 = nn.Sequential(
15 |             nn.Conv2d(in_channels=out_channels, out_channels=(out_channels * 4), kernel_size=1),
16 |             nn.BatchNorm2d((out_channels * 4)),
17 |             nn.ReLU(inplace=True)
18 |         )
19 |         self.relu = nn.ReLU(inplace=True)
20 |         self.downsample = nn.Sequential(
21 |             nn.Conv2d(in_channels=in_channels, out_channels=(out_channels * 4), kernel_size=1, stride=stride),
22 |             nn.BatchNorm2d((out_channels * 4))
23 |         )
24 |         self._initialize_weights()
25 | 
26 |     def forward(self, x):
27 |         identity = x
28 |         identity = self.downsample(identity)
29 |         x = self.conv1(x)
30 |         x = self.conv2(x)
31 |         x = self.conv3(x)
32 |         x = x + identity
33 |         x = self.relu(x)
34 |         return(x)
35 |     
36 |     def _initialize_weights(self):
37 |         for m in self.modules():
38 |             if isinstance(m, nn.Conv2d):
39 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
40 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
41 |                 if m.bias is not None:
42 |                     m.bias.data.zero_()
43 |             elif isinstance(m, nn.BatchNorm2d):
44 |                 m.weight.data.fill_(1)
45 |                 m.bias.data.zero_()
46 |             elif isinstance(m, nn.Linear):
47 |                 m.weight.data.normal_(0, 0.01)
48 |                 m.bias.data.zero_()
49 | 
50 | class ResNet50(nn.Module):
51 |     def __init__(self, num_classes):
52 |         super(ResNet50, self).__init__()
53 |         self.head = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2)
54 |         self.bn = nn.BatchNorm2d(num_features=64)
55 |         self.relu = nn.ReLU(inplace=True)
56 |         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
57 |         self.group1 = self._make_group(BottleNeck, in_channels=64, out_channels=64, blocks=3, stride=1)
58 |         self.group2 = self._make_group(BottleNeck, in_channels=256, out_channels=128, blocks=4, stride=2)
59 |         self.group3 = self._make_group(BottleNeck, in_channels=512, out_channels=256, blocks=6, stride=2)
60 |         self.group4 = self._make_group(BottleNeck, in_channels=1024, out_channels=512, blocks=3, stride=2)
61 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
62 |         self.classifier = nn.Linear(in_features=2048, out_features=num_classes)
63 |         
64 |     def forward(self, x):
65 |         x = self.head(x)
66 |         x = self.bn(x)
67 |         x = self.relu(x)
68 |         x = self.pool(x)
69 |         x = self.group1(x)
70 |         x = self.group2(x)
71 |         x = self.group3(x)
72 |         x = self.group4(x)
73 |         x = self.avgpool(x)
74 |         x = x.view(x.size(0), -1)
75 |         x = self.classifier(x)
76 |         print(x)
77 |         return(x)
78 | 
79 |     def _make_group(self, block, in_channels, out_channels, blocks, stride):
80 |         layers = []
81 |         layers.append(block(in_channels=in_channels, out_channels=out_channels, stride=stride))
82 |         stride = 1
83 |         for i in range(1, blocks):
84 |             layers.append(block(in_channels=(out_channels * 4), out_channels=out_channels, stride=stride))
85 | 
86 |         return(nn.Sequential(*layers))
87 | 


--------------------------------------------------------------------------------
/ResNeXt/ResNeXt50-32-4d.py:
--------------------------------------------------------------------------------
 1 | class ResNeXtBottleNeck(nn.Module):
 2 |     def __init__(self, in_channels, intermediate, out_channels, stride, cardinality = 32):
 3 |         super(ResNeXtBottleNeck, self).__init__()
 4 |         self.conv1 = nn.Sequential(
 5 |             nn.Conv2d(in_channels=in_channels, out_channels=intermediate, kernel_size=1, stride=1),
 6 |             nn.BatchNorm2d(intermediate),
 7 |             nn.ReLU(inplace=True)
 8 |         )
 9 |         self.conv2 = nn.Sequential(
10 |             nn.Conv2d(in_channels=intermediate, out_channels=intermediate, groups=cardinality, kernel_size=3, stride=stride, padding=1),
11 |             nn.BatchNorm2d(intermediate),
12 |             nn.ReLU(inplace=True)
13 |         )
14 |         self.conv3 = nn.Sequential(
15 |             nn.Conv2d(in_channels=intermediate, out_channels=out_channels, kernel_size=1),
16 |             nn.BatchNorm2d(out_channels),
17 |             nn.ReLU(inplace=True)
18 |         )
19 |         self.relu = nn.ReLU(inplace=True)
20 |         self.downsample = nn.Sequential(
21 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride),
22 |             nn.BatchNorm2d(out_channels)
23 |         )
24 | 
25 |     def forward(self, x):
26 |         identity = x
27 |         identity = self.downsample(identity)
28 |         x = self.conv1(x)
29 |         x = self.conv2(x)
30 |         x = self.conv3(x)
31 |         x = x + identity
32 |         x = self.relu(x)
33 |         return(x)
34 | 
35 | 
36 | 
37 | class ResNeXt50(nn.Module):
38 |     def __init__(self, num_classes):
39 |         super(ResNeXt50, self).__init__()
40 |         self.head = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2)
41 |         self.bn = nn.BatchNorm2d(num_features=64)
42 |         self.relu = nn.ReLU(inplace=True)
43 |         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
44 |         self.group1 = self._make_group(ResNeXtBottleNeck, in_channels=64, intermediate=128, out_channels=256, blocks=3, stride=1)
45 |         self.group2 = self._make_group(ResNeXtBottleNeck, in_channels=256, intermediate=256, out_channels=512, blocks=4, stride=2)
46 |         self.group3 = self._make_group(ResNeXtBottleNeck, in_channels=512, intermediate=512, out_channels=1024, blocks=6, stride=2)
47 |         self.group4 = self._make_group(ResNeXtBottleNeck, in_channels=1024, intermediate=1024, out_channels=2048, blocks=3, stride=2)
48 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
49 |         self.classifier = nn.Linear(in_features=2048, out_features=num_classes)
50 |         self._initialize_weights()
51 | 
52 |     def forward(self, x):
53 |         x = self.head(x)
54 |         x = self.bn(x)
55 |         x = self.relu(x)
56 |         x = self.pool(x)
57 |         x = self.group1(x)
58 |         x = self.group2(x)
59 |         x = self.group3(x)
60 |         x = self.group4(x)
61 |         x = self.avgpool(x)
62 |         x = x.view(x.size(0), -1)
63 |         x = self.classifier(x)
64 |         return(x)
65 | 
66 |     def _make_group(self, block, in_channels, intermediate, out_channels, blocks, stride):
67 |         layers = []
68 |         layers.append(block(in_channels=in_channels, intermediate=intermediate, out_channels=out_channels, stride=stride))
69 |         stride = 1
70 |         for i in range(blocks):
71 |             layers.append(block(in_channels=out_channels, intermediate=intermediate, out_channels=out_channels, stride=stride))
72 | 
73 |         return(nn.Sequential(*layers))
74 | 
75 |     def _initialize_weights(self):
76 |         for m in self.modules():
77 |             if isinstance(m, nn.Conv2d):
78 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
79 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
80 |                 if m.bias is not None:
81 |                     m.bias.data.zero_()
82 |             elif isinstance(m, nn.BatchNorm2d):
83 |                 m.weight.data.fill_(1)
84 |                 m.bias.data.zero_()
85 |             elif isinstance(m, nn.Linear):
86 |                 m.weight.data.normal_(0, 0.01)
87 |                 m.bias.data.zero_()
88 | 


--------------------------------------------------------------------------------
/VGG/VGG19.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import torchvision
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | import torchvision.models as models
 7 | from torch.autograd import Variable
 8 | 
 9 | 
10 | 
11 | class VGG19(nn.Module):
12 |     def __init__(self, num_classes):
13 |         super(VGG19, self).__init__()
14 |         self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, stride=1, padding=1)
15 |         self.conv2 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride=1, padding=1)
16 |         self.pool1 = nn.MaxPool2d(2, 2)
17 |         self.conv3 = nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride=1, padding=1)
18 |         self.conv4 = nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride=1, padding=1)
19 |         self.pool2 = nn.MaxPool2d(2, 2)
20 |         self.conv5 = nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, stride=1, padding=1)
21 |         self.conv6 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1)
22 |         self.conv7 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1)
23 |         self.conv8 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride=1, padding=1)
24 |         self.pool3 = nn.MaxPool2d(2, 2)
25 |         self.conv9 = nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, stride=1, padding=1)
26 |         self.conv10 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1)
27 |         self.conv11 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1)
28 |         self.conv12 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1)
29 |         self.pool4 = nn.MaxPool2d(2, 2)
30 |         self.conv13 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1)
31 |         self.conv14 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1)
32 |         self.conv15 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1)
33 |         self.conv16 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride=1, padding=1)
34 |         self.pool5 = nn.MaxPool2d(2, 2)
35 |         self.fc1 = nn.Linear(512 * 7 * 7, 4096)
36 |         self.fc2 = nn.Linear(4096, 4096)
37 |         self.fc3 = nn.Linear(4096, num_classes)
38 |         self._initialize_weights()
39 | 
40 |     def forward(self, x):
41 |         x = F.relu(self.conv1(x))
42 |         x = F.relu(self.conv2(x))
43 |         x = self.pool1(x)
44 |         x = F.relu(self.conv3(x))
45 |         x = F.relu(self.conv4(x))
46 |         x = self.pool2(x)
47 |         x = F.relu(self.conv5(x))
48 |         x = F.relu(self.conv6(x))
49 |         x = F.relu(self.conv7(x))
50 |         x = F.relu(self.conv8(x))
51 |         x = self.pool3(x)
52 |         x = F.relu(self.conv9(x))
53 |         x = F.relu(self.conv10(x))
54 |         x = F.relu(self.conv11(x))
55 |         x = F.relu(self.conv12(x))
56 |         x = self.pool4(x)
57 |         x = F.relu(self.conv13(x))
58 |         x = F.relu(self.conv14(x))
59 |         x = F.relu(self.conv15(x))
60 |         x = F.relu(self.conv16(x))
61 |         x = self.pool5(x)
62 |         x = x.view(-1, 512 * 7 * 7)
63 |         x = F.relu(self.fc1(x))
64 |         x = F.dropout(x)
65 |         x = F.relu(self.fc2(x))
66 |         x = F.dropout(x)
67 |         x = F.softmax(self.fc3(x))
68 |         return(x)
69 |     
70 |     # This function is the key to have a competitive performance.
71 |     # Borrowed from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
72 |     def _initialize_weights(self):
73 |         for m in self.modules():
74 |             if isinstance(m, nn.Conv2d):
75 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
76 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
77 |                 if m.bias is not None:
78 |                     m.bias.data.zero_()
79 |             elif isinstance(m, nn.BatchNorm2d):
80 |                 m.weight.data.fill_(1)
81 |                 m.bias.data.zero_()
82 |             elif isinstance(m, nn.Linear):
83 |                 m.weight.data.normal_(0, 0.01)
84 |                 m.bias.data.zero_()
85 |     
86 | 


--------------------------------------------------------------------------------
/DenseNet/DenseNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | class DenseLayer(nn.Module):
  6 |     def __init__(self, in_channels, growth_rate, bottleneck_factor = 4, drop_rate = 0.5):
  7 |         super(DenseLayer, self).__init__()
  8 |         self.layer = nn.Sequential(
  9 |             nn.BatchNorm2d(in_channels),
 10 |             nn.ReLU(inplace=True),
 11 |             nn.Conv2d(in_channels=in_channels, out_channels=growth_rate * bottleneck_factor, kernel_size=1, stride=1),
 12 | 
 13 |             nn.BatchNorm2d(growth_rate * bottleneck_factor),
 14 |             nn.ReLU(inplace=True),
 15 |             nn.Conv2d(in_channels=growth_rate * bottleneck_factor, out_channels=growth_rate, kernel_size=3, stride=1, padding=1),
 16 | 
 17 |             nn.Dropout(p=drop_rate)
 18 |         )
 19 | 
 20 |     def forward(self, x):
 21 |         identity = x
 22 |         x = self.layer(x)
 23 |         total = torch.cat([identity, x], 1)
 24 |         return(total)
 25 | 
 26 | class Transition(nn.Module):
 27 |     def __init__(self, in_channels, out_channels):
 28 |         super(Transition, self).__init__()
 29 |         self.layer = nn.Sequential(
 30 |             nn.BatchNorm2d(in_channels),
 31 |             nn.ReLU(inplace=True),
 32 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 33 |             nn.AvgPool2d(kernel_size=2, stride=2)
 34 |         )
 35 | 
 36 |     def forward(self, x):
 37 |         x = self.layer(x)
 38 |         return(x)
 39 | 
 40 | class DenseNet(nn.Module):
 41 |     def __init__(self, num_classes, growth_rate, compression_factor, blocks):
 42 |         super(DenseNet, self).__init__()
 43 |         num_features = growth_rate * 2
 44 |         self.entry = nn.Sequential(
 45 |             nn.Conv2d(in_channels=3, out_channels=num_features, kernel_size=7, stride=2, padding=3),
 46 |             nn.BatchNorm2d(num_features=num_features),
 47 |             nn.ReLU(inplace=True),
 48 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 49 |         )
 50 |         self.DenseBlock1 = self._make_block(DenseLayer, blocks[0], in_channels=num_features, growth_rate=growth_rate)
 51 |         num_features = num_features + blocks[0] * growth_rate
 52 |         compressed_features = int(num_features * compression_factor)
 53 |         self.Transition1 = Transition(in_channels=num_features, out_channels=compressed_features)
 54 | 
 55 |         self.DenseBlock2 = self._make_block(DenseLayer, blocks[1], in_channels=compressed_features, growth_rate=growth_rate)
 56 |         num_features = compressed_features + blocks[1] * growth_rate
 57 |         compressed_features = int(num_features * compression_factor)
 58 |         self.Transition2 = Transition(in_channels=num_features, out_channels=compressed_features)
 59 | 
 60 |         self.DenseBlock3 = self._make_block(DenseLayer, blocks[2], in_channels=compressed_features, growth_rate=growth_rate)
 61 |         num_features = compressed_features + blocks[2] * growth_rate
 62 |         compressed_features = int(num_features * compression_factor)
 63 |         self.Transition3 = Transition(in_channels=num_features, out_channels=compressed_features)
 64 | 
 65 |         self.DenseBlock4 = self._make_block(DenseLayer, blocks[3], in_channels=compressed_features, growth_rate=growth_rate)
 66 |         num_features = compressed_features + blocks[3] * growth_rate
 67 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
 68 |         self.classifier = nn.Linear(in_features=num_features, out_features=num_classes)
 69 |         self._initialize_weights()
 70 | 
 71 |     def forward(self, x):
 72 |         x = self.entry(x)
 73 |         x = self.DenseBlock1(x)
 74 |         x = self.Transition1(x)
 75 |         x = self.DenseBlock2(x)
 76 |         x = self.Transition2(x)
 77 |         x = self.DenseBlock3(x)
 78 |         x = self.Transition3(x)
 79 |         x = self.DenseBlock4(x)
 80 |         x = self.avgpool(x)
 81 |         x = x.view(x.size(0), -1)
 82 |         x = self.classifier(x)
 83 |         return(x)
 84 | 
 85 |     def _make_block(self, layer, num_layers, in_channels, growth_rate, bottleneck_factor = 4, drop_rate = 0.5):
 86 |         block = []
 87 |         for i in range(num_layers):
 88 |             block.append(layer(in_channels=in_channels + i * growth_rate, growth_rate=growth_rate, bottleneck_factor=bottleneck_factor, drop_rate=drop_rate))
 89 |         return(nn.Sequential(*block))
 90 | 
 91 |     def _initialize_weights(self):
 92 |         for m in self.modules():
 93 |             if isinstance(m, nn.Conv2d):
 94 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 95 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 96 |                 if m.bias is not None:
 97 |                     m.bias.data.zero_()
 98 |             elif isinstance(m, nn.BatchNorm2d):
 99 |                 m.weight.data.fill_(1)
100 |                 m.bias.data.zero_()
101 |             elif isinstance(m, nn.Linear):
102 |                 m.weight.data.normal_(0, 0.01)
103 |                 m.bias.data.zero_()
104 | 


--------------------------------------------------------------------------------
/ShuffleNet/ShuffleNet.py:
--------------------------------------------------------------------------------
  1 | def channel_shuffle(x, groups):
  2 |     batchsize, num_channels, height, width = x.data.size()
  3 |     channels_per_group = num_channels // groups
  4 |     x = x.view(batchsize, groups, channels_per_group, height, width)
  5 |     x = torch.transpose(x, 1, 2).contiguous()
  6 |     x = x.view(batchsize, -1, height, width)
  7 |     return x
  8 | 
  9 | class Conv_1x1(nn.Module):
 10 |     def __init__(self, in_channels, out_channels, groups):
 11 |         super(Conv_1x1, self).__init__()
 12 |         self.layer = nn.Sequential(
 13 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, groups=groups),
 14 |             nn.BatchNorm2d(num_features=out_channels)
 15 |         )
 16 | 
 17 |     def forward(self, x):
 18 |         x = self.layer(x)
 19 |         return(x)
 20 | 
 21 | class DWConv_3x3(nn.Module):
 22 |     def __init__(self, in_channels, out_channels, stride):
 23 |         super(DWConv_3x3, self).__init__()
 24 |         self.layer = nn.Sequential(
 25 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels),
 26 |             nn.BatchNorm2d(num_features=out_channels)
 27 |         )
 28 | 
 29 |     def forward(self, x):
 30 |         x = self.layer(x)
 31 |         return(x)
 32 | 
 33 | class ShuffleUnit(nn.Module):
 34 |     def __init__(self, in_channels, out_channels, groups, mode):
 35 |         super(ShuffleUnit, self).__init__()
 36 |         self.mode = mode
 37 |         self.groups = groups
 38 |         if mode == "add":
 39 |             stride = 1
 40 |         if mode == "cat":
 41 |             stride = 2
 42 |         self.gconv_1x1_head = Conv_1x1(in_channels=in_channels, out_channels=(out_channels // 4), groups=groups)
 43 |         self.relu = nn.ReLU(inplace=True)
 44 |         self.dwconv_3x3 = DWConv_3x3(in_channels=(out_channels // 4), out_channels=(out_channels // 4), stride=stride)
 45 |         self.gconv_1x1_cat_tail = Conv_1x1(in_channels=(out_channels // 4), out_channels=(out_channels - in_channels), groups=groups)
 46 |         self.gconv_1x1_add_tail = Conv_1x1(in_channels=(out_channels // 4), out_channels=out_channels, groups=groups)
 47 |         self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
 48 | 
 49 |     def forward(self, x):
 50 |         if self.mode == "add":
 51 |             identity = x
 52 |             x = self.gconv_1x1_head(x)
 53 |             x = channel_shuffle(x, self.groups)
 54 |             x = self.dwconv_3x3(x)
 55 |             x = self.gconv_1x1_add_tail(x)
 56 |             x = x + identity
 57 |             x = self.relu(x)
 58 |             return(x)
 59 |         if self.mode == "cat":
 60 |             identity = x
 61 |             identity = self.avgpool(identity)
 62 |             print("stage head identity: ", identity.size())
 63 |             x = self.gconv_1x1_head(x)
 64 |             x = channel_shuffle(x, self.groups)
 65 |             x = self.dwconv_3x3(x)
 66 |             x = self.gconv_1x1_cat_tail(x)
 67 |             x = torch.cat([x, identity], 1)
 68 |             x = self.relu(x)
 69 |             return(x)
 70 | 
 71 | 
 72 | class Stage(nn.Module):
 73 |     def __init__(self, in_channels, out_channels, repeats, groups):
 74 |         super(Stage, self).__init__()
 75 |         self.head = ShuffleUnit(in_channels=in_channels, out_channels=out_channels, groups=groups, mode="cat")
 76 |         self.body = self._make_stage(in_channels=out_channels, out_channels=out_channels, groups=groups, repeats=repeats)
 77 | 
 78 |     def forward(self, x):
 79 |         x = self.head(x)
 80 |         print("stage head: ", x.size())
 81 |         x = self.body(x)
 82 |         return(x)
 83 | 
 84 |     def _make_stage(self, in_channels, out_channels, groups, repeats):
 85 |         layers = []
 86 |         for i in range(repeats):
 87 |             layers.append(ShuffleUnit(in_channels=in_channels, out_channels=out_channels, groups=groups, mode="add"))
 88 |         return(nn.Sequential(*layers))
 89 | 
 90 | 
 91 | class ShuffleNet(nn.Module):
 92 |     def __init__(self, num_classes, groups=3):
 93 |         super(ShuffleNet, self).__init__()
 94 |         self.conv1 = nn.Sequential(
 95 |             nn.Conv2d(in_channels=3, out_channels=24, kernel_size=7, stride=2, padding=3),
 96 |             nn.BatchNorm2d(num_features=24),
 97 |             nn.ReLU(inplace=True)
 98 |         )
 99 |         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
100 |         self.stage1 = Stage(in_channels=24, out_channels=240, repeats=3, groups=groups)
101 |         self.stage2 = Stage(in_channels=240, out_channels=480, repeats=7, groups=groups)
102 |         self.stage3 = Stage(in_channels=480, out_channels=960, repeats=3, groups=groups)
103 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
104 |         self.classifier = nn.Linear(in_features=960, out_features=num_classes)
105 | 
106 |     def forward(self, x):
107 |         x = self.conv1(x)
108 |         print("conv1: ", x.size())
109 |         x = self.pool(x)
110 |         print("pool: ", x.size())
111 |         x = self.stage1(x)
112 |         x = self.stage2(x)
113 |         x = self.stage3(x)
114 |         x = self.avgpool(x)
115 |         x = x.view(x.size(0), -1)
116 |         x = self.classifier(x)
117 |         return(x)
118 | 


--------------------------------------------------------------------------------
/ResNet50_with_Stochastic_Depth/ResNet50_with_Stochastic_Depth.py:
--------------------------------------------------------------------------------
  1 | class BottleNeck(nn.Module):
  2 |     def __init__(self, in_channels, out_channels, stride):
  3 |         super(BottleNeck, self).__init__()
  4 |         self.conv1 = nn.Sequential(
  5 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
  6 |             nn.BatchNorm2d(out_channels),
  7 |             nn.ReLU(inplace=True)
  8 |         )
  9 |         self.conv2 = nn.Sequential(
 10 |             nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1),
 11 |             nn.BatchNorm2d(out_channels),
 12 |             nn.ReLU(inplace=True)
 13 |         )
 14 |         self.conv3 = nn.Sequential(
 15 |             nn.Conv2d(in_channels=out_channels, out_channels=(out_channels * 4), kernel_size=1),
 16 |             nn.BatchNorm2d((out_channels * 4)),
 17 |             nn.ReLU(inplace=True)
 18 |         )
 19 |         self.relu = nn.ReLU(inplace=True)
 20 |         self.downsample = nn.Sequential(
 21 |             nn.Conv2d(in_channels=in_channels, out_channels=(out_channels * 4), kernel_size=1, stride=stride),
 22 |             nn.BatchNorm2d((out_channels * 4))
 23 |         )
 24 |         self._initialize_weights()
 25 | 
 26 |     def forward(self, x, active, prob):      
 27 |         if self.training:
 28 |             if active == 1:
 29 | #                 print("active")
 30 |                 identity = x
 31 |                 identity = self.downsample(identity)
 32 |                 x = self.conv1(x)
 33 |                 x = self.conv2(x)
 34 |                 x = self.conv3(x)
 35 |                 x = x + identity
 36 |                 x = self.relu(x)
 37 |                 return(x)
 38 |             else:
 39 | #                 print("inactive")
 40 |                 x = self.downsample(x)
 41 |                 x = self.relu(x)
 42 |                 return(x)
 43 |         else:
 44 |             identity = x
 45 |             identity = self.downsample(identity)
 46 |             x = self.conv1(x)
 47 |             x = self.conv2(x)
 48 |             x = self.conv3(x)
 49 |             x = prob * x + identity
 50 |             x = self.relu(x)
 51 |             return(x)
 52 | 
 53 | 
 54 |     def _initialize_weights(self):
 55 |         for m in self.modules():
 56 |             if isinstance(m, nn.Conv2d):
 57 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 58 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 59 |                 if m.bias is not None:
 60 |                     m.bias.data.zero_()
 61 |             elif isinstance(m, nn.BatchNorm2d):
 62 |                 m.weight.data.fill_(1)
 63 |                 m.bias.data.zero_()
 64 |             elif isinstance(m, nn.Linear):
 65 |                 m.weight.data.normal_(0, 0.01)
 66 |                 m.bias.data.zero_()
 67 | 
 68 | class Group(nn.Module):
 69 |     def __init__(self, num_blocks, in_channels, out_channels, stride):
 70 |         super(Group, self).__init__()
 71 |         self.num_blocks = num_blocks
 72 |         self.head_layer = BottleNeck(in_channels=in_channels, out_channels=out_channels, stride=stride)
 73 |         self.tail_layer = BottleNeck(in_channels=(out_channels * 4), out_channels=out_channels, stride=1)
 74 |     def forward(self, x, active, probs):
 75 |         x = self.head_layer(x, active[0], probs[0])
 76 |         for i in range(1, self.num_blocks):
 77 |             x = self.tail_layer(x, active[i], probs[i])
 78 |         return(x)
 79 | 
 80 | class ResNet50_Stochastic_Depth(nn.Module):
 81 |     def __init__(self, num_classes, pL=0.5):
 82 |         super(ResNet50_Stochastic_Depth, self).__init__()
 83 |         self.num_classes = num_classes
 84 |         self.probabilities = torch.linspace(start=1, end=pL, steps=16)
 85 |         self.actives = torch.bernoulli(self.probabilities)
 86 |         
 87 |         self.head = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2)
 88 |         self.bn = nn.BatchNorm2d(num_features=64)
 89 |         self.relu = nn.ReLU(inplace=True)
 90 |         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 91 |         self.group1 = Group(num_blocks = 3, in_channels=64, out_channels=64, stride=1)
 92 |         self.group2 = Group(num_blocks = 4, in_channels=256, out_channels=128, stride=2)
 93 |         self.group3 = Group(num_blocks = 6, in_channels=512, out_channels=256, stride=2)
 94 |         self.group4 = Group(num_blocks = 3, in_channels=1024, out_channels=512, stride=2)
 95 |         
 96 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
 97 |         self.classifier = nn.Linear(in_features=2048, out_features=num_classes)
 98 | 
 99 |     def forward(self, x):
100 | 
101 |         self.actives = torch.bernoulli(self.probabilities)
102 | #         print("The sum of actives blocks: ", int(torch.sum(self.actives)))
103 |         x = self.head(x)
104 |         x = self.bn(x)
105 |         x = self.relu(x)
106 |         x = self.pool(x)
107 |         x = self.group1(x, self.actives[:3], self.probabilities[:3])
108 |         x = self.group2(x, self.actives[3:7], self.probabilities[3:7])
109 |         x = self.group3(x, self.actives[7:13], self.probabilities[7:13])
110 |         x = self.group4(x, self.actives[13:], self.probabilities[13:])
111 |         x = self.avgpool(x)
112 |         x = x.view(x.size(0), -1)
113 |         x = self.classifier(x)
114 |         
115 |         return(x)
116 | 


--------------------------------------------------------------------------------
/GoogLeNet/GoogLeNet.py:
--------------------------------------------------------------------------------
  1 | class Inception(nn.Module):
  2 |     def __init__(self, in_channels, k_1x1, k_3x3red, k_3x3, k_5x5red, k_5x5, pool_proj):
  3 |         super(Inception, self).__init__()
  4 |         self.b1 = nn.Sequential(
  5 |             nn.Conv2d(in_channels=in_channels, out_channels=k_1x1, kernel_size=1),
  6 |             nn.ReLU(inplace=True)
  7 |         )
  8 |         self.b2 = nn.Sequential(
  9 |             nn.Conv2d(in_channels=in_channels, out_channels=k_3x3red, kernel_size=1),
 10 |             nn.ReLU(inplace=True),
 11 |             nn.Conv2d(in_channels=k_3x3red, out_channels=k_3x3, kernel_size=3, padding=1),
 12 |             nn.ReLU(inplace=True)
 13 |         )
 14 |         self.b3 = nn.Sequential(
 15 |             nn.Conv2d(in_channels=in_channels, out_channels=k_5x5red, kernel_size=1, padding=1),
 16 |             nn.ReLU(inplace=True),
 17 |             nn.Conv2d(in_channels=k_5x5red, out_channels=k_5x5, kernel_size=5, padding=1),
 18 |             nn.ReLU(inplace=True)
 19 |         )
 20 |         self.b4 = nn.Sequential(
 21 |             nn.MaxPool2d(kernel_size=3, stride=1),
 22 |             nn.Conv2d(in_channels=in_channels, out_channels=pool_proj, kernel_size=1, padding=1),
 23 |             nn.ReLU(inplace=True)
 24 |         )
 25 |     def forward(self, x):
 26 |         y1 = self.b1(x)
 27 |         y2 = self.b2(x)
 28 |         y3 = self.b3(x)
 29 |         y4 = self.b4(x)
 30 |         return(torch.cat([y1, y2, y3, y4], 1))
 31 | 
 32 | class AuxClassifier(nn.Module):
 33 |     def __init__(self, num_classes, in_channels):
 34 |         super(AuxClassifier, self).__init__()
 35 |         self.pool1 = nn.AvgPool2d(kernel_size=5, stride=3)
 36 |         self.conv1 = nn.Sequential(
 37 |             nn.Conv2d(in_channels=in_channels, out_channels=128, kernel_size=1),
 38 |             nn.ReLU(inplace=True)
 39 |         )
 40 |         
 41 |         self.fc1 = nn.Sequential(
 42 |             nn.Linear(in_features=4 * 4 * 128, out_features=1024),
 43 |             nn.ReLU(inplace=True)
 44 |         )
 45 |         self.drop = nn.Dropout(p=0.3)
 46 |         self.fc2 = nn.Linear(in_features=1024, out_features=num_classes)
 47 |     def forward(self, x):
 48 |         x = self.pool1(x)
 49 |         x = self.conv1(x)
 50 |         x = x.view(x.size(0), -1)
 51 |         x = self.fc1(x)
 52 |         x = self.drop(x)
 53 |         x = self.fc2(x)
 54 |         return(x)       
 55 | 
 56 | class GoogLeNet(nn.Module):
 57 |     def __init__(self, num_classes, aux_classifier=True):
 58 |         super(GoogLeNet, self).__init__()
 59 |         self.aux_classifier = aux_classifier
 60 |         self.head = nn.Sequential(
 61 |             nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3),
 62 |             nn.ReLU(inplace=True),
 63 |             nn.MaxPool2d(kernel_size=3, stride=2),
 64 |             nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=1),
 65 |             nn.ReLU(inplace=True),
 66 |             nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1),
 67 |             nn.ReLU(inplace=True),
 68 |             nn.MaxPool2d(kernel_size=3, stride=2)
 69 |         )
 70 |         
 71 |         self.block3a = Inception(in_channels=192, k_1x1=64, k_3x3red=96, k_3x3=128, k_5x5red=16, k_5x5=32, pool_proj=32)
 72 |         self.block3b = Inception(in_channels=256, k_1x1=128, k_3x3red=128, k_3x3=192, k_5x5red=32, k_5x5=96, pool_proj=64)
 73 |         self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 74 |         self.block4a = Inception(in_channels=480, k_1x1=192, k_3x3red=96, k_3x3=208, k_5x5red=16, k_5x5=48, pool_proj=64)
 75 |         if aux_classifier:
 76 |             self.aux0 = AuxClassifier(num_classes=num_classes, in_channels=512)
 77 |         self.block4b = Inception(in_channels=512, k_1x1=160, k_3x3red=112, k_3x3=224, k_5x5red=24, k_5x5=64, pool_proj=64)
 78 |         self.block4c = Inception(in_channels=512, k_1x1=128, k_3x3red=128, k_3x3=256, k_5x5red=24, k_5x5=64, pool_proj=64)
 79 |         self.block4d = Inception(in_channels=512, k_1x1=112, k_3x3red=144, k_3x3=288, k_5x5red=32, k_5x5=64, pool_proj=64)
 80 |         if aux_classifier:
 81 |             self.aux1 = AuxClassifier(num_classes=num_classes, in_channels=528)
 82 |         self.block4e = Inception(in_channels=528, k_1x1=256, k_3x3red=160, k_3x3=320, k_5x5red=32, k_5x5=128, pool_proj=128)
 83 |         self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 84 |         self.block5a = Inception(in_channels=832, k_1x1=256, k_3x3red=160, k_3x3=320, k_5x5red=32, k_5x5=128, pool_proj=128)
 85 |         self.block5b = Inception(in_channels=832, k_1x1=384, k_3x3red=192, k_3x3=384, k_5x5red=48, k_5x5=128, pool_proj=128)
 86 |         self.pool3 = nn.AvgPool2d(kernel_size=7, stride=1)
 87 |         self.drop = nn.Dropout(p=0.4)
 88 |         self.classifier = nn.Linear(in_features=1024, out_features=num_classes)
 89 |         self._initialize_weights()
 90 | 
 91 |     def forward(self, x):
 92 |         x = self.head(x)
 93 |         x = self.block3a(x)
 94 |         x = self.block3b(x)
 95 |         x = self.pool1(x)
 96 |         x = self.block4a(x)
 97 |         if self.training and self.aux_classifier:
 98 |             output0 = self.aux0(x)
 99 |         x = self.block4b(x)
100 |         x = self.block4c(x)
101 |         x = self.block4d(x)
102 |         if self.training and self.aux_classifier:
103 |             output1 = self.aux1(x)
104 |         x = self.block4e(x)
105 |         x = self.pool2(x)
106 |         x = self.block5a(x)
107 |         x = self.block5b(x)
108 |         x = self.pool3(x)
109 |         x = x.view(x.size(0), -1)
110 |         x = self.drop(x)
111 |         output2 = self.classifier(x)
112 |         return(output0, output1, output2)
113 | 
114 |     def _initialize_weights(self):
115 |         for m in self.modules():
116 |             if isinstance(m, nn.Conv2d):
117 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
118 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
119 |                 if m.bias is not None:
120 |                     m.bias.data.zero_()
121 |             elif isinstance(m, nn.BatchNorm2d):
122 |                 m.weight.data.fill_(1)
123 |                 m.bias.data.zero_()
124 |             elif isinstance(m, nn.Linear):
125 |                 m.weight.data.normal_(0, 0.01)
126 |                 m.bias.data.zero_()
127 | 


--------------------------------------------------------------------------------
/DeepLearningBasics/LogisticRegression.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def sigmoid(z):
  5 |     """
  6 |     Compute the sigmoid of z
  7 | 
  8 |     Arguments:
  9 |     z -- A scalar or numpy array of any size.
 10 | 
 11 |     Return:
 12 |     s -- sigmoid(z)
 13 |     """
 14 | 
 15 |     ### START CODE HERE ### (≈ 1 line of code)
 16 |     s = 1 / (1 + np.exp(-z))
 17 |     ### END CODE HERE ###
 18 | 
 19 |     return s
 20 | 
 21 | def initialize_with_zeros(dim):
 22 |     """
 23 |     This function creates a vector of zeros of shape (dim, 1) for w and initializes b to 0.
 24 | 
 25 |     Argument:
 26 |     dim -- size of the w vector we want (or number of parameters in this case)
 27 | 
 28 |     Returns:
 29 |     w -- initialized vector of shape (dim, 1)
 30 |     b -- initialized scalar (corresponds to the bias)
 31 |     """
 32 | 
 33 |     ### START CODE HERE ### (≈ 1 line of code)
 34 |     w = np.zeros((dim, 1))
 35 |     b = 0
 36 |     ### END CODE HERE ###
 37 | 
 38 |     assert(w.shape == (dim, 1))
 39 |     assert(isinstance(b, float) or isinstance(b, int))
 40 | 
 41 |     return w, b
 42 | 
 43 | 
 44 | def propagate(w, b, X, Y):
 45 |     """
 46 |     Implement the cost function and its gradient for the propagation explained above
 47 | 
 48 |     Arguments:
 49 |     w -- weights, a numpy array of size (num_px * num_px * 3, 1)
 50 |     b -- bias, a scalar
 51 |     X -- data of size (num_px * num_px * 3, number of examples)
 52 |     Y -- true "label" vector (containing 0 if non-cat, 1 if cat) of size (1, number of examples)
 53 | 
 54 |     Return:
 55 |     cost -- negative log-likelihood cost for logistic regression
 56 |     dw -- gradient of the loss with respect to w, thus same shape as w
 57 |     db -- gradient of the loss with respect to b, thus same shape as b
 58 | 
 59 |     Tips:
 60 |     - Write your code step by step for the propagation. np.log(), np.dot()
 61 |     """
 62 | 
 63 |     m = X.shape[1]
 64 | 
 65 |     # FORWARD PROPAGATION (FROM X TO COST)
 66 |     ### START CODE HERE ### (≈ 2 lines of code)
 67 |     A = sigmoid(np.dot(w.T, X) + b)
 68 |     cost = (-1/m)*np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
 69 |     ### END CODE HERE ###
 70 | 
 71 |     # BACKWARD PROPAGATION (TO FIND GRAD)
 72 |     ### START CODE HERE ### (≈ 2 lines of code)
 73 |     dw = (1/m) * np.dot(X, (A - Y).T)
 74 |     db = (1/m) * np.sum(A - Y)
 75 |     ### END CODE HERE ###
 76 | 
 77 |     assert(dw.shape == w.shape)
 78 |     assert(db.dtype == float)
 79 |     cost = np.squeeze(cost)
 80 |     assert(cost.shape == ())
 81 | 
 82 |     grads = {"dw": dw,
 83 |              "db": db}
 84 | 
 85 |     return grads, cost
 86 | 
 87 | 
 88 | def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
 89 |     """
 90 |     This function optimizes w and b by running a gradient descent algorithm
 91 | 
 92 |     Arguments:
 93 |     w -- weights, a numpy array of size (num_px * num_px * 3, 1)
 94 |     b -- bias, a scalar
 95 |     X -- data of shape (num_px * num_px * 3, number of examples)
 96 |     Y -- true "label" vector (containing 0 if non-cat, 1 if cat), of shape (1, number of examples)
 97 |     num_iterations -- number of iterations of the optimization loop
 98 |     learning_rate -- learning rate of the gradient descent update rule
 99 |     print_cost -- True to print the loss every 100 steps
100 | 
101 |     Returns:
102 |     params -- dictionary containing the weights w and bias b
103 |     grads -- dictionary containing the gradients of the weights and bias with respect to the cost function
104 |     costs -- list of all the costs computed during the optimization, this will be used to plot the learning curve.
105 | 
106 |     Tips:
107 |     You basically need to write down two steps and iterate through them:
108 |         1) Calculate the cost and the gradient for the current parameters. Use propagate().
109 |         2) Update the parameters using gradient descent rule for w and b.
110 |     """
111 | 
112 |     costs = []
113 | 
114 |     for i in range(num_iterations):
115 | 
116 | 
117 |         # Cost and gradient calculation (≈ 1-4 lines of code)
118 |         ### START CODE HERE ###
119 |         grads, cost = propagate(w, b, X, Y)
120 |         ### END CODE HERE ###
121 | 
122 |         # Retrieve derivatives from grads
123 |         dw = grads["dw"]
124 |         db = grads["db"]
125 | 
126 |         # update rule (≈ 2 lines of code)
127 |         ### START CODE HERE ###
128 |         w = w - learning_rate * dw
129 |         b = b - learning_rate * db
130 |         ### END CODE HERE ###
131 | 
132 |         # Record the costs
133 |         if i % 100 == 0:
134 |             costs.append(cost)
135 | 
136 |         # Print the cost every 100 training examples
137 |         if print_cost and i % 100 == 0:
138 |             print ("Cost after iteration %i: %f" %(i, cost))
139 | 
140 |     params = {"w": w,
141 |               "b": b}
142 | 
143 |     grads = {"dw": dw,
144 |              "db": db}
145 | 
146 |     return params, grads, costs
147 | 
148 | 
149 | def predict(w, b, X):
150 |     '''
151 |     Predict whether the label is 0 or 1 using learned logistic regression parameters (w, b)
152 | 
153 |     Arguments:
154 |     w -- weights, a numpy array of size (num_px * num_px * 3, 1)
155 |     b -- bias, a scalar
156 |     X -- data of size (num_px * num_px * 3, number of examples)
157 | 
158 |     Returns:
159 |     Y_prediction -- a numpy array (vector) containing all predictions (0/1) for the examples in X
160 |     '''
161 | 
162 |     m = X.shape[1]
163 |     Y_prediction = np.zeros((1,m))
164 |     w = w.reshape(X.shape[0], 1)
165 | 
166 |     # Compute vector "A" predicting the probabilities of a cat being present in the picture
167 |     ### START CODE HERE ### (≈ 1 line of code)
168 |     A = sigmoid(np.dot(w.T, X) + b)
169 |     ### END CODE HERE ###
170 | 
171 |     for i in range(A.shape[1]):
172 | 
173 |         # Convert probabilities A[0,i] to actual predictions p[0,i]
174 |         ### START CODE HERE ### (≈ 4 lines of code)
175 |         if A[0,i] > 0.5:
176 |             Y_prediction[0,i] = 1
177 |         ### END CODE HERE ###
178 | 
179 |     assert(Y_prediction.shape == (1, m))
180 | 
181 |     return Y_prediction
182 | 
183 | 
184 | def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
185 |     """
186 |     Builds the logistic regression model by calling the function you've implemented previously
187 | 
188 |     Arguments:
189 |     X_train -- training set represented by a numpy array of shape (num_px * num_px * 3, m_train)
190 |     Y_train -- training labels represented by a numpy array (vector) of shape (1, m_train)
191 |     X_test -- test set represented by a numpy array of shape (num_px * num_px * 3, m_test)
192 |     Y_test -- test labels represented by a numpy array (vector) of shape (1, m_test)
193 |     num_iterations -- hyperparameter representing the number of iterations to optimize the parameters
194 |     learning_rate -- hyperparameter representing the learning rate used in the update rule of optimize()
195 |     print_cost -- Set to true to print the cost every 100 iterations
196 | 
197 |     Returns:
198 |     d -- dictionary containing information about the model.
199 |     """
200 | 
201 |     ### START CODE HERE ###
202 | 
203 |     # initialize parameters with zeros (≈ 1 line of code)
204 |     w, b = initialize_with_zeros(X_train.shape[0])
205 | 
206 |     # Gradient descent (≈ 1 line of code)
207 |     parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
208 | 
209 |     # Retrieve parameters w and b from dictionary "parameters"
210 |     w = parameters["w"]
211 |     b = parameters["b"]
212 | 
213 |     # Predict test/train set examples (≈ 2 lines of code)
214 |     Y_prediction_test = predict(w, b, X_test)
215 |     Y_prediction_train = predict(w, b, X_train)
216 | 
217 |     ### END CODE HERE ###
218 | 
219 |     # Print train/test Errors
220 |     print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
221 |     print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
222 | 
223 | 
224 |     d = {"costs": costs,
225 |          "Y_prediction_test": Y_prediction_test,
226 |          "Y_prediction_train" : Y_prediction_train,
227 |          "w" : w,
228 |          "b" : b,
229 |          "learning_rate" : learning_rate,
230 |          "num_iterations": num_iterations}
231 | 
232 |     return d
233 | 


--------------------------------------------------------------------------------
/DeepLearningBasics/ShallowNeuralNetwork.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def layer_sizes(X, Y):
  5 |     """
  6 |     Arguments:
  7 |     X -- input dataset of shape (input size, number of examples)
  8 |     Y -- labels of shape (output size, number of examples)
  9 | 
 10 |     Returns:
 11 |     n_x -- the size of the input layer
 12 |     n_h -- the size of the hidden layer
 13 |     n_y -- the size of the output layer
 14 |     """
 15 |     ### START CODE HERE ### (≈ 3 lines of code)
 16 |     n_x = X.shape[0] # size of input layer
 17 |     n_h = 4
 18 |     n_y = Y.shape[0] # size of output layer
 19 |     ### END CODE HERE ###
 20 |     return (n_x, n_h, n_y)
 21 | 
 22 | 
 23 | def initialize_parameters(n_x, n_h, n_y):
 24 |     """
 25 |     Argument:
 26 |     n_x -- size of the input layer
 27 |     n_h -- size of the hidden layer
 28 |     n_y -- size of the output layer
 29 | 
 30 |     Returns:
 31 |     params -- python dictionary containing your parameters:
 32 |                     W1 -- weight matrix of shape (n_h, n_x)
 33 |                     b1 -- bias vector of shape (n_h, 1)
 34 |                     W2 -- weight matrix of shape (n_y, n_h)
 35 |                     b2 -- bias vector of shape (n_y, 1)
 36 |     """
 37 | 
 38 |     np.random.seed(2) # we set up a seed so that your output matches ours although the initialization is random.
 39 | 
 40 |     ### START CODE HERE ### (≈ 4 lines of code)
 41 |     W1 = np.random.randn(n_h, n_x) * 0.01
 42 |     b1 = np.zeros((n_h, 1))
 43 |     W2 = np.random.randn(n_y, n_h) * 0.01
 44 |     b2 = np.zeros((n_y, 1))
 45 |     ### END CODE HERE ###
 46 | 
 47 |     assert (W1.shape == (n_h, n_x))
 48 |     assert (b1.shape == (n_h, 1))
 49 |     assert (W2.shape == (n_y, n_h))
 50 |     assert (b2.shape == (n_y, 1))
 51 | 
 52 |     parameters = {"W1": W1,
 53 |                   "b1": b1,
 54 |                   "W2": W2,
 55 |                   "b2": b2}
 56 | 
 57 |     return parameters
 58 | 
 59 | 
 60 | def forward_propagation(X, parameters):
 61 |     """
 62 |     Argument:
 63 |     X -- input data of size (n_x, m)
 64 |     parameters -- python dictionary containing your parameters (output of initialization function)
 65 | 
 66 |     Returns:
 67 |     A2 -- The sigmoid output of the second activation
 68 |     cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
 69 |     """
 70 |     # Retrieve each parameter from the dictionary "parameters"
 71 |     ### START CODE HERE ### (≈ 4 lines of code)
 72 |     W1 = parameters["W1"]
 73 |     b1 = parameters["b1"]
 74 |     W2 = parameters["W2"]
 75 |     b2 = parameters["b2"]
 76 |     ### END CODE HERE ###
 77 | 
 78 |     # Implement Forward Propagation to calculate A2 (probabilities)
 79 |     ### START CODE HERE ### (≈ 4 lines of code)
 80 |     Z1 = np.dot(W1, X) + b1
 81 |     A1 = np.maximum(Z1, 0) # np.tanh(Z1)
 82 |     Z2 = np.dot(W2, A1) + b2
 83 |     A2 = sigmoid(Z2)
 84 |     ### END CODE HERE ###
 85 | 
 86 |     assert(A2.shape == (1, X.shape[1]))
 87 | 
 88 |     cache = {"Z1": Z1,
 89 |              "A1": A1,
 90 |              "Z2": Z2,
 91 |              "A2": A2}
 92 | 
 93 |     return A2, cache
 94 | 
 95 | 
 96 | 
 97 | def compute_cost(A2, Y, parameters):
 98 |     """
 99 |     Computes the cross-entropy cost given in equation (13)
100 | 
101 |     Arguments:
102 |     A2 -- The sigmoid output of the second activation, of shape (1, number of examples)
103 |     Y -- "true" labels vector of shape (1, number of examples)
104 |     parameters -- python dictionary containing your parameters W1, b1, W2 and b2
105 | 
106 |     Returns:
107 |     cost -- cross-entropy cost given equation (13)
108 |     """
109 | 
110 |     m = Y.shape[1] # number of example
111 | 
112 |     # Compute the cross-entropy cost
113 |     ### START CODE HERE ### (≈ 2 lines of code)
114 |     logprobs = np.multiply(np.log(A2),Y) + np.multiply(np.log(1 - A2), (1 - Y))
115 |     cost = - np.mean(logprobs)
116 |     ### END CODE HERE ###
117 | 
118 |     cost = np.squeeze(cost)     # makes sure cost is the dimension we expect.
119 |                                 # E.g., turns [[17]] into 17
120 |     assert(isinstance(cost, float))
121 | 
122 |     return cost
123 | 
124 | 
125 | def backward_propagation(parameters, cache, X, Y):
126 |     """
127 |     Implement the backward propagation using the instructions above.
128 | 
129 |     Arguments:
130 |     parameters -- python dictionary containing our parameters
131 |     cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
132 |     X -- input data of shape (2, number of examples)
133 |     Y -- "true" labels vector of shape (1, number of examples)
134 | 
135 |     Returns:
136 |     grads -- python dictionary containing your gradients with respect to different parameters
137 |     """
138 |     m = X.shape[1]
139 | 
140 |     # First, retrieve W1 and W2 from the dictionary "parameters".
141 |     ### START CODE HERE ### (≈ 2 lines of code)
142 |     W1 = parameters["W1"]
143 |     W2 = parameters["W2"]
144 |     ### END CODE HERE ###
145 | 
146 |     # Retrieve also A1 and A2 from dictionary "cache".
147 |     ### START CODE HERE ### (≈ 2 lines of code)
148 |     A1 = cache["A1"]
149 |     A2 = cache["A2"]
150 |     Z1 = cache["Z1"]
151 |     ### END CODE HERE ###
152 | 
153 |     # Backward propagation: calculate dW1, db1, dW2, db2.
154 |     ### START CODE HERE ### (≈ 6 lines of code, corresponding to 6 equations on slide above)
155 |     dZ2= A2 - Y
156 |     dW2 = np.dot(dZ2, A1.T) / m
157 |     db2 = np.sum(dZ2, axis = 1, keepdims=True) / m
158 |     dZ1 = W2.T * dZ2 * (Z1>=0).astype(int) # np.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
159 |     dW1 = np.dot(dZ1, X.T) / m
160 |     db1 = np.sum(dZ1, axis = 1, keepdims=True) / m
161 |     ### END CODE HERE ###
162 | 
163 |     grads = {"dW1": dW1,
164 |              "db1": db1,
165 |              "dW2": dW2,
166 |              "db2": db2}
167 | 
168 |     return grads
169 | 
170 | def update_parameters(parameters, grads, learning_rate = 1.2):
171 |     """
172 |     Updates parameters using the gradient descent update rule given above
173 | 
174 |     Arguments:
175 |     parameters -- python dictionary containing your parameters
176 |     grads -- python dictionary containing your gradients
177 | 
178 |     Returns:
179 |     parameters -- python dictionary containing your updated parameters
180 |     """
181 |     # Retrieve each parameter from the dictionary "parameters"
182 |     ### START CODE HERE ### (≈ 4 lines of code)
183 |     W1 = parameters["W1"]
184 |     b1 = parameters["b1"]
185 |     W2 = parameters["W2"]
186 |     b2 = parameters["b2"]
187 |     ### END CODE HERE ###
188 | 
189 |     # Retrieve each gradient from the dictionary "grads"
190 |     ### START CODE HERE ### (≈ 4 lines of code)
191 |     dW1 = grads["dW1"]
192 |     db1 = grads["db1"]
193 |     dW2 = grads["dW2"]
194 |     db2 = grads["db2"]
195 |     ## END CODE HERE ###
196 | 
197 |     # Update rule for each parameter
198 |     ### START CODE HERE ### (≈ 4 lines of code)
199 |     W1 = W1 - learning_rate * dW1
200 |     b1 = b1 - learning_rate * db1
201 |     W2 = W2 - learning_rate * dW2
202 |     b2 = b2 - learning_rate * db2
203 |     ### END CODE HERE ###
204 | 
205 |     parameters = {"W1": W1,
206 |                   "b1": b1,
207 |                   "W2": W2,
208 |                   "b2": b2}
209 | 
210 |     return parameters
211 | 
212 | 
213 | def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False, eta = 0.01):
214 |     """
215 |     Arguments:
216 |     X -- dataset of shape (2, number of examples)
217 |     Y -- labels of shape (1, number of examples)
218 |     n_h -- size of the hidden layer
219 |     num_iterations -- Number of iterations in gradient descent loop
220 |     print_cost -- if True, print the cost every 1000 iterations
221 | 
222 |     Returns:
223 |     parameters -- parameters learnt by the model. They can then be used to predict.
224 |     """
225 | 
226 |     np.random.seed(3)
227 |     n_x = layer_sizes(X, Y)[0]
228 |     n_y = layer_sizes(X, Y)[2]
229 | 
230 |     # Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters".
231 |     ### START CODE HERE ### (≈ 5 lines of code)
232 |     parameters = initialize_parameters(n_x, n_h, n_y)
233 |     W1 = parameters["W1"]
234 |     b1 = parameters["b1"]
235 |     W2 = parameters["W2"]
236 |     b2 = parameters["b2"]
237 |     ### END CODE HERE ###
238 | 
239 |     # Loop (gradient descent)
240 | 
241 |     for i in range(0, num_iterations):
242 | 
243 |         ### START CODE HERE ### (≈ 4 lines of code)
244 |         # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
245 |         A2, cache = forward_propagation(X, parameters)
246 | 
247 |         # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
248 |         cost = compute_cost(A2, Y, parameters)
249 | 
250 |         # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
251 |         grads = backward_propagation(parameters, cache, X, Y)
252 | 
253 |         # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
254 |         parameters = update_parameters(parameters, grads, learning_rate=eta)
255 | 
256 |         ### END CODE HERE ###
257 | 
258 |         # Print the cost every 1000 iterations
259 |         if print_cost and i % 1000 == 0:
260 |             print ("Cost after iteration %i: %f" %(i, cost))
261 | 
262 |     return parameters
263 | 
264 | 
265 | def predict(parameters, X):
266 |     """
267 |     Using the learned parameters, predicts a class for each example in X
268 | 
269 |     Arguments:
270 |     parameters -- python dictionary containing your parameters
271 |     X -- input data of size (n_x, m)
272 | 
273 |     Returns
274 |     predictions -- vector of predictions of our model (red: 0 / blue: 1)
275 |     """
276 | 
277 |     # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
278 |     ### START CODE HERE ### (≈ 2 lines of code)
279 |     A2, cache = forward_propagation(X, parameters)
280 |     predictions = A2 > 0.5
281 |     ### END CODE HERE ###
282 | 
283 |     return predictions
284 | 


--------------------------------------------------------------------------------