├── README.md
├── optim.py
├── activations.py
├── utils.py
├── lossfunctions.py
├── linear.py
├── datasets.py
├── convolution.py
└── net-test-mnist.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # 神经网络numpy实现
2 | 
3 | ## 介绍
4 | 
5 | - Linear、CNN、Maxpool、BatchNorm、Dropout、SoftmaxLoss等，正向反向传播的实现。
6 | - 代码包含大量的中文注释，对于刚接触深度学习的同学可以作为参考，用来当玩具练习。
7 | - 代码测试有效，设计了类似AlexNet的简单卷积网络，使用mnist数据集训练3个epoch后test accuracy(98%)。
8 | - 实验效果查看[net-test-mnist.ipynb](https://github.com/heyxhh/nnet-numpy/blob/master/net-test-mnist.ipynb)
9 | 


--------------------------------------------------------------------------------
/optim.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class SGD():
 4 |     """
 5 |     随机梯度下降
 6 |     parameters: 模型需要训练的参数
 7 |     lr: float, 学习率
 8 |     momentum: float, 动量因子，默认为None不使用动量梯度下降
 9 |     """
10 |     def __init__(self, parameters, lr, momentum=None):
11 |         self.parameters = parameters
12 |         self.lr = lr
13 |         self.momentum = momentum
14 | 
15 |         if momentum is not None:
16 |             self.velocity = self.velocity_initial()
17 | 
18 |     def update_parameters(self, grads):
19 |         """
20 |         grads: 调用network的backward方法，返回的grads.
21 |         """
22 |         if self.momentum == None:
23 |             for param, grad in zip(self.parameters, grads):
24 |                 param -= self.lr * grad
25 |         else:
26 |             for i in range(len(self.parameters)):
27 |                 self.velocity[i] = self.momentum * self.velocity[i] - self.lr * grads[i]
28 |                 self.parameters[i] += self.velocity[i]
29 |     
30 |     def velocity_initial(self):
31 |         """
32 |         初始化velocity，按照parameters的参数顺序依次将v初始化为0
33 |         """
34 |         velocity = []
35 |         for param in self.parameters:
36 |             velocity.append(np.zeros_like(param))
37 |         return velocity


--------------------------------------------------------------------------------
/activations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # 定义Relu层
 4 | class Relu(object):
 5 |     def __init__(self):
 6 |         self.X = None
 7 |     
 8 |     def __call__(self, X):
 9 |         self.X = X
10 |         return self.forward(self.X)
11 |     
12 |     def forward(self, X):
13 |         return np.maximum(0, X)
14 |     
15 |     def backward(self, grad_output):
16 |         """
17 |         grad_output: loss对relu激活输出的梯度
18 |         return: relu对输入input_z的梯度
19 |         """
20 |         grad_relu = self.X > 0  # input_z大于0的提放梯度为1，其它为0
21 |         return grad_relu * grad_output  # numpy中*为点乘
22 | 
23 | 
24 | # 定义Tanh层
25 | class Tanh():
26 |     def __init__(self):
27 |         self.X = None
28 |     
29 |     def __call__(self, X):
30 |         self.X = X
31 |         return self.forward(self.X)
32 |     
33 |     def forward(self, X):
34 |         return np.tanh(X)
35 |     
36 |     def backward(self, grad_output):
37 |         grad_tanh = 1 - (np.tanh(self.X)) ** 2
38 |         return grad_output * grad_tanh
39 | 
40 | # 定义Sigmoid层
41 | class Sigmoid():
42 |     def __init__(self):
43 |         self.X = None
44 |     
45 |     def __call__(self, X):
46 |         self.X = X
47 |         return self.forward(self.X)
48 |     
49 |     def forward(self, X):
50 |         return self._sigmoid(X)
51 |     
52 |     def backward(self, grad_output):
53 |         sigmoid_grad = self._sigmoid(self.X) * (1 - self._sigmoid(self.X))
54 |         return grad_output * sigmoid_grad
55 |     
56 |     def _sigmoid(self, X):
57 |         return 1.0 / (1 + np.exp(-X))


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # 数字label与one hot转换
 4 | def label_encoder(label, num_class):
 5 |     """
 6 |     将class有0，1，。。。，n-1编码为one-hot
 7 |     label: 原标签，值为0，1，2，。。。，num_class-1
 8 |     num_class: 标签的种类数
 9 |     return: 没一行是一个sample的标签的one-hot, shape=(m, num_class)
10 |     """
11 |     tmp = np.eye(num_class)
12 |     return tmp[label]
13 | 
14 | # one hot转化为数字
15 | def label_decoder(one_hot):
16 |     """
17 |     将one-hot转换成数值
18 |     one_hot：one hot形式的label。array矩阵，shape=(m, num_class)
19 |     return：数值型的label，shape=(n)
20 |     """
21 |     return np.argmax(one_hot, axis=1)
22 | 
23 | # 打乱数据的顺序
24 | def shuffle_data(datas, labels):
25 |     """
26 |     随机打乱数据顺序
27 |     参数：
28 |         datas, labels
29 |     """
30 |     n = labels.shape[0]  # 总数据量
31 |     # 打乱顺序
32 |     shuffled_idx = np.arange(n)
33 |     np.random.shuffle(shuffled_idx)
34 |     shuffled_datas, shuffled_labels = datas[shuffled_idx], labels[shuffled_idx]
35 |     return shuffled_datas, shuffled_labels
36 | 
37 | 
38 | def softmax(X):
39 |     """
40 |     根据X，计算softmax
41 |     代码利用softmax函数的性质: softmax(x) = softmax(x + c)
42 |     return: softmax
43 |     """
44 |     batch_size = X.shape[0]
45 |     # axis=1 表示在二维数组中沿着横轴进行取最大值的操作
46 |     max_value = X.max(axis=1)
47 |     #每一行减去自己本行最大的数字,防止取指数后出现inf，性质：softmax(x) = softmax(x + c)
48 |     tmp = X - max_value.reshape(batch_size, 1)
49 |     # 对每个数取指数
50 |     exp_input = np.exp(tmp)  # shape=(m, n)
51 |     # 求出每一行的和
52 |     exp_sum = exp_input.sum(axis=1).reshape(batch_size, 1)  # shape=(m, 1)
53 |     return exp_input / exp_sum


--------------------------------------------------------------------------------
/lossfunctions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # 交叉熵损失
 4 | class CrossEntropyLoss():
 5 |     """
 6 |     对最后一层的神经元输出计算交叉熵损失
 7 |     """
 8 |     def __init__(self):
 9 |         self.X = None
10 |         self.labels = None
11 |     
12 |     def __call__(self, X, labels):
13 |         """
14 |         参数：
15 |             X: 模型最后fc层输出
16 |             labels: one hot标注，shape=(batch_size, num_class)
17 |         """
18 |         self.X = X
19 |         self.labels = labels
20 | 
21 |         return self.forward(self.X)
22 |     
23 |     def forward(self, X):
24 |         """
25 |         计算交叉熵损失
26 |         参数：
27 |             X：最后一层神经元输出，shape=(batch_size, C)
28 |             label：数据onr-hot标注，shape=(batch_size, C)
29 |         return：
30 |             交叉熵loss
31 |         """
32 |         self.softmax_x = self.softmax(X)
33 |         log_softmax = self.log_softmax(self.softmax_x)
34 |         cross_entropy_loss = np.sum(-(self.labels * log_softmax), axis=1).mean()
35 |         return cross_entropy_loss
36 |     
37 |     def backward(self):
38 |         grad_x =  (self.softmax_x - self.labels)  # 返回的梯度需要除以batch_size
39 |         return grad_x / self.X.shape[0]
40 |         
41 |     def log_softmax(self, softmax_x):
42 |         """
43 |         参数:
44 |             softmax_x, 在经过softmax处理过的X
45 |         return: 
46 |             log_softmax处理后的结果shape = (m, C)
47 |         """
48 |         return np.log(softmax_x + 1e-5)
49 |     
50 |     def softmax(self, X):
51 |         """
52 |         根据输入，返回softmax
53 |         代码利用softmax函数的性质: softmax(x) = softmax(x + c)
54 |         """
55 |         batch_size = X.shape[0]
56 |         # axis=1 表示在二维数组中沿着横轴进行取最大值的操作
57 |         max_value = X.max(axis=1)
58 |         #每一行减去自己本行最大的数字,防止取指数后出现inf，性质：softmax(x) = softmax(x + c)
59 |         # 一定要新定义变量，不要用-=，否则会改变输入X。因为在调用计算损失时，多次用到了softmax，input不能改变
60 |         tmp = X - max_value.reshape(batch_size, 1)
61 |         # 对每个数取指数
62 |         exp_input = np.exp(tmp)  # shape=(m, n)
63 |         # 求出每一行的和
64 |         exp_sum = exp_input.sum(axis=1, keepdims=True)  # shape=(m, 1)
65 |         return exp_input / exp_sum


--------------------------------------------------------------------------------
/linear.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # 定义线性层网络
 4 | class Linear():
 5 |     """
 6 |     线性全连接层
 7 |     """
 8 |     def __init__(self, dim_in, dim_out):
 9 |         """
10 |         参数：
11 |             dim_in: 输入维度
12 |             dim_out: 输出维度
13 |         """
14 |         # 初始化参数
15 |         scale = np.sqrt(dim_in / 2)
16 |         self.weight = np.random.standard_normal((dim_in, dim_out)) / scale
17 |         self.bias = np.random.standard_normal(dim_out) / scale
18 |         # self.weight = np.random.randn(dim_in, dim_out)
19 |         # self.bias = np.zeros(dim_out)
20 |         
21 |         self.params = [self.weight, self.bias]
22 |         
23 |     def __call__(self, X):
24 |         """
25 |         参数：
26 |             X：这一层的输入，shape=(batch_size, dim_in)
27 |         return：
28 |             xw + b
29 |         """
30 |         self.X = X
31 |         return self.forward()
32 |     
33 |     def forward(self):
34 |         return np.dot(self.X, self.weight) + self.bias
35 |     
36 |     def backward(self, d_out):
37 |         """
38 |         参数：
39 |             d_out：输出的梯度, shape=(batch_size, dim_out)
40 |         return：
41 |             返回loss对输入 X 的梯度（前一层（l-1）的激活值的梯度）
42 |         """
43 |         # 计算梯度
44 |         # 对input的梯度有batch维度，对参数的梯度对batch维度取平均
45 |         d_x = np.dot(d_out, self.weight.T)  # 输入也即上一层激活值的梯度
46 |         d_w = np.dot(self.X.T, d_out)  # weight的梯度
47 |         d_b = np.mean(d_out, axis=0)  # bias的梯度
48 |         
49 |         return d_x, [d_w, d_b]
50 | 
51 | 
52 | # dropout，1D 和 2D feature都可用
53 | class Dropout():
54 |     """
55 |     在训练时随机将部分feature置为0
56 |     """
57 |     def __init__(self, p):
58 |         """
59 |         parameters:
60 |             p: 保留比例
61 |         """
62 |         self.p = p
63 |     
64 |     def __call__(self, X, mode):
65 |         """
66 |         mode: 是在训练阶段还是测试阶段. train 或者 test
67 |         """
68 |         return self.forward(X, mode)
69 |     
70 |     def forward(self, X, mode):
71 |         if mode == 'train':
72 |             self.mask = np.random.binomial(1, self.p, X.shape) / self.p
73 |             out =  self.mask * X
74 |         else:
75 |             out = X
76 |         
77 |         return out
78 |     
79 |     def backward(self, d_out):
80 |         """
81 |         d_out: loss对dropout输出的梯度
82 |         """
83 |         return d_out * self.mask


--------------------------------------------------------------------------------
/datasets.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import struct
  3 | 
  4 | class Mnist():
  5 |     """
  6 |     解析mnist原二进制文件，读取数据
  7 |     来源：https://blog.csdn.net/panrenlong/article/details/81736754
  8 |     """
  9 |     def __init__(self, train_image_path, train_label_path, test_image_path, test_label_path):
 10 |         self.train_image_path = train_image_path
 11 |         self.train_label_path = train_label_path
 12 |         self.test_image_path = test_image_path
 13 |         self.test_label_path = test_label_path
 14 |     
 15 |     def decode_idx3_ubyte(self, idx3_ubyte_file):
 16 |         """
 17 |         解析idx3文件的通用函数
 18 |         :param idx3_ubyte_file: idx3文件路径
 19 |         :return: 数据集
 20 |         """
 21 |         # 读取二进制数据
 22 |         bin_data = open(idx3_ubyte_file, 'rb').read()
 23 | 
 24 |         # 解析文件头信息，依次为魔数、图片数量、每张图片高、每张图片宽
 25 |         offset = 0
 26 |         fmt_header = '>iiii'
 27 |         # 因为数据结构中前4行的数据类型都是32位整型，所以采用i格式，但我们需要读取前4行数据，所以需要4个i。
 28 |         # 我们后面会看到标签集中，只使用2个ii。
 29 |         magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
 30 |         print('魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))
 31 | 
 32 |         # 解析数据集
 33 |         image_size = num_rows * num_cols
 34 |         offset += struct.calcsize(fmt_header)  
 35 |         # 获得数据在缓存中的指针位置，从前面介绍的数据结构可以看出，读取了前4行之后，指针位置（即偏移位置offset）指向0016。
 36 |         print(offset)
 37 |         fmt_image = '>' + str(image_size) + 'B'
 38 |         # 图像数据像素值的类型为unsigned char型，对应的format格式为B。
 39 |         # 这里还有加上图像大小784，是为了读取784个B格式数据，如果没有则只会读取一个值（即一副图像中的一个像素值）
 40 |         print(fmt_image,offset,struct.calcsize(fmt_image))
 41 |         images = np.empty((num_images, num_rows, num_cols))
 42 |         #plt.figure()
 43 |         for i in range(num_images):
 44 |             if (i + 1) % 10000 == 0:
 45 |                 print('已解析 %d' % (i + 1) + '张')
 46 |                 print(offset)
 47 |             images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
 48 |             #print(images[i])
 49 |             offset += struct.calcsize(fmt_image)
 50 |         return images
 51 | 
 52 |     def decode_idx1_ubyte(self, idx1_ubyte_file):
 53 |         """
 54 |         解析idx1文件的通用函数
 55 |         :param idx1_ubyte_file: idx1文件路径
 56 |         :return: 数据集
 57 |         """
 58 |         # 读取二进制数据
 59 |         bin_data = open(idx1_ubyte_file, 'rb').read()
 60 | 
 61 |         # 解析文件头信息，依次为魔数和标签数
 62 |         offset = 0
 63 |         fmt_header = '>ii'
 64 |         magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
 65 |         print('魔数:%d, 图片数量: %d张' % (magic_number, num_images))
 66 | 
 67 |         # 解析数据集
 68 |         offset += struct.calcsize(fmt_header)
 69 |         fmt_image = '>B'
 70 |         labels = np.empty(num_images)
 71 |         for i in range(num_images):
 72 |             if (i + 1) % 10000 == 0:
 73 |                 print ('已解析 %d' % (i + 1) + '张')
 74 |             labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
 75 |             offset += struct.calcsize(fmt_image)
 76 |         return labels
 77 |     
 78 |     def load_train_images(self):
 79 |         """
 80 |         TRAINING SET IMAGE FILE (train-images-idx3-ubyte):
 81 |         [offset] [type]          [value]          [description]
 82 |         0000     32 bit integer  0x00000803(2051) magic number
 83 |         0004     32 bit integer  60000            number of images
 84 |         0008     32 bit integer  28               number of rows
 85 |         0012     32 bit integer  28               number of columns
 86 |         0016     unsigned byte   ??               pixel
 87 |         0017     unsigned byte   ??               pixel
 88 |             ........
 89 |         xxxx     unsigned byte   ??               pixel
 90 |         Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
 91 | 
 92 |         :param idx_ubyte_file: idx文件路径
 93 |         :return: n*row*col维np.array对象，n为图片数量
 94 |         """
 95 |         return self.decode_idx3_ubyte(self.train_image_path)
 96 |     
 97 |     def load_train_labels(self):
 98 |         """
 99 |         TEST SET LABEL FILE (t10k-labels-idx1-ubyte):
100 |         [offset] [type]          [value]          [description]
101 |         0000     32 bit integer  0x00000801(2049) magic number (MSB first)
102 |         0004     32 bit integer  10000            number of items
103 |         0008     unsigned byte   ??               label
104 |         0009     unsigned byte   ??               label
105 |         ........
106 |         xxxx     unsigned byte   ??               label
107 |         The labels values are 0 to 9.
108 | 
109 |         :param idx_ubyte_file: idx文件路径
110 |         :return: n*1维np.array对象，n为图片数量
111 |         """
112 |         return self.decode_idx1_ubyte(self.train_label_path)
113 |     
114 |     def load_test_images(self):
115 |         """
116 |         TEST SET IMAGE FILE (t10k-images-idx3-ubyte):
117 |         [offset] [type]          [value]          [description]
118 |         0000     32 bit integer  0x00000803(2051) magic number
119 |         0004     32 bit integer  10000            number of images
120 |         0008     32 bit integer  28               number of rows
121 |         0012     32 bit integer  28               number of columns
122 |         0016     unsigned byte   ??               pixel
123 |         0017     unsigned byte   ??               pixel
124 |         ........
125 |         xxxx     unsigned byte   ??               pixel
126 |         Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
127 | 
128 |         :param idx_ubyte_file: idx文件路径
129 |         :return: n*row*col维np.array对象，n为图片数量
130 |         """
131 |         return self.decode_idx3_ubyte(self.test_image_path)
132 |     
133 |     def load_test_labels(self):
134 |         """
135 |         TEST SET LABEL FILE (t10k-labels-idx1-ubyte):
136 |         [offset] [type]          [value]          [description]
137 |         0000     32 bit integer  0x00000801(2049) magic number (MSB first)
138 |         0004     32 bit integer  10000            number of items
139 |         0008     unsigned byte   ??               label
140 |         0009     unsigned byte   ??               label
141 |         ........
142 |         xxxx     unsigned byte   ??               label
143 |         The labels values are 0 to 9.
144 | 
145 |         :param idx_ubyte_file: idx文件路径
146 |         :return: n*1维np.array对象，n为图片数量
147 |         """
148 |         return self.decode_idx1_ubyte(self.test_label_path)


--------------------------------------------------------------------------------
/convolution.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | # 定义conv2d
  4 | class Conv2d():
  5 |     def __init__(self, in_channels, n_filter, filter_size, padding, stride):
  6 |         """
  7 |         parameters:
  8 |             in_channel: 输入feature的通道数
  9 |             n_filter: 卷积核数目
 10 |             filter_size: 卷积核的尺寸(h_filter, w_filter)
 11 |             padding: 0填充数目
 12 |             stride: 卷积核滑动步幅
 13 |         """
 14 |         self.in_channels = in_channels
 15 |         self.n_filter = n_filter
 16 |         self.h_filter, self.w_filter = filter_size
 17 |         self.padding = padding
 18 |         self.stride = stride
 19 |         
 20 |         # 初始化参数,卷积网络的参数size与输入的size无关
 21 |         self.W = np.random.randn(n_filter, self.in_channels, self.h_filter, self.w_filter) / np.sqrt(n_filter / 2.)
 22 |         self.b = np.zeros((n_filter, 1))
 23 |         
 24 |         self.params = [self.W, self.b]
 25 |         
 26 |     def __call__(self, X):
 27 |         # 计算输出feature的尺寸
 28 |         self.n_x, _, self.h_x, self.w_x = X.shape
 29 |         self.h_out = (self.h_x + 2 * self.padding - self.h_filter) / self.stride + 1
 30 |         self.w_out = (self.w_x + 2 * self.padding - self.w_filter) / self.stride + 1
 31 |         if not self.h_out.is_integer() or not self.w_out.is_integer():
 32 |             raise Exception("Invalid dimensions!")
 33 |         self.h_out, self.w_out = int(self.h_out), int(self.w_out)
 34 |         
 35 |         # 声明Img2colIndices实例
 36 |         self.img2col_indices = Img2colIndices((self.h_filter, self.w_filter), self.padding, self.stride)
 37 |         
 38 |         return self.forward(X)
 39 |     
 40 |     def forward(self, X):
 41 |         # 将X转换成col
 42 |         self.x_col = self.img2col_indices.img2col(X)
 43 |         
 44 |         # 转换参数W的形状，使它适合与col形态的x做计算
 45 |         self.w_row = self.W.reshape(self.n_filter, -1)
 46 |         
 47 |         # 计算前向传播
 48 |         out = self.w_row @ self.x_col + self.b  # @在numpy中相当于矩阵乘法，等价于numpy.matmul()
 49 |         out = out.reshape(self.n_filter, self.h_out, self.w_out, self.n_x)
 50 |         out = out.transpose(3, 0, 1, 2)
 51 |         
 52 |         return out
 53 |     
 54 |     def backward(self, d_out):
 55 |         """
 56 |         parameters:
 57 |             d_out: loss对卷积输出的梯度
 58 |         """
 59 |         # 转换d_out的形状
 60 |         d_out_col = d_out.transpose(1, 2, 3, 0)
 61 |         d_out_col = d_out_col.reshape(self.n_filter, -1)
 62 |         
 63 |         d_w = d_out_col @ self.x_col.T
 64 |         d_w = d_w.reshape(self.W.shape)  # shape=(n_filter, d_x, h_filter, w_filter)
 65 |         d_b = d_out_col.sum(axis=1).reshape(self.n_filter, 1)
 66 |         
 67 |         d_x = self.w_row.T @ d_out_col
 68 |         # 将col态的d_x转换成image格式
 69 |         d_x = self.img2col_indices.col2img(d_x)
 70 |         
 71 |         return d_x, [d_w, d_b]
 72 | 
 73 | 
 74 | # 定义maxpool
 75 | class Maxpool():
 76 |     def __init__(self, size, stride):
 77 |         """
 78 |         parameters:
 79 |             size: maxpool框框的尺寸,int类型
 80 |             stride: maxpool框框的滑动步幅，一般设计步幅和size一样
 81 |         """
 82 |         self.size = size  # maxpool框的尺寸
 83 |         self.stride = stride
 84 |         
 85 |     def __call__(self, X):
 86 |         """
 87 |         parameters:
 88 |             X: 输入feature，shape=(batch_size, channels, height, width)
 89 |         """
 90 |         self.n_x, self.c_x, self.h_x, self.w_x = X.shape
 91 |         # 计算maxpool输出尺寸
 92 |         self.h_out = (self.h_x - self.size) / self.stride + 1
 93 |         self.w_out = (self.w_x - self.size) / self.stride + 1
 94 |         if not self.h_out.is_integer() or not self.w_out.is_integer():
 95 |             raise Exception("Invalid dimensions!")
 96 |         self.h_out, self.w_out = int(self.h_out), int(self.w_out)
 97 |         
 98 |         # 声明Img2colIndices实例
 99 |         self.img2col_indices = Img2colIndices((self.size, self.size), padding=0, stride=self.stride) # maxpool不需要padding
100 |         
101 |         return self.forward(X)
102 |     
103 |     def forward(self, X):
104 |         """
105 |         parameters:
106 |             X: 输入feature，shape=(batch_size, channels, height, width)
107 |         """
108 |         x_reshaped = X.reshape(self.n_x * self.c_x, 1, self.h_x, self.w_x)
109 |         self.x_col = self.img2col_indices.img2col(x_reshaped)
110 |         self.max_indices = np.argmax(self.x_col, axis=0)
111 |         
112 |         out = self.x_col[self.max_indices, range(self.max_indices.size)]
113 |         out = out.reshape(self.h_out, self.w_out, self.n_x, self.c_x).transpose(2, 3, 0, 1)
114 |         return out
115 |     
116 |     def backward(self, d_out):
117 |         """
118 |         parameters:
119 |             d_out: loss多maxpool输出的梯度，shape=(batch_size, channels, h_out, w_out)
120 |         """
121 |         d_x_col = np.zeros_like(self.x_col)  # shape=(size*size, h_out*h_out*batch*C)
122 |         d_out_flat = d_out.transpose(2, 3, 0, 1).ravel()
123 |         
124 |         d_x_col[self.max_indices, range(self.max_indices.size)] = d_out_flat
125 |         # 将d_x由col形态转换到img形态
126 |         d_x = self.img2col_indices.col2img(d_x_col)
127 |         d_x = d_x.reshape(self.n_x, self.c_x, self.h_x, self.w_x)
128 |         
129 |         return d_x
130 | 
131 | 
132 | # 卷积网络辅助类img和col的转换
133 | class Img2colIndices():
134 |     """
135 |     卷积网络的滑动计算实际上是将feature map转换成为矩阵乘法的方式。
136 |     卷积计算forward前需要将feature map转换成为cols格式，每一次滑动的窗口作为cols的一列
137 |     卷积计算backward时需要将cols态的梯度转换成为与输入map shape一致的格式
138 |     该辅助类完成feature map --> cols 以及 cols --> feature map
139 | 
140 |     设计卷积、maxpool、average pool都有可能用到该类进行转换操作
141 |     """
142 |     def __init__(self, filter_size, padding, stride):
143 |         """
144 |         parameters:
145 |             filter_shape: 卷积核的尺寸(h_filter, w_filter)
146 |             padding: feature边缘填充0的个数
147 |             stride: filter滑动步幅
148 |         """
149 |         self.h_filter, self.w_filter = filter_size
150 |         self.padding = padding
151 |         self.stride = stride
152 |     
153 |     def get_img2col_indices(self, h_out, w_out):
154 |         """
155 |         获得需要由image转换为col的索引, 返回的索引是在feature map填充后对于尺寸的索引
156 | 
157 |         获得每次卷积时，在feature map上卷积的元素的坐标索引。以后img2col时根据索引获得
158 |         i 的每一行，如第r行是filter第r个元素(左右上下的顺序)在不同位置卷积时点乘的元素的位置的row坐标索引
159 |         j 的每一行，如第r行是filter第r个元素(左右上下的顺序)在不同位置卷积时点乘的元素的位置的column坐标索引
160 |         结果i、j每一列，如第c列是filter第c次卷积的位置卷积的k×k个元素(左右上下的顺序)。
161 |         每一列长filter_height*filter_width*C，由于C个通道，每C个都是重复的，表示在第几个通道上做的卷积。
162 | 
163 |         parameters:
164 |             h_out: 卷积层输出feature的height
165 |             w_out: 卷积层输出feature的width。每次调用imgcol时计算得到
166 |         return:
167 |             k: shape=(filter_height*filter_width*C, 1), 每挨着的filter_height*filter_width元素值都一样，表示从第几个通道取点
168 |             i: shape=(filter_height*filter_width*C, out_height*out_width), 依次待取元素的横坐标索引
169 |             j: shape=(filter_height*filter_width*C, out_height*out_width), 依次待取元素的纵坐标索引
170 |         """
171 |         i0 = np.repeat(np.arange(self.h_filter), self.w_filter)
172 |         i1 = np.repeat(np.arange(h_out), w_out) * self.stride
173 |         i = i0.reshape(-1, 1) + i1
174 |         i = np.tile(i, [self.c_x, 1])
175 |         
176 |         j0 = np.tile(np.arange(self.w_filter), self.h_filter)
177 |         j1 = np.tile(np.arange(w_out), h_out) * self.stride
178 |         j = j0.reshape(-1, 1) + j1
179 |         j = np.tile(j, [self.c_x, 1])
180 |         
181 |         k = np.repeat(np.arange(self.c_x), self.h_filter * self.w_filter).reshape(-1, 1)
182 |         
183 |         return k, i, j
184 |     
185 |     def img2col(self, X):
186 |         """
187 |         基于索引取元素的方法实现img2col
188 |         parameters:
189 |             x: 输入feature map，shape=(batch_size, channels, height, width)
190 |         return:
191 |             转换img2col,shape=(h_filter * w_filter*chanels, batch_size * h_out * w_out)
192 |         """
193 |         self.n_x, self.c_x, self.h_x, self.w_x = X.shape
194 | 
195 |         # 首先计算出输出特征的尺寸
196 |         # 计算输出feature的尺寸,并且保证是整数
197 |         h_out = (self.h_x + 2 * self.padding - self.h_filter) / self.stride + 1
198 |         w_out = (self.w_x + 2 * self.padding - self.w_filter) / self.stride + 1
199 |         if not h_out.is_integer() or not w_out.is_integer():
200 |             raise Exception("Invalid dimention")
201 |         else:
202 |             h_out, w_out = int(h_out), int(w_out)  # 上一步在进行除法后类型会是float
203 |         
204 |         # 0填充输入feature map
205 |         x_padded = None
206 |         if self.padding > 0:
207 |             x_padded = np.pad(X, ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant')
208 |         else:
209 |             x_padded = X
210 |         
211 |         # 在计算出输出feature尺寸后,并且0填充X后，获得img2col_indices
212 |         # img2col_indices设为实例的属性，col2img时用，避免重复计算
213 |         self.img2col_indices = self.get_img2col_indices(h_out, w_out)
214 |         k, i, j = self.img2col_indices
215 |         
216 |         # 获得参与卷积计算的col形式
217 |         cols = x_padded[:, k, i, j]  # shape=(batch_size, h_filter*w_filter*n_channel, h_out*w_out)
218 |         cols = cols.transpose(1, 2, 0).reshape(self.h_filter * self.w_filter * self.c_x, -1)  # reshape
219 |         
220 |         return cols
221 |     
222 |     def col2img(self, cols):
223 |         """
224 |         img2col的逆过程
225 |         卷积网络，在求出x的梯度时，dx是col矩阵的形式(filter_height*filter_width*chanels, batch_size*out_height*out_width)
226 |         将dx有col格式转换成feature map的原尺寸格式。由get_img2col_indices获得该尺寸下的索引，使用numpt.add.at方法还原成img格式
227 |         parameters:
228 |             cols: dx的col形式, shape=(h_filter*w_filter*n_chanels, batch_size*h_out*w_out)
229 |         """
230 |         # 将col还原成img2col的输出shape
231 |         cols = cols.reshape(self.h_filter * self.w_filter * self.c_x, -1, self.n_x)
232 |         cols = cols.transpose(2, 0, 1)
233 |         
234 |         h_padded, w_padded = self.h_x + 2 * self.padding, self.w_x + 2 * self.padding
235 |         x_padded = np.zeros((self.n_x, self.c_x, h_padded, w_padded))
236 |         
237 |         k, i, j = self.img2col_indices
238 |         
239 |         np.add.at(x_padded, (slice(None), k, i, j), cols)
240 |         
241 |         if self.padding == 0:
242 |             return x_padded
243 |         else:
244 |             return x_padded[:, :, self.padding : -self.padding, self.padding : -self.padding]
245 | 
246 | 
247 | # 定义BatchNorm2d
248 | class BatchNorm2d():
249 |     """
250 |     对卷积层来说，批量归一化发生在卷积计算之后、应用激活函数之前。
251 |     如果卷积计算输出多个通道，我们需要对这些通道的输出分别做批量归一化,且每个通道都拥有独立的拉伸和偏移参数，并均为标量。
252 |     设小批量中有 m 个样本。在单个通道上，假设卷积计算输出的高和宽分别为 p 和 q 。我们需要对该通道中 m×p×q 个元素同时做批量归一化。
253 |     对这些元素做标准化计算时，我们使用相同的均值和方差，即该通道中 m×p×q 个元素的均值和方差。
254 |     
255 |     将训练好的模型用于预测时，我们希望模型对于任意输入都有确定的输出。
256 |     因此，单个样本的输出不应取决于批量归一化所需要的随机小批量中的均值和方差。
257 |     一种常用的方法是通过移动平均估算整个训练数据集的样本均值和方差，并在预测时使用它们得到确定的输出。
258 |     """
259 |     def __init__(self, n_channel, momentum):
260 |         """
261 |         parameters:
262 |             n_channel: 输入feature的通道数
263 |             momentum: moving_mean/moving_var迭代调整系数
264 |         """
265 |         self.n_channel = n_channel
266 |         self.momentum = momentum
267 |         
268 |         # 参与求梯度和迭代的拉伸和偏移参数，分别初始化成1和0
269 |         self.gamma = np.ones((1, n_channel, 1, 1))
270 |         self.beta = np.zeros((1, n_channel, 1, 1))
271 |         
272 |         # 测试时使用的参数，初始化为0，需在训练时动态调整
273 |         self.moving_mean = np.zeros((1, n_channel, 1, 1))
274 |         self.moving_var = np.zeros((1, n_channel, 1, 1))
275 |         
276 |         self.params = [self.gamma, self.beta]
277 |     
278 |     def __call__(self, X, mode):
279 |         """
280 |         X: shape = (N, C, H, W)
281 |         mode: 训练阶段还是测试阶段，train或test, 需要在调用时传参
282 |         """
283 |         self.X = X  # 求gamma的梯度时用
284 |         return self.forward(X, mode)
285 |     
286 |     def forward(self, X, mode):
287 |         """
288 |         X: shape = (N, C, H, W)
289 |         mode: 训练阶段还是测试阶段，train或test, 需要在调用时传参
290 |         """
291 |         if mode != 'train':
292 |             # 如果是在预测模式下，直接使用传入的移动平均所得的均值和方差
293 |             self.x_norm = (X - self.moving_mean) / np.sqrt(self.moving_var + 1e-5)
294 |         else:
295 |             # 使用二维卷积层的情况，计算通道维上（axis=1）的均值和方差。
296 |             # 这里我们需要保持X的形状以便后面可以做广播运算
297 |             mean = X.mean(axis=(0, 2, 3), keepdims=True)
298 |             self.var = X.var(axis=(0, 2, 3), keepdims=True)  # 设为self，是因为backward时会用到
299 |             
300 |             # 训练模式下用当前的均值和方差做标准化。设为类实例的属性，backward时用
301 |             self.x_norm = (X - mean) / (np.sqrt(self.var + 1e-5))
302 |             
303 |             # 更新移动平均的均值和方差
304 |             self.moving_mean = self.momentum * self.moving_mean + (1 - self.momentum) * mean
305 |             self.moving_var = self.momentum * self.moving_var + (1 - self.momentum) * self.var
306 |         # 拉伸和偏移
307 |         out = self.x_norm * self.gamma + self.beta
308 |         return out
309 |     
310 |     def backward(self, d_out):
311 |         """
312 |         d_out的形状与输入的形状一样
313 |         """
314 |         d_gamma = (d_out * self.x_norm).sum(axis=(0, 2, 3), keepdims=True)
315 |         d_beta = d_out.sum(axis=(0, 2, 3), keepdims=True)
316 |         
317 |         d_x = (d_out * self.gamma) / np.sqrt(self.var + 1e-5)
318 |         
319 |         return d_x, [d_gamma, d_beta]
320 | 
321 | 
322 | # 定义Flatten，卷积层到全连接层
323 | class Flatten():
324 |     """
325 |     最后的卷积层输出的feature若要连接全连接层需要将feature拉平
326 |     单独建立一个模块是为了方便梯度反向传播
327 |     """
328 |     def __init__(self):
329 |         pass
330 |     
331 |     def __call__(self, X):
332 |         self.x_shape = X.shape # (batch_size, channels, height, width)
333 |         
334 |         return self.forward(X)
335 |     
336 |     def forward(self, X):
337 |         out = X.ravel().reshape(self.x_shape[0], -1)
338 |         return out
339 |     
340 |     def backward(self, d_out):
341 |         d_x = d_out.reshape(self.x_shape)
342 |         return d_x


--------------------------------------------------------------------------------
/net-test-mnist.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pdb\n",
 11 |     "\n",
 12 |     "from activations import Relu\n",
 13 |     "from convolution import Conv2d, Maxpool, BatchNorm2d, Flatten\n",
 14 |     "from lossfunctions import CrossEntropyLoss\n",
 15 |     "from linear import Linear, Dropout\n",
 16 |     "from datasets import Mnist\n",
 17 |     "from optim import SGD\n",
 18 |     "from utils import label_encoder, shuffle_data"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "# 定义网络"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "class MyNet():\n",
 35 |     "    def __init__(self):\n",
 36 |     "        self.conv1 = Conv2d(in_channels=1, n_filter=30, filter_size=(3, 3), padding=1, stride=1)\n",
 37 |     "        self.relu1 = Relu()\n",
 38 |     "        self.bn1 = BatchNorm2d(n_channel=30, momentum=0.1)\n",
 39 |     "        self.maxpool1 = Maxpool(size=2, stride=2)  # 对于mnist，此时输出feature size为14×14\n",
 40 |     "        \n",
 41 |     "        self.conv2 = Conv2d(in_channels=30, n_filter=20, filter_size=(3, 3), padding=1, stride=1)\n",
 42 |     "        self.relu2 = Relu()\n",
 43 |     "        self.bn2 = BatchNorm2d(n_channel=20, momentum=0.1)\n",
 44 |     "        self.maxpool2 = Maxpool(size=2, stride=2)  # 与maxpoll1层是一样的，可以舍弃只用maxpool1\n",
 45 |     "        # 对于mnist，此时输出feature size为7×7\n",
 46 |     "        \n",
 47 |     "        self.conv3 = Conv2d(in_channels=20, n_filter=10, filter_size=(3, 3), padding=0, stride=1)\n",
 48 |     "        # 对于mnist，此时输出feature size为5×5\n",
 49 |     "        self.relu3 = Relu()\n",
 50 |     "        self.bn3 = BatchNorm2d(n_channel=10, momentum=0.1)\n",
 51 |     "        \n",
 52 |     "        self.flatten = Flatten()\n",
 53 |     "        \n",
 54 |     "        # 全连接层\n",
 55 |     "        self.fc1 = Linear(dim_in=250, dim_out=100)\n",
 56 |     "        self.dropout1 = Dropout(p=0.5)\n",
 57 |     "        self.fc2 = Linear(dim_in=100, dim_out=10)\n",
 58 |     "        \n",
 59 |     "        self.parameters = self.conv1.params + self.bn1.params + self.conv2.params + self.bn2.params + \\\n",
 60 |     "                            self.conv3.params + self.bn3.params + self.fc1.params + self.fc2.params\n",
 61 |     "    \n",
 62 |     "    def __call__(self, X, mode='train'):\n",
 63 |     "        \"\"\"\n",
 64 |     "        mode: 是在训练阶段还是测试阶段. train 或者 test\n",
 65 |     "        \"\"\"\n",
 66 |     "        return self.forward(X, mode)\n",
 67 |     "    \n",
 68 |     "    def forward(self, X, mode):\n",
 69 |     "        conv1_out = self.conv1(X)\n",
 70 |     "        relu1_out = self.relu1(conv1_out)\n",
 71 |     "        bn1_out = self.bn1(relu1_out, mode)\n",
 72 |     "        pool1_out = self.maxpool1(bn1_out)\n",
 73 |     "        \n",
 74 |     "        conv2_out = self.conv2(pool1_out)\n",
 75 |     "        relu2_out = self.relu2(conv2_out)\n",
 76 |     "        bn2_out = self.bn2(relu2_out, mode)\n",
 77 |     "        pool2_out = self.maxpool2(bn2_out)\n",
 78 |     "        \n",
 79 |     "        conv3_out = self.conv3(pool2_out)\n",
 80 |     "        relu3_out = self.relu3(conv3_out)\n",
 81 |     "        bn3_out = self.bn3(relu3_out, mode)\n",
 82 |     "        \n",
 83 |     "        flat_out = self.flatten(bn3_out)\n",
 84 |     "        \n",
 85 |     "        fc1_out = self.fc1(flat_out)\n",
 86 |     "        drop1_out = self.dropout1(fc1_out, mode)\n",
 87 |     "        fc2_out = self.fc2(drop1_out)\n",
 88 |     "        \n",
 89 |     "        return fc2_out\n",
 90 |     "    \n",
 91 |     "    def backward(self, d_out):\n",
 92 |     "        d_fc2_x, d_fc2_params = self.fc2.backward(d_out)\n",
 93 |     "        d_drop1_x = self.dropout1.backward(d_fc2_x)\n",
 94 |     "        d_fc1_x, d_fc1_params = self.fc1.backward(d_drop1_x)\n",
 95 |     "        \n",
 96 |     "        d_flat_x = self.flatten.backward(d_fc1_x)\n",
 97 |     "        \n",
 98 |     "        d_bn3_x, d_bn3_params = self.bn3.backward(d_flat_x)\n",
 99 |     "        d_relu3_x = self.relu3.backward(d_bn3_x)\n",
100 |     "        d_conv3_x, d_conv3_params = self.conv3.backward(d_relu3_x)\n",
101 |     "        \n",
102 |     "        d_pool2_x = self.maxpool2.backward(d_conv3_x)\n",
103 |     "        d_bn2_x, d_bn2_params = self.bn2.backward(d_pool2_x)\n",
104 |     "        d_relu2_x = self.relu2.backward(d_bn2_x)\n",
105 |     "        d_conv2_x, d_conv2_params = self.conv2.backward(d_relu2_x)\n",
106 |     "        \n",
107 |     "        d_pool1_x = self.maxpool1.backward(d_conv2_x)\n",
108 |     "        d_bn1_x, d_bn1_params = self.bn1.backward(d_pool1_x)\n",
109 |     "        d_relu1_x = self.relu1.backward(d_bn1_x)\n",
110 |     "        _, d_conv1_params = self.conv1.backward(d_relu1_x)\n",
111 |     "        \n",
112 |     "        grads = d_conv1_params + d_bn1_params + d_conv2_params + d_bn2_params + d_conv3_params + \\\n",
113 |     "                d_bn3_params + d_fc1_params + d_fc2_params\n",
114 |     "        \n",
115 |     "        return grads"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "# 定义训练方式"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 3,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "def train(train_datas, train_labels, test_datas, test_labels, network, \n",
132 |     "          loss_fc, optim, epochs, batch_size):\n",
133 |     "    for epoch in range(1, epochs + 1):\n",
134 |     "        print(\"**\" * 20, \"epoch: %d\" % epoch, \"**\" * 20)\n",
135 |     "        \n",
136 |     "        # 打乱数据\n",
137 |     "        shuffled_datas, shuffled_labels = shuffle_data(train_datas, train_labels)\n",
138 |     "        \n",
139 |     "        n_correct = 0  # 预测正确的个数\n",
140 |     "        n_data = train_datas.shape[0]  # 总样本数\n",
141 |     "        loss_sum = 0.0\n",
142 |     "        \n",
143 |     "        num_trained_batchs = 0  # 记录当前epoch训练的batch数目\n",
144 |     "        for i in range(0, n_data, batch_size):\n",
145 |     "            batch_datas, batch_labels = shuffled_datas[i : i+batch_size], shuffled_labels[i : i+batch_size]\n",
146 |     "            \n",
147 |     "            net_out = network(batch_datas, mode='train')  # 模型输出\n",
148 |     "            \n",
149 |     "            batch_loss = loss_fc(net_out, batch_labels)  # 计算损失\n",
150 |     "            loss_sum += batch_loss\n",
151 |     "            \n",
152 |     "            grad_out = loss_fc.backward()  # 计算loss对模型输出的梯度\n",
153 |     "            # 计算模型所有参数的梯度，更新参数\n",
154 |     "            grads = network.backward(grad_out)\n",
155 |     "            optim.update_parameters(grads)\n",
156 |     "            \n",
157 |     "            # pdb.set_trace()\n",
158 |     "            \n",
159 |     "            # 计算预测正确的个数\n",
160 |     "            predict = np.argmax(net_out, axis=1)\n",
161 |     "            n_correct += np.sum(predict == np.argmax(batch_labels, axis=1))\n",
162 |     "            \n",
163 |     "            num_trained_batchs += 1\n",
164 |     "            if num_trained_batchs % 100 == 0:\n",
165 |     "                ave_loss = loss_sum / num_trained_batchs\n",
166 |     "                print(\"*\" * 20, \"%d th batch, loss: %f\" % (num_trained_batchs, ave_loss), \"*\" * 20)\n",
167 |     "        \n",
168 |     "        train_acc = n_correct / n_data\n",
169 |     "        \n",
170 |     "        test_acc = test(test_datas, test_labels, network, batch_size)\n",
171 |     "        \n",
172 |     "        print(\"epoch %d, train accuracy: %f  test accuracy: %f\" % (epoch, train_acc, test_acc))\n",
173 |     "            \n",
174 |     "\n",
175 |     "def test(test_datas, test_labels, network, batch_size):\n",
176 |     "    \"\"\"\n",
177 |     "    测试时使用批量化喂数据，否则内存不够\n",
178 |     "    test_labels: one hot\n",
179 |     "    return: 测试准确率\n",
180 |     "    \"\"\"\n",
181 |     "    n_data = test_labels.shape[0]\n",
182 |     "    n_correct = 0\n",
183 |     "    for i in range(0, n_data, batch_size):\n",
184 |     "        batch_datas = test_datas[i : i + batch_size]\n",
185 |     "        batch_labels = test_labels[i : i + batch_size]\n",
186 |     "        net_out = network(batch_datas, mode='test')\n",
187 |     "        predict = np.argmax(net_out, axis=1)\n",
188 |     "        n_correct += np.sum(predict == np.argmax(batch_labels, axis=1))\n",
189 |     "        \n",
190 |     "    test_acc = n_correct / n_data\n",
191 |     "    return test_acc"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "# 训练"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "metadata": {},
204 |    "source": [
205 |     "## 加载mnist数据"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 5,
211 |    "metadata": {},
212 |    "outputs": [
213 |     {
214 |      "name": "stdout",
215 |      "output_type": "stream",
216 |      "text": [
217 |       "魔数:2051, 图片数量: 60000张, 图片大小: 28*28\n",
218 |       "16\n",
219 |       ">784B 16 784\n",
220 |       "已解析 10000张\n",
221 |       "7839232\n",
222 |       "已解析 20000张\n",
223 |       "15679232\n",
224 |       "已解析 30000张\n",
225 |       "23519232\n",
226 |       "已解析 40000张\n",
227 |       "31359232\n",
228 |       "已解析 50000张\n",
229 |       "39199232\n",
230 |       "已解析 60000张\n",
231 |       "47039232\n",
232 |       "魔数:2049, 图片数量: 60000张\n",
233 |       "已解析 10000张\n",
234 |       "已解析 20000张\n",
235 |       "已解析 30000张\n",
236 |       "已解析 40000张\n",
237 |       "已解析 50000张\n",
238 |       "已解析 60000张\n",
239 |       "魔数:2051, 图片数量: 10000张, 图片大小: 28*28\n",
240 |       "16\n",
241 |       ">784B 16 784\n",
242 |       "已解析 10000张\n",
243 |       "7839232\n",
244 |       "魔数:2049, 图片数量: 10000张\n",
245 |       "已解析 10000张\n"
246 |      ]
247 |     }
248 |    ],
249 |    "source": [
250 |     "train_image_path =  r'D:\\datas\\mnist\\train-images.idx3-ubyte'\n",
251 |     "train_label_path =  r'D:\\datas\\mnist\\train-labels.idx1-ubyte'\n",
252 |     "test_image_path =  r'D:\\datas\\mnist\\t10k-images.idx3-ubyte'\n",
253 |     "test_label_path =  r'D:\\datas\\mnist\\t10k-labels.idx1-ubyte'\n",
254 |     "# 训练样本：共60000个\n",
255 |     "# 测试样本：共10000个\n",
256 |     "\n",
257 |     "# 声明mnist数据实例\n",
258 |     "mnist = Mnist(train_image_path, train_label_path, test_image_path, test_label_path)\n",
259 |     "\n",
260 |     "# 获得训练数据\n",
261 |     "train_images = mnist.load_train_images()\n",
262 |     "train_labels = mnist.load_train_labels()\n",
263 |     "train_images = train_images[:, np.newaxis, :, :]  # 增加通道维度，卷积网络输入为4维[N, C, H, W], mnist数据加载后是3维[N, H, W]\n",
264 |     "train_labels = np.array(train_labels, np.int32)  # 原类型时float类型，转化为int，否则转换成one-hot会出错\n",
265 |     "train_labels = label_encoder(train_labels, 10)\n",
266 |     "\n",
267 |     "# 获得测试数据\n",
268 |     "test_images = mnist.load_test_images()\n",
269 |     "test_labels = mnist.load_test_labels()\n",
270 |     "test_images = test_images[:, np.newaxis, :, :]\n",
271 |     "test_labels = np.array(test_labels, np.int32)\n",
272 |     "test_labels = label_encoder(test_labels, 10)"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "## 开始训练"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "markdown",
284 |    "metadata": {},
285 |    "source": [
286 |     "### 使用SGD训练"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 8,
292 |    "metadata": {},
293 |    "outputs": [
294 |     {
295 |      "name": "stdout",
296 |      "output_type": "stream",
297 |      "text": [
298 |       "**************************************** epoch: 1 ****************************************\n",
299 |       "******************** 100 th batch, loss: 2.294114 ********************\n",
300 |       "******************** 200 th batch, loss: 1.674324 ********************\n",
301 |       "******************** 300 th batch, loss: 1.360098 ********************\n",
302 |       "******************** 400 th batch, loss: 1.172868 ********************\n",
303 |       "******************** 500 th batch, loss: 1.037376 ********************\n",
304 |       "******************** 600 th batch, loss: 0.939488 ********************\n",
305 |       "******************** 700 th batch, loss: 0.866106 ********************\n",
306 |       "******************** 800 th batch, loss: 0.806912 ********************\n",
307 |       "******************** 900 th batch, loss: 0.756997 ********************\n",
308 |       "******************** 1000 th batch, loss: 0.714366 ********************\n",
309 |       "******************** 1100 th batch, loss: 0.675957 ********************\n",
310 |       "******************** 1200 th batch, loss: 0.647047 ********************\n",
311 |       "******************** 1300 th batch, loss: 0.616233 ********************\n",
312 |       "******************** 1400 th batch, loss: 0.594029 ********************\n",
313 |       "******************** 1500 th batch, loss: 0.571482 ********************\n",
314 |       "******************** 1600 th batch, loss: 0.552422 ********************\n",
315 |       "******************** 1700 th batch, loss: 0.535268 ********************\n",
316 |       "******************** 1800 th batch, loss: 0.518393 ********************\n",
317 |       "******************** 1900 th batch, loss: 0.503792 ********************\n",
318 |       "******************** 2000 th batch, loss: 0.490532 ********************\n",
319 |       "epoch 1, train accuracy: 0.849500  test accuracy: 0.955400\n",
320 |       "**************************************** epoch: 2 ****************************************\n",
321 |       "******************** 100 th batch, loss: 0.232883 ********************\n",
322 |       "******************** 200 th batch, loss: 0.230598 ********************\n",
323 |       "******************** 300 th batch, loss: 0.219935 ********************\n",
324 |       "******************** 400 th batch, loss: 0.216815 ********************\n",
325 |       "******************** 500 th batch, loss: 0.218382 ********************\n",
326 |       "******************** 600 th batch, loss: 0.213029 ********************\n",
327 |       "******************** 700 th batch, loss: 0.209409 ********************\n",
328 |       "******************** 800 th batch, loss: 0.207112 ********************\n",
329 |       "******************** 900 th batch, loss: 0.204232 ********************\n",
330 |       "******************** 1000 th batch, loss: 0.205093 ********************\n",
331 |       "******************** 1100 th batch, loss: 0.205172 ********************\n",
332 |       "******************** 1200 th batch, loss: 0.202066 ********************\n",
333 |       "******************** 1300 th batch, loss: 0.198654 ********************\n",
334 |       "******************** 1400 th batch, loss: 0.198053 ********************\n",
335 |       "******************** 1500 th batch, loss: 0.195828 ********************\n",
336 |       "******************** 1600 th batch, loss: 0.194917 ********************\n",
337 |       "******************** 1700 th batch, loss: 0.193736 ********************\n",
338 |       "******************** 1800 th batch, loss: 0.192274 ********************\n",
339 |       "******************** 1900 th batch, loss: 0.191177 ********************\n",
340 |       "******************** 2000 th batch, loss: 0.189747 ********************\n",
341 |       "epoch 2, train accuracy: 0.940500  test accuracy: 0.967900\n",
342 |       "**************************************** epoch: 3 ****************************************\n",
343 |       "******************** 100 th batch, loss: 0.157776 ********************\n",
344 |       "******************** 200 th batch, loss: 0.160244 ********************\n",
345 |       "******************** 300 th batch, loss: 0.158695 ********************\n",
346 |       "******************** 400 th batch, loss: 0.161886 ********************\n",
347 |       "******************** 500 th batch, loss: 0.158724 ********************\n",
348 |       "******************** 600 th batch, loss: 0.156167 ********************\n",
349 |       "******************** 700 th batch, loss: 0.157858 ********************\n",
350 |       "******************** 800 th batch, loss: 0.159318 ********************\n",
351 |       "******************** 900 th batch, loss: 0.157177 ********************\n",
352 |       "******************** 1000 th batch, loss: 0.158159 ********************\n",
353 |       "******************** 1100 th batch, loss: 0.158121 ********************\n",
354 |       "******************** 1200 th batch, loss: 0.157814 ********************\n",
355 |       "******************** 1300 th batch, loss: 0.156787 ********************\n",
356 |       "******************** 1400 th batch, loss: 0.154653 ********************\n",
357 |       "******************** 1500 th batch, loss: 0.155393 ********************\n",
358 |       "******************** 1600 th batch, loss: 0.156707 ********************\n",
359 |       "******************** 1700 th batch, loss: 0.155441 ********************\n",
360 |       "******************** 1800 th batch, loss: 0.154288 ********************\n",
361 |       "******************** 1900 th batch, loss: 0.153360 ********************\n",
362 |       "******************** 2000 th batch, loss: 0.152392 ********************\n",
363 |       "epoch 3, train accuracy: 0.952967  test accuracy: 0.973100\n"
364 |      ]
365 |     }
366 |    ],
367 |    "source": [
368 |     "# 初始化网络\n",
369 |     "mynet = MyNet()\n",
370 |     "\n",
371 |     "# 声明损失函数\n",
372 |     "cross_entropy = CrossEntropyLoss()\n",
373 |     "\n",
374 |     "sgd = SGD(mynet.parameters, lr=0.01)\n",
375 |     "\n",
376 |     "train(train_images, train_labels, test_images, test_labels, network=mynet,\n",
377 |     "      loss_fc=cross_entropy, optim=sgd, epochs=3, batch_size=30)"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "markdown",
382 |    "metadata": {},
383 |    "source": [
384 |     "### 使用Momentum SGD训练"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": 6,
390 |    "metadata": {},
391 |    "outputs": [
392 |     {
393 |      "name": "stdout",
394 |      "output_type": "stream",
395 |      "text": [
396 |       "**************************************** epoch: 1 ****************************************\n",
397 |       "******************** 100 th batch, loss: 1.201727 ********************\n",
398 |       "******************** 200 th batch, loss: 0.800054 ********************\n",
399 |       "******************** 300 th batch, loss: 0.627133 ********************\n",
400 |       "******************** 400 th batch, loss: 0.533434 ********************\n",
401 |       "******************** 500 th batch, loss: 0.474872 ********************\n",
402 |       "******************** 600 th batch, loss: 0.432316 ********************\n",
403 |       "******************** 700 th batch, loss: 0.400252 ********************\n",
404 |       "******************** 800 th batch, loss: 0.374126 ********************\n",
405 |       "******************** 900 th batch, loss: 0.352210 ********************\n",
406 |       "******************** 1000 th batch, loss: 0.333141 ********************\n",
407 |       "******************** 1100 th batch, loss: 0.319562 ********************\n",
408 |       "******************** 1200 th batch, loss: 0.307324 ********************\n",
409 |       "******************** 1300 th batch, loss: 0.295901 ********************\n",
410 |       "******************** 1400 th batch, loss: 0.285395 ********************\n",
411 |       "******************** 1500 th batch, loss: 0.275471 ********************\n",
412 |       "******************** 1600 th batch, loss: 0.266668 ********************\n",
413 |       "******************** 1700 th batch, loss: 0.259145 ********************\n",
414 |       "******************** 1800 th batch, loss: 0.251230 ********************\n",
415 |       "******************** 1900 th batch, loss: 0.245891 ********************\n",
416 |       "******************** 2000 th batch, loss: 0.240104 ********************\n",
417 |       "epoch 1, train accuracy: 0.927817  test accuracy: 0.975000\n",
418 |       "**************************************** epoch: 2 ****************************************\n",
419 |       "******************** 100 th batch, loss: 0.115397 ********************\n",
420 |       "******************** 200 th batch, loss: 0.117541 ********************\n",
421 |       "******************** 300 th batch, loss: 0.110366 ********************\n",
422 |       "******************** 400 th batch, loss: 0.117021 ********************\n",
423 |       "******************** 500 th batch, loss: 0.116808 ********************\n",
424 |       "******************** 600 th batch, loss: 0.117222 ********************\n",
425 |       "******************** 700 th batch, loss: 0.116315 ********************\n",
426 |       "******************** 800 th batch, loss: 0.116487 ********************\n",
427 |       "******************** 900 th batch, loss: 0.116482 ********************\n",
428 |       "******************** 1000 th batch, loss: 0.117153 ********************\n",
429 |       "******************** 1100 th batch, loss: 0.116400 ********************\n",
430 |       "******************** 1200 th batch, loss: 0.116582 ********************\n",
431 |       "******************** 1300 th batch, loss: 0.116381 ********************\n",
432 |       "******************** 1400 th batch, loss: 0.115446 ********************\n",
433 |       "******************** 1500 th batch, loss: 0.114026 ********************\n",
434 |       "******************** 1600 th batch, loss: 0.113181 ********************\n",
435 |       "******************** 1700 th batch, loss: 0.112160 ********************\n",
436 |       "******************** 1800 th batch, loss: 0.112151 ********************\n",
437 |       "******************** 1900 th batch, loss: 0.112404 ********************\n",
438 |       "******************** 2000 th batch, loss: 0.112656 ********************\n",
439 |       "epoch 2, train accuracy: 0.966067  test accuracy: 0.978900\n",
440 |       "**************************************** epoch: 3 ****************************************\n",
441 |       "******************** 100 th batch, loss: 0.097347 ********************\n",
442 |       "******************** 200 th batch, loss: 0.092374 ********************\n",
443 |       "******************** 300 th batch, loss: 0.095974 ********************\n",
444 |       "******************** 400 th batch, loss: 0.093282 ********************\n",
445 |       "******************** 500 th batch, loss: 0.094074 ********************\n",
446 |       "******************** 600 th batch, loss: 0.096557 ********************\n",
447 |       "******************** 700 th batch, loss: 0.096027 ********************\n",
448 |       "******************** 800 th batch, loss: 0.096519 ********************\n",
449 |       "******************** 900 th batch, loss: 0.097390 ********************\n",
450 |       "******************** 1000 th batch, loss: 0.097327 ********************\n",
451 |       "******************** 1100 th batch, loss: 0.094425 ********************\n",
452 |       "******************** 1200 th batch, loss: 0.094232 ********************\n",
453 |       "******************** 1300 th batch, loss: 0.096179 ********************\n",
454 |       "******************** 1400 th batch, loss: 0.095138 ********************\n",
455 |       "******************** 1500 th batch, loss: 0.095496 ********************\n",
456 |       "******************** 1600 th batch, loss: 0.095212 ********************\n",
457 |       "******************** 1700 th batch, loss: 0.095224 ********************\n",
458 |       "******************** 1800 th batch, loss: 0.094293 ********************\n",
459 |       "******************** 1900 th batch, loss: 0.093824 ********************\n",
460 |       "******************** 2000 th batch, loss: 0.093302 ********************\n",
461 |       "epoch 3, train accuracy: 0.971933  test accuracy: 0.980100\n"
462 |      ]
463 |     }
464 |    ],
465 |    "source": [
466 |     "# 初始化网络\n",
467 |     "mynet = MyNet()\n",
468 |     "\n",
469 |     "# 声明损失函数\n",
470 |     "cross_entropy = CrossEntropyLoss()\n",
471 |     "\n",
472 |     "sgd = SGD(mynet.parameters, lr=0.01, momentum=0.9)\n",
473 |     "\n",
474 |     "train(train_images, train_labels, test_images, test_labels, network=mynet,\n",
475 |     "      loss_fc=cross_entropy, optim=sgd, epochs=3, batch_size=30)"
476 |    ]
477 |   }
478 |  ],
479 |  "metadata": {
480 |   "kernelspec": {
481 |    "display_name": "Pytorch",
482 |    "language": "python",
483 |    "name": "pytorch"
484 |   },
485 |   "language_info": {
486 |    "codemirror_mode": {
487 |     "name": "ipython",
488 |     "version": 3
489 |    },
490 |    "file_extension": ".py",
491 |    "mimetype": "text/x-python",
492 |    "name": "python",
493 |    "nbconvert_exporter": "python",
494 |    "pygments_lexer": "ipython3",
495 |    "version": "3.6.10"
496 |   }
497 |  },
498 |  "nbformat": 4,
499 |  "nbformat_minor": 2
500 | }
501 | 


--------------------------------------------------------------------------------