├── README.md
├── .gitignore
├── fetch_MNIST.py
└── LeNet.py
/README.md:
--------------------------------------------------------------------------------
1 | # LeNet-python
2 | ### A tiny implementation of LeNet (without deep learning framework)
3 |
4 | - - -
5 | ## Instructions
6 | The main functional code is in LeNet.py.
7 | To run the code, you should do the following:
8 | 1. Download the MNIST dataset and extract it to "your/path/to/data"
9 | 2. modify the data path to "your/path/to/data" in fetch_MNIST.py
10 | 3. execute "python LeNet.py"
11 |
12 | Note: Python 2.7 is used to execute. Also, if something goes wrong, it is probably because of the incompatible version of numpy.
13 |
14 | ## Results
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
--------------------------------------------------------------------------------
/fetch_MNIST.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | """
3 | @author: monitor1379
4 | @contact: yy4f5da2@hotmail.com
5 | @site: www.monitor1379.com
6 |
7 | @version: 1.0
8 | @license: Apache Licence
9 | @file: mnist_decoder.py
10 | @time: 2016/8/16 20:03
11 |
12 | 对MNIST手写数字数据文件转换为bmp图片文件格式。
13 | 数据集下载地址为http://yann.lecun.com/exdb/mnist。
14 | 相关格式转换见官网以及代码注释。
15 |
16 | ========================
17 | 关于IDX文件格式的解析规则:
18 | ========================
19 | THE IDX FILE FORMAT
20 |
21 | the IDX file format is a simple format for vectors and multidimensional matrices of various numerical types.
22 | The basic format is
23 |
24 | magic number
25 | size in dimension 0
26 | size in dimension 1
27 | size in dimension 2
28 | .....
29 | size in dimension N
30 | data
31 |
32 | The magic number is an integer (MSB first). The first 2 bytes are always 0.
33 |
34 | The third byte codes the type of the data:
35 | 0x08: unsigned byte
36 | 0x09: signed byte
37 | 0x0B: short (2 bytes)
38 | 0x0C: int (4 bytes)
39 | 0x0D: float (4 bytes)
40 | 0x0E: double (8 bytes)
41 |
42 | The 4-th byte codes the number of dimensions of the vector/matrix: 1 for vectors, 2 for matrices....
43 |
44 | The sizes in each dimension are 4-byte integers (MSB first, high endian, like in most non-Intel processors).
45 |
46 | The data is stored like in a C array, i.e. the index in the last dimension changes the fastest.
47 | """
48 |
49 | import numpy as np
50 | import struct
51 | import matplotlib.pyplot as plt
52 |
53 | #data_path = '/Users/didi/Desktop/python_workspace/Neural Network/data/'
54 | # 训练集文件
55 | train_images_idx3_ubyte_file = './data/train-images-idx3-ubyte'
56 | # 训练集标签文件
57 | train_labels_idx1_ubyte_file = './data/train-labels-idx1-ubyte'
58 |
59 | # 测试集文件
60 | test_images_idx3_ubyte_file = './data/t10k-images-idx3-ubyte'
61 | # 测试集标签文件
62 | test_labels_idx1_ubyte_file = './data/t10k-labels-idx1-ubyte'
63 |
64 |
65 | def decode_idx3_ubyte(idx3_ubyte_file):
66 | """
67 | 解析idx3文件的通用函数
68 | :param idx3_ubyte_file: idx3文件路径
69 | :return: 数据集
70 | """
71 | # 读取二进制数据
72 | bin_data = open(idx3_ubyte_file, 'rb').read()
73 |
74 | # 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽
75 | offset = 0
76 | fmt_header = '>iiii'
77 | magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
78 | print '魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols)
79 |
80 | # 解析数据集
81 | image_size = num_rows * num_cols
82 | offset += struct.calcsize(fmt_header)
83 | fmt_image = '>' + str(image_size) + 'B'
84 | images = np.empty((num_images, num_rows, num_cols))
85 | for i in range(num_images):
86 | if (i + 1) % 10000 == 0:
87 | print '已解析 %d' % (i + 1) + '张'
88 | images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
89 | offset += struct.calcsize(fmt_image)
90 | return images
91 |
92 |
93 | def decode_idx1_ubyte(idx1_ubyte_file):
94 | """
95 | 解析idx1文件的通用函数
96 | :param idx1_ubyte_file: idx1文件路径
97 | :return: 数据集
98 | """
99 | # 读取二进制数据
100 | bin_data = open(idx1_ubyte_file, 'rb').read()
101 |
102 | # 解析文件头信息,依次为魔数和标签数
103 | offset = 0
104 | fmt_header = '>ii'
105 | magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
106 | print '魔数:%d, 图片数量: %d张' % (magic_number, num_images)
107 |
108 | # 解析数据集
109 | offset += struct.calcsize(fmt_header)
110 | fmt_image = '>B'
111 | labels = np.empty(num_images)
112 | for i in range(num_images):
113 | if (i + 1) % 10000 == 0:
114 | print '已解析 %d' % (i + 1) + '张'
115 | labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
116 | offset += struct.calcsize(fmt_image)
117 | return labels
118 |
119 |
120 | def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):
121 | """
122 | TRAINING SET IMAGE FILE (train-images-idx3-ubyte):
123 | [offset] [type] [value] [description]
124 | 0000 32 bit integer 0x00000803(2051) magic number
125 | 0004 32 bit integer 60000 number of images
126 | 0008 32 bit integer 28 number of rows
127 | 0012 32 bit integer 28 number of columns
128 | 0016 unsigned byte ?? pixel
129 | 0017 unsigned byte ?? pixel
130 | ........
131 | xxxx unsigned byte ?? pixel
132 | Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
133 |
134 | :param idx_ubyte_file: idx文件路径
135 | :return: n*row*col维np.array对象,n为图片数量
136 | """
137 | return decode_idx3_ubyte(idx_ubyte_file)
138 |
139 |
140 | def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):
141 | """
142 | TRAINING SET LABEL FILE (train-labels-idx1-ubyte):
143 | [offset] [type] [value] [description]
144 | 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
145 | 0004 32 bit integer 60000 number of items
146 | 0008 unsigned byte ?? label
147 | 0009 unsigned byte ?? label
148 | ........
149 | xxxx unsigned byte ?? label
150 | The labels values are 0 to 9.
151 |
152 | :param idx_ubyte_file: idx文件路径
153 | :return: n*1维np.array对象,n为图片数量
154 | """
155 | return decode_idx1_ubyte(idx_ubyte_file)
156 |
157 |
158 | def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file):
159 | """
160 | TEST SET IMAGE FILE (t10k-images-idx3-ubyte):
161 | [offset] [type] [value] [description]
162 | 0000 32 bit integer 0x00000803(2051) magic number
163 | 0004 32 bit integer 10000 number of images
164 | 0008 32 bit integer 28 number of rows
165 | 0012 32 bit integer 28 number of columns
166 | 0016 unsigned byte ?? pixel
167 | 0017 unsigned byte ?? pixel
168 | ........
169 | xxxx unsigned byte ?? pixel
170 | Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
171 |
172 | :param idx_ubyte_file: idx文件路径
173 | :return: n*row*col维np.array对象,n为图片数量
174 | """
175 | return decode_idx3_ubyte(idx_ubyte_file)
176 |
177 |
178 | def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file):
179 | """
180 | TEST SET LABEL FILE (t10k-labels-idx1-ubyte):
181 | [offset] [type] [value] [description]
182 | 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
183 | 0004 32 bit integer 10000 number of items
184 | 0008 unsigned byte ?? label
185 | 0009 unsigned byte ?? label
186 | ........
187 | xxxx unsigned byte ?? label
188 | The labels values are 0 to 9.
189 |
190 | :param idx_ubyte_file: idx文件路径
191 | :return: n*1维np.array对象,n为图片数量
192 | """
193 | return decode_idx1_ubyte(idx_ubyte_file)
194 |
195 |
196 |
197 |
198 | def run():
199 | train_images = load_train_images() # (60000, 28, 28) 0~255
200 | train_labels = load_train_labels() # (60000,) 1~10
201 | # test_images = load_test_images()
202 | # test_labels = load_test_labels()
203 | print type(train_images), train_images.shape
204 | print type(train_labels), train_labels.shape
205 |
206 | # 查看前十个数据及其标签以读取是否正确
207 | for i in range(10):
208 | print train_labels[i]
209 | print np.max(train_images), np.min(train_images)
210 | plt.imshow(train_images[i], cmap='gray')
211 | plt.show()
212 | print 'done'
213 |
214 | if __name__ == '__main__':
215 | run()
216 |
217 | '''
218 | 作者:monitor1379
219 | 链接:https://www.jianshu.com/p/84f72791806f
220 | 來源:简书
221 | 著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
222 | '''
--------------------------------------------------------------------------------
/LeNet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''
3 | Author: Site Li
4 | Website: http://blog.csdn.net/site1997
5 | '''
6 | import numpy as np
7 | from scipy.signal import convolve2d
8 | from skimage.measure import block_reduce
9 | import fetch_MNIST
10 |
11 |
12 | class LeNet(object):
13 | #The network is like:
14 | # conv1 -> pool1 -> conv2 -> pool2 -> fc1 -> relu -> fc2 -> relu -> softmax
15 | # l0 l1 l2 l3 l4 l5 l6 l7 l8 l9
16 | def __init__(self, lr=0.1):
17 | self.lr = lr
18 | # 6 convolution kernal, each has 1 * 5 * 5 size
19 | self.conv1 = xavier_init(6, 1, 5, 5)
20 | # the size for mean pool is 2 * 2, stride = 2
21 | self.pool1 = [2, 2]
22 | # 16 convolution kernal, each has 6 * 5 * 5 size
23 | self.conv2 = xavier_init(16, 6, 5, 5)
24 | # the size for mean pool is 2 * 2, stride = 2
25 | self.pool2 = [2, 2]
26 | # fully connected layer 256 -> 200
27 | self.fc1 = xavier_init(256, 200, fc=True)
28 | # fully connected layer 200 -> 10
29 | self.fc2 = xavier_init(200, 10, fc=True)
30 |
31 | def forward_prop(self, input_data):
32 | self.l0 = np.expand_dims(input_data, axis=1) / 255 # (batch_sz, 1, 28, 28)
33 | self.l1 = self.convolution(self.l0, self.conv1) # (batch_sz, 6, 24, 24)
34 | self.l2 = self.mean_pool(self.l1, self.pool1) # (batch_sz, 6, 12, 12)
35 | self.l3 = self.convolution(self.l2, self.conv2) # (batch_sz, 16, 8, 8)
36 | self.l4 = self.mean_pool(self.l3, self.pool2) # (batch_sz, 16, 4, 4)
37 | self.l5 = self.fully_connect(self.l4, self.fc1) # (batch_sz, 200)
38 | self.l6 = self.relu(self.l5) # (batch_sz, 200)
39 | self.l7 = self.fully_connect(self.l6, self.fc2) # (batch_sz, 10)
40 | self.l8 = self.relu(self.l7) # (batch_sz, 10)
41 | self.l9 = self.softmax(self.l8) # (batch_sz, 10)
42 | return self.l9
43 |
44 | def backward_prop(self, softmax_output, output_label):
45 | l8_delta = (output_label - softmax_output) / softmax_output.shape[0]
46 | l7_delta = self.relu(self.l8, l8_delta, deriv=True) # (batch_sz, 10)
47 | l6_delta, self.fc2 = self.fully_connect(self.l6, self.fc2, l7_delta, deriv=True) # (batch_sz, 200)
48 | l5_delta = self.relu(self.l6, l6_delta, deriv=True) # (batch_sz, 200)
49 | l4_delta, self.fc1 = self.fully_connect(self.l4, self.fc1, l5_delta, deriv=True) # (batch_sz, 16, 4, 4)
50 | l3_delta = self.mean_pool(self.l3, self.pool2, l4_delta, deriv=True) # (batch_sz, 16, 8, 8)
51 | l2_delta, self.conv2 = self.convolution(self.l2, self.conv2, l3_delta, deriv=True) # (batch_sz, 6, 12, 12)
52 | l1_delta = self.mean_pool(self.l1, self.pool1, l2_delta, deriv=True) # (batch_sz, 6, 24, 24)
53 | l0_delta, self.conv1 = self.convolution(self.l0, self.conv1, l1_delta, deriv=True) # (batch_sz, 1, 28, 28)
54 |
55 | def convolution(self, input_map, kernal, front_delta=None, deriv=False):
56 | N, C, W, H = input_map.shape
57 | K_NUM, K_C, K_W, K_H = kernal.shape
58 | if deriv == False:
59 | feature_map = np.zeros((N, K_NUM, W-K_W+1, H-K_H+1))
60 | for imgId in range(N):
61 | for kId in range(K_NUM):
62 | for cId in range(C):
63 | feature_map[imgId][kId] += \
64 | convolve2d(input_map[imgId][cId], kernal[kId,cId,:,:], mode='valid')
65 | return feature_map
66 | else :
67 | # front->back (propagate loss)
68 | back_delta = np.zeros((N, C, W, H))
69 | kernal_gradient = np.zeros((K_NUM, K_C, K_W, K_H))
70 | padded_front_delta = \
71 | np.pad(front_delta, [(0,0), (0,0), (K_W-1, K_H-1), (K_W-1, K_H-1)], mode='constant', constant_values=0)
72 | for imgId in range(N):
73 | for cId in range(C):
74 | for kId in range(K_NUM):
75 | back_delta[imgId][cId] += \
76 | convolve2d(padded_front_delta[imgId][kId], kernal[kId,cId,::-1,::-1], mode='valid')
77 | kernal_gradient[kId][cId] += \
78 | convolve2d(front_delta[imgId][kId], input_map[imgId,cId,::-1,::-1], mode='valid')
79 | # update weights
80 | kernal += self.lr * kernal_gradient
81 | return back_delta, kernal
82 |
83 | def mean_pool(self, input_map, pool, front_delta=None, deriv=False):
84 | N, C, W, H = input_map.shape
85 | P_W, P_H = tuple(pool)
86 | if deriv == False:
87 | feature_map = np.zeros((N, C, W/P_W, H/P_H))
88 | feature_map = block_reduce(input_map, tuple((1, 1, P_W, P_H)), func=np.mean)
89 | return feature_map
90 | else :
91 | # front->back (propagate loss)
92 | back_delta = np.zeros((N, C, W, H))
93 | back_delta = front_delta.repeat(P_W, axis = 2).repeat(P_H, axis = 3)
94 | back_delta /= (P_W * P_H)
95 | return back_delta
96 |
97 | def fully_connect(self, input_data, fc, front_delta=None, deriv=False):
98 | N = input_data.shape[0]
99 | if deriv == False:
100 | output_data = np.dot(input_data.reshape(N, -1), fc)
101 | return output_data
102 | else :
103 | # front->back (propagate loss)
104 | back_delta = np.dot(front_delta, fc.T).reshape(input_data.shape)
105 | # update weights
106 | fc += self.lr * np.dot(input_data.reshape(N, -1).T, front_delta)
107 | return back_delta, fc
108 |
109 | def relu(self, x, front_delta=None, deriv=False):
110 | if deriv == False:
111 | return x * (x > 0)
112 | else :
113 | # propagate loss
114 | back_delta = front_delta * 1. * (x > 0)
115 | return back_delta
116 |
117 | def softmax(self, x):
118 | y = list()
119 | for t in x:
120 | e_t = np.exp(t - np.max(t))
121 | y.append(e_t / e_t.sum())
122 | return np.array(y)
123 |
124 |
125 | def xavier_init(c1, c2, w=1, h=1, fc=False):
126 | fan_1 = c2 * w * h
127 | fan_2 = c1 * w * h
128 | ratio = np.sqrt(6.0 / (fan_1 + fan_2))
129 | params = ratio * (2*np.random.random((c1, c2, w, h)) - 1)
130 | if fc == True:
131 | params = params.reshape(c1, c2)
132 | return params
133 |
134 | def convertToOneHot(labels):
135 | oneHotLabels = np.zeros((labels.size, labels.max()+1))
136 | oneHotLabels[np.arange(labels.size), labels] = 1
137 | return oneHotLabels
138 |
139 | def shuffle_dataset(data, label):
140 | N = data.shape[0]
141 | index = np.random.permutation(N)
142 | x = data[index, :, :]; y = label[index, :]
143 | return x, y
144 |
145 | if __name__ == '__main__':
146 | train_imgs = fetch_MNIST.load_train_images()
147 | train_labs = fetch_MNIST.load_train_labels().astype(int)
148 | # size of data; batch size
149 | data_size = train_imgs.shape[0]; batch_sz = 64;
150 | # learning rate; max iteration; iter % mod (avoid index out of range)
151 | lr = 0.01; max_iter = 50000; iter_mod = int(data_size/batch_sz)
152 | train_labs = convertToOneHot(train_labs)
153 | my_CNN = LeNet(lr)
154 | for iters in range(max_iter):
155 | # starting index and ending index for input data
156 | st_idx = (iters % iter_mod) * batch_sz
157 | # shuffle the dataset
158 | if st_idx == 0:
159 | train_imgs, train_labs = shuffle_dataset(train_imgs, train_labs)
160 | input_data = train_imgs[st_idx : st_idx + batch_sz]
161 | output_label = train_labs[st_idx : st_idx + batch_sz]
162 | softmax_output = my_CNN.forward_prop(input_data)
163 | if iters % 50 == 0:
164 | # calculate accuracy
165 | correct_list = [ int(np.argmax(softmax_output[i])==np.argmax(output_label[i])) for i in range(batch_sz) ]
166 | accuracy = float(np.array(correct_list).sum()) / batch_sz
167 | # calculate loss
168 | correct_prob = [ softmax_output[i][np.argmax(output_label[i])] for i in range(batch_sz) ]
169 | correct_prob = filter(lambda x: x > 0, correct_prob)
170 | loss = -1.0 * np.sum(np.log(correct_prob))
171 | print "The %d iters result:" % iters
172 | print "The accuracy is %f The loss is %f " % (accuracy, loss)
173 | my_CNN.backward_prop(softmax_output, output_label)
174 |
--------------------------------------------------------------------------------