├── .gitignore ├── LeNet.py ├── README.md └── fetch_MNIST.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LeNet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | Author: Site Li 4 | Website: http://blog.csdn.net/site1997 5 | ''' 6 | import numpy as np 7 | from scipy.signal import convolve2d 8 | from skimage.measure import block_reduce 9 | import fetch_MNIST 10 | 11 | 12 | class LeNet(object): 13 | #The network is like: 14 | # conv1 -> pool1 -> conv2 -> pool2 -> fc1 -> relu -> fc2 -> relu -> softmax 15 | # l0 l1 l2 l3 l4 l5 l6 l7 l8 l9 16 | def __init__(self, lr=0.1): 17 | self.lr = lr 18 | # 6 convolution kernal, each has 1 * 5 * 5 size 19 | self.conv1 = xavier_init(6, 1, 5, 5) 20 | # the size for mean pool is 2 * 2, stride = 2 21 | self.pool1 = [2, 2] 22 | # 16 convolution kernal, each has 6 * 5 * 5 size 23 | self.conv2 = xavier_init(16, 6, 5, 5) 24 | # the size for mean pool is 2 * 2, stride = 2 25 | self.pool2 = [2, 2] 26 | # fully connected layer 256 -> 200 27 | self.fc1 = xavier_init(256, 200, fc=True) 28 | # fully connected layer 200 -> 10 29 | self.fc2 = xavier_init(200, 10, fc=True) 30 | 31 | def forward_prop(self, input_data): 32 | self.l0 = np.expand_dims(input_data, axis=1) / 255 # (batch_sz, 1, 28, 28) 33 | self.l1 = self.convolution(self.l0, self.conv1) # (batch_sz, 6, 24, 24) 34 | self.l2 = self.mean_pool(self.l1, self.pool1) # (batch_sz, 6, 12, 12) 35 | self.l3 = self.convolution(self.l2, self.conv2) # (batch_sz, 16, 8, 8) 36 | self.l4 = self.mean_pool(self.l3, self.pool2) # (batch_sz, 16, 4, 4) 37 | self.l5 = self.fully_connect(self.l4, self.fc1) # (batch_sz, 200) 38 | self.l6 = self.relu(self.l5) # (batch_sz, 200) 39 | self.l7 = self.fully_connect(self.l6, self.fc2) # (batch_sz, 10) 40 | self.l8 = self.relu(self.l7) # (batch_sz, 10) 41 | self.l9 = self.softmax(self.l8) # (batch_sz, 10) 42 | return self.l9 43 | 44 | def backward_prop(self, softmax_output, output_label): 45 | l8_delta = (output_label - softmax_output) / softmax_output.shape[0] 46 | l7_delta = self.relu(self.l8, l8_delta, deriv=True) # (batch_sz, 10) 47 | l6_delta, self.fc2 = self.fully_connect(self.l6, self.fc2, l7_delta, deriv=True) # (batch_sz, 200) 48 | l5_delta = self.relu(self.l6, l6_delta, deriv=True) # (batch_sz, 200) 49 | l4_delta, self.fc1 = self.fully_connect(self.l4, self.fc1, l5_delta, deriv=True) # (batch_sz, 16, 4, 4) 50 | l3_delta = self.mean_pool(self.l3, self.pool2, l4_delta, deriv=True) # (batch_sz, 16, 8, 8) 51 | l2_delta, self.conv2 = self.convolution(self.l2, self.conv2, l3_delta, deriv=True) # (batch_sz, 6, 12, 12) 52 | l1_delta = self.mean_pool(self.l1, self.pool1, l2_delta, deriv=True) # (batch_sz, 6, 24, 24) 53 | l0_delta, self.conv1 = self.convolution(self.l0, self.conv1, l1_delta, deriv=True) # (batch_sz, 1, 28, 28) 54 | 55 | def convolution(self, input_map, kernal, front_delta=None, deriv=False): 56 | N, C, W, H = input_map.shape 57 | K_NUM, K_C, K_W, K_H = kernal.shape 58 | if deriv == False: 59 | feature_map = np.zeros((N, K_NUM, W-K_W+1, H-K_H+1)) 60 | for imgId in range(N): 61 | for kId in range(K_NUM): 62 | for cId in range(C): 63 | feature_map[imgId][kId] += \ 64 | convolve2d(input_map[imgId][cId], kernal[kId,cId,:,:], mode='valid') 65 | return feature_map 66 | else : 67 | # front->back (propagate loss) 68 | back_delta = np.zeros((N, C, W, H)) 69 | kernal_gradient = np.zeros((K_NUM, K_C, K_W, K_H)) 70 | padded_front_delta = \ 71 | np.pad(front_delta, [(0,0), (0,0), (K_W-1, K_H-1), (K_W-1, K_H-1)], mode='constant', constant_values=0) 72 | for imgId in range(N): 73 | for cId in range(C): 74 | for kId in range(K_NUM): 75 | back_delta[imgId][cId] += \ 76 | convolve2d(padded_front_delta[imgId][kId], kernal[kId,cId,::-1,::-1], mode='valid') 77 | kernal_gradient[kId][cId] += \ 78 | convolve2d(front_delta[imgId][kId], input_map[imgId,cId,::-1,::-1], mode='valid') 79 | # update weights 80 | kernal += self.lr * kernal_gradient 81 | return back_delta, kernal 82 | 83 | def mean_pool(self, input_map, pool, front_delta=None, deriv=False): 84 | N, C, W, H = input_map.shape 85 | P_W, P_H = tuple(pool) 86 | if deriv == False: 87 | feature_map = np.zeros((N, C, W/P_W, H/P_H)) 88 | feature_map = block_reduce(input_map, tuple((1, 1, P_W, P_H)), func=np.mean) 89 | return feature_map 90 | else : 91 | # front->back (propagate loss) 92 | back_delta = np.zeros((N, C, W, H)) 93 | back_delta = front_delta.repeat(P_W, axis = 2).repeat(P_H, axis = 3) 94 | back_delta /= (P_W * P_H) 95 | return back_delta 96 | 97 | def fully_connect(self, input_data, fc, front_delta=None, deriv=False): 98 | N = input_data.shape[0] 99 | if deriv == False: 100 | output_data = np.dot(input_data.reshape(N, -1), fc) 101 | return output_data 102 | else : 103 | # front->back (propagate loss) 104 | back_delta = np.dot(front_delta, fc.T).reshape(input_data.shape) 105 | # update weights 106 | fc += self.lr * np.dot(input_data.reshape(N, -1).T, front_delta) 107 | return back_delta, fc 108 | 109 | def relu(self, x, front_delta=None, deriv=False): 110 | if deriv == False: 111 | return x * (x > 0) 112 | else : 113 | # propagate loss 114 | back_delta = front_delta * 1. * (x > 0) 115 | return back_delta 116 | 117 | def softmax(self, x): 118 | y = list() 119 | for t in x: 120 | e_t = np.exp(t - np.max(t)) 121 | y.append(e_t / e_t.sum()) 122 | return np.array(y) 123 | 124 | 125 | def xavier_init(c1, c2, w=1, h=1, fc=False): 126 | fan_1 = c2 * w * h 127 | fan_2 = c1 * w * h 128 | ratio = np.sqrt(6.0 / (fan_1 + fan_2)) 129 | params = ratio * (2*np.random.random((c1, c2, w, h)) - 1) 130 | if fc == True: 131 | params = params.reshape(c1, c2) 132 | return params 133 | 134 | def convertToOneHot(labels): 135 | oneHotLabels = np.zeros((labels.size, labels.max()+1)) 136 | oneHotLabels[np.arange(labels.size), labels] = 1 137 | return oneHotLabels 138 | 139 | def shuffle_dataset(data, label): 140 | N = data.shape[0] 141 | index = np.random.permutation(N) 142 | x = data[index, :, :]; y = label[index, :] 143 | return x, y 144 | 145 | if __name__ == '__main__': 146 | train_imgs = fetch_MNIST.load_train_images() 147 | train_labs = fetch_MNIST.load_train_labels().astype(int) 148 | # size of data; batch size 149 | data_size = train_imgs.shape[0]; batch_sz = 64; 150 | # learning rate; max iteration; iter % mod (avoid index out of range) 151 | lr = 0.01; max_iter = 50000; iter_mod = int(data_size/batch_sz) 152 | train_labs = convertToOneHot(train_labs) 153 | my_CNN = LeNet(lr) 154 | for iters in range(max_iter): 155 | # starting index and ending index for input data 156 | st_idx = (iters % iter_mod) * batch_sz 157 | # shuffle the dataset 158 | if st_idx == 0: 159 | train_imgs, train_labs = shuffle_dataset(train_imgs, train_labs) 160 | input_data = train_imgs[st_idx : st_idx + batch_sz] 161 | output_label = train_labs[st_idx : st_idx + batch_sz] 162 | softmax_output = my_CNN.forward_prop(input_data) 163 | if iters % 50 == 0: 164 | # calculate accuracy 165 | correct_list = [ int(np.argmax(softmax_output[i])==np.argmax(output_label[i])) for i in range(batch_sz) ] 166 | accuracy = float(np.array(correct_list).sum()) / batch_sz 167 | # calculate loss 168 | correct_prob = [ softmax_output[i][np.argmax(output_label[i])] for i in range(batch_sz) ] 169 | correct_prob = filter(lambda x: x > 0, correct_prob) 170 | loss = -1.0 * np.sum(np.log(correct_prob)) 171 | print "The %d iters result:" % iters 172 | print "The accuracy is %f The loss is %f " % (accuracy, loss) 173 | my_CNN.backward_prop(softmax_output, output_label) 174 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LeNet-python 2 | ### A tiny implementation of LeNet (without deep learning framework) 3 | 4 | - - - 5 | ## Instructions 6 | The main functional code is in LeNet.py. 7 | To run the code, you should do the following: 8 | 1. Download the MNIST dataset and extract it to "your/path/to/data" 9 | 2. modify the data path to "your/path/to/data" in fetch_MNIST.py 10 | 3. execute "python LeNet.py" 11 | 12 | Note: Python 2.7 is used to execute. Also, if something goes wrong, it is probably because of the incompatible version of numpy. 13 | 14 | ## Results 15 |

16 | 17 |

18 | -------------------------------------------------------------------------------- /fetch_MNIST.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: monitor1379 4 | @contact: yy4f5da2@hotmail.com 5 | @site: www.monitor1379.com 6 | 7 | @version: 1.0 8 | @license: Apache Licence 9 | @file: mnist_decoder.py 10 | @time: 2016/8/16 20:03 11 | 12 | 对MNIST手写数字数据文件转换为bmp图片文件格式。 13 | 数据集下载地址为http://yann.lecun.com/exdb/mnist。 14 | 相关格式转换见官网以及代码注释。 15 | 16 | ======================== 17 | 关于IDX文件格式的解析规则: 18 | ======================== 19 | THE IDX FILE FORMAT 20 | 21 | the IDX file format is a simple format for vectors and multidimensional matrices of various numerical types. 22 | The basic format is 23 | 24 | magic number 25 | size in dimension 0 26 | size in dimension 1 27 | size in dimension 2 28 | ..... 29 | size in dimension N 30 | data 31 | 32 | The magic number is an integer (MSB first). The first 2 bytes are always 0. 33 | 34 | The third byte codes the type of the data: 35 | 0x08: unsigned byte 36 | 0x09: signed byte 37 | 0x0B: short (2 bytes) 38 | 0x0C: int (4 bytes) 39 | 0x0D: float (4 bytes) 40 | 0x0E: double (8 bytes) 41 | 42 | The 4-th byte codes the number of dimensions of the vector/matrix: 1 for vectors, 2 for matrices.... 43 | 44 | The sizes in each dimension are 4-byte integers (MSB first, high endian, like in most non-Intel processors). 45 | 46 | The data is stored like in a C array, i.e. the index in the last dimension changes the fastest. 47 | """ 48 | 49 | import numpy as np 50 | import struct 51 | import matplotlib.pyplot as plt 52 | 53 | #data_path = '/Users/didi/Desktop/python_workspace/Neural Network/data/' 54 | # 训练集文件 55 | train_images_idx3_ubyte_file = './data/train-images-idx3-ubyte' 56 | # 训练集标签文件 57 | train_labels_idx1_ubyte_file = './data/train-labels-idx1-ubyte' 58 | 59 | # 测试集文件 60 | test_images_idx3_ubyte_file = './data/t10k-images-idx3-ubyte' 61 | # 测试集标签文件 62 | test_labels_idx1_ubyte_file = './data/t10k-labels-idx1-ubyte' 63 | 64 | 65 | def decode_idx3_ubyte(idx3_ubyte_file): 66 | """ 67 | 解析idx3文件的通用函数 68 | :param idx3_ubyte_file: idx3文件路径 69 | :return: 数据集 70 | """ 71 | # 读取二进制数据 72 | bin_data = open(idx3_ubyte_file, 'rb').read() 73 | 74 | # 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽 75 | offset = 0 76 | fmt_header = '>iiii' 77 | magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset) 78 | print '魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols) 79 | 80 | # 解析数据集 81 | image_size = num_rows * num_cols 82 | offset += struct.calcsize(fmt_header) 83 | fmt_image = '>' + str(image_size) + 'B' 84 | images = np.empty((num_images, num_rows, num_cols)) 85 | for i in range(num_images): 86 | if (i + 1) % 10000 == 0: 87 | print '已解析 %d' % (i + 1) + '张' 88 | images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols)) 89 | offset += struct.calcsize(fmt_image) 90 | return images 91 | 92 | 93 | def decode_idx1_ubyte(idx1_ubyte_file): 94 | """ 95 | 解析idx1文件的通用函数 96 | :param idx1_ubyte_file: idx1文件路径 97 | :return: 数据集 98 | """ 99 | # 读取二进制数据 100 | bin_data = open(idx1_ubyte_file, 'rb').read() 101 | 102 | # 解析文件头信息,依次为魔数和标签数 103 | offset = 0 104 | fmt_header = '>ii' 105 | magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset) 106 | print '魔数:%d, 图片数量: %d张' % (magic_number, num_images) 107 | 108 | # 解析数据集 109 | offset += struct.calcsize(fmt_header) 110 | fmt_image = '>B' 111 | labels = np.empty(num_images) 112 | for i in range(num_images): 113 | if (i + 1) % 10000 == 0: 114 | print '已解析 %d' % (i + 1) + '张' 115 | labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0] 116 | offset += struct.calcsize(fmt_image) 117 | return labels 118 | 119 | 120 | def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file): 121 | """ 122 | TRAINING SET IMAGE FILE (train-images-idx3-ubyte): 123 | [offset] [type] [value] [description] 124 | 0000 32 bit integer 0x00000803(2051) magic number 125 | 0004 32 bit integer 60000 number of images 126 | 0008 32 bit integer 28 number of rows 127 | 0012 32 bit integer 28 number of columns 128 | 0016 unsigned byte ?? pixel 129 | 0017 unsigned byte ?? pixel 130 | ........ 131 | xxxx unsigned byte ?? pixel 132 | Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black). 133 | 134 | :param idx_ubyte_file: idx文件路径 135 | :return: n*row*col维np.array对象,n为图片数量 136 | """ 137 | return decode_idx3_ubyte(idx_ubyte_file) 138 | 139 | 140 | def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file): 141 | """ 142 | TRAINING SET LABEL FILE (train-labels-idx1-ubyte): 143 | [offset] [type] [value] [description] 144 | 0000 32 bit integer 0x00000801(2049) magic number (MSB first) 145 | 0004 32 bit integer 60000 number of items 146 | 0008 unsigned byte ?? label 147 | 0009 unsigned byte ?? label 148 | ........ 149 | xxxx unsigned byte ?? label 150 | The labels values are 0 to 9. 151 | 152 | :param idx_ubyte_file: idx文件路径 153 | :return: n*1维np.array对象,n为图片数量 154 | """ 155 | return decode_idx1_ubyte(idx_ubyte_file) 156 | 157 | 158 | def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file): 159 | """ 160 | TEST SET IMAGE FILE (t10k-images-idx3-ubyte): 161 | [offset] [type] [value] [description] 162 | 0000 32 bit integer 0x00000803(2051) magic number 163 | 0004 32 bit integer 10000 number of images 164 | 0008 32 bit integer 28 number of rows 165 | 0012 32 bit integer 28 number of columns 166 | 0016 unsigned byte ?? pixel 167 | 0017 unsigned byte ?? pixel 168 | ........ 169 | xxxx unsigned byte ?? pixel 170 | Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black). 171 | 172 | :param idx_ubyte_file: idx文件路径 173 | :return: n*row*col维np.array对象,n为图片数量 174 | """ 175 | return decode_idx3_ubyte(idx_ubyte_file) 176 | 177 | 178 | def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file): 179 | """ 180 | TEST SET LABEL FILE (t10k-labels-idx1-ubyte): 181 | [offset] [type] [value] [description] 182 | 0000 32 bit integer 0x00000801(2049) magic number (MSB first) 183 | 0004 32 bit integer 10000 number of items 184 | 0008 unsigned byte ?? label 185 | 0009 unsigned byte ?? label 186 | ........ 187 | xxxx unsigned byte ?? label 188 | The labels values are 0 to 9. 189 | 190 | :param idx_ubyte_file: idx文件路径 191 | :return: n*1维np.array对象,n为图片数量 192 | """ 193 | return decode_idx1_ubyte(idx_ubyte_file) 194 | 195 | 196 | 197 | 198 | def run(): 199 | train_images = load_train_images() # (60000, 28, 28) 0~255 200 | train_labels = load_train_labels() # (60000,) 1~10 201 | # test_images = load_test_images() 202 | # test_labels = load_test_labels() 203 | print type(train_images), train_images.shape 204 | print type(train_labels), train_labels.shape 205 | 206 | # 查看前十个数据及其标签以读取是否正确 207 | for i in range(10): 208 | print train_labels[i] 209 | print np.max(train_images), np.min(train_images) 210 | plt.imshow(train_images[i], cmap='gray') 211 | plt.show() 212 | print 'done' 213 | 214 | if __name__ == '__main__': 215 | run() 216 | 217 | ''' 218 | 作者:monitor1379 219 | 链接:https://www.jianshu.com/p/84f72791806f 220 | 來源:简书 221 | 著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。 222 | ''' --------------------------------------------------------------------------------