├── .gitignore ├── LeNet.py ├── README.md └── fetch_MNIST.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LeNet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | Author: Site Li 4 | Website: http://blog.csdn.net/site1997 5 | ''' 6 | import numpy as np 7 | from scipy.signal import convolve2d 8 | from skimage.measure import block_reduce 9 | import fetch_MNIST 10 | 11 | 12 | class LeNet(object): 13 | #The network is like: 14 | # conv1 -> pool1 -> conv2 -> pool2 -> fc1 -> relu -> fc2 -> relu -> softmax 15 | # l0 l1 l2 l3 l4 l5 l6 l7 l8 l9 16 | def __init__(self, lr=0.1): 17 | self.lr = lr 18 | # 6 convolution kernal, each has 1 * 5 * 5 size 19 | self.conv1 = xavier_init(6, 1, 5, 5) 20 | # the size for mean pool is 2 * 2, stride = 2 21 | self.pool1 = [2, 2] 22 | # 16 convolution kernal, each has 6 * 5 * 5 size 23 | self.conv2 = xavier_init(16, 6, 5, 5) 24 | # the size for mean pool is 2 * 2, stride = 2 25 | self.pool2 = [2, 2] 26 | # fully connected layer 256 -> 200 27 | self.fc1 = xavier_init(256, 200, fc=True) 28 | # fully connected layer 200 -> 10 29 | self.fc2 = xavier_init(200, 10, fc=True) 30 | 31 | def forward_prop(self, input_data): 32 | self.l0 = np.expand_dims(input_data, axis=1) / 255 # (batch_sz, 1, 28, 28) 33 | self.l1 = self.convolution(self.l0, self.conv1) # (batch_sz, 6, 24, 24) 34 | self.l2 = self.mean_pool(self.l1, self.pool1) # (batch_sz, 6, 12, 12) 35 | self.l3 = self.convolution(self.l2, self.conv2) # (batch_sz, 16, 8, 8) 36 | self.l4 = self.mean_pool(self.l3, self.pool2) # (batch_sz, 16, 4, 4) 37 | self.l5 = self.fully_connect(self.l4, self.fc1) # (batch_sz, 200) 38 | self.l6 = self.relu(self.l5) # (batch_sz, 200) 39 | self.l7 = self.fully_connect(self.l6, self.fc2) # (batch_sz, 10) 40 | self.l8 = self.relu(self.l7) # (batch_sz, 10) 41 | self.l9 = self.softmax(self.l8) # (batch_sz, 10) 42 | return self.l9 43 | 44 | def backward_prop(self, softmax_output, output_label): 45 | l8_delta = (output_label - softmax_output) / softmax_output.shape[0] 46 | l7_delta = self.relu(self.l8, l8_delta, deriv=True) # (batch_sz, 10) 47 | l6_delta, self.fc2 = self.fully_connect(self.l6, self.fc2, l7_delta, deriv=True) # (batch_sz, 200) 48 | l5_delta = self.relu(self.l6, l6_delta, deriv=True) # (batch_sz, 200) 49 | l4_delta, self.fc1 = self.fully_connect(self.l4, self.fc1, l5_delta, deriv=True) # (batch_sz, 16, 4, 4) 50 | l3_delta = self.mean_pool(self.l3, self.pool2, l4_delta, deriv=True) # (batch_sz, 16, 8, 8) 51 | l2_delta, self.conv2 = self.convolution(self.l2, self.conv2, l3_delta, deriv=True) # (batch_sz, 6, 12, 12) 52 | l1_delta = self.mean_pool(self.l1, self.pool1, l2_delta, deriv=True) # (batch_sz, 6, 24, 24) 53 | l0_delta, self.conv1 = self.convolution(self.l0, self.conv1, l1_delta, deriv=True) # (batch_sz, 1, 28, 28) 54 | 55 | def convolution(self, input_map, kernal, front_delta=None, deriv=False): 56 | N, C, W, H = input_map.shape 57 | K_NUM, K_C, K_W, K_H = kernal.shape 58 | if deriv == False: 59 | feature_map = np.zeros((N, K_NUM, W-K_W+1, H-K_H+1)) 60 | for imgId in range(N): 61 | for kId in range(K_NUM): 62 | for cId in range(C): 63 | feature_map[imgId][kId] += \ 64 | convolve2d(input_map[imgId][cId], kernal[kId,cId,:,:], mode='valid') 65 | return feature_map 66 | else : 67 | # front->back (propagate loss) 68 | back_delta = np.zeros((N, C, W, H)) 69 | kernal_gradient = np.zeros((K_NUM, K_C, K_W, K_H)) 70 | padded_front_delta = \ 71 | np.pad(front_delta, [(0,0), (0,0), (K_W-1, K_H-1), (K_W-1, K_H-1)], mode='constant', constant_values=0) 72 | for imgId in range(N): 73 | for cId in range(C): 74 | for kId in range(K_NUM): 75 | back_delta[imgId][cId] += \ 76 | convolve2d(padded_front_delta[imgId][kId], kernal[kId,cId,::-1,::-1], mode='valid') 77 | kernal_gradient[kId][cId] += \ 78 | convolve2d(front_delta[imgId][kId], input_map[imgId,cId,::-1,::-1], mode='valid') 79 | # update weights 80 | kernal += self.lr * kernal_gradient 81 | return back_delta, kernal 82 | 83 | def mean_pool(self, input_map, pool, front_delta=None, deriv=False): 84 | N, C, W, H = input_map.shape 85 | P_W, P_H = tuple(pool) 86 | if deriv == False: 87 | feature_map = np.zeros((N, C, W/P_W, H/P_H)) 88 | feature_map = block_reduce(input_map, tuple((1, 1, P_W, P_H)), func=np.mean) 89 | return feature_map 90 | else : 91 | # front->back (propagate loss) 92 | back_delta = np.zeros((N, C, W, H)) 93 | back_delta = front_delta.repeat(P_W, axis = 2).repeat(P_H, axis = 3) 94 | back_delta /= (P_W * P_H) 95 | return back_delta 96 | 97 | def fully_connect(self, input_data, fc, front_delta=None, deriv=False): 98 | N = input_data.shape[0] 99 | if deriv == False: 100 | output_data = np.dot(input_data.reshape(N, -1), fc) 101 | return output_data 102 | else : 103 | # front->back (propagate loss) 104 | back_delta = np.dot(front_delta, fc.T).reshape(input_data.shape) 105 | # update weights 106 | fc += self.lr * np.dot(input_data.reshape(N, -1).T, front_delta) 107 | return back_delta, fc 108 | 109 | def relu(self, x, front_delta=None, deriv=False): 110 | if deriv == False: 111 | return x * (x > 0) 112 | else : 113 | # propagate loss 114 | back_delta = front_delta * 1. * (x > 0) 115 | return back_delta 116 | 117 | def softmax(self, x): 118 | y = list() 119 | for t in x: 120 | e_t = np.exp(t - np.max(t)) 121 | y.append(e_t / e_t.sum()) 122 | return np.array(y) 123 | 124 | 125 | def xavier_init(c1, c2, w=1, h=1, fc=False): 126 | fan_1 = c2 * w * h 127 | fan_2 = c1 * w * h 128 | ratio = np.sqrt(6.0 / (fan_1 + fan_2)) 129 | params = ratio * (2*np.random.random((c1, c2, w, h)) - 1) 130 | if fc == True: 131 | params = params.reshape(c1, c2) 132 | return params 133 | 134 | def convertToOneHot(labels): 135 | oneHotLabels = np.zeros((labels.size, labels.max()+1)) 136 | oneHotLabels[np.arange(labels.size), labels] = 1 137 | return oneHotLabels 138 | 139 | def shuffle_dataset(data, label): 140 | N = data.shape[0] 141 | index = np.random.permutation(N) 142 | x = data[index, :, :]; y = label[index, :] 143 | return x, y 144 | 145 | if __name__ == '__main__': 146 | train_imgs = fetch_MNIST.load_train_images() 147 | train_labs = fetch_MNIST.load_train_labels().astype(int) 148 | # size of data; batch size 149 | data_size = train_imgs.shape[0]; batch_sz = 64; 150 | # learning rate; max iteration; iter % mod (avoid index out of range) 151 | lr = 0.01; max_iter = 50000; iter_mod = int(data_size/batch_sz) 152 | train_labs = convertToOneHot(train_labs) 153 | my_CNN = LeNet(lr) 154 | for iters in range(max_iter): 155 | # starting index and ending index for input data 156 | st_idx = (iters % iter_mod) * batch_sz 157 | # shuffle the dataset 158 | if st_idx == 0: 159 | train_imgs, train_labs = shuffle_dataset(train_imgs, train_labs) 160 | input_data = train_imgs[st_idx : st_idx + batch_sz] 161 | output_label = train_labs[st_idx : st_idx + batch_sz] 162 | softmax_output = my_CNN.forward_prop(input_data) 163 | if iters % 50 == 0: 164 | # calculate accuracy 165 | correct_list = [ int(np.argmax(softmax_output[i])==np.argmax(output_label[i])) for i in range(batch_sz) ] 166 | accuracy = float(np.array(correct_list).sum()) / batch_sz 167 | # calculate loss 168 | correct_prob = [ softmax_output[i][np.argmax(output_label[i])] for i in range(batch_sz) ] 169 | correct_prob = filter(lambda x: x > 0, correct_prob) 170 | loss = -1.0 * np.sum(np.log(correct_prob)) 171 | print "The %d iters result:" % iters 172 | print "The accuracy is %f The loss is %f " % (accuracy, loss) 173 | my_CNN.backward_prop(softmax_output, output_label) 174 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LeNet-python 2 | ### A tiny implementation of LeNet (without deep learning framework) 3 | 4 | - - - 5 | ## Instructions 6 | The main functional code is in LeNet.py. 7 | To run the code, you should do the following: 8 | 1. Download the MNIST dataset and extract it to "your/path/to/data" 9 | 2. modify the data path to "your/path/to/data" in fetch_MNIST.py 10 | 3. execute "python LeNet.py" 11 | 12 | Note: Python 2.7 is used to execute. Also, if something goes wrong, it is probably because of the incompatible version of numpy. 13 | 14 | ## Results 15 |
16 |
17 |