├── .idea └── codeStyleSettings.xml ├── FullConnectedNetwork.py ├── FullConnectedNetwork2.py ├── NeuralNode.py ├── README.md └── unitTestDoc └── 神经网络单元测试.xlsx /.idea/codeStyleSettings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 9 | -------------------------------------------------------------------------------- /FullConnectedNetwork.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | 4 | # author: wangyao_bupt@hotmail.com 5 | import numpy as np 6 | from NeuralNode import NeuralNode 7 | 8 | #全连接前向神经网络 9 | class FullConnectedNetwork: 10 | def __init__(self, iStructure): 11 | #网络结构，以list表达，例如[N0, N1, N2, N3]表示4层网络， 12 | #第一层为输入层，包含N0个节点，最后一层为输出层包含N3个节点，网络包含2个隐层，分别有N1和N2个节点 13 | self.networkStructure = iStructure 14 | # NodeList中保存每一层的神经元，每一层为一个单独的List，对于输入层，保存“”. 15 | # 每一层对应的List（不含输入层）保存神经元对象 16 | self.nodeList = genNodeList(self.networkStructure) 17 | 18 | #根据一条输入数据预测结果 19 | #输入数据向量形状为1*numberOfInputNodes) 20 | #返回值形状为为1*numberOfOutputNodes) 21 | def predict(self, inputData): 22 | if inputData.shape[0] != self.networkStructure[0]: 23 | print "Invalid input data shape: ", inputData.shape 24 | return 25 | # 为了避免重复计算，用List保存每个节点的前向计算值， 26 | # 这个List包含N_Layer个ndarray，分别代表每一层的计算结果 27 | forwardValueList = [inputData] 28 | for layerCount in range(1, len(self.networkStructure)): 29 | forwardValueVec = np.zeros(self.networkStructure[layerCount]) 30 | for indexInCurLayer in range(0, forwardValueVec.shape[0]): 31 | forwardValueVec[indexInCurLayer] = self.nodeList[layerCount][indexInCurLayer].forward(forwardValueList[layerCount - 1]) 32 | forwardValueList.append(forwardValueVec) 33 | result = forwardValueList[-1] 34 | return result 35 | 36 | #根据批量输入数据预测结果 37 | #输入数据向量形状为batchSize*numberOfInputNodes) 38 | #返回值形状为为batchSize*numberOfOutputNodes) 39 | def predict_batch(self,iBatchData): 40 | batchSize = iBatchData.shape[0] 41 | result = np.zeros([batchSize, self.networkStructure[-1]]) 42 | for batchIdx in range(0, batchSize): 43 | result[batchIdx] = self.predict(iBatchData[batchIdx]) 44 | return result 45 | 46 | #根据训练数据和标签训练模型 47 | #输入数据data向量形状为(batch_size, numberOfInputNodes) 48 | #标签数据label向量形状为为(batch_size, numberOfOutputNodes), 这里只考虑分类问题，label采用one-hot vector的形式 49 | def train(self, data, label, learningRate): 50 | #使用梯度下降法计算 51 | totalLoss = 0 52 | for batchIdx in range(0, data.shape[0]): 53 | # 得到前向预测结果向量 54 | predict_result = self.predict(data[batchIdx]) 55 | 56 | # 使用交叉熵计算Loss 57 | loss = cross_entropy(softmax(predict_result), label[batchIdx]) 58 | totalLoss += loss; 59 | # dpredict = dLoss /dpredict 60 | dpredict = (label[batchIdx] - predict_result) 61 | #用于每一层训练的梯度向量，其形状为1*numOfNodeInThisLayer，处理每一层时形状在变化 62 | gradient = dpredict 63 | #DebugOutput 64 | #print 'batchIdx', batchIdx, 'data = ', data[batchIdx], 'predictedResult=',predict_result, 'Softmax(predictedResult)=', softmax(predict_result), ' label = ', label[batchIdx], 'loss = ', loss, ' gradient = ', gradient 65 | #从输出层向前逐层做训练, 第0层不用训练 66 | for layerIdx in range(len(self.networkStructure) -1, 0, -1): 67 | for nodeIdxInOneLayer in range(0, self.networkStructure[layerIdx]): 68 | #对每个节点计算梯度 69 | grad = self.nodeList[layerIdx][nodeIdxInOneLayer].backward(gradient[nodeIdxInOneLayer]) 70 | #训练每个节点 71 | self.nodeList[layerIdx][nodeIdxInOneLayer].adjustWeightAndBias(learningRate, grad[0], grad[1]) 72 | #print 'layer',layerIdx, 'node', nodeIdxInOneLayer, 'Weight', self.nodeList[layerIdx][nodeIdxInOneLayer].weight 73 | #计算gradient，供前一层训练使用,当layerIndex=1时，前一层不用训练 74 | if (layerIdx > 1): 75 | #梯度向量形状为1*(layerIdx-1)层节点数目s 76 | weightMatrix = np.zeros([self.networkStructure[layerIdx-1], self.networkStructure[layerIdx]]) 77 | deltaVec = np.zeros(self.networkStructure[layerIdx]) 78 | for nodeIdx in range(0, self.networkStructure[layerIdx]): 79 | weightMatrix[:, nodeIdx] = self.nodeList[layerIdx][nodeIdx].weight 80 | deltaVec[nodeIdx] = self.nodeList[layerIdx][nodeIdx].delta 81 | gradient = np.matmul(weightMatrix, deltaVec) 82 | #print 'Gradient for layer ', layerIdx-1, ' =',gradient 83 | 84 | #Debug: 训练完一条数据之后再算一遍，看Loss是否有降低 85 | predict_result = self.predict(data[batchIdx]) 86 | loss = cross_entropy(softmax(predict_result), label[batchIdx]) 87 | #print 'batchIdx', batchIdx, 'After Train Loss = ', loss 88 | #fcNetwork.debug_print() 89 | print "Total Loss=", totalLoss 90 | 91 | def debug_print(self): 92 | for layerIdx in range(1, len(self.networkStructure)): 93 | for nodeIdx in range(0, self.networkStructure[layerIdx]): 94 | print 'LayerIdx = ', layerIdx,' NodeIdx = ', nodeIdx ," ", self.nodeList[layerIdx][nodeIdx].getParam() 95 | 96 | #根据网络结构List生成神经元List 97 | def genNodeList(networkStructure): 98 | nodeList = [] 99 | for layerIdx in range(0, len(networkStructure)): 100 | if layerIdx == 0: 101 | nodeList.append("") 102 | else: 103 | nodesInThisLayer = [] 104 | for nodeIdx in range(0, networkStructure[layerIdx]): 105 | nodesInThisLayer.append(NeuralNode(networkStructure[layerIdx-1])) 106 | nodeList.append(nodesInThisLayer) 107 | 108 | return nodeList 109 | 110 | #计算两个向量的交叉熵 111 | #两个输入向量具有同样的形状，都是1*N的向量 112 | def cross_entropy(data,label): 113 | return -1*np.sum(np.nan_to_num(np.dot(label, np.log(data)))) 114 | 115 | #计算向量的softmax值 116 | def softmax(data): 117 | s = np.sum(np.exp(data)) 118 | return np.exp(data) / s 119 | 120 | #处理经过softmat得到的向量，将最大值的维度设置为1，其余维度设置为0 121 | def argmax(data): 122 | maxIdx = np.argmax(data) 123 | result = np.zeros(data.shape) 124 | result[maxIdx] = 1 125 | return result 126 | 127 | def generateTestDataAndLabel(batchSize): 128 | testData = np.random.randn(batchSize, 2) 129 | label = np.zeros([batchSize, 2]) 130 | for batchIdx in range(0, batchSize): 131 | if testData[batchIdx][0] >= testData[batchIdx][1]: 132 | label[batchIdx][0] = 1 133 | else: 134 | label[batchIdx][1] = 1 135 | return [testData, label] 136 | 137 | def evaluate(fullConnectedNetwork, testData, label): 138 | if testData.shape[0] != label.shape[0]: 139 | print '数据和标签的Batch数目不匹配，无法评估' 140 | return 141 | tp = 0 142 | tn = 0 143 | fp = 0 144 | fn = 0 145 | predictedValues = fullConnectedNetwork.predict_batch(testData); 146 | for batchIdx in range(0, testData.shape[0]): 147 | if (argmax(softmax(predictedValues[batchIdx])) == label[batchIdx]).all(): 148 | if label[batchIdx][0] == 1: 149 | tp = tp + 1 150 | else: 151 | tn = tn + 1 152 | else: 153 | if label[batchIdx][0] == 1: 154 | fn = fn + 1 155 | else: 156 | fp = fp + 1 157 | if (tp+fp == 0): 158 | precision = 0 159 | else: 160 | precision = tp * 1.0 / (tp + fp) 161 | if (tp+fn == 0): 162 | recall = 0 163 | else: 164 | recall = tp*1.0 / (tp+fn) 165 | accuracy = (tp+tn)*1.0 / (tp+tn+fp+fn) 166 | print 'tp = ', tp, ", tn =",tn, " fp = ", fp,' fn=', fn 167 | print "precision = ", precision, " recall = ", recall, " acc = ", accuracy 168 | return [precision, recall, accuracy] 169 | 170 | if __name__ == '__main__': 171 | fcNetwork = FullConnectedNetwork([2, 2, 2]) 172 | fcNetwork.debug_print() 173 | 174 | batchSize = 10 175 | learningRate = 0.03 176 | 177 | [testData, label] = generateTestDataAndLabel(batchSize) 178 | for index in range(0, label.shape[0]): 179 | print "testData[",index, ']=', testData[index], " label[",index, ']=',label[index] 180 | 181 | print "before train: " 182 | evaluate(fcNetwork,testData,label) 183 | 184 | for trainIndex in range(0, 10): 185 | fcNetwork.train(testData, label, learningRate) 186 | print "result after train loop ", trainIndex 187 | evaluate(fcNetwork, testData, label) 188 | fcNetwork.debug_print() -------------------------------------------------------------------------------- /FullConnectedNetwork2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | 4 | # author: wangyao_bupt@hotmail.com 5 | import numpy as np 6 | 7 | # FullConnectedNetwork2 is a fully connected neural network, using sigmoid as activation function 8 | # The difference between FullConnectedNetwork2 and FullConnectedNetwork is: 9 | # In FullConnectedNetwork2, the weights and bias is stored in matrix format.The NeuralNode class is NOT used in FullConnectedNetwork2 10 | class FullConnectedNetwork2: 11 | __debugMode__ = True 12 | 13 | # @param iStructure is a list containing L enlements, representing L layers of a fully connected netowrk 14 | # The 1st element (aka iStructure[0]) define number of input nodes 15 | # the i-th element (aka iStructure[i-1]) define number of neural nodes in i-th layer 16 | # @param batchSize define the number of data whthin a batch 17 | def __init__(self, iStructure, ibatchSize): 18 | self.networkStructure = iStructure 19 | self.numberOfLayers = len(self.networkStructure) 20 | self.batchSize = ibatchSize 21 | #weightMatrixList is a container of Weight Matrixes of each layer 22 | self.weightMatrixList = np.ndarray((self.numberOfLayers - 1),np.object) 23 | for layerIdx in range(0, self.numberOfLayers - 1): 24 | ## each element in weightMatrixList is a matrix, the content is as below 25 | # ROW_0 = [weight(^l)(_00), weight(^l)(_01),weight(^l)(_02),...weight(^l)(_0S_l), b(^l)(_1)] 26 | # ..... 27 | # ROW_(s_(l+1) - 1) = [weight(^l)(_(s_(l+1) - 1)0), weight(^l)(_(s_(l+1) - 1)1),weight(^l)(_(s_(l+1) - 1)2),...weight(^l)(_(s_(l+1) - 1)(S_l)), b(^l)(_(s_(l+1) - 1))] 28 | # l: layer index 29 | # weight(^l)(_ji): the weight from i-th node in (l) layer to j-th node in (l+1) layer 30 | # b(^l)(_j): the bias of j-th node in (l+1) layer 31 | # s_l: number of nodes in (l) layer 32 | if (not FullConnectedNetwork2.__debugMode__): 33 | weightMatrixBetweenCurAndNextLayer = np.random.randn( 34 | iStructure[layerIdx+1],iStructure[layerIdx]+1) 35 | else: 36 | weightMatrixBetweenCurAndNextLayer = np.zeros((iStructure[layerIdx+1],iStructure[layerIdx]+1)) 37 | if self.__debugMode__: 38 | weightMatrixBetweenCurAndNextLayer[0] = [1,-1,1] 39 | weightMatrixBetweenCurAndNextLayer[1] = [2, -2, 1] 40 | self.weightMatrixList[layerIdx] = weightMatrixBetweenCurAndNextLayer 41 | # activationValueMatrix is activation values of each layer in each sample in a batch, 42 | # activationValueMatrix's shape is (batchSize, numberOfLayers), each element is an 1-D vector, 43 | # For example, given batchIdx = b, LayerIndex = l, the activationValueMatrix[b][l] represents 44 | # the value (1-D vector with self.networkStructure[l] dims) calculated from l-th layer 45 | # For the input layer, activationValueMatrix[b][0] = inputVector 46 | self.activationValueMatrix = np.ndarray((self.batchSize, len(self.networkStructure)), np.object) 47 | # zMatrix is the value before activation of each layer in each sample in a batch 48 | # the shape of zMatrix is similiar to activationValueMatrix 49 | # since input layer does not contain any weight, self.zMatrix[b][0] is meaningless 50 | self.zMatrix = np.ndarray((self.batchSize, len(self.networkStructure)), np.object) 51 | 52 | # Calculate forward value of a batch of input data 53 | # the input data shape should be (batchSize, numberOfInputNode) 54 | # batchSize == self.batchSize 55 | # numberOfInputNode == self.networkStructure[0] 56 | # return value is a matrix in shape (batchSize, numberOfOutputNode) 57 | def forward(self, inputData): 58 | batchSize = inputData.shape[0] 59 | if batchSize != self.batchSize: 60 | print "Invalid Batch Size:",batchSize 61 | result =np.ndarray((batchSize, self.networkStructure[-1])) 62 | for sampleIdx in range(0, batchSize): 63 | #the a[sampleIdx][0] always equal to inputVector 64 | self.activationValueMatrix[sampleIdx][0] = inputData[sampleIdx] 65 | for layerIdx in range(1, self.numberOfLayers): 66 | activation_and_one = np.ones(self.activationValueMatrix[sampleIdx][layerIdx - 1].shape[0]+1) 67 | activation_and_one[:-1] = self.activationValueMatrix[sampleIdx][layerIdx - 1] 68 | self.zMatrix[sampleIdx][layerIdx] \ 69 | = np.matmul(self.weightMatrixList[layerIdx - 1], activation_and_one) 70 | self.activationValueMatrix[sampleIdx][layerIdx] = self.sigmoid(self.zMatrix[sampleIdx][layerIdx]) 71 | result[sampleIdx] = self.activationValueMatrix[sampleIdx][self.numberOfLayers-1] 72 | return result 73 | 74 | # Evaluate error between predicted result and label 75 | # both predictedResult and label 's shapes are (batchSize, numberOfOutputNode) 76 | # the loss function is defined as http://ufldl.stanford.edu/wiki/index.php/Backpropagation_Algorithm 77 | # ignoring regularization term 78 | def lossEvaluation(self, predictedResult, label): 79 | loss = 0.0; 80 | for sampleIdx in range(0, self.batchSize): 81 | loss += 0.5* np.sum((predictedResult[sampleIdx] - label[sampleIdx])**2) 82 | return loss / self.batchSize 83 | 84 | # Train the network using label 85 | # the label should be in the same batch size as data, i.e. in shape of (batchSize, numberOfOutputNode 86 | def train(self, data, label, learningRate): 87 | predictedResult = self.forward(data) 88 | if self.__debugMode__: 89 | print 'loss before training: ', self.lossEvaluation(predictedResult, label) 90 | # for each node i in layer l, we would like to compute an "error term" that measures 91 | # how much that node was "responsible" for any errors in our output 92 | # delta represent such error 93 | delta = np.ndarray((self.batchSize, self.numberOfLayers-1), np.object) 94 | 95 | ## initialize delta_weightAndBias to all zeros 96 | delta_weightAndBias = np.ndarray((self.numberOfLayers-1), np.object) 97 | for layerIdx in range(0, self.numberOfLayers-1): 98 | delta_weightAndBias[layerIdx] = np.zeros(self.weightMatrixList[layerIdx].shape) 99 | 100 | ## calculate deritives of each weight/bias at each node in eachlayer in each sample 101 | for sampleIdx in range(0, self.batchSize): 102 | delta[sampleIdx][-1] = \ 103 | -(label[sampleIdx] - self.activationValueMatrix[sampleIdx][self.numberOfLayers-1])\ 104 | *self.dsigmoiddx_usingActivationValue(self.activationValueMatrix[sampleIdx][self.numberOfLayers-1]) 105 | for layerIdx in range(self.numberOfLayers-3, -1, -1): 106 | #build vector for f'(z_i) 107 | dsigmoidVector = np.zeros(self.networkStructure[layerIdx+1]) 108 | for nodeIdx in range(0, self.networkStructure[layerIdx+1]): 109 | dsigmoidVector[nodeIdx] = self.dsigmoiddx_usingActivationValue(self.activationValueMatrix[sampleIdx][layerIdx+1][nodeIdx]) 110 | # build weight matrix, remove bias column 111 | weightMatrix = self.weightMatrixList[layerIdx]; 112 | weightMatrix = np.zeros((self.weightMatrixList[layerIdx].shape[0],self.weightMatrixList[layerIdx].shape[1]-1) ) 113 | for rowIdx in range(0, weightMatrix.shape[0]): 114 | for colIdx in range(0, weightMatrix.shape[1]): 115 | weightMatrix[rowIdx][colIdx] = self.weightMatrixList[layerIdx][rowIdx][colIdx] 116 | weightMatrix = np.transpose(weightMatrix) 117 | #calculate delta 118 | delta[sampleIdx][layerIdx] = np.matmul(weightMatrix, delta[sampleIdx][layerIdx+1]) * dsigmoidVector 119 | 120 | for layerIdx in range(0, self.numberOfLayers-1): 121 | ## dWeightAndBias is value of desired partial derivatives 122 | dWeightAndBias = np.zeros(self.weightMatrixList[layerIdx].shape) 123 | for i in range(0, self.weightMatrixList[layerIdx].shape[0]): 124 | for j in range(0, self.weightMatrixList[layerIdx].shape[1]): 125 | if j >= self.networkStructure[layerIdx]: 126 | #dbias 127 | dWeightAndBias[i][j] = delta[sampleIdx][layerIdx][i] 128 | else: 129 | dWeightAndBias[i][j] = delta[sampleIdx][layerIdx][i] \ 130 | * self.activationValueMatrix[sampleIdx][layerIdx][j]; 131 | if self.__debugMode__: 132 | print 'Sample Idex = ', sampleIdx, " LayerIdx = ", layerIdx, " dWeightAndBias = ", dWeightAndBias 133 | ##For each layer in each sample, update delta_weightAndBias 134 | delta_weightAndBias[layerIdx] += dWeightAndBias 135 | 136 | if self.__debugMode__: 137 | for sampleIdx in range(0,self.batchSize): 138 | for layerIdx in range(0, self.numberOfLayers-1): 139 | print 'Sample Idex = ', sampleIdx, " LayerIdx = ", layerIdx, " Delta = ",delta[sampleIdx][layerIdx] 140 | 141 | ##Update weights and bias 142 | for layerIdx in range(0, self.numberOfLayers-1): 143 | self.weightMatrixList[layerIdx] = self.weightMatrixList[layerIdx] - \ 144 | learningRate*delta_weightAndBias[layerIdx]/self.batchSize 145 | 146 | def sigmoid(self, x): 147 | result = 1 / (1 + np.exp(-1*x)) 148 | return result 149 | 150 | def dsigmoiddx(self, x): 151 | return (1-self.sigmoid(x))*self.sigmoid(x) 152 | 153 | def dsigmoiddx_usingActivationValue(self, activationValue): 154 | return (1-activationValue)*activationValue 155 | 156 | def debugPrintWeights(self): 157 | for layerIdx in range(0, self.numberOfLayers-1): 158 | for nodeIdx in range(0, self.networkStructure[layerIdx+1]): 159 | print 'Layer ', (layerIdx+1), ' Node ', nodeIdx, " Weight = ", self.weightMatrixList[layerIdx][nodeIdx][:-1], " bias = ", self.weightMatrixList[layerIdx][nodeIdx][-1] 160 | 161 | def generateTestDataAndLabel(batchSize): 162 | testData = np.random.randn(batchSize, 2) 163 | label = np.zeros([batchSize, 2]) 164 | for batchIdx in range(0, batchSize): 165 | if testData[batchIdx][0] >= testData[batchIdx][1]: 166 | label[batchIdx][0] = 1 167 | else: 168 | label[batchIdx][1] = 1 169 | return [testData, label] 170 | 171 | if __name__ == "__main__": 172 | batchSize = 1000 173 | learningRate = 0.1 174 | 175 | FullConnectedNetwork2.__debugMode__ = False 176 | network = FullConnectedNetwork2([2,2,2],batchSize) 177 | network.debugPrintWeights() 178 | 179 | #[testData, label] = generateTestDataAndLabel(batchSize) 180 | 181 | #predictedResult = network.forward(testData) 182 | 183 | #for dataIdx in range(0, batchSize): 184 | #print 'inputData = ', testData[dataIdx], "Layer 1 actValue=", network.activationValueMatrix[dataIdx][1], " PredictedResult = ", network.activationValueMatrix[dataIdx][2], " Label = ", label[dataIdx] 185 | 186 | for dataBatchIdx in range(0, 10): 187 | [testData, label] = generateTestDataAndLabel(batchSize) 188 | for trainLoopIdx in range(0, 100): 189 | network.train(testData, label, learningRate) 190 | predictedResult = network.forward(testData) 191 | print "LoopIdx=", trainLoopIdx, " Loss=", network.lossEvaluation(predictedResult, label) 192 | 193 | network.debugPrintWeights() 194 | -------------------------------------------------------------------------------- /NeuralNode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | 4 | # author: wangyao_bupt@hotmail.com 5 | import numpy as np 6 | 7 | class NeuralNode: 8 | 9 | def __init__(self, inputDim): 10 | #当前结点与前一级的连接数目 11 | self.iDims = inputDim 12 | #权重向量，Shape = (iDims, ) 13 | self.weight = np.random.rand(self.iDims) 14 | #self.weight = np.ones(self.iDims) 15 | #偏置 16 | self.bias = np.random.rand(1) 17 | #激活函数的输入 18 | self.z = 1 19 | #当前层的残差 20 | self.delta = 1 21 | #前级节点的输入向量，必须与iDims匹配，Shape = (iDims, )的向量 22 | self.x = [] 23 | #ada算法新增状态变量 24 | self.m_weight = np.zeros(self.iDims) 25 | self.v_weight = np.zeros(self.iDims) 26 | self.m_bias = 0 27 | self.v_bias = 0 28 | self.t = 0 29 | 30 | #forward: 输入1*iDims向量，计算前向结果 31 | def forward(self, ix): 32 | if (ix.shape <> (self.iDims,)): 33 | print ("Wrong input shape: x.shape = " + str(ix.shape)) 34 | return 35 | self.x = ix 36 | self.z = np.dot(self.x, self.weight) + self.bias 37 | #为了避免计算溢出，对z做最大值和最小值限制 38 | if self.z >1000: 39 | self.z = 1000 40 | elif self.z < -1000: 41 | self.z = -1000 42 | return sigmoid(self.z) 43 | 44 | #backward: 输入前一级计算出的梯度，输出为两个数组 45 | #第一个数组: dx，iDims*1向量，即当前节点对于前一级每个输入的梯度 46 | #第二个数组：dw，iDims*1向量，当前节点对于每个权重的梯度 47 | #第三个数组：dbias, 1*1向量，当前节点对于偏置量的梯度 48 | def backward(self, gradient): 49 | try: 50 | #print 'self.z = ', self.z 51 | dz = (1 - self.z) * self.z #Sigmoid函数的求导 52 | except RuntimeWarning: 53 | print 'self.z = ', self.z 54 | if np.isnan(dz): 55 | dz = np.nan_to_num(dz) 56 | print 'dz=', dz 57 | 58 | self.delta = dz*gradient 59 | dw = self.x * self.delta # 回传到w 60 | if np.isnan(dw).any(): 61 | dz = np.nan_to_num(dw) 62 | dbias = self.delta # 回传到bias 63 | if np.isnan(dbias).any(): 64 | dz = np.nan_to_num(dbias) 65 | return [dw, dbias] 66 | 67 | #根据AdamOptimization算法调整 68 | def adaOptimization(self, learnRate, dw, dbias): 69 | #AdamOptimization算法需要的常量 70 | beta1=0.9 71 | beta2=0.999 72 | eps=1e-8 73 | self.t = self.t+1 74 | self.m_weight = self.m_weight*beta1 + (1-beta1)*dw 75 | self.v_weight = self.v_weight*beta2 + (1-beta2)*(dw*dw) 76 | m_w = self.m_weight / (1-beta1**self.t) 77 | v_w = self.v_weight / (1-beta2**self.t) 78 | self.weight = self.weight - learnRate*m_w/(np.sqrt(v_w)+eps) 79 | 80 | self.m_bias = self.m_bias*beta1 + (1-beta1)*dbias 81 | self.v_bias = self.v_bias*beta2 + (1-beta2)*(dbias*dbias) 82 | m_b = self.m_bias / (1-beta1**self.t) 83 | v_b = self.v_bias / (1-beta2**self.t) 84 | self.bias = self.bias - learnRate*m_b/(np.sqrt(v_b)+eps) 85 | 86 | #根据学习率和梯度调整weight和bias参数 87 | def adjustWeightAndBias(self, learnRate, dw, dbias): 88 | self.weight = self.weight - learnRate*dw 89 | self.bias = self.bias - learnRate*dbias 90 | 91 | #打印节点内部参数 92 | def printParam(self): 93 | print "Weight = ", self.weight , " Bias = ", self.bias 94 | 95 | def getParam(self): 96 | return [self.weight, self.bias] 97 | 98 | def sigmoid(x): 99 | result = 1 / (1 + np.exp(-1*x)) 100 | return result 101 | 102 | #Sigmoid函数对X的导数 103 | def dsigmoiddx(x): 104 | return (1-sigmoid(x))*sigmoid(x) 105 | 106 | #测试神经元训练，使用梯度下降法训练参数 107 | def unitTest_naiveTrain(): 108 | print "In unitTest_naiveTrain" 109 | n1 = NeuralNode(2) 110 | n1.printParam(); 111 | prevWeight = n1.weight 112 | 113 | x = np.ones(2) 114 | x[0] = 2 115 | x[1] = 2 116 | 117 | target = 1/(1+np.exp(1)) 118 | counter = 0 119 | for i in range(1000000): 120 | counter=i 121 | print "Round",i 122 | fowardResult = n1.forward(x) 123 | #print "Forward Result:",fowardResult 124 | loss = (fowardResult-target)*(fowardResult-target) 125 | print "Loss=",loss 126 | dLossdvalue = 2*(target-fowardResult) 127 | grad = n1.backward(dLossdvalue) 128 | #print "grad=",grad 129 | n1.adjustWeightAndBias(0.001, grad[0], grad[1]) 130 | if np.sum(np.abs(prevWeight - n1.weight)) < 1e-7: 131 | break 132 | prevWeight = n1.weight 133 | n1.printParam() 134 | print "" 135 | n1.printParam() 136 | return [counter, loss, n1.weight, n1.bias] 137 | 138 | #测试神经元训练,使用Adam训练算法 139 | def unitTest_AdamOptimize(): 140 | print "In unitTest_AdamOptimize" 141 | n1 = NeuralNode(2) 142 | n1.printParam(); 143 | prevWeight = n1.weight 144 | 145 | x = np.ones(2) 146 | x[0] = 2 147 | x[1] = 2 148 | 149 | target = 1/(1+np.exp(1)) 150 | 151 | counter = 0 152 | for i in range(1000000): 153 | #print "Round",i 154 | counter = i 155 | fowardResult = n1.forward(x) 156 | #print "Forward Result:",fowardResult 157 | loss = (fowardResult-target)*(fowardResult-target) 158 | #print "Loss=",loss 159 | dLossdvalue = 2*(target-fowardResult) 160 | grad = n1.backward(dLossdvalue) 161 | #print "grad=",grad 162 | n1.adaOptimization(0.001, grad[0], grad[1]) 163 | if np.sum(np.abs(prevWeight - n1.weight)) < 1e-7: 164 | break 165 | prevWeight = n1.weight 166 | #n1.printParam() 167 | #print "" 168 | 169 | n1.printParam() 170 | return [counter, loss, n1.weight, n1.bias] 171 | 172 | def trainWithLargerDataSet(sizeOfDataSet): 173 | iDims = 2 174 | iterationNumber = 1000; 175 | n1 = NeuralNode(iDims) 176 | trainDataSet = np.random.randn(sizeOfDataSet,iDims) 177 | np.savetxt('trainData.csv', trainDataSet, delimiter=',') 178 | prevWeight = n1.weight 179 | prevBias = n1.bias 180 | 181 | for iterIdx in range(0, iterationNumber): 182 | loss = 0 183 | for sampleIdx in range(0, sizeOfDataSet): 184 | # print "sampleIdx",sampleIdx 185 | fowardResult = n1.forward(trainDataSet[sampleIdx]) 186 | # print "Forward Result:",fowardResult 187 | if (trainDataSet[sampleIdx][0] >= trainDataSet[sampleIdx][1] ): 188 | target = 0 189 | else: 190 | target = 1 191 | loss = loss + (fowardResult - target) * (fowardResult - target) 192 | # print "Loss=",loss 193 | dLossdvalue = 2 * (target - fowardResult) 194 | grad = n1.backward(dLossdvalue) 195 | # print "grad=",grad 196 | n1.adjustWeightAndBias(0.03, grad[0], grad[1]) 197 | if iterIdx % 10 == 0: 198 | print "Loss=", loss, " iterIdx=", iterIdx 199 | print n1.getParam()," iterIdx=", iterIdx 200 | if (iterIdx > 0) and (np.sum(np.abs(prevWeight - n1.weight)) + np.abs(prevBias - n1.bias) < 1e-7): 201 | break 202 | prevWeight = n1.weight 203 | prevBias = n1.bias 204 | 205 | return [iterIdx, loss, n1.weight, n1.bias] 206 | 207 | 208 | if __name__ == '__main__': 209 | naiveResultStr = "" 210 | adamResultStr = "" 211 | for i in range(1): 212 | # naiveResult = unitTest_naiveTrain() 213 | # naiveResultStr = naiveResultStr + str(naiveResult) + "\n" 214 | # adamResult = unitTest_AdamOptimize() 215 | # adamResultStr = adamResultStr + str(adamResult) + "\n" 216 | result = trainWithLargerDataSet(1000) 217 | print result -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SimpleNeuralNetwork 2 | This is a python implementation of neural network, aims with the following purpose: 3 | 1) implement fundemental algorithms in neural network 4 | 2) understand limits of each algorithm -------------------------------------------------------------------------------- /unitTestDoc/神经网络单元测试.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyaobupt/SimpleNeuralNetwork/b7e5cda4efb87edc437eba0dee402a40f775b046/unitTestDoc/神经网络单元测试.xlsx --------------------------------------------------------------------------------