├── README.md ├── data └── data.csv └── logicRegression.py /README.md: -------------------------------------------------------------------------------- 1 | 逻辑回归算法原理博客(包含整个数学推导):http://www.jianshu.com/p/631a3fe4542e 2 | -------------------------------------------------------------------------------- /data/data.csv: -------------------------------------------------------------------------------- 1 | -0.017612,14.053064,0 2 | -1.395634,4.662541,1 3 | -0.752157,6.538620,0 4 | -1.322371,7.152853,0 5 | 0.423363,11.054677,0 6 | 0.406704,7.067335,1 7 | 0.667394,12.741452,0 8 | -2.460150,6.866805,1 9 | 0.569411,9.548755,0 10 | -0.026632,10.427743,0 11 | 0.850433,6.920334,1 12 | 1.347183,13.175500,0 13 | 1.176813,3.167020,1 14 | -1.781871,9.097953,0 15 | -0.566606,5.749003,1 16 | 0.931635,1.589505,1 17 | -0.024205,6.151823,1 18 | -0.036453,2.690988,1 19 | -0.196949,0.444165,1 20 | 1.014459,5.754399,1 21 | 1.985298,3.230619,1 22 | -1.693453,-0.557540,1 23 | -0.576525,11.778922,0 24 | -0.346811,-1.678730,1 25 | -2.124484,2.672471,1 26 | 1.217916,9.597015,0 27 | -0.733928,9.098687,0 28 | -3.642001,-1.618087,1 29 | 0.315985,3.523953,1 30 | 1.416614,9.619232,0 31 | -0.386323,3.989286,1 32 | 0.556921,8.294984,1 33 | 1.224863,11.587360,0 34 | -1.347803,-2.406051,1 35 | 1.196604,4.951851,1 36 | 0.275221,9.543647,0 37 | 0.470575,9.332488,0 38 | -1.889567,9.542662,0 39 | -1.527893,12.150579,0 40 | -1.185247,11.309318,0 41 | -0.445678,3.297303,1 42 | 1.042222,6.105155,1 43 | -0.618787,10.320986,0 44 | 1.152083,0.548467,1 45 | 0.828534,2.676045,1 46 | -1.237728,10.549033,0 47 | -0.683565,-2.166125,1 48 | 0.229456,5.921938,1 49 | -0.959885,11.555336,0 50 | 0.492911,10.993324,0 51 | 0.184992,8.721488,0 52 | -0.355715,10.325976,0 53 | -0.397822,8.058397,0 54 | 0.824839,13.730343,0 55 | 1.507278,5.027866,1 56 | 0.099671,6.835839,1 57 | -0.344008,10.717485,0 58 | 1.785928,7.718645,1 59 | -0.918801,11.560217,0 60 | -0.364009,4.747300,1 61 | -0.841722,4.119083,1 62 | 0.490426,1.960539,1 63 | -0.007194,9.075792,0 64 | 0.356107,12.447863,0 65 | 0.342578,12.281162,0 66 | -0.810823,-1.466018,1 67 | 2.530777,6.476801,1 68 | 1.296683,11.607559,0 69 | 0.475487,12.040035,0 70 | -0.783277,11.009725,0 71 | 0.074798,11.023650,0 72 | -1.337472,0.468339,1 73 | -0.102781,13.763651,0 74 | -0.147324,2.874846,1 75 | 0.518389,9.887035,0 76 | 1.015399,7.571882,0 77 | -1.658086,-0.027255,1 78 | 1.319944,2.171228,1 79 | 2.056216,5.019981,1 80 | -0.851633,4.375691,1 81 | -1.510047,6.061992,0 82 | -1.076637,-3.181888,1 83 | 1.821096,10.283990,0 84 | 3.010150,8.401766,1 85 | -1.099458,1.688274,1 86 | -0.834872,-1.733869,1 87 | -0.846637,3.849075,1 88 | 1.400102,12.628781,0 89 | 1.752842,5.468166,1 90 | 0.078557,0.059736,1 91 | 0.089392,-0.715300,1 92 | 1.825662,12.693808,0 93 | 0.197445,9.744638,0 94 | 0.126117,0.922311,1 95 | -0.679797,1.220530,1 96 | 0.677983,2.556666,1 97 | 0.761349,10.693862,0 98 | -2.168791,0.143632,1 99 | 1.388610,9.341997,0 100 | 0.317029,14.739025,0 -------------------------------------------------------------------------------- /logicRegression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | def sigmoid(x): 4 | return 1.0 / (1.0 + np.exp(-x)) 5 | # 梯度上升法 6 | def logicRegression(x, y, alpha, iter): 7 | numSamples, numFeatures = np.shape(x) 8 | weights = np.ones((numFeatures, 1)) 9 | for i in range(iter): 10 | fx = x * weights 11 | hx = sigmoid(fx) 12 | weights = weights + alpha * x.T * (y - hx) 13 | return weights 14 | # 随机梯度上升法 15 | def stochLogicRegression(x, y, alpha, iter): 16 | numSamples, numFeatures = np.shape(x) 17 | weights = np.ones((numFeatures, 1)) 18 | for i in range(iter): 19 | for j in range(numSamples): 20 | fx = x[j, :] * weights 21 | hx = sigmoid(fx) 22 | weights = weights + alpha * x[j, :].T * (y[j, :] - hx) 23 | return weights 24 | # 在迭代好的回归因子下计算模型在训练数据上表现的准确度 25 | def accLogicRegression(weights, x, y): 26 | numSamples, numFeatures = np.shape(x) 27 | accuracy = 0.0 28 | for i in range(numSamples): 29 | predict = sigmoid(x[i, :] * weights)[0, 0] > 0.5 30 | if predict == bool(y[i, 0]): 31 | accuracy += 1 32 | print('逻辑回归模型准确率为{0}%'.format(accuracy / numSamples * 100)) 33 | # 可视化二维训练数据的分类结果 34 | def showLogicRegression(weights, x, y): 35 | numSamples, numFeatures = np.shape(x) 36 | for i in range(numSamples): 37 | if int(y[i, 0]) == 0: 38 | plt.plot(x[i, 1], x[i, 2], 'om') 39 | elif int(y[i, 0]) == 1: 40 | plt.plot(x[i, 1], x[i, 2], 'ob') 41 | xa1 = min(x[:, 1])[0, 0] 42 | xb1 = max(x[:, 1])[0, 0] 43 | xa2 = - ((weights[0] + weights[1] * xa1) / weights[2]).tolist()[0][0] 44 | xb2 = - ((weights[0] + weights[1] * xb1) / weights[2]).tolist()[0][0] 45 | plt.plot([xa1, xb1], [xa2, xb2], '#FB4A42') 46 | plt.xlabel('X1') 47 | plt.ylabel('X2') 48 | plt.show() 49 | def loadData(src): 50 | x, y = [], [] 51 | lineArr = [] 52 | with open(src) as fileIn: 53 | [lineArr.append(line.strip().split(',')) for line in fileIn.readlines()] 54 | np.random.shuffle(lineArr) # 随机打乱训练数据集 55 | for line in lineArr: 56 | x.append([1.0, float(line[0]), float(line[1])]) 57 | y.append(float(line[2])) 58 | return np.mat(x), np.mat(y).T 59 | if __name__ =='__main__': 60 | x, y = loadData('./data/data.csv') 61 | # weights = logicRegression(x, y, alpha=0.01, iter=500) 62 | weights = stochLogicRegression(x, y, alpha=0.01, iter=200) 63 | accLogicRegression(weights, x, y) 64 | # showLogicRegression(weights, x, y) 65 | --------------------------------------------------------------------------------