├── README.md
├── data
    └── data.csv
└── logicRegression.py


/README.md:
--------------------------------------------------------------------------------
1 | 逻辑回归算法原理博客（包含整个数学推导）：http://www.jianshu.com/p/631a3fe4542e
2 | 


--------------------------------------------------------------------------------
/data/data.csv:
--------------------------------------------------------------------------------
  1 | -0.017612,14.053064,0
  2 | -1.395634,4.662541,1
  3 | -0.752157,6.538620,0
  4 | -1.322371,7.152853,0
  5 | 0.423363,11.054677,0
  6 | 0.406704,7.067335,1
  7 | 0.667394,12.741452,0
  8 | -2.460150,6.866805,1
  9 | 0.569411,9.548755,0
 10 | -0.026632,10.427743,0
 11 | 0.850433,6.920334,1
 12 | 1.347183,13.175500,0
 13 | 1.176813,3.167020,1
 14 | -1.781871,9.097953,0
 15 | -0.566606,5.749003,1
 16 | 0.931635,1.589505,1
 17 | -0.024205,6.151823,1
 18 | -0.036453,2.690988,1
 19 | -0.196949,0.444165,1
 20 | 1.014459,5.754399,1
 21 | 1.985298,3.230619,1
 22 | -1.693453,-0.557540,1
 23 | -0.576525,11.778922,0
 24 | -0.346811,-1.678730,1
 25 | -2.124484,2.672471,1
 26 | 1.217916,9.597015,0
 27 | -0.733928,9.098687,0
 28 | -3.642001,-1.618087,1
 29 | 0.315985,3.523953,1
 30 | 1.416614,9.619232,0
 31 | -0.386323,3.989286,1
 32 | 0.556921,8.294984,1
 33 | 1.224863,11.587360,0
 34 | -1.347803,-2.406051,1
 35 | 1.196604,4.951851,1
 36 | 0.275221,9.543647,0
 37 | 0.470575,9.332488,0
 38 | -1.889567,9.542662,0
 39 | -1.527893,12.150579,0
 40 | -1.185247,11.309318,0
 41 | -0.445678,3.297303,1
 42 | 1.042222,6.105155,1
 43 | -0.618787,10.320986,0
 44 | 1.152083,0.548467,1
 45 | 0.828534,2.676045,1
 46 | -1.237728,10.549033,0
 47 | -0.683565,-2.166125,1
 48 | 0.229456,5.921938,1
 49 | -0.959885,11.555336,0
 50 | 0.492911,10.993324,0
 51 | 0.184992,8.721488,0
 52 | -0.355715,10.325976,0
 53 | -0.397822,8.058397,0
 54 | 0.824839,13.730343,0
 55 | 1.507278,5.027866,1
 56 | 0.099671,6.835839,1
 57 | -0.344008,10.717485,0
 58 | 1.785928,7.718645,1
 59 | -0.918801,11.560217,0
 60 | -0.364009,4.747300,1
 61 | -0.841722,4.119083,1
 62 | 0.490426,1.960539,1
 63 | -0.007194,9.075792,0
 64 | 0.356107,12.447863,0
 65 | 0.342578,12.281162,0
 66 | -0.810823,-1.466018,1
 67 | 2.530777,6.476801,1
 68 | 1.296683,11.607559,0
 69 | 0.475487,12.040035,0
 70 | -0.783277,11.009725,0
 71 | 0.074798,11.023650,0
 72 | -1.337472,0.468339,1
 73 | -0.102781,13.763651,0
 74 | -0.147324,2.874846,1
 75 | 0.518389,9.887035,0
 76 | 1.015399,7.571882,0
 77 | -1.658086,-0.027255,1
 78 | 1.319944,2.171228,1
 79 | 2.056216,5.019981,1
 80 | -0.851633,4.375691,1
 81 | -1.510047,6.061992,0
 82 | -1.076637,-3.181888,1
 83 | 1.821096,10.283990,0
 84 | 3.010150,8.401766,1
 85 | -1.099458,1.688274,1
 86 | -0.834872,-1.733869,1
 87 | -0.846637,3.849075,1
 88 | 1.400102,12.628781,0
 89 | 1.752842,5.468166,1
 90 | 0.078557,0.059736,1
 91 | 0.089392,-0.715300,1
 92 | 1.825662,12.693808,0
 93 | 0.197445,9.744638,0
 94 | 0.126117,0.922311,1
 95 | -0.679797,1.220530,1
 96 | 0.677983,2.556666,1
 97 | 0.761349,10.693862,0
 98 | -2.168791,0.143632,1
 99 | 1.388610,9.341997,0
100 | 0.317029,14.739025,0


--------------------------------------------------------------------------------
/logicRegression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | def sigmoid(x):
 4 |     return 1.0 / (1.0 + np.exp(-x))
 5 | # 梯度上升法
 6 | def logicRegression(x, y, alpha, iter):
 7 |     numSamples, numFeatures = np.shape(x)
 8 |     weights = np.ones((numFeatures, 1))
 9 |     for i in range(iter):
10 |         fx = x * weights
11 |         hx = sigmoid(fx)
12 |         weights = weights + alpha * x.T * (y - hx)
13 |     return weights
14 | # 随机梯度上升法
15 | def stochLogicRegression(x, y, alpha, iter):
16 |     numSamples, numFeatures = np.shape(x)
17 |     weights = np.ones((numFeatures, 1))
18 |     for i in range(iter):
19 |         for j in range(numSamples):
20 |             fx = x[j, :] * weights
21 |             hx = sigmoid(fx)
22 |             weights = weights + alpha * x[j, :].T * (y[j, :] - hx)
23 |     return weights
24 | # 在迭代好的回归因子下计算模型在训练数据上表现的准确度
25 | def accLogicRegression(weights, x, y):
26 |     numSamples, numFeatures = np.shape(x)
27 |     accuracy = 0.0
28 |     for i in range(numSamples):
29 |         predict = sigmoid(x[i, :] * weights)[0, 0] > 0.5
30 |         if predict == bool(y[i, 0]):
31 |             accuracy += 1
32 |     print('逻辑回归模型准确率为{0}%'.format(accuracy / numSamples * 100))
33 | # 可视化二维训练数据的分类结果
34 | def showLogicRegression(weights, x, y):
35 |     numSamples, numFeatures = np.shape(x)
36 |     for i in range(numSamples):
37 |         if int(y[i, 0]) == 0:
38 |             plt.plot(x[i, 1], x[i, 2], 'om')
39 |         elif int(y[i, 0]) == 1:
40 |             plt.plot(x[i, 1], x[i, 2], 'ob')
41 |     xa1 = min(x[:, 1])[0, 0]
42 |     xb1 = max(x[:, 1])[0, 0]
43 |     xa2 = - ((weights[0] + weights[1] * xa1) / weights[2]).tolist()[0][0]
44 |     xb2 = - ((weights[0] + weights[1] * xb1) / weights[2]).tolist()[0][0]
45 |     plt.plot([xa1, xb1], [xa2, xb2], '#FB4A42')
46 |     plt.xlabel('X1')
47 |     plt.ylabel('X2')
48 |     plt.show()
49 | def loadData(src):
50 |     x, y = [], []
51 |     lineArr = []
52 |     with open(src) as fileIn:
53 |         [lineArr.append(line.strip().split(',')) for line in fileIn.readlines()]
54 |     np.random.shuffle(lineArr)  # 随机打乱训练数据集
55 |     for line in lineArr:
56 |         x.append([1.0, float(line[0]), float(line[1])])
57 |         y.append(float(line[2]))
58 |     return np.mat(x), np.mat(y).T
59 | if __name__ =='__main__':
60 |     x, y = loadData('./data/data.csv')
61 |     # weights = logicRegression(x, y, alpha=0.01, iter=500)
62 |     weights = stochLogicRegression(x, y, alpha=0.01, iter=200)
63 |     accLogicRegression(weights, x, y)
64 |     # showLogicRegression(weights, x, y)
65 | 


--------------------------------------------------------------------------------