├── README.md
├── wine.data
└── DimensionalityReduction.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # DimensionalityReduction
2 | PCA和LDA进行数据降维
3 | # 使用PCA对数据进行降维,我们使用两种方式:
4 | * 直接按数学推导的方式实现PCA
5 | * 使用sklearn实现PCA
6 | * 利用降维后的特征进行逻辑回归分类
7 | # 代码使用LDA对数据进行降维,我们使用两种方式:
8 | * 直接按数学推导过程实现LDA
9 | * 使用sklearn实现LDA
10 | * 利用降维后的特征进行逻辑回归分类
11 |
12 |
--------------------------------------------------------------------------------
/wine.data:
--------------------------------------------------------------------------------
1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
179 |
--------------------------------------------------------------------------------
/DimensionalityReduction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": []
7 | },
8 | {
9 | "cell_type": "code",
10 | "execution_count": 1,
11 | "metadata": {},
12 | "outputs": [
13 | {
14 | "name": "stderr",
15 | "output_type": "stream",
16 | "text": [
17 | "/Users/yaoxiaoying/.py3virtualEnv/ai/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192\n",
18 | " return f(*args, **kwds)\n"
19 | ]
20 | }
21 | ],
22 | "source": [
23 | "import numpy as np\n",
24 | "import pandas as pd\n",
25 | "import matplotlib.pyplot as plt\n",
26 | "from sklearn.preprocessing import StandardScaler\n",
27 | "from sklearn.model_selection import train_test_split\n",
28 | "from sklearn.decomposition import PCA\n",
29 | "from matplotlib.colors import ListedColormap\n",
30 | "from sklearn.linear_model import LogisticRegression"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "# 数据获取"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 6,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "# 读取数据()\n",
47 | "df_wine = pd.read_csv('wine.data',header=None)"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "该数据集是UCI的公开数据集,是对意大利同一地区种植的葡萄酒进行分析的结果,数据集共14列数据,第一个属性是类标识符,分别是1/2/3来表示,代表葡萄酒的三个分类。剩余的13个属性是,酒精、苹果酸、灰、灰分的碱度、镁、总酚、黄酮类化合物、非黄烷类酚类、原花色素、颜色强度、色调等。"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 7,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "# 设置列索引\n",
64 | "df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',\n",
65 | " 'Alcalinity of ash', 'Magnesium', 'Total phenols',\n",
66 | " 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',\n",
67 | " 'Color intensity', 'Hue',\n",
68 | " 'OD280/OD315 of diluted wines', 'Proline']"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 10,
74 | "metadata": {},
75 | "outputs": [
76 | {
77 | "data": {
78 | "text/plain": [
79 | "(178, 14)"
80 | ]
81 | },
82 | "execution_count": 10,
83 | "metadata": {},
84 | "output_type": "execute_result"
85 | }
86 | ],
87 | "source": [
88 | "# 数据维度\n",
89 | "df_wine.shape"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": 11,
95 | "metadata": {},
96 | "outputs": [
97 | {
98 | "data": {
99 | "text/plain": [
100 | "2 71\n",
101 | "1 59\n",
102 | "3 48\n",
103 | "Name: Class label, dtype: int64"
104 | ]
105 | },
106 | "execution_count": 11,
107 | "metadata": {},
108 | "output_type": "execute_result"
109 | }
110 | ],
111 | "source": [
112 | "# 每一类数据包含的样本个数\n",
113 | "df_wine['Class label'].value_counts()"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 12,
119 | "metadata": {},
120 | "outputs": [
121 | {
122 | "data": {
123 | "text/html": [
124 | "
\n",
125 | "\n",
138 | "
\n",
139 | " \n",
140 | " \n",
141 | " | \n",
142 | " Class label | \n",
143 | " Alcohol | \n",
144 | " Malic acid | \n",
145 | " Ash | \n",
146 | " Alcalinity of ash | \n",
147 | " Magnesium | \n",
148 | " Total phenols | \n",
149 | " Flavanoids | \n",
150 | " Nonflavanoid phenols | \n",
151 | " Proanthocyanins | \n",
152 | " Color intensity | \n",
153 | " Hue | \n",
154 | " OD280/OD315 of diluted wines | \n",
155 | " Proline | \n",
156 | "
\n",
157 | " \n",
158 | " \n",
159 | " \n",
160 | " | 0 | \n",
161 | " 1 | \n",
162 | " 14.23 | \n",
163 | " 1.71 | \n",
164 | " 2.43 | \n",
165 | " 15.6 | \n",
166 | " 127 | \n",
167 | " 2.80 | \n",
168 | " 3.06 | \n",
169 | " 0.28 | \n",
170 | " 2.29 | \n",
171 | " 5.64 | \n",
172 | " 1.04 | \n",
173 | " 3.92 | \n",
174 | " 1065 | \n",
175 | "
\n",
176 | " \n",
177 | " | 1 | \n",
178 | " 1 | \n",
179 | " 13.20 | \n",
180 | " 1.78 | \n",
181 | " 2.14 | \n",
182 | " 11.2 | \n",
183 | " 100 | \n",
184 | " 2.65 | \n",
185 | " 2.76 | \n",
186 | " 0.26 | \n",
187 | " 1.28 | \n",
188 | " 4.38 | \n",
189 | " 1.05 | \n",
190 | " 3.40 | \n",
191 | " 1050 | \n",
192 | "
\n",
193 | " \n",
194 | " | 2 | \n",
195 | " 1 | \n",
196 | " 13.16 | \n",
197 | " 2.36 | \n",
198 | " 2.67 | \n",
199 | " 18.6 | \n",
200 | " 101 | \n",
201 | " 2.80 | \n",
202 | " 3.24 | \n",
203 | " 0.30 | \n",
204 | " 2.81 | \n",
205 | " 5.68 | \n",
206 | " 1.03 | \n",
207 | " 3.17 | \n",
208 | " 1185 | \n",
209 | "
\n",
210 | " \n",
211 | " | 3 | \n",
212 | " 1 | \n",
213 | " 14.37 | \n",
214 | " 1.95 | \n",
215 | " 2.50 | \n",
216 | " 16.8 | \n",
217 | " 113 | \n",
218 | " 3.85 | \n",
219 | " 3.49 | \n",
220 | " 0.24 | \n",
221 | " 2.18 | \n",
222 | " 7.80 | \n",
223 | " 0.86 | \n",
224 | " 3.45 | \n",
225 | " 1480 | \n",
226 | "
\n",
227 | " \n",
228 | " | 4 | \n",
229 | " 1 | \n",
230 | " 13.24 | \n",
231 | " 2.59 | \n",
232 | " 2.87 | \n",
233 | " 21.0 | \n",
234 | " 118 | \n",
235 | " 2.80 | \n",
236 | " 2.69 | \n",
237 | " 0.39 | \n",
238 | " 1.82 | \n",
239 | " 4.32 | \n",
240 | " 1.04 | \n",
241 | " 2.93 | \n",
242 | " 735 | \n",
243 | "
\n",
244 | " \n",
245 | "
\n",
246 | "
"
247 | ],
248 | "text/plain": [
249 | " Class label Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n",
250 | "0 1 14.23 1.71 2.43 15.6 127 \n",
251 | "1 1 13.20 1.78 2.14 11.2 100 \n",
252 | "2 1 13.16 2.36 2.67 18.6 101 \n",
253 | "3 1 14.37 1.95 2.50 16.8 113 \n",
254 | "4 1 13.24 2.59 2.87 21.0 118 \n",
255 | "\n",
256 | " Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins \\\n",
257 | "0 2.80 3.06 0.28 2.29 \n",
258 | "1 2.65 2.76 0.26 1.28 \n",
259 | "2 2.80 3.24 0.30 2.81 \n",
260 | "3 3.85 3.49 0.24 2.18 \n",
261 | "4 2.80 2.69 0.39 1.82 \n",
262 | "\n",
263 | " Color intensity Hue OD280/OD315 of diluted wines Proline \n",
264 | "0 5.64 1.04 3.92 1065 \n",
265 | "1 4.38 1.05 3.40 1050 \n",
266 | "2 5.68 1.03 3.17 1185 \n",
267 | "3 7.80 0.86 3.45 1480 \n",
268 | "4 4.32 1.04 2.93 735 "
269 | ]
270 | },
271 | "execution_count": 12,
272 | "metadata": {},
273 | "output_type": "execute_result"
274 | }
275 | ],
276 | "source": [
277 | "df_wine.head()"
278 | ]
279 | },
280 | {
281 | "cell_type": "markdown",
282 | "metadata": {},
283 | "source": [
284 | "# 数据集划分"
285 | ]
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": 13,
290 | "metadata": {},
291 | "outputs": [],
292 | "source": [
293 | "# 数据集设置:X为样本特征数据,y为目标数据,即标注结果\n",
294 | "X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": 14,
300 | "metadata": {},
301 | "outputs": [],
302 | "source": [
303 | "# 数据集划分: 将数据集划分为训练集和测试集数据(测试集数据为30%,训练集为70%)\n",
304 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,\n",
305 | " stratify=y,\n",
306 | " random_state=0)"
307 | ]
308 | },
309 | {
310 | "cell_type": "markdown",
311 | "metadata": {},
312 | "source": [
313 | "# 数据标准化"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 15,
319 | "metadata": {},
320 | "outputs": [],
321 | "source": [
322 | "# 实例化\n",
323 | "sc = StandardScaler()"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 16,
329 | "metadata": {},
330 | "outputs": [],
331 | "source": [
332 | "# 对数据集进行标准化(一般情况下我们在训练集中进行均值和方差的计算,直接在测试集中使用)\n",
333 | "X_train_std = sc.fit_transform(X_train)\n",
334 | "X_test_std = sc.transform(X_test)"
335 | ]
336 | },
337 | {
338 | "cell_type": "markdown",
339 | "metadata": {},
340 | "source": [
341 | "# PCA(Pricipal component analysis)"
342 | ]
343 | },
344 | {
345 | "cell_type": "markdown",
346 | "metadata": {},
347 | "source": [
348 | "## PCA实现"
349 | ]
350 | },
351 | {
352 | "cell_type": "markdown",
353 | "metadata": {},
354 | "source": [
355 | "### 特征值计算"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 18,
361 | "metadata": {},
362 | "outputs": [],
363 | "source": [
364 | "# 计算协方差矩阵\n",
365 | "cov_mat = np.cov(X_train_std.T)"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": 19,
371 | "metadata": {},
372 | "outputs": [],
373 | "source": [
374 | "# 对协方差矩阵进行特征值分解\n",
375 | "eigen_vals, eigen_vecs = np.linalg.eig(cov_mat)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": 20,
381 | "metadata": {},
382 | "outputs": [
383 | {
384 | "data": {
385 | "text/plain": [
386 | "array([4.84274532, 2.41602459, 1.54845825, 0.96120438, 0.84166161,\n",
387 | " 0.6620634 , 0.51828472, 0.34650377, 0.3131368 , 0.10754642,\n",
388 | " 0.21357215, 0.15362835, 0.1808613 ])"
389 | ]
390 | },
391 | "execution_count": 20,
392 | "metadata": {},
393 | "output_type": "execute_result"
394 | }
395 | ],
396 | "source": [
397 | "# 特征值\n",
398 | "eigen_vals"
399 | ]
400 | },
401 | {
402 | "cell_type": "markdown",
403 | "metadata": {},
404 | "source": [
405 | "### 特征值分布"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 21,
411 | "metadata": {},
412 | "outputs": [],
413 | "source": [
414 | "# 特征值之和\n",
415 | "tot = sum(eigen_vals)"
416 | ]
417 | },
418 | {
419 | "cell_type": "code",
420 | "execution_count": 23,
421 | "metadata": {},
422 | "outputs": [],
423 | "source": [
424 | "# 对特征进行排序,并计算所占的比例\n",
425 | "var_exp = [(i / tot) for i in sorted(eigen_vals, reverse=True)]"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "execution_count": 24,
431 | "metadata": {},
432 | "outputs": [],
433 | "source": [
434 | "# 累计求和\n",
435 | "cum_var_exp = np.cumsum(var_exp)"
436 | ]
437 | },
438 | {
439 | "cell_type": "code",
440 | "execution_count": 25,
441 | "metadata": {},
442 | "outputs": [
443 | {
444 | "data": {
445 | "image/png": "\n",
446 | "text/plain": [
447 | ""
448 | ]
449 | },
450 | "metadata": {
451 | "needs_background": "light"
452 | },
453 | "output_type": "display_data"
454 | }
455 | ],
456 | "source": [
457 | "# 绘制图像\n",
458 | "plt.figure()\n",
459 | "plt.bar(range(1, 14), var_exp, alpha=0.5, align='center',\n",
460 | " label='特征值分布')\n",
461 | "plt.step(range(1, 14), cum_var_exp, where='mid',\n",
462 | " label='累计特征值')\n",
463 | "plt.ylabel('特征值比例')\n",
464 | "plt.xlabel('特征index')\n",
465 | "plt.legend(loc='best')"
466 | ]
467 | },
468 | {
469 | "cell_type": "markdown",
470 | "metadata": {},
471 | "source": [
472 | "### 特征降维"
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": 26,
478 | "metadata": {},
479 | "outputs": [],
480 | "source": [
481 | "# 创建列表,由(eigenvalue, eigenvector)元组构成\n",
482 | "eigen_pairs = [(np.abs(eigen_vals[i]), eigen_vecs[:, i])\n",
483 | " for i in range(len(eigen_vals))]"
484 | ]
485 | },
486 | {
487 | "cell_type": "code",
488 | "execution_count": 27,
489 | "metadata": {},
490 | "outputs": [],
491 | "source": [
492 | "# 按特征值从大到小对列表(eigenvalue, eigenvector)排序\n",
493 | "eigen_pairs.sort(key=lambda k: k[0], reverse=True)"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": 28,
499 | "metadata": {},
500 | "outputs": [
501 | {
502 | "data": {
503 | "text/plain": [
504 | "[(4.842745315655898,\n",
505 | " array([-0.13724218, 0.24724326, -0.02545159, 0.20694508, -0.15436582,\n",
506 | " -0.39376952, -0.41735106, 0.30572896, -0.30668347, 0.07554066,\n",
507 | " -0.32613263, -0.36861022, -0.29669651])),\n",
508 | " (2.4160245870352255,\n",
509 | " array([ 0.50303478, 0.16487119, 0.24456476, -0.11352904, 0.28974518,\n",
510 | " 0.05080104, -0.02287338, 0.09048885, 0.00835233, 0.54977581,\n",
511 | " -0.20716433, -0.24902536, 0.38022942])),\n",
512 | " (1.5484582488203513,\n",
513 | " array([-0.13774873, 0.09615039, 0.67777567, 0.62504055, 0.19613548,\n",
514 | " 0.14031057, 0.11705386, 0.13121778, 0.0304309 , -0.07992997,\n",
515 | " 0.05305915, 0.13239103, -0.07065022])),\n",
516 | " (0.9612043774977367,\n",
517 | " array([-0.0032961 , 0.56264669, -0.10897711, 0.0338187 , -0.36751107,\n",
518 | " 0.24024513, 0.1870533 , -0.02292622, 0.49626233, 0.10648294,\n",
519 | " -0.36905375, 0.14201609, -0.16768217])),\n",
520 | " (0.8416616104578422,\n",
521 | " array([-0.29062523, 0.08953787, -0.16083499, 0.05158734, 0.67648707,\n",
522 | " -0.11851114, -0.10710035, -0.50758161, 0.20163462, 0.00573607,\n",
523 | " -0.27691422, -0.06662756, -0.12802904])),\n",
524 | " (0.6620634040383039,\n",
525 | " array([ 2.99096847e-01, 6.27036396e-01, 3.89128239e-04, -4.05836452e-02,\n",
526 | " 6.57772614e-02, -5.89776247e-02, -3.01103180e-02, -2.71728086e-01,\n",
527 | " -4.39997519e-01, -4.11743459e-01, 1.41673377e-01, 1.75842384e-01,\n",
528 | " 1.38018388e-01])),\n",
529 | " (0.5182847213561953,\n",
530 | " array([ 0.07905293, -0.27400201, 0.13232805, 0.2239991 , -0.40526897,\n",
531 | " -0.03474194, 0.04178357, -0.63114569, -0.32312277, 0.26908262,\n",
532 | " -0.30264066, 0.13054014, 0.00081134])),\n",
533 | " (0.34650376641286657,\n",
534 | " array([-0.36817641, -0.01257758, 0.17757818, -0.44059211, 0.1166175 ,\n",
535 | " 0.35019213, 0.21871818, 0.19712942, -0.43305587, -0.06684118,\n",
536 | " -0.45976229, 0.11082755, 0.00560817])),\n",
537 | " (0.3131368004720887,\n",
538 | " array([-0.39837702, 0.11045823, 0.38249686, -0.24337385, -0.25898236,\n",
539 | " -0.34231286, -0.03612316, -0.17143688, 0.24437021, -0.15551492,\n",
540 | " 0.02119612, -0.23808956, 0.51727846])),\n",
541 | " (0.2135721466052733,\n",
542 | " array([ 0.37463888, -0.1374056 , 0.46158303, -0.41895399, 0.01004706,\n",
543 | " -0.22125424, -0.04175136, -0.08875695, 0.19992186, -0.22166887,\n",
544 | " -0.09846946, 0.01912058, -0.54253207])),\n",
545 | " (0.18086130479496634,\n",
546 | " array([ 0.26283426, -0.26676921, -0.11554255, 0.19948341, 0.02890188,\n",
547 | " -0.06638686, -0.21334908, 0.18639128, 0.16808299, -0.46636903,\n",
548 | " -0.53248388, 0.23783528, 0.36776336])),\n",
549 | " (0.15362835006711043,\n",
550 | " array([-0.12783451, 0.08064016, 0.01679249, -0.11084566, 0.07938796,\n",
551 | " -0.49145931, -0.0503074 , 0.17532803, -0.00367596, 0.35975654,\n",
552 | " 0.04046698, 0.74222954, 0.03873952])),\n",
553 | " (0.10754642369670996,\n",
554 | " array([-0.09448698, 0.02636524, 0.14274751, -0.13048578, -0.06760808,\n",
555 | " 0.45991766, -0.81458395, -0.09574809, 0.06724689, 0.08733362,\n",
556 | " 0.12906113, 0.18764627, 0.01211126]))]"
557 | ]
558 | },
559 | "execution_count": 28,
560 | "metadata": {},
561 | "output_type": "execute_result"
562 | }
563 | ],
564 | "source": [
565 | "# 特征值与特征向量\n",
566 | "eigen_pairs"
567 | ]
568 | },
569 | {
570 | "cell_type": "code",
571 | "execution_count": 29,
572 | "metadata": {},
573 | "outputs": [],
574 | "source": [
575 | "# 取前两个特征值对应的特征向量作为主要成分\n",
576 | "w = np.hstack((eigen_pairs[0][1][:, np.newaxis],\n",
577 | " eigen_pairs[1][1][:, np.newaxis]))"
578 | ]
579 | },
580 | {
581 | "cell_type": "code",
582 | "execution_count": 30,
583 | "metadata": {},
584 | "outputs": [
585 | {
586 | "data": {
587 | "text/plain": [
588 | "array([[-0.13724218, 0.50303478],\n",
589 | " [ 0.24724326, 0.16487119],\n",
590 | " [-0.02545159, 0.24456476],\n",
591 | " [ 0.20694508, -0.11352904],\n",
592 | " [-0.15436582, 0.28974518],\n",
593 | " [-0.39376952, 0.05080104],\n",
594 | " [-0.41735106, -0.02287338],\n",
595 | " [ 0.30572896, 0.09048885],\n",
596 | " [-0.30668347, 0.00835233],\n",
597 | " [ 0.07554066, 0.54977581],\n",
598 | " [-0.32613263, -0.20716433],\n",
599 | " [-0.36861022, -0.24902536],\n",
600 | " [-0.29669651, 0.38022942]])"
601 | ]
602 | },
603 | "execution_count": 30,
604 | "metadata": {},
605 | "output_type": "execute_result"
606 | }
607 | ],
608 | "source": [
609 | "w"
610 | ]
611 | },
612 | {
613 | "cell_type": "code",
614 | "execution_count": 31,
615 | "metadata": {},
616 | "outputs": [
617 | {
618 | "data": {
619 | "text/plain": [
620 | "array([ 0.71225893, 2.22048673, -0.13025864, 0.05962872, -0.50432733,\n",
621 | " -0.52831584, -1.24000033, 0.84118003, -1.05215112, -0.29218864,\n",
622 | " -0.20017028, -0.82164144, -0.62946362])"
623 | ]
624 | },
625 | "execution_count": 31,
626 | "metadata": {},
627 | "output_type": "execute_result"
628 | }
629 | ],
630 | "source": [
631 | "# 原始特征(以第一个样本为例)\n",
632 | "X_train_std[0]"
633 | ]
634 | },
635 | {
636 | "cell_type": "code",
637 | "execution_count": 32,
638 | "metadata": {},
639 | "outputs": [
640 | {
641 | "data": {
642 | "text/plain": [
643 | "array([2.38299011, 0.45458499])"
644 | ]
645 | },
646 | "execution_count": 32,
647 | "metadata": {},
648 | "output_type": "execute_result"
649 | }
650 | ],
651 | "source": [
652 | "# 特征压缩后结果\n",
653 | "X_train_std[0].dot(w)"
654 | ]
655 | },
656 | {
657 | "cell_type": "code",
658 | "execution_count": 33,
659 | "metadata": {},
660 | "outputs": [],
661 | "source": [
662 | "# 全部特征压缩\n",
663 | "X_train_pca = X_train_std.dot(w)"
664 | ]
665 | },
666 | {
667 | "cell_type": "code",
668 | "execution_count": 34,
669 | "metadata": {},
670 | "outputs": [
671 | {
672 | "data": {
673 | "image/png": "\n",
674 | "text/plain": [
675 | ""
676 | ]
677 | },
678 | "metadata": {
679 | "needs_background": "light"
680 | },
681 | "output_type": "display_data"
682 | }
683 | ],
684 | "source": [
685 | "# 特征压缩后结果展示\n",
686 | "colors = ['r', 'b', 'g']\n",
687 | "markers = ['s', 'x', 'o']\n",
688 | "\n",
689 | "for l, c, m in zip(np.unique(y_train), colors, markers):\n",
690 | " # 按照样本的真实值进行展示\n",
691 | " plt.scatter(X_train_pca[y_train == l, 0], \n",
692 | " X_train_pca[y_train == l, 1], \n",
693 | " c=c, label=l, marker=m)\n",
694 | "\n",
695 | "plt.xlabel('PC 1')\n",
696 | "plt.ylabel('PC 2')\n",
697 | "plt.legend(loc='lower left')\n",
698 | "plt.tight_layout()\n",
699 | "plt.show()"
700 | ]
701 | },
702 | {
703 | "cell_type": "markdown",
704 | "metadata": {},
705 | "source": [
706 | "## 使用sklearn实现PCA"
707 | ]
708 | },
709 | {
710 | "cell_type": "markdown",
711 | "metadata": {},
712 | "source": [
713 | "sklearn中提供了进行PCA的API"
714 | ]
715 | },
716 | {
717 | "cell_type": "markdown",
718 | "metadata": {},
719 | "source": [
720 | "### 特征值计算"
721 | ]
722 | },
723 | {
724 | "cell_type": "code",
725 | "execution_count": 35,
726 | "metadata": {},
727 | "outputs": [],
728 | "source": [
729 | "# 实例化pca,保留所有特征\n",
730 | "pca = PCA()"
731 | ]
732 | },
733 | {
734 | "cell_type": "code",
735 | "execution_count": 36,
736 | "metadata": {},
737 | "outputs": [
738 | {
739 | "data": {
740 | "text/plain": [
741 | "array([0.36951469, 0.18434927, 0.11815159, 0.07334252, 0.06422108,\n",
742 | " 0.05051724, 0.03954654, 0.02643918, 0.02389319, 0.01629614,\n",
743 | " 0.01380021, 0.01172226, 0.00820609])"
744 | ]
745 | },
746 | "execution_count": 36,
747 | "metadata": {},
748 | "output_type": "execute_result"
749 | }
750 | ],
751 | "source": [
752 | "# 特征提取\n",
753 | "X_train_pca = pca.fit_transform(X_train_std)\n",
754 | "# 特征值结果\n",
755 | "pca.explained_variance_ratio_"
756 | ]
757 | },
758 | {
759 | "cell_type": "code",
760 | "execution_count": 39,
761 | "metadata": {},
762 | "outputs": [
763 | {
764 | "data": {
765 | "text/plain": [
766 | ""
767 | ]
768 | },
769 | "execution_count": 39,
770 | "metadata": {},
771 | "output_type": "execute_result"
772 | },
773 | {
774 | "data": {
775 | "image/png": "\n",
776 | "text/plain": [
777 | ""
778 | ]
779 | },
780 | "metadata": {
781 | "needs_background": "light"
782 | },
783 | "output_type": "display_data"
784 | }
785 | ],
786 | "source": [
787 | "# 特征值绘制\n",
788 | "# 绘制图像\n",
789 | "plt.figure()\n",
790 | "plt.bar(range(1, 14), pca.explained_variance_ratio_, alpha=0.5, align='center',\n",
791 | " label='特征值分布')\n",
792 | "plt.step(range(1, 14), np.cumsum(pca.explained_variance_ratio_), where='mid',\n",
793 | " label='累计特征值')\n",
794 | "plt.ylabel('特征值比例')\n",
795 | "plt.xlabel('特征index')\n",
796 | "plt.legend(loc='best')"
797 | ]
798 | },
799 | {
800 | "cell_type": "markdown",
801 | "metadata": {},
802 | "source": [
803 | "### 特征降维"
804 | ]
805 | },
806 | {
807 | "cell_type": "code",
808 | "execution_count": 40,
809 | "metadata": {},
810 | "outputs": [],
811 | "source": [
812 | "# 压缩到二维特征\n",
813 | "pca = PCA(n_components=2)"
814 | ]
815 | },
816 | {
817 | "cell_type": "code",
818 | "execution_count": 41,
819 | "metadata": {},
820 | "outputs": [],
821 | "source": [
822 | "# 对训练数据进行处理\n",
823 | "X_train_pca = pca.fit_transform(X_train_std)"
824 | ]
825 | },
826 | {
827 | "cell_type": "code",
828 | "execution_count": 42,
829 | "metadata": {},
830 | "outputs": [
831 | {
832 | "name": "stdout",
833 | "output_type": "stream",
834 | "text": [
835 | "[0.36951469 0.18434927]\n"
836 | ]
837 | }
838 | ],
839 | "source": [
840 | "# 特征值结果(只保留两个特征)\n",
841 | "print(pca.explained_variance_ratio_)"
842 | ]
843 | },
844 | {
845 | "cell_type": "code",
846 | "execution_count": 43,
847 | "metadata": {},
848 | "outputs": [],
849 | "source": [
850 | "# 对测试集数据进行处理\n",
851 | "X_test_pca = pca.transform(X_test_std)"
852 | ]
853 | },
854 | {
855 | "cell_type": "code",
856 | "execution_count": 44,
857 | "metadata": {},
858 | "outputs": [
859 | {
860 | "data": {
861 | "image/png": "\n",
862 | "text/plain": [
863 | ""
864 | ]
865 | },
866 | "metadata": {
867 | "needs_background": "light"
868 | },
869 | "output_type": "display_data"
870 | }
871 | ],
872 | "source": [
873 | "# 特征降维后结果展示\n",
874 | "colors = ['r', 'b', 'g']\n",
875 | "markers = ['s', 'x', 'o']\n",
876 | "\n",
877 | "for l, c, m in zip(np.unique(y_train), colors, markers):\n",
878 | " # 按照样本的真实值进行展示\n",
879 | " plt.scatter(X_train_pca[y_train == l, 0], \n",
880 | " X_train_pca[y_train == l, 1], \n",
881 | " c=c, label=l, marker=m)\n",
882 | "\n",
883 | "plt.xlabel('PC 1')\n",
884 | "plt.ylabel('PC 2')\n",
885 | "plt.legend(loc='lower left')\n",
886 | "plt.tight_layout()\n",
887 | "plt.show()"
888 | ]
889 | },
890 | {
891 | "cell_type": "markdown",
892 | "metadata": {},
893 | "source": [
894 | "## 利用逻辑回归进行分类"
895 | ]
896 | },
897 | {
898 | "cell_type": "markdown",
899 | "metadata": {},
900 | "source": [
901 | "### 绘制函数"
902 | ]
903 | },
904 | {
905 | "cell_type": "code",
906 | "execution_count": 48,
907 | "metadata": {},
908 | "outputs": [],
909 | "source": [
910 | "# 绘制样本及其目标值\n",
911 | "def plot_decision_regions(X, y, classifier, resolution=0.02):\n",
912 | " \"\"\"\n",
913 | " X:样本特征值\n",
914 | " y:目标值\n",
915 | " classifier: 分类器\n",
916 | " \"\"\"\n",
917 | " # 设置图像的标记及颜色\n",
918 | " markers = ('s', 'x', 'o', '^', 'v')\n",
919 | " colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')\n",
920 | " cmap = ListedColormap(colors[:len(np.unique(y))])\n",
921 | "\n",
922 | " # 利用样本点创建meshgrid\n",
923 | " x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n",
924 | " x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n",
925 | " xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),\n",
926 | " np.arange(x2_min, x2_max, resolution))\n",
927 | " # 预测结果\n",
928 | " Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)\n",
929 | " Z = Z.reshape(xx1.shape)\n",
930 | " # 绘制预测结果的等高线\n",
931 | " plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)\n",
932 | " plt.xlim(xx1.min(), xx1.max())\n",
933 | " plt.ylim(xx2.min(), xx2.max())\n",
934 | "\n",
935 | " # 绘制样本点,并根据真实值进行着色\n",
936 | " for idx, cl in enumerate(np.unique(y)):\n",
937 | " # 绘制散点图\n",
938 | " plt.scatter(x=X[y == cl, 0], \n",
939 | " y=X[y == cl, 1],\n",
940 | " alpha=0.6, \n",
941 | " c=cmap(idx),\n",
942 | " edgecolor='black',\n",
943 | " marker=markers[idx], \n",
944 | " label=cl)"
945 | ]
946 | },
947 | {
948 | "cell_type": "markdown",
949 | "metadata": {},
950 | "source": [
951 | "### PCA特征降维"
952 | ]
953 | },
954 | {
955 | "cell_type": "code",
956 | "execution_count": 45,
957 | "metadata": {},
958 | "outputs": [],
959 | "source": [
960 | "# 利用PCA进行特征降维(提取)\n",
961 | "# 保留两维特征\n",
962 | "pca = PCA(n_components=2)\n",
963 | "# 训练集数据处理\n",
964 | "X_train_pca = pca.fit_transform(X_train_std)\n",
965 | "# 测试集数据处理\n",
966 | "X_test_pca = pca.transform(X_test_std)"
967 | ]
968 | },
969 | {
970 | "cell_type": "markdown",
971 | "metadata": {},
972 | "source": [
973 | "### LR分类器"
974 | ]
975 | },
976 | {
977 | "cell_type": "code",
978 | "execution_count": 46,
979 | "metadata": {},
980 | "outputs": [
981 | {
982 | "name": "stderr",
983 | "output_type": "stream",
984 | "text": [
985 | "/Users/yaoxiaoying/.py3virtualEnv/ai/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
986 | " FutureWarning)\n",
987 | "/Users/yaoxiaoying/.py3virtualEnv/ai/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
988 | " \"this warning.\", FutureWarning)\n"
989 | ]
990 | }
991 | ],
992 | "source": [
993 | "# 实例化\n",
994 | "lr = LogisticRegression()\n",
995 | "# 模型训练\n",
996 | "lr = lr.fit(X_train_pca, y_train)"
997 | ]
998 | },
999 | {
1000 | "cell_type": "markdown",
1001 | "metadata": {},
1002 | "source": [
1003 | "### 训练数据结果"
1004 | ]
1005 | },
1006 | {
1007 | "cell_type": "code",
1008 | "execution_count": 49,
1009 | "metadata": {},
1010 | "outputs": [
1011 | {
1012 | "name": "stderr",
1013 | "output_type": "stream",
1014 | "text": [
1015 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n",
1016 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n",
1017 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n"
1018 | ]
1019 | },
1020 | {
1021 | "data": {
1022 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de5TW1X3v8feGARkEQZBxBHEUIaCswRteseRAbEKO12hdNpo2Rs+CnrSJMT1Nj0nbdVo97UlyuhLWyeVIGkNPNeqymEqKQlRMphBEBxUoN0GaAcEBnMhFh8sM7PPHb34zzzy3eS6/y/79ns9rLRczz8zz/PYM+Ps8e+/v3ttYaxEREXHNoLgbICIiko8CSkREnKSAEhERJymgRETESQooERFxUl3cDch21ogR9vyxY+Nuhkhodh8dzOkToH5QfdxNEXHC5jc2v2+tHZf9uHMBdf7YsbR+4xtxN0MkNA+uH8nVf2eYftr0uJsi4oQZw2a05XtcQ3wiIuIkBZSIiDhJASUiIk5SQIlE7KLnf8OO1oNxN0PEeZEGlDHmQWPMS1FeU8Q18x9SlapIKSILKGNME3BvVNcTEZFki7IHtRB4KMLriYhIgkUSUMaYu4H1wOYCX59vjGk1xrQe+PDDKJokIiKOi6oHdRPwCeAp4ApjzJ9kftFau8haO9NaO3PciBERNUlERFwWyU4S1tq7AYwx5wP/YK39XhTXFRGR5FKZuYiIOCnSvfistb8BbojymiIikkzqQYnE4HjbUJ5bvTruZog4TQElEoOHu7rjboKI8xRQIiLiJAWUiIg4SQElIiJOUkCJiIiTFFAiIuIkBZSIiDhJASUiIk5SQInE6JVdm+JugoizFFAiMWm4/wMOH4m7FSLuinQvPhFx38bXN7Jy+Ur2t++nobGBufPm0nxlc9zNkhqkgBKRXhtf38jS55Zy7WevpXFSI+0721n65FIAhZRETkN8ItJr5fKVXPvZaxk/ZTyDBg9i/JTxXPvZa1m5fGXcTZMapIASkV772/fTOKmx32ONkxrZ374/phZJLVNAiUivhsYG2ne293usfWc7DY0NMbVIapkCSkR6zZ03lzVPrmHv9r2cOnmKvdv3subJNcydNzfupkkNUpGEiPTyCyFWPruSl9tfpqGxgVtuvUUFEhILBZRITObP2sQjB8+Muxk5mq9sViCJEzTEJxKX2bM53jY07laIOCuSgDLG1BljnjHGrDbGPBbFNUVEJNmi6kHdBqy31s4CzjHGXBrRdUVEJKGimoNaDjxvjKkDRgOHI7quiIgkVCQ9KGvth9baTmA1sM9auzPz68aY+caYVmNM64EPP4yiSSIi4rio5qDGGmNOA64DzjTGzMn8urV2kbV2prV25rgRI6JokoiIOC6qOag/Be601p4EOoH6iK4rIiIJFVVAfR+4zxizBugAVkR0XRHn7eneE3cTRJwUSZGEtXYPoL1SRLJM/Jt9tDadYMKsCXE3pSbp7Cu3aScJkRjNf2gsj3Aq7mbUJJ195T7tJCEiNUlnX7lPASUiNUlnX7lPASUiNUlnX7lPc1AxeXDxYjoPHcp5fPioUXzn3nujb5BIjZk7by5Ln+w/B7XmyTXccustZb+Wii3CoYCKSeehQzw6dmzO4ws6OmJojUjtCersKxVbhEcBJSI1K4izrzKLLYC+YotnVyqgqqQ5KBGRKqjYIjwKKJGYHW8bynPr1sXdDKmQii3Co4ASidnD//fRuJsgVZg7by5rnlzD3u17OXXyFHu372XNk2uYO0+b51RLc1AxGT5qVN6CiOGjRsXQGhGpVFDFFpJLARUTlZJLLUl7GXYQxRaSSwElIqFKYhl22gM1KRRQIhKquMuwyw2bJAZqWimg8tAuDyLBKVSG/XL7y6Ffu5KwiTtQpY8CKo+k7fIQR6AqxKVUfhm2f8OH6MqwKwmbOANV+lNApUAcgZq0EHfalClw7FjcrQhNkHve5VNsCK+SsIkzUKU/BZRI3GbPBk6x6fgmpp82Pe7WBC7MMuyBhvAqCZugA1UFF5VTQIk4oGPJKCbPjLsV4QmrDHugIbxKwibIQFXBRXUUUCKSWAMN4VUaNkEFqgouqhNJQBljDLAYmArsB2631nZHce1KaJcHkWQoZQgvzkW0KrioTlQ9qFlAnbX2GmPML4FPAs9HdO2yJa0KLY5AVYiLC+IswCiFCi6qE1VA7QMW9nx8IqJr1ow4AjVpIS7pFGcBRinCDtC0iySgrLXbAYwxnwGGAisyv26MmQ/MBzhvzJgomiQiCVSoR1NKYJTbG8qcP9reup11L66jfVc7333ku3zlL75S0jW1kWx1IiuSMMbcAjwA3GytPZn5NWvtImARwMymJhtVm0QkOarp0VTyXH/+aHvrdtYsW8NVd13FuAvG0fqzVpY+V3pPShvJVi6S86CMMY3AnwE3WmuPRHFNEUmXzB7NoMGD+irilq8M5bn+/NG6F9dx1V1X0TilkeMfHS/rulKdqA4s/DxwDrDCGLPKGHNfRNcVSYQDT49mR+vBuJvhtGqOVq/kuf5BhO3vtDPugnF0Hupk3zv7mDBxgo50j0hUc1DfBL4ZxbVkYNpHzz2P/+1uHom7EY6rpiKukuf6w3LffeS7tP6slfFTxtPU1MSYhjHs3b438Eo87TiRS0e+1yB/H73s//KFlogrqjlavdLnNl/ZzFf+4it8+PaHjD1jLKPHjg7lSHd/jqz59mbu+d/30Hx7M0ufW8rG1zcGdo0k0k4SIpII1VTExfXcUmnHifwUUFJUpcOBGkaUMFRTEdcbND3DaH6RgwuVeNpxIj8FlAMquZlHFQCVHqvhP+/BbdvoPNG3Nvv1tjYWLFyooJLIubxxq3acyE8B5YBKQiAp5zF1njjBo8OG9X7eAsweO9a5dkr6uTyMph0n8lNA1SDtoye1yOVhNO04kZ8CqgZpaE1qkevDaNpxIpcCSgKRPSe2qa2NBXv3srWzEzKG+KS451av5tZZs+JuRippGC15FFBSVKnDgdlzYr9+/326T5zgy93dtBw71vt43dCh4TU24f5ixyAemXwq7maklobRkkcB5YB8IbB1zx6OAQsWLsz53u/ce29V80jlVABWOhx43dSpAJy2YQNPjB/f72v/2NGh+S6JhYbRkkUB5YB8IbBg4cKiVXrVzCO9sXkzDxuT8/hf7tlT8WsWcumECTz6wAOBv65IVLQFUXwUUDXoZHc3s0eOzH38iDaaF8nk8tqpWqCAEpHEiapX4/LaqVqggJJAaG2VRCXKXo3La6dqgQIqZoUKFt7aswfyzEG5SmurJCpR9mpcXzuVdgqomBXasujje/aE1iPpqqtjQUbpd+bjQdFmsRKWKHs1WjsVLwWUo6aFWP123cUX5w2P68oMv2IhlJS9Al303KZN3Dp9etzNcFaUvRqtnYqXAqoGBdWDUQgFr+H+D9i/ZHTczXBa1L0arZ2KjwJK+snsFW3ds4eT3d2AN/x36YQJgIbpJF7q1dQOBZQAfcG0qa2Nvx0yBIAPPvqIq+vqaBw9mgXHjvX2ltRDkripV1MbIgsoY8wQ4Flr7c1RXTMJ4irPzre56/QhQxjc1dW7iLelsxN7SnvDiURNu1d4IgkoY0w9sBb4WBTXS5K4hsqy549a9u5l9rBhXP/hh4G8vtZFiVRGu1f0iSSgrLVHgRnGmB1RXK/WxFXSXSyENEclUhntXtHHiTkoY8x8YD7AeWPGxNya5Imrmk4hJBI87V7Rx4mAstYuAhYBzGxqsjE3p6Z1GdO7iHcrcKi7mxFHjniLe3sCT8N0IuHR7hV9nAgoiV/d0KG0HDvGkKFDoef8pmlouC4WBw+yp3sPE+omxN0SiYF2r+ijgKpROfNHZ50FwJUKpFjNf2gsf9lWB1fH3RKJi9Z59Yk0oKy1k6O8nhRWKIQeXLw45xRfUE9K0sX1Mm6t8/KoB5UCQZZ0R11woU1lJWoq404OBVRChXFjf3DxYja1tdGyd2+/x+uGDu0dAgya9vOTqKmMOzkUUAkVxo2989Ahpg8Zwuxhw/o93pLnaA6RpBqojLuc4T/XhwqzJa29CqgUS8Lw2Vt79rAgq8cG8Jbtv9qgkp8lCT+/RK9YGXc5w39JGypMWntBAZVqSRg+G9LdzaM9e/9luv7IkX6fV/KzJOHnl+hllnF/+MGHrFqyirZ/b2PqtKk88/+eYdLvTKLln1v4oP0Dzmw8k/NnnM/K5bnDf0kbKkxae0EBJVmGDx2ac9rupq4urswouFDPRJLMvxk//b2n2b1vN9fcdQ23fOUWTnx4gu/e/106h3Zy/eevp2FSA/t37mftU2s5vONwzuv4Q4XbW7ez7sV1fND+AaMbRvP+lvej/pFKksQdKhRQ0s93pk7NeWxBR0e/4AmyZzK4ri7vHNfgKo6fzzw6JLPgo27oUK7L8/O5qHXtb5gwSwt1w9J8ZTMrl69k7p/M7TfUN3zUcD52w8donOLdyBunNDLjxhm89K2Xcl6jobGB137+Gm+vf5ur7rqKhkkNtK1v45W3X2Hj6xud65UkcYcKBVRChbFbeBw7kE+bMIHZecJuWpXFHo+OHcuCnh3afUkp9ni4q5tH4m5EDcjXoxgxZgRdJ7roPNRJ/Rn1HD18lFMnT3H6iNNznj933ly+/dff5tMPfZqzJ5/N0cNHAZj3X+flHRKsVrUFDkncoUIBlVBhDKVpeE7SoNQbeb4exZjGMXQf7KbjPzo4dvQYw+qHUU89F067MOf5zVc2M3rUaAbZQex4dQfD6ofR1NTE6LGjWff0usB/pmoLHJK4Q4UCKsWScCZTqW2s5GfJnk/b1NXF9I4Op35+CVY5N/J8PYruw9281/oezTObaby0ka2vb2X5D5cz5OQQFj68MCfsLpx2IePOHNcv5PZu3xv4sFlQBQ5J26FCAZViSegRldrGSn6W7Pm0BR0dPPrAA2W/jiRHOTfyfD2KL/zRF3of+5et/8LBQweZ+4dzuermq/KGXVTDZkkscAiCAkrKloSemUTDtYWf5d7IC/Uomq9sZuHDC2m+vblo2EU1bJbEAocgKKASKs5Sb9d7ZgrQaLi48DPIG3mpYRfFsFkSCxyCoIBKKC1CLcz1AE0LFxd+Bnkjd6nXksQChyAUDChjzCDgkz2frrDW23vGGHOvtXZxBG0TkRBYC8YU/rxULs2LZA41Dh40mJXfW4kdZKu6kbvWa0lagUMQivWgngVGASeArxljbrLWdgL3AYsjaJtITXvu1Ve59ZprAn3NXy4byfGjg/jkHYcwxgunXywZxWn1p/hPNx4Z+AUyuNLDyDfU6AdJNTf05iub2bFlB098/QkOHTrEqFGjuOmOm2ouJOJULKDOttZeC2CM+Qzwr8aYG6NpliRBWEd+VPuaUc3PhXWdRX/XQQOwf/FZvLJrEwBzzpte8ev5rIXjRwex9hVv0ekn7zjEL5aMYu0rp3P1nI/K7km50sMIa6hx4+sb2fz2Zu7523v6/XyTX58cW0i5VpQStmIBtc0Y80/AQmvtz4wxJ4EVQGOR50gNCevIj2pfM6r5uUCu09IC27ezaP9tfY81NMD997P1R96n0776FM9t8oJqcs+Z1NNPKz+wjPFCCWDtK6f3BtXVcz7q7VGVw5V5kbCGGl2bY3OxKCVsBQPKWnuvMWYWcLDn86XGmLXA56NqnBSmSrUEa2kBYNHq6cB0mDUfpgCzZ/f7tt5PW3+/92kd121k7DWb2EFfYI0ePBqACXUD793nh5QfTkBF4eRzYV4krKFGl+bYwL3AjELRKj5r7eqsz/cB3wq1RVISVaolSE4vabrXS5o1JSeUivG+tRla+25Gmw4sB+C0cQdp9d5L9oZWvsDy55wy/WLJqKpCKm5hDTW6Msfmcy0wo6Ayc0mNzF3MMw9BHD50aN5d2st5zWwD9VQX/V1m73Y6NMyBh+6vqA3FTGyb533Q5v3R0gLH/2B5v8ACmDl1NOMHT+g355Q5BwXV9aTiFNZQoytzbD7XAjMKoQeUMWYY8M/ARGAD8Id+ybpIkPw5oZasXcyzz7eq5DWz5Rte3fKmd53d3V1c0Qg89FDF163U7NlA27zewIKeEcWvPgUcZOuB8YxoHsyEm3ZjzPTeOanT6k8lMpx8YQw1ujLH5nMtMKMQRQ/qc8C71tqbjDH/Cvwu8IsIrishK3UerJxqtyDm1uqGDu13tMamri4WBL1J7AcfMPzECf7zzq6+x4YOhXPPhVEXw73Rh1Mhs2fTO4+1tRVOWHjnd57inZ55rAtv8uamNh2vrPgiScpdA+bCHJvPtcCMQhQBNRdY0vPxSmAOWQFljJkPzAc4b8yYCJokQSh1HqycXkgQc2vZhxJOD2KT2PfeA2BL+5kA7O4ewW13/AMXQVnzSHHLLrzY0r2Rjle9hzKLL2ZOLb3wwmWZZdldx8/lY1Pmcf/XLqh6DVhcXArMKBTbSeJ0vNB421q7zBjz34BO4CfW2qNlXGMs4L99PgzkTAZYaxcBiwBmNjVp+E/c8e67bNl1OnAm1NdDPTBtGnS0JSqYCrmoLuNm11N8saV7I6sP7Ok3jzVzav6iC5f1K8u+oJE3Wzp4adES+NYd3P+1C6paAybRKNaDegJv3dPmns9/hdf7eQq4tYxrvI+3IwU9f75fZhtFItO/uAF2nzgbrrwsptbE46K6Zmhr7ld4wVef6ld0ccZI+Nh4t0Mruyz7ijnjgKtZ/vcvs3fXXwGVrwErpNYW0oatWEA1Wmt/6H9irX0deL1nLVQ5Xsbb028J3nDfd8pupUQmzl3Sq1XR/NWPfwzQf6FsZnHD4m38aUcbOUbVznr1zDks8AJr3HUbOXzNpn6l7eDWHFZOWbaBy2aP5bn/1VfhGXQ41dpC2rAVC6gXjTErgeeB3wIjgE8Br5Z5jSeA240xG4D1eIElMShlgjjJu6SXFKAtLT0LZH23waxZcH/+4brb7l0cRNNSJXs9VuY81o5rNvV+X9zzWDll2RbebOngtCF9ZdpBrgGrxYW0YSu2k8RfGmOuxQulC/Hmjx611i4t5wLW2uPATVW1Uqr28/XncbSrjjuv2Nk7QfzMuknUD+nm5kt2hXrtWHe96LdrA3g7N8xKxfyRKwrNY6141Su8yJzHgugCq19Zdu8c1Fo+fsMd3P+1vYGvAavFhbRhK1Yk0QjcjFcY8TVrbTLKXCSHtXC0q46Xt3o3hjuv2Mkz6ybx8tYJfGLantAniCMfGuzXS+rZtaEBuD/4hbKSX29o9QTW7qblrD7gPXTauL5e1q3T+3qzQR0D4ssuy+46fi6zrr6jt4ov6DVgtbiQNmwDFUn8BBgN/AD4g0haJIEzxgslgJe3TugNqk9M29Pbo0q6nJ0b1EtySu+OF9BbfLG7aXnvJrhbW8YzxAzm0k/vZm7T9MBKwLPLsjNDzw+poP791+JC2rAVC6hh1trHAYwxvxdReyQkfkj54QQkO5wGKm4Q503s2fHCWlixBNrbofPoag7/7ib+/cWJvPtWPTPn7ePj9ozA/p1mv06Q//5rcSFt2IoF1FhjzN2AARp6PgbAWvvT0FsmgfLnnDI9s25STki5uku6/VULP/p15nDQbZjrCxc3SHIYA5/7HKxbB1ufn8V7z3uPj56yi5GNR1i6eXfv98ZdeDGQWltIG7ZiAfUk3iEAAE9nfKyFtAnjh5M/55Q5BwX9e1IulZL7w3brOyfTNfI+rhj3H5j/cj/WejezIUPgkjzPC3ouQ8JnDFxxBWzd2vfYLVedh9l1HvTU8JQyjyXpUqyK76+jbIiExxioH9Ldb87Jn5OqH9Ltzs07pwQc7H9/iK51PTeuaXBFTzht3ept6JAdPuvXQ1eXd7PzqxV7wyxfmokT/L+nTOvW9f09Qv55rP0z+w5zPGNk35eDOIFY4qfjNmrEzZfsypkgjn0OKmceKbe4weDdpMALJf8d9rRp/W9e4N3kurr6vueKK4qHmbjBZr3pyPx7g9y/50wNWQuIIbgTiCV+CqgaEuYEccl+/OOMQCq+SNaXb/gn303L/z4YOMzEHcZ4PdzMvyf/73HIkNL/3ko5gfiMkdBwduEDHcUtCigJV6GdG8ooAS9l+MdXapilQZrm2i65JLcEvNq/t3wnEG/p3sh/0H8BcbETiCVeCigJXL81SQ1zqlokW+7wT6lhlvSbexrn2qLo4WcvIM53AvEZIzWH5QoFlFQtewdwILA1SeUM/5QaZkm/uWuuLTj5TiDOLLyAvuILhVb0FFBSvuzihoaGULcRKnX4p5QwS8PNXXNt4Sq18AJUfBE2Y61by5pmNjXZ1m98I+5mSKacDVfx5pHAye2EBhq+y+xp+ZJ4c7cWnnii7/N77klW+5NoS/fG3o/HOrRze9LNGDZjnbV2Zvbj6kFJfv2q7Xo2XJ01xclAyjbQXEYaCinKKRyR4JR6AvHkyepdBUEBJb36zyXdltq97ZJ+c69m3ZAEr9AJxH5pOyTjBGIXKaBqWb9eUo+UhpIvDTf3oNYNSTjKOYFY5e3FKaBqRUsLbN/eP5Aa5sNDtXVGUlpu7mGsG5JwZK/HammBrXihpfVYxalIIs2yixscLmyImgvroFxog7jBL77IV3gB6S++UJFErUhwcUOU4t72KelrsSRYxU8g7iu+qLWd2xVQCZe7SDa9xQ1pkYa1WBKugU4ghtpYQBx6QBljhgDPWmtvDvtaNSO7uEGBlChaaBufJA+rTszY8SLfAmJ/I9w0lbeHGlDGmHpgLfCxMK+TavkWydZgcUPaBL0WK8k33qikaVg1e+d28OaxDly4hx3j0rOAONSAstYeBWYYY3YU+z5jzHxgPsB5Y8aE2aTE6Bu665lHqmLDVXFPkGux0nTjDUstDKtmr8dKwwnEgQaUMeYHwIyMh1qstV8f6HnW2kXAIvCq+IJsU1LkzCWFvL+dxCfItVi1cOMNQi0Oq6bhBOJAA8pa+8UgXy/VsueRyjwjSZIryLVYpdx4Kx3+S9uwYRq2uKrWQBvhujaPpSq+qOQcb65qu1oW5ELbYjfeSof/0jhsmPQtroKW7wRi8AIr+wRiiCe0FFBhKXSS7ADHm6dNsXfhaXuHXq6g1mIVuvFefnllw39pHDZMwxZXYStUeOGfQOyHVpQ7XkQSUNbayQN/VzrkFDfU8DxSsXfhkI536HGH7EA33ssv9/4sZ94ljfM1adniKmpxn0CsHlQ1cnpJ1Hwo+Yq9C5861Xts27bcryXpHboLw2AD3XgHDaps3iWs+ZpyAz3INwDav7B6UZ9ArIAqR8480nQVNxQw0Ltw/3uS+g7dpWGwYjfeSuddwpivKTXQ/Z8ljDcAcW9xlUZhnkCsgBpIv13Ae+aRpqBQKsFA78KTXFHl2jBYvhtvpfMuYczXFAr01auhsRGam73eXmYIufIGQEqXPY+1pXsjHa96D2XOY5W6gFgBlUf/NUnToWGOdm6oQLF34f7H+b6WlBuPK8NgxdpXybxLGPM1hQK9sRFOnIA33sgNoUrmz8QtpZ5AXIgCinwbrqIS8CoVexfun/CybVu0FVVBFzTEOQxWqkrnXcKYr8kX6Hfe6YVToRBKci9b8sve8cLz2bzfW5sBla+4QYEUqELvwq2FoUO97/HfJUdRURX0jT/KYbBqh7UqnXepZr4m35sByA30N97w/g3kCyGtW5LaCKic02RV3BCF7HfhGzZ4f86Y4T126pR3g/JDIsyeU9A3/iiHwZI2rJXvzUBrK+zaBUeP5gb6O+/0/zvw13D5PSutW6pd6QyonB3Ap8Os+SpuiEHmotyuLm9Yz78RZ96Awpz0DuvGH9UwWJJuxoXeDGzbBvX13hID/+e5/HIvnNrbvfeL2SFUV6d1S7UuVQGlHcDdFXfvIKwbf9Bly0kf1ip1eQF4VXtTp8KFF+YPIa1bkkQHlIobkiXO3kEpN37Xd4VIys25nL/nSy8tHkJat1TbkhVQOQtlUSA5ZKAbfFy9g1Ju/Bs29J83yZ4fiyKswpjXikO5f88KISnE7YDShquJMVCVXFi9g1J6PQPd+KH/vEldnTdn0tXlPSc7rMKU9GGttPQCxQ1OBpQ2XE2WUqvkyukdlBI85ZSOD3Tjz5w3OXAADh6Eiy/OrSaLqidV7HOXpaUXKG5wLqAOtJ+ESxVKSVJqAUSpvYNSgqeS0vFin2fOm4wb5z124gT89Kf5fxYpLOm9QHGHcwFFY6PCKYFKnRgf6PNSgyfoqsDseZNx47ye1Fln1e4NtpqikezCk0KFKCLFDIq7AZIOhSbG/R0ESuWHwbRpXug88UT/+Yx8vZ5M1YSTf5277/Z6awcPwvvv93293J8lydav7/8z+7+D9eujfQ2pbQooqVr2Df6ee/oCppqQypQveIIMRX/exJ9zOnHCm4OaOxcuuqjynyWJMnux/s/s//12dZX2OwjiNUTcG+KTxAl6YrzUNUtBVotdcolXrTdokNfmiy6Cyy6DwYP7bqa1MskfxPBp3AuzJR0UUBKIoCbGSw2eoEMxszDDD6so9gl0VRCLqpO+bZPET0N8EpggyqMLBc+0abnBkx0c/veWu1Yp33CUX1ruD0fV2k01iOHToIZgpXaF2oMyxhhgMTAV2A/cbq3tDvOaknzl9MYqDcXs13f9cLwot2EKYvhUC3YlCGH3oGYBddbaa4AzgE+GfD1JiTAXq+arLnvjDW8HiUyu3EQHqobL7pGU2kMp9LxyerGFBPEaImHPQe0DFvZ8fKLQNxlj5gPzAcaMOS/kJknSVdObKLTOasuWvoMUfS7sIj7QurC33oLubq8HOGhQX3jV1XkbsRYy0GLoIOYUtWBXqhVoQBljfgDMyHioxVr7dWPMZ4ChwIp8z7PWLgIWATQ1zdQItRRU7cm4+arL/FN+/b33Kh2OCmMYrlg13GWXwZtvwurV3rlKv/d73ud+2BqT/3dSzmLo7LZU0v5qX0NqV6ABZa39YvZjxphbgAeAm621J4O8ntSWoE7Gza4uM8Z7fmbwlVsRGPSR8sXa67fLPy79nXdg0yYvmEaPhnPO8dpSqMBDJeCSFGEXSTQCfwbMs9Z+FOa1JP2CurHmqy7r6vJu9pUMR4VxpHyx9loLq1Z5i4nB69uIFI4AAAvFSURBVDlt2QIdHXDoEIwd663jKtZ+lYBLEoQ9B/V54BxghVfQx2PW2sdCvqakWLU31nKqy8rZd67S4Mw3LJh5bWuhtdU7/sN/vdZW+Ld/gyNHvD9XrYLOTu976+q8PQTvuWfg6yb55F6pDaEGlLX2m8A3w7yG1JZqb6xBL/DNfN1ygzPfsOCzz3pfu/32vufu2gX19X3fN3Om9/hvfuOF1fHj3uOTJnmPHzoEzzwDd97pFU5kUwm4JIUW6kpiBLXnX1ALfPO1LVOxNuVbHNza6oXRrl3ex/5rHj0K52UUt/rtPf/8vs+thQkT4IEHvD0E29u9Oap811cJuCSFtjqSxAiy9xNkdVkpPZJ818g3LHj99d6f27Z5/0HuUKF/vVWrvNNpjPF6TXv3eqHkV/MV+52oBFySQAElieLijXWg4NywoXCFX/awoD9854cT5P/52tq8P6+/3nvOa6/Br3/t9b5mziztd6IScHGdAkoSJ+wbayXrmQoFJ+T2pvzPp071hvIyZX/uPz8zcDZs8D6eNasv0AYNgokTYfJkBY2khwJKJEM165kKBWe+obypU70/s6vzVq3yHvd7RvmGCru6vHkp//XXrfNeZ+pUmJG5TF4k4RRQIj3CWs+Ur8Jv5kyvJ5Q5LDhzpjdE53+90BxbZuAVmqcSSQMFlEiPsHZYKFYa71/X//P223Mfy3fU/eWX9w+8zEXGhdoQ9DZMImFTmblIhsyQ8gURToVK4/Ndf6A5trfe8tY5ZXrmGe/xfAbaDV3EVQookQxBH7IX9JqjU6e8Yb3Nm73n33239+fmzd7jp07l/jzZ6638wPT36hNxlYb4RHqEtcNCkKXxgwZ5bQNvL76f/tR77Ysv9h7P3jlCG8NKkiUioOrqupg06V2GDz8Wd1Py6uwcxs6d59LdPSTupkgVwtoGyX/tYp+X45JLoLnZCyf/tQpta+R/XRvDShIlIqAmTXqXiRNHMnLk+RjH/q+y1nLkSAfwLm+/fUHczZEqubgQOJt/AnCmN94o3E5tDCtJlYiAGj78mJPhBGCMYeTIsQwffiDupkhAXN5hodgwpLV95en+94I2hpXkSkRAAU6Gk8/ltkm6FBqGbGvr2+YI+i8wDmvYUiRsiQkoEfFkD0OCt9v5tm19Q3fZvSZwe9hSJJ/UBdRffeleju1vz3l8WEMjf/N/Flf12l1dXXzhC7fz+OM/r+p1RMpVbGGtvwuFMaVX6imcJAlSF1DH9rfzg3Obch7/4rttVb3u0aNHmTfvanbufLuq1xEpVyn7A6pST9JIC3VLVF9fz69+tYFzzjk37qZIDSl1oW3QC4xFXJC6HpRImpSy0FZHuEtaqQcl4riB9gfUEe6SVqH2oIwxdcCTwHhgm7X2vjCvJ5JGpSy0TcICY5FyhT3Edxuw3lp7pzHmBWPMpdbaAnsuB2NYQ2PegohhDY1hXlYkFOUM37m8wFikEmEH1HLg+Z6e1GjgcMjXq7qUfCCvvbYj1NcXyRTm/oAirgs0oIwxPwAyD51usdZ+3RizFnjPWruzwPPmA/MBxow5L8gmiSSehu+kVgUaUNbaL2Z+bowZa4w5DbgOWGmMmWOtfSXP8xYBiwCammaqMFYki4bvpBaFXcX3p8Cd1tqTQCdQH/L1RKqSvW5I64hE4hN2QH0fuM8YswboAFaEfD2RiulodBG3hFokYa3dA8wN8xr5r9t/CKTYPmYi0H/HBsjdcFX/hkSil7qdJFasgGPH4JZb+lbZL10Kw4bBpz5V+etaa/nSl+7lnXe2cdZZDfzkJ89SV5e6X1/N0tHoIu5J1U4S1nrh1NLihZIfTi0t3uPVzCesXbua7u5uXnjhVY4cOcwvf/mL4BouThhoxwYRiVaqugDGeD0n8EKppcX7ePbsvh5VpRoazmb+/AcAGDJkaJUtFRfpaHQRt6QqoKAvpPxwgurDCWDSpCkALFv2M7q6TjBnThXjheIcbbgq4p5UDfFB37BeJn+4r1rLly/lRz9ayOOP/5zBgwdX/4LiDG24KuKeVPWgMuec/GE9/3Oorie1b1873//+t3nqqeWcfvrpwTVanKEdG0TckqqAMsar1succ/LnpIYNq+5G8/TT/8i+fe9x113e0N7dd9/H3Xdrc/a00Y4NIu5IVUCBV0qe/S44iDmoL3/5z/nyl/+8+gaKiEhJUjcHBXoXLCKSBqkMKBERST4FlIiIOEkBJSIiTlJAiYiIk1JXxQfwxpuvseylJezdv5vxDRO58YY7uPyyq6p6ze7ubhYs+Czt7XuZPHkqCxc+FlBrRUQkn9T1oN548zWeeGER02+/iD/81r1Mv/0innhhEW+8+VpVr/vCC//C9OmXsGzZavbte4+NG98KqMUiIpJP6gJq2UtLmHXX9Zw7ZSKDBw/m3CkTmXXX9Sx7aUlVrztnzjz+6I++Snd3N4cOHWTkyDMCarGIiOSTuoDau38350wa3++xcyaNZ+/+3VW97ogRIxg+fDg33TSLcePO5vzzJ1X1eiIiUlzqAmp8w0Te27m332Pv7dzL+IaJVb3ub3/bwfHjx1m27NccOvQBq1a9UtXriYhIcakLqBtvuIPVT6/i3e27OXnyJO9u383qp1dx4w13VPW6P/zh37N06TMMHjyY+vrhHDt2NKAWi4hIPqmr4vOr9ZY9u4QX9/+C8Q0TuefT86uu4rvvvj/mj//4D3jsse9z/vkX6jwoEZGQRRZQxpgHgRuttTeEfa3LL7uq6kDKds45E3j22ZWBvqaIiBQWyRCfMaYJuDeKa4mISDpENQe1EHiomhewQRyJGxKX2yYiklSBD/EZY34AzMh4aDzwT8DmIs+ZD8wHGDPmvJyvd3YO48iRDkaOHItx7OwMay1HjnTQ2Tks7qaIiKRK4AFlrf1i5ufGmJ8CnwA+BUw1xvyJtfZ7Wc9ZBCwCaGqamdMd2bnzXOBdhg8/EHRzA9HZOaynjSIiEpTQiySstXcDGGPOB/4hO5xK0d09hLffviDglomIiMtStw5KRETSIbIyc2vtb4DQS8xFRCQd1IMSEREnGddKpI0xB4C2uNtRhrOA9+NuhMP0+xmYfkfF6fdTXBp+P03W2nHZDzoXUEljjGm11s6Mux2u0u9nYPodFaffT3Fp/v1oiE9ERJykgBIREScpoKq3KO4GOE6/n4Hpd1Scfj/Fpfb3ozkoERFxknpQIiLiJAWUiIg4SQEVAGPMg8aYl+Juh2uM5x+NMa8aY5YaY1J3gnOljDHDjDH/aoxZb4z5J+PaNv0O0L+f0qT5/qOAqpIOYyxqFlBnrb0GOAP4ZMztccnngHettZcAZwK/G3N7XKR/PwNI+/1HAVW9qg9jTLF9eL8fgBNxNsRBc4EXez5eCcyJsS2u0r+fgaX6/qMucxkqOYyxluT5/bRYa79ujPkMMBRYEU/LnDQWONTz8WFgaoxtcZK1djuA/v3kZ4y5G1hPiu8/CqgyVHIYYy3J/v0AGGNuAR4AbrbWnoy+Vc56HxjV8/Eokr+XWij076eom4DzSPH9R+ugApBxGKOOE8lgjGkEngHmWWs/irs9LjHG3Adcba1dYIxZBnzHWpvKie5K6d9PadJ8/9EclITp88A5wApjzKqem7J4ngAmGGM2AL8FXo65PS7Sv58apx6UiIg4ST0oERFxkgJKREScpIASEREnKaBERMRJCiiRkBlj/ocxZpsx5tfGmFeMMeN7Hn/YGLO2Z5+5kUWeP8QY8/PoWiziBgWUSDQettZeBzwGfMkYcx3wO8A1wHJgfr4nGWPqgXVorz6pQQookWiNBo7irf5/3nrrPJYDb+f7ZmvtUWvtDODd6Joo4gYFlEg0vmGMacHrMS0EzsZboIu1dqe1VkN4Ilm0F59INP6ntfZx/xNjzGFgRM/HVwEft9Z+O67GibhIPSiReKzGG+YD76iNozG2RcRJ6kGJxGMpcIMx5jW8c49+P+b2iDhHe/GJiIiTNMQnIiJOUkCJiIiTFFAiIuIkBZSIiDhJASUiIk5SQImIiJP+P7wwWY/0Ug3WAAAAAElFTkSuQmCC\n",
1023 | "text/plain": [
1024 | ""
1025 | ]
1026 | },
1027 | "metadata": {
1028 | "needs_background": "light"
1029 | },
1030 | "output_type": "display_data"
1031 | }
1032 | ],
1033 | "source": [
1034 | "plot_decision_regions(X_train_pca, y_train, classifier=lr)\n",
1035 | "plt.xlabel('PC 1')\n",
1036 | "plt.ylabel('PC 2')\n",
1037 | "plt.legend(loc='lower left')\n",
1038 | "plt.tight_layout()\n",
1039 | "plt.show()"
1040 | ]
1041 | },
1042 | {
1043 | "cell_type": "markdown",
1044 | "metadata": {},
1045 | "source": [
1046 | "### 测试数据结果"
1047 | ]
1048 | },
1049 | {
1050 | "cell_type": "code",
1051 | "execution_count": 50,
1052 | "metadata": {},
1053 | "outputs": [
1054 | {
1055 | "name": "stderr",
1056 | "output_type": "stream",
1057 | "text": [
1058 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n",
1059 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n",
1060 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n"
1061 | ]
1062 | },
1063 | {
1064 | "data": {
1065 | "image/png": "\n",
1066 | "text/plain": [
1067 | ""
1068 | ]
1069 | },
1070 | "metadata": {
1071 | "needs_background": "light"
1072 | },
1073 | "output_type": "display_data"
1074 | }
1075 | ],
1076 | "source": [
1077 | "plot_decision_regions(X_test_pca, y_test, classifier=lr)\n",
1078 | "plt.xlabel('PC 1')\n",
1079 | "plt.ylabel('PC 2')\n",
1080 | "plt.legend(loc='lower left')\n",
1081 | "plt.tight_layout()\n",
1082 | "plt.show()"
1083 | ]
1084 | },
1085 | {
1086 | "cell_type": "markdown",
1087 | "metadata": {},
1088 | "source": [
1089 | "# LDA(Linear discriminant analysis)"
1090 | ]
1091 | },
1092 | {
1093 | "cell_type": "markdown",
1094 | "metadata": {},
1095 | "source": [
1096 | "## LDA实现"
1097 | ]
1098 | },
1099 | {
1100 | "cell_type": "code",
1101 | "execution_count": 51,
1102 | "metadata": {},
1103 | "outputs": [],
1104 | "source": [
1105 | "# 精度设置,浮点数\n",
1106 | "np.set_printoptions(precision=4)"
1107 | ]
1108 | },
1109 | {
1110 | "cell_type": "markdown",
1111 | "metadata": {},
1112 | "source": [
1113 | "### 平均向量"
1114 | ]
1115 | },
1116 | {
1117 | "cell_type": "code",
1118 | "execution_count": 52,
1119 | "metadata": {},
1120 | "outputs": [
1121 | {
1122 | "name": "stdout",
1123 | "output_type": "stream",
1124 | "text": [
1125 | "MV 1: [ 0.9066 -0.3497 0.3201 -0.7189 0.5056 0.8807 0.9589 -0.5516 0.5416\n",
1126 | " 0.2338 0.5897 0.6563 1.2075]\n",
1127 | "\n",
1128 | "MV 2: [-0.8749 -0.2848 -0.3735 0.3157 -0.3848 -0.0433 0.0635 -0.0946 0.0703\n",
1129 | " -0.8286 0.3144 0.3608 -0.7253]\n",
1130 | "\n",
1131 | "MV 3: [ 0.1992 0.866 0.1682 0.4148 -0.0451 -1.0286 -1.2876 0.8287 -0.7795\n",
1132 | " 0.9649 -1.209 -1.3622 -0.4013]\n",
1133 | "\n"
1134 | ]
1135 | }
1136 | ],
1137 | "source": [
1138 | "# 计算每一类数据的平均向量\n",
1139 | "mean_vecs = []\n",
1140 | "for label in range(1, 4):\n",
1141 | " mean_vecs.append(np.mean(X_train_std[y_train == label], axis=0))\n",
1142 | " print('MV %s: %s\\n' % (label, mean_vecs[label - 1]))"
1143 | ]
1144 | },
1145 | {
1146 | "cell_type": "markdown",
1147 | "metadata": {},
1148 | "source": [
1149 | "### 类内散度矩阵Sw"
1150 | ]
1151 | },
1152 | {
1153 | "cell_type": "code",
1154 | "execution_count": 53,
1155 | "metadata": {},
1156 | "outputs": [],
1157 | "source": [
1158 | "# 特征维度\n",
1159 | "d = 13 \n",
1160 | "S_W = np.zeros((d, d))\n",
1161 | "# 获取每个类别的平均值向量\n",
1162 | "for label, mv in zip(range(1, 4), mean_vecs):\n",
1163 | " # 每一类别的散度矩阵\n",
1164 | " class_scatter = np.zeros((d, d)) \n",
1165 | " for row in X_train_std[y_train == label]:\n",
1166 | " # 列向量\n",
1167 | " row, mv = row.reshape(d, 1), mv.reshape(d, 1) \n",
1168 | " class_scatter += (row - mv).dot((row - mv).T)\n",
1169 | " # 每个类别散度矩阵之和\n",
1170 | " S_W += class_scatter "
1171 | ]
1172 | },
1173 | {
1174 | "cell_type": "markdown",
1175 | "metadata": {},
1176 | "source": [
1177 | "### 类间散度矩阵SB"
1178 | ]
1179 | },
1180 | {
1181 | "cell_type": "code",
1182 | "execution_count": 54,
1183 | "metadata": {},
1184 | "outputs": [],
1185 | "source": [
1186 | "# 全局平均值\n",
1187 | "mean_overall = np.mean(X_train_std, axis=0)\n",
1188 | "# 特征维度\n",
1189 | "d = 13 \n",
1190 | "S_B = np.zeros((d, d))\n",
1191 | "# 获取每个类别的平均值\n",
1192 | "for i, mean_vec in enumerate(mean_vecs):\n",
1193 | " n = X_train[y_train == i + 1, :].shape[0]\n",
1194 | " # 列向量\n",
1195 | " mean_vec = mean_vec.reshape(d, 1) \n",
1196 | " mean_overall = mean_overall.reshape(d, 1) \n",
1197 | " # 类间散度矩阵\n",
1198 | " S_B += n * (mean_vec - mean_overall).dot((mean_vec - mean_overall).T)"
1199 | ]
1200 | },
1201 | {
1202 | "cell_type": "markdown",
1203 | "metadata": {},
1204 | "source": [
1205 | "### 特征值计算"
1206 | ]
1207 | },
1208 | {
1209 | "cell_type": "markdown",
1210 | "metadata": {},
1211 | "source": [
1212 | "求解矩阵 $S_W^{-1}S_B$的特征值和特征向量"
1213 | ]
1214 | },
1215 | {
1216 | "cell_type": "code",
1217 | "execution_count": 55,
1218 | "metadata": {},
1219 | "outputs": [],
1220 | "source": [
1221 | "# 计算LDA的特征值\n",
1222 | "eigen_vals, eigen_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))"
1223 | ]
1224 | },
1225 | {
1226 | "cell_type": "code",
1227 | "execution_count": 56,
1228 | "metadata": {},
1229 | "outputs": [
1230 | {
1231 | "data": {
1232 | "text/plain": [
1233 | "array([ 0.0000e+00+0.0000e+00j, 4.2257e+00+0.0000e+00j,\n",
1234 | " 8.2625e+00+0.0000e+00j, -1.2454e-15+0.0000e+00j,\n",
1235 | " 5.3485e-16+2.0059e-16j, 5.3485e-16-2.0059e-16j,\n",
1236 | " 2.3023e-16+2.2646e-16j, 2.3023e-16-2.2646e-16j,\n",
1237 | " -3.0897e-16+2.9887e-16j, -3.0897e-16-2.9887e-16j,\n",
1238 | " -3.4712e-18+0.0000e+00j, -2.7022e-16+0.0000e+00j,\n",
1239 | " -2.0974e-16+0.0000e+00j])"
1240 | ]
1241 | },
1242 | "execution_count": 56,
1243 | "metadata": {},
1244 | "output_type": "execute_result"
1245 | }
1246 | ],
1247 | "source": [
1248 | "eigen_vals"
1249 | ]
1250 | },
1251 | {
1252 | "cell_type": "markdown",
1253 | "metadata": {},
1254 | "source": [
1255 | "### 特征值分布"
1256 | ]
1257 | },
1258 | {
1259 | "cell_type": "code",
1260 | "execution_count": 58,
1261 | "metadata": {},
1262 | "outputs": [
1263 | {
1264 | "name": "stdout",
1265 | "output_type": "stream",
1266 | "text": [
1267 | "8.262493673957486\n",
1268 | "4.225659486916685\n",
1269 | "1.2454365753641987e-15\n",
1270 | "5.71224762018794e-16\n",
1271 | "5.71224762018794e-16\n",
1272 | "4.298731032916801e-16\n",
1273 | "4.298731032916801e-16\n",
1274 | "3.2293605074486046e-16\n",
1275 | "3.2293605074486046e-16\n",
1276 | "2.702165077415812e-16\n",
1277 | "2.0974269802096344e-16\n",
1278 | "3.4711688834518034e-18\n",
1279 | "0.0\n"
1280 | ]
1281 | }
1282 | ],
1283 | "source": [
1284 | "# 创建由特征值和特征向量组成的list\n",
1285 | "eigen_pairs = [(np.abs(eigen_vals[i]), eigen_vecs[:, i])\n",
1286 | " for i in range(len(eigen_vals))]\n",
1287 | "\n",
1288 | "# 根据特征值从大到小排序(eigenvalue, eigenvector)\n",
1289 | "eigen_pairs = sorted(eigen_pairs, key=lambda k: k[0], reverse=True)\n",
1290 | "\n",
1291 | "# 特征值结果\n",
1292 | "for eigen_val in eigen_pairs:\n",
1293 | " print(eigen_val[0])"
1294 | ]
1295 | },
1296 | {
1297 | "cell_type": "code",
1298 | "execution_count": 61,
1299 | "metadata": {},
1300 | "outputs": [
1301 | {
1302 | "data": {
1303 | "image/png": "\n",
1304 | "text/plain": [
1305 | ""
1306 | ]
1307 | },
1308 | "metadata": {
1309 | "needs_background": "light"
1310 | },
1311 | "output_type": "display_data"
1312 | }
1313 | ],
1314 | "source": [
1315 | "# 实部求和\n",
1316 | "tot = sum(eigen_vals.real)\n",
1317 | "# 计算比例\n",
1318 | "discr = [(i / tot) for i in sorted(eigen_vals.real, reverse=True)]\n",
1319 | "# 累计求和\n",
1320 | "cum_discr = np.cumsum(discr)\n",
1321 | "\n",
1322 | "plt.bar(range(1, 14), discr, alpha=0.5, align='center',\n",
1323 | " label='\"区分度\"分布')\n",
1324 | "plt.step(range(1, 14), cum_discr, where='mid',\n",
1325 | " label='累计\"区分度\"')\n",
1326 | "plt.ylabel('\"区分度\" 比例')\n",
1327 | "plt.xlabel('特征维度')\n",
1328 | "plt.ylim([-0.1, 1.1])\n",
1329 | "plt.legend(loc='best')\n",
1330 | "plt.show()"
1331 | ]
1332 | },
1333 | {
1334 | "cell_type": "markdown",
1335 | "metadata": {},
1336 | "source": [
1337 | "### 特征降维"
1338 | ]
1339 | },
1340 | {
1341 | "cell_type": "code",
1342 | "execution_count": 62,
1343 | "metadata": {},
1344 | "outputs": [],
1345 | "source": [
1346 | "# 保留两维特征\n",
1347 | "w = np.hstack((eigen_pairs[0][1][:, np.newaxis].real,\n",
1348 | " eigen_pairs[1][1][:, np.newaxis].real))"
1349 | ]
1350 | },
1351 | {
1352 | "cell_type": "code",
1353 | "execution_count": 63,
1354 | "metadata": {},
1355 | "outputs": [
1356 | {
1357 | "data": {
1358 | "text/plain": [
1359 | "array([[-0.1586, -0.4077],\n",
1360 | " [ 0.0984, -0.1821],\n",
1361 | " [-0.0156, -0.3473],\n",
1362 | " [ 0.1588, 0.3095],\n",
1363 | " [-0.0207, -0.064 ],\n",
1364 | " [ 0.1884, 0.0733],\n",
1365 | " [-0.7153, 0.3034],\n",
1366 | " [-0.0798, -0.0009],\n",
1367 | " [ 0.0074, 0.0716],\n",
1368 | " [ 0.3448, -0.2808],\n",
1369 | " [-0.0254, 0.244 ],\n",
1370 | " [-0.3192, -0.0459],\n",
1371 | " [-0.4054, -0.5806]])"
1372 | ]
1373 | },
1374 | "execution_count": 63,
1375 | "metadata": {},
1376 | "output_type": "execute_result"
1377 | }
1378 | ],
1379 | "source": [
1380 | "w"
1381 | ]
1382 | },
1383 | {
1384 | "cell_type": "code",
1385 | "execution_count": 64,
1386 | "metadata": {},
1387 | "outputs": [],
1388 | "source": [
1389 | "# 特征降维\n",
1390 | "X_train_lda = X_train_std.dot(w)"
1391 | ]
1392 | },
1393 | {
1394 | "cell_type": "code",
1395 | "execution_count": 66,
1396 | "metadata": {},
1397 | "outputs": [
1398 | {
1399 | "data": {
1400 | "text/plain": [
1401 | "array([ 0.8944, -0.3881, 1.1007, -0.812 , 1.132 , 1.0981, 0.712 ,\n",
1402 | " 0.181 , 0.0663, 0.5129, 0.7963, 0.4483, 1.9059])"
1403 | ]
1404 | },
1405 | "execution_count": 66,
1406 | "metadata": {},
1407 | "output_type": "execute_result"
1408 | }
1409 | ],
1410 | "source": [
1411 | "# 降维前\n",
1412 | "X_test_std[0]"
1413 | ]
1414 | },
1415 | {
1416 | "cell_type": "code",
1417 | "execution_count": 67,
1418 | "metadata": {},
1419 | "outputs": [
1420 | {
1421 | "data": {
1422 | "text/plain": [
1423 | "array([ 1.2617, -0.6537])"
1424 | ]
1425 | },
1426 | "execution_count": 67,
1427 | "metadata": {},
1428 | "output_type": "execute_result"
1429 | }
1430 | ],
1431 | "source": [
1432 | "# 降维后\n",
1433 | "X_train_lda[0]"
1434 | ]
1435 | },
1436 | {
1437 | "cell_type": "code",
1438 | "execution_count": 68,
1439 | "metadata": {},
1440 | "outputs": [
1441 | {
1442 | "data": {
1443 | "image/png": "\n",
1444 | "text/plain": [
1445 | ""
1446 | ]
1447 | },
1448 | "metadata": {
1449 | "needs_background": "light"
1450 | },
1451 | "output_type": "display_data"
1452 | }
1453 | ],
1454 | "source": [
1455 | "# 结果绘制\n",
1456 | "colors = ['r', 'b', 'g']\n",
1457 | "markers = ['s', 'x', 'o']\n",
1458 | "\n",
1459 | "for l, c, m in zip(np.unique(y_train), colors, markers):\n",
1460 | " plt.scatter(X_train_lda[y_train == l, 0],\n",
1461 | " X_train_lda[y_train == l, 1] * (-1),\n",
1462 | " c=c, label=l, marker=m)\n",
1463 | "\n",
1464 | "plt.xlabel('LD 1')\n",
1465 | "plt.ylabel('LD 2')\n",
1466 | "plt.legend(loc='lower right')\n",
1467 | "plt.show()"
1468 | ]
1469 | },
1470 | {
1471 | "cell_type": "markdown",
1472 | "metadata": {},
1473 | "source": [
1474 | "## 使用sklearn实现LDA并进行LR分类"
1475 | ]
1476 | },
1477 | {
1478 | "cell_type": "code",
1479 | "execution_count": 69,
1480 | "metadata": {},
1481 | "outputs": [],
1482 | "source": [
1483 | "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA\n",
1484 | "# 实例化\n",
1485 | "lda = LDA(n_components=2)"
1486 | ]
1487 | },
1488 | {
1489 | "cell_type": "code",
1490 | "execution_count": 70,
1491 | "metadata": {},
1492 | "outputs": [],
1493 | "source": [
1494 | "# 对训练数据进行LDA处理\n",
1495 | "X_train_lda = lda.fit_transform(X_train_std, y_train)"
1496 | ]
1497 | },
1498 | {
1499 | "cell_type": "code",
1500 | "execution_count": 73,
1501 | "metadata": {},
1502 | "outputs": [
1503 | {
1504 | "data": {
1505 | "text/plain": [
1506 | "array([2.9646, 1.157 ])"
1507 | ]
1508 | },
1509 | "execution_count": 73,
1510 | "metadata": {},
1511 | "output_type": "execute_result"
1512 | }
1513 | ],
1514 | "source": [
1515 | "X_train_lda[0]"
1516 | ]
1517 | },
1518 | {
1519 | "cell_type": "code",
1520 | "execution_count": 71,
1521 | "metadata": {},
1522 | "outputs": [
1523 | {
1524 | "name": "stderr",
1525 | "output_type": "stream",
1526 | "text": [
1527 | "/Users/yaoxiaoying/.py3virtualEnv/ai/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
1528 | " FutureWarning)\n",
1529 | "/Users/yaoxiaoying/.py3virtualEnv/ai/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
1530 | " \"this warning.\", FutureWarning)\n"
1531 | ]
1532 | }
1533 | ],
1534 | "source": [
1535 | "# 实例化逻辑回归\n",
1536 | "lr = LogisticRegression()\n",
1537 | "# 训练\n",
1538 | "lr = lr.fit(X_train_lda, y_train)"
1539 | ]
1540 | },
1541 | {
1542 | "cell_type": "code",
1543 | "execution_count": 74,
1544 | "metadata": {},
1545 | "outputs": [
1546 | {
1547 | "name": "stderr",
1548 | "output_type": "stream",
1549 | "text": [
1550 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n",
1551 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n",
1552 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n"
1553 | ]
1554 | },
1555 | {
1556 | "data": {
1557 | "image/png": "\n",
1558 | "text/plain": [
1559 | ""
1560 | ]
1561 | },
1562 | "metadata": {
1563 | "needs_background": "light"
1564 | },
1565 | "output_type": "display_data"
1566 | }
1567 | ],
1568 | "source": [
1569 | "# 训练数据结果\n",
1570 | "plot_decision_regions(X_train_lda, y_train, classifier=lr)\n",
1571 | "plt.xlabel('LD 1')\n",
1572 | "plt.ylabel('LD 2')\n",
1573 | "plt.legend(loc='lower left')\n",
1574 | "plt.tight_layout()\n",
1575 | "plt.show()"
1576 | ]
1577 | },
1578 | {
1579 | "cell_type": "code",
1580 | "execution_count": 75,
1581 | "metadata": {},
1582 | "outputs": [
1583 | {
1584 | "name": "stderr",
1585 | "output_type": "stream",
1586 | "text": [
1587 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n",
1588 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n",
1589 | "'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.\n"
1590 | ]
1591 | },
1592 | {
1593 | "data": {
1594 | "image/png": "\n",
1595 | "text/plain": [
1596 | ""
1597 | ]
1598 | },
1599 | "metadata": {
1600 | "needs_background": "light"
1601 | },
1602 | "output_type": "display_data"
1603 | }
1604 | ],
1605 | "source": [
1606 | "# 测试数据结果\n",
1607 | "X_test_lda = lda.transform(X_test_std)\n",
1608 | "\n",
1609 | "plot_decision_regions(X_test_lda, y_test, classifier=lr)\n",
1610 | "plt.xlabel('LD 1')\n",
1611 | "plt.ylabel('LD 2')\n",
1612 | "plt.legend(loc='lower left')\n",
1613 | "plt.tight_layout()\n",
1614 | "plt.show()"
1615 | ]
1616 | },
1617 | {
1618 | "cell_type": "markdown",
1619 | "metadata": {},
1620 | "source": [
1621 | "# kernel PCA "
1622 | ]
1623 | },
1624 | {
1625 | "cell_type": "markdown",
1626 | "metadata": {},
1627 | "source": [
1628 | "对于线性不可分的数据,在降维时可以使用带核函数的PCA"
1629 | ]
1630 | },
1631 | {
1632 | "cell_type": "code",
1633 | "execution_count": 80,
1634 | "metadata": {},
1635 | "outputs": [
1636 | {
1637 | "data": {
1638 | "image/png": "\n",
1639 | "text/plain": [
1640 | ""
1641 | ]
1642 | },
1643 | "metadata": {
1644 | "needs_background": "light"
1645 | },
1646 | "output_type": "display_data"
1647 | }
1648 | ],
1649 | "source": [
1650 | "from sklearn.decomposition import KernelPCA\n",
1651 | "from sklearn.datasets import make_moons\n",
1652 | "# 数据生成\n",
1653 | "X, y = make_moons(n_samples=100, random_state=123)\n",
1654 | "# 实例化\n",
1655 | "scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15)\n",
1656 | "# 数据处理\n",
1657 | "X_skernpca = scikit_kpca.fit_transform(X)\n",
1658 | "# 结果绘制\n",
1659 | "fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(7, 3))\n",
1660 | "ax[0].scatter(X[:,0],X[:,1],marker='o',c=y) \n",
1661 | "\n",
1662 | "ax[1].scatter(X_skernpca[y == 0, 0], X_skernpca[y == 0, 1],\n",
1663 | " color='red', marker='^', alpha=0.5)\n",
1664 | "ax[1].scatter(X_skernpca[y == 1, 0], X_skernpca[y == 1, 1],\n",
1665 | " color='blue', marker='o', alpha=0.5)\n",
1666 | "\n",
1667 | "plt.xlabel('PC1')\n",
1668 | "plt.ylabel('PC2')\n",
1669 | "plt.show()"
1670 | ]
1671 | },
1672 | {
1673 | "cell_type": "code",
1674 | "execution_count": null,
1675 | "metadata": {},
1676 | "outputs": [],
1677 | "source": []
1678 | }
1679 | ],
1680 | "metadata": {
1681 | "kernelspec": {
1682 | "display_name": "Python 3",
1683 | "language": "python",
1684 | "name": "python3"
1685 | },
1686 | "language_info": {
1687 | "codemirror_mode": {
1688 | "name": "ipython",
1689 | "version": 3
1690 | },
1691 | "file_extension": ".py",
1692 | "mimetype": "text/x-python",
1693 | "name": "python",
1694 | "nbconvert_exporter": "python",
1695 | "pygments_lexer": "ipython3",
1696 | "version": "3.7.4"
1697 | },
1698 | "toc": {
1699 | "base_numbering": 1,
1700 | "nav_menu": {},
1701 | "number_sections": true,
1702 | "sideBar": true,
1703 | "skip_h1_title": false,
1704 | "title_cell": "Table of Contents",
1705 | "title_sidebar": "Contents",
1706 | "toc_cell": false,
1707 | "toc_position": {
1708 | "height": "calc(100% - 180px)",
1709 | "left": "10px",
1710 | "top": "150px",
1711 | "width": "274px"
1712 | },
1713 | "toc_section_display": true,
1714 | "toc_window_display": true
1715 | }
1716 | },
1717 | "nbformat": 4,
1718 | "nbformat_minor": 2
1719 | }
1720 |
--------------------------------------------------------------------------------