├── FSVM-CIL Fuzzy Support Vector Machines.pdf
├── README.md
├── LICENSE
├── haberman.csv
├── modifiedhaberman.csv
├── Preprocessing PIma_indians.ipynb
├── Preprocessing pagebreak.ipynb
├── Preprocessing Haberman.ipynb
├── Preprocessing Abalone.ipynb
├── pima-indians-diabetes.csv
├── FUZZY SVM Haberman.ipynb
├── FUZZY SVM.ipynb
└── FUZZY SVM Pageblock.ipynb
/FSVM-CIL Fuzzy Support Vector Machines.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityasahugit/Fuzzy-SVM/HEAD/FSVM-CIL Fuzzy Support Vector Machines.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Fuzzy-SVM
2 |
3 | Based on research paper “FSVM-CIL: Fuzzy Support Vector Machines for Class Imbalance Learning” by Rukshan Batuwita and Vasile Palade which discuss Fuzzy concept
4 |
5 | It is used for optimazation of algorithm for imbalanced datasets which do not have 1:1 no. of instances of each class.
6 |
7 | # Datasets used
8 |
9 | Pima Indians diabities dataset has 35:65 ratio in their class.
10 | haberman, abalone,pageblocks which have diff class ratio.
11 |
12 | # Notetbooks
13 |
14 | Preprocessing notebooks are to make a new version of each dataset such that thier class ratio after splitting in test and training is same.
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 ADITYA SAHU
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/haberman.csv:
--------------------------------------------------------------------------------
1 | 30,64,1,1
2 | 30,62,3,1
3 | 30,65,0,1
4 | 31,59,2,1
5 | 31,65,4,1
6 | 33,58,10,1
7 | 33,60,0,1
8 | 34,59,0,2
9 | 34,66,9,2
10 | 34,58,30,1
11 | 34,60,1,1
12 | 34,61,10,1
13 | 34,67,7,1
14 | 34,60,0,1
15 | 35,64,13,1
16 | 35,63,0,1
17 | 36,60,1,1
18 | 36,69,0,1
19 | 37,60,0,1
20 | 37,63,0,1
21 | 37,58,0,1
22 | 37,59,6,1
23 | 37,60,15,1
24 | 37,63,0,1
25 | 38,69,21,2
26 | 38,59,2,1
27 | 38,60,0,1
28 | 38,60,0,1
29 | 38,62,3,1
30 | 38,64,1,1
31 | 38,66,0,1
32 | 38,66,11,1
33 | 38,60,1,1
34 | 38,67,5,1
35 | 39,66,0,2
36 | 39,63,0,1
37 | 39,67,0,1
38 | 39,58,0,1
39 | 39,59,2,1
40 | 39,63,4,1
41 | 40,58,2,1
42 | 40,58,0,1
43 | 40,65,0,1
44 | 41,60,23,2
45 | 41,64,0,2
46 | 41,67,0,2
47 | 41,58,0,1
48 | 41,59,8,1
49 | 41,59,0,1
50 | 41,64,0,1
51 | 41,69,8,1
52 | 41,65,0,1
53 | 41,65,0,1
54 | 42,69,1,2
55 | 42,59,0,2
56 | 42,58,0,1
57 | 42,60,1,1
58 | 42,59,2,1
59 | 42,61,4,1
60 | 42,62,20,1
61 | 42,65,0,1
62 | 42,63,1,1
63 | 43,58,52,2
64 | 43,59,2,2
65 | 43,64,0,2
66 | 43,64,0,2
67 | 43,63,14,1
68 | 43,64,2,1
69 | 43,64,3,1
70 | 43,60,0,1
71 | 43,63,2,1
72 | 43,65,0,1
73 | 43,66,4,1
74 | 44,64,6,2
75 | 44,58,9,2
76 | 44,63,19,2
77 | 44,61,0,1
78 | 44,63,1,1
79 | 44,61,0,1
80 | 44,67,16,1
81 | 45,65,6,2
82 | 45,66,0,2
83 | 45,67,1,2
84 | 45,60,0,1
85 | 45,67,0,1
86 | 45,59,14,1
87 | 45,64,0,1
88 | 45,68,0,1
89 | 45,67,1,1
90 | 46,58,2,2
91 | 46,69,3,2
92 | 46,62,5,2
93 | 46,65,20,2
94 | 46,62,0,1
95 | 46,58,3,1
96 | 46,63,0,1
97 | 47,63,23,2
98 | 47,62,0,2
99 | 47,65,0,2
100 | 47,61,0,1
101 | 47,63,6,1
102 | 47,66,0,1
103 | 47,67,0,1
104 | 47,58,3,1
105 | 47,60,4,1
106 | 47,68,4,1
107 | 47,66,12,1
108 | 48,58,11,2
109 | 48,58,11,2
110 | 48,67,7,2
111 | 48,61,8,1
112 | 48,62,2,1
113 | 48,64,0,1
114 | 48,66,0,1
115 | 49,63,0,2
116 | 49,64,10,2
117 | 49,61,1,1
118 | 49,62,0,1
119 | 49,66,0,1
120 | 49,60,1,1
121 | 49,62,1,1
122 | 49,63,3,1
123 | 49,61,0,1
124 | 49,67,1,1
125 | 50,63,13,2
126 | 50,64,0,2
127 | 50,59,0,1
128 | 50,61,6,1
129 | 50,61,0,1
130 | 50,63,1,1
131 | 50,58,1,1
132 | 50,59,2,1
133 | 50,61,0,1
134 | 50,64,0,1
135 | 50,65,4,1
136 | 50,66,1,1
137 | 51,59,13,2
138 | 51,59,3,2
139 | 51,64,7,1
140 | 51,59,1,1
141 | 51,65,0,1
142 | 51,66,1,1
143 | 52,69,3,2
144 | 52,59,2,2
145 | 52,62,3,2
146 | 52,66,4,2
147 | 52,61,0,1
148 | 52,63,4,1
149 | 52,69,0,1
150 | 52,60,4,1
151 | 52,60,5,1
152 | 52,62,0,1
153 | 52,62,1,1
154 | 52,64,0,1
155 | 52,65,0,1
156 | 52,68,0,1
157 | 53,58,4,2
158 | 53,65,1,2
159 | 53,59,3,2
160 | 53,60,9,2
161 | 53,63,24,2
162 | 53,65,12,2
163 | 53,58,1,1
164 | 53,60,1,1
165 | 53,60,2,1
166 | 53,61,1,1
167 | 53,63,0,1
168 | 54,60,11,2
169 | 54,65,23,2
170 | 54,65,5,2
171 | 54,68,7,2
172 | 54,59,7,1
173 | 54,60,3,1
174 | 54,66,0,1
175 | 54,67,46,1
176 | 54,62,0,1
177 | 54,69,7,1
178 | 54,63,19,1
179 | 54,58,1,1
180 | 54,62,0,1
181 | 55,63,6,2
182 | 55,68,15,2
183 | 55,58,1,1
184 | 55,58,0,1
185 | 55,58,1,1
186 | 55,66,18,1
187 | 55,66,0,1
188 | 55,69,3,1
189 | 55,69,22,1
190 | 55,67,1,1
191 | 56,65,9,2
192 | 56,66,3,2
193 | 56,60,0,1
194 | 56,66,2,1
195 | 56,66,1,1
196 | 56,67,0,1
197 | 56,60,0,1
198 | 57,61,5,2
199 | 57,62,14,2
200 | 57,64,1,2
201 | 57,64,9,1
202 | 57,69,0,1
203 | 57,61,0,1
204 | 57,62,0,1
205 | 57,63,0,1
206 | 57,64,0,1
207 | 57,64,0,1
208 | 57,67,0,1
209 | 58,59,0,1
210 | 58,60,3,1
211 | 58,61,1,1
212 | 58,67,0,1
213 | 58,58,0,1
214 | 58,58,3,1
215 | 58,61,2,1
216 | 59,62,35,2
217 | 59,60,0,1
218 | 59,63,0,1
219 | 59,64,1,1
220 | 59,64,4,1
221 | 59,64,0,1
222 | 59,64,7,1
223 | 59,67,3,1
224 | 60,59,17,2
225 | 60,65,0,2
226 | 60,61,1,1
227 | 60,67,2,1
228 | 60,61,25,1
229 | 60,64,0,1
230 | 61,62,5,2
231 | 61,65,0,2
232 | 61,68,1,2
233 | 61,59,0,1
234 | 61,59,0,1
235 | 61,64,0,1
236 | 61,65,8,1
237 | 61,68,0,1
238 | 61,59,0,1
239 | 62,59,13,2
240 | 62,58,0,2
241 | 62,65,19,2
242 | 62,62,6,1
243 | 62,66,0,1
244 | 62,66,0,1
245 | 62,58,0,1
246 | 63,60,1,2
247 | 63,61,0,1
248 | 63,62,0,1
249 | 63,63,0,1
250 | 63,63,0,1
251 | 63,66,0,1
252 | 63,61,9,1
253 | 63,61,28,1
254 | 64,58,0,1
255 | 64,65,22,1
256 | 64,66,0,1
257 | 64,61,0,1
258 | 64,68,0,1
259 | 65,58,0,2
260 | 65,61,2,2
261 | 65,62,22,2
262 | 65,66,15,2
263 | 65,58,0,1
264 | 65,64,0,1
265 | 65,67,0,1
266 | 65,59,2,1
267 | 65,64,0,1
268 | 65,67,1,1
269 | 66,58,0,2
270 | 66,61,13,2
271 | 66,58,0,1
272 | 66,58,1,1
273 | 66,68,0,1
274 | 67,64,8,2
275 | 67,63,1,2
276 | 67,66,0,1
277 | 67,66,0,1
278 | 67,61,0,1
279 | 67,65,0,1
280 | 68,67,0,1
281 | 68,68,0,1
282 | 69,67,8,2
283 | 69,60,0,1
284 | 69,65,0,1
285 | 69,66,0,1
286 | 70,58,0,2
287 | 70,58,4,2
288 | 70,66,14,1
289 | 70,67,0,1
290 | 70,68,0,1
291 | 70,59,8,1
292 | 70,63,0,1
293 | 71,68,2,1
294 | 72,63,0,2
295 | 72,58,0,1
296 | 72,64,0,1
297 | 72,67,3,1
298 | 73,62,0,1
299 | 73,68,0,1
300 | 74,65,3,2
301 | 74,63,0,1
302 | 75,62,1,1
303 | 76,67,0,1
304 | 77,65,3,1
305 | 78,65,1,2
306 | 83,58,2,2
307 |
--------------------------------------------------------------------------------
/modifiedhaberman.csv:
--------------------------------------------------------------------------------
1 | 34.0,59.0,0.0,1.0
2 | 34.0,66.0,9.0,1.0
3 | 38.0,69.0,21.0,1.0
4 | 39.0,66.0,0.0,1.0
5 | 41.0,60.0,23.0,1.0
6 | 41.0,64.0,0.0,1.0
7 | 41.0,67.0,0.0,1.0
8 | 42.0,69.0,1.0,1.0
9 | 42.0,59.0,0.0,1.0
10 | 43.0,58.0,52.0,1.0
11 | 43.0,59.0,2.0,1.0
12 | 43.0,64.0,0.0,1.0
13 | 43.0,64.0,0.0,1.0
14 | 44.0,64.0,6.0,1.0
15 | 44.0,58.0,9.0,1.0
16 | 44.0,63.0,19.0,1.0
17 | 30.0,64.0,1.0,-1.0
18 | 30.0,62.0,3.0,-1.0
19 | 30.0,65.0,0.0,-1.0
20 | 31.0,59.0,2.0,-1.0
21 | 31.0,65.0,4.0,-1.0
22 | 33.0,58.0,10.0,-1.0
23 | 33.0,60.0,0.0,-1.0
24 | 34.0,58.0,30.0,-1.0
25 | 34.0,60.0,1.0,-1.0
26 | 34.0,61.0,10.0,-1.0
27 | 34.0,67.0,7.0,-1.0
28 | 34.0,60.0,0.0,-1.0
29 | 35.0,64.0,13.0,-1.0
30 | 35.0,63.0,0.0,-1.0
31 | 36.0,60.0,1.0,-1.0
32 | 36.0,69.0,0.0,-1.0
33 | 37.0,60.0,0.0,-1.0
34 | 37.0,63.0,0.0,-1.0
35 | 37.0,58.0,0.0,-1.0
36 | 37.0,59.0,6.0,-1.0
37 | 37.0,60.0,15.0,-1.0
38 | 37.0,63.0,0.0,-1.0
39 | 38.0,59.0,2.0,-1.0
40 | 38.0,60.0,0.0,-1.0
41 | 38.0,60.0,0.0,-1.0
42 | 38.0,62.0,3.0,-1.0
43 | 38.0,64.0,1.0,-1.0
44 | 38.0,66.0,0.0,-1.0
45 | 38.0,66.0,11.0,-1.0
46 | 38.0,60.0,1.0,-1.0
47 | 38.0,67.0,5.0,-1.0
48 | 39.0,63.0,0.0,-1.0
49 | 39.0,67.0,0.0,-1.0
50 | 39.0,58.0,0.0,-1.0
51 | 39.0,59.0,2.0,-1.0
52 | 39.0,63.0,4.0,-1.0
53 | 40.0,58.0,2.0,-1.0
54 | 40.0,58.0,0.0,-1.0
55 | 40.0,65.0,0.0,-1.0
56 | 41.0,58.0,0.0,-1.0
57 | 41.0,59.0,8.0,-1.0
58 | 41.0,59.0,0.0,-1.0
59 | 41.0,64.0,0.0,-1.0
60 | 41.0,69.0,8.0,-1.0
61 | 41.0,65.0,0.0,-1.0
62 | 45.0,65.0,6.0,1.0
63 | 45.0,66.0,0.0,1.0
64 | 45.0,67.0,1.0,1.0
65 | 46.0,58.0,2.0,1.0
66 | 46.0,69.0,3.0,1.0
67 | 46.0,62.0,5.0,1.0
68 | 46.0,65.0,20.0,1.0
69 | 47.0,63.0,23.0,1.0
70 | 47.0,62.0,0.0,1.0
71 | 47.0,65.0,0.0,1.0
72 | 48.0,58.0,11.0,1.0
73 | 48.0,58.0,11.0,1.0
74 | 48.0,67.0,7.0,1.0
75 | 49.0,63.0,0.0,1.0
76 | 49.0,64.0,10.0,1.0
77 | 50.0,63.0,13.0,1.0
78 | 41.0,65.0,0.0,-1.0
79 | 42.0,58.0,0.0,-1.0
80 | 42.0,60.0,1.0,-1.0
81 | 42.0,59.0,2.0,-1.0
82 | 42.0,61.0,4.0,-1.0
83 | 42.0,62.0,20.0,-1.0
84 | 42.0,65.0,0.0,-1.0
85 | 42.0,63.0,1.0,-1.0
86 | 43.0,63.0,14.0,-1.0
87 | 43.0,64.0,2.0,-1.0
88 | 43.0,64.0,3.0,-1.0
89 | 43.0,60.0,0.0,-1.0
90 | 43.0,63.0,2.0,-1.0
91 | 43.0,65.0,0.0,-1.0
92 | 43.0,66.0,4.0,-1.0
93 | 44.0,61.0,0.0,-1.0
94 | 44.0,63.0,1.0,-1.0
95 | 44.0,61.0,0.0,-1.0
96 | 44.0,67.0,16.0,-1.0
97 | 45.0,60.0,0.0,-1.0
98 | 45.0,67.0,0.0,-1.0
99 | 45.0,59.0,14.0,-1.0
100 | 45.0,64.0,0.0,-1.0
101 | 45.0,68.0,0.0,-1.0
102 | 45.0,67.0,1.0,-1.0
103 | 46.0,62.0,0.0,-1.0
104 | 46.0,58.0,3.0,-1.0
105 | 46.0,63.0,0.0,-1.0
106 | 47.0,61.0,0.0,-1.0
107 | 47.0,63.0,6.0,-1.0
108 | 47.0,66.0,0.0,-1.0
109 | 47.0,67.0,0.0,-1.0
110 | 47.0,58.0,3.0,-1.0
111 | 47.0,60.0,4.0,-1.0
112 | 47.0,68.0,4.0,-1.0
113 | 47.0,66.0,12.0,-1.0
114 | 48.0,61.0,8.0,-1.0
115 | 48.0,62.0,2.0,-1.0
116 | 48.0,64.0,0.0,-1.0
117 | 48.0,66.0,0.0,-1.0
118 | 49.0,61.0,1.0,-1.0
119 | 49.0,62.0,0.0,-1.0
120 | 49.0,66.0,0.0,-1.0
121 | 49.0,60.0,1.0,-1.0
122 | 49.0,62.0,1.0,-1.0
123 | 50.0,64.0,0.0,1.0
124 | 51.0,59.0,13.0,1.0
125 | 51.0,59.0,3.0,1.0
126 | 52.0,69.0,3.0,1.0
127 | 52.0,59.0,2.0,1.0
128 | 52.0,62.0,3.0,1.0
129 | 52.0,66.0,4.0,1.0
130 | 53.0,58.0,4.0,1.0
131 | 53.0,65.0,1.0,1.0
132 | 53.0,59.0,3.0,1.0
133 | 53.0,60.0,9.0,1.0
134 | 53.0,63.0,24.0,1.0
135 | 53.0,65.0,12.0,1.0
136 | 54.0,60.0,11.0,1.0
137 | 54.0,65.0,23.0,1.0
138 | 54.0,65.0,5.0,1.0
139 | 49.0,63.0,3.0,-1.0
140 | 49.0,61.0,0.0,-1.0
141 | 49.0,67.0,1.0,-1.0
142 | 50.0,59.0,0.0,-1.0
143 | 50.0,61.0,6.0,-1.0
144 | 50.0,61.0,0.0,-1.0
145 | 50.0,63.0,1.0,-1.0
146 | 50.0,58.0,1.0,-1.0
147 | 50.0,59.0,2.0,-1.0
148 | 50.0,61.0,0.0,-1.0
149 | 50.0,64.0,0.0,-1.0
150 | 50.0,65.0,4.0,-1.0
151 | 50.0,66.0,1.0,-1.0
152 | 51.0,64.0,7.0,-1.0
153 | 51.0,59.0,1.0,-1.0
154 | 51.0,65.0,0.0,-1.0
155 | 51.0,66.0,1.0,-1.0
156 | 52.0,61.0,0.0,-1.0
157 | 52.0,63.0,4.0,-1.0
158 | 52.0,69.0,0.0,-1.0
159 | 52.0,60.0,4.0,-1.0
160 | 52.0,60.0,5.0,-1.0
161 | 52.0,62.0,0.0,-1.0
162 | 52.0,62.0,1.0,-1.0
163 | 52.0,64.0,0.0,-1.0
164 | 52.0,65.0,0.0,-1.0
165 | 52.0,68.0,0.0,-1.0
166 | 53.0,58.0,1.0,-1.0
167 | 53.0,60.0,1.0,-1.0
168 | 53.0,60.0,2.0,-1.0
169 | 53.0,61.0,1.0,-1.0
170 | 53.0,63.0,0.0,-1.0
171 | 54.0,59.0,7.0,-1.0
172 | 54.0,60.0,3.0,-1.0
173 | 54.0,66.0,0.0,-1.0
174 | 54.0,67.0,46.0,-1.0
175 | 54.0,62.0,0.0,-1.0
176 | 54.0,69.0,7.0,-1.0
177 | 54.0,63.0,19.0,-1.0
178 | 54.0,58.0,1.0,-1.0
179 | 54.0,62.0,0.0,-1.0
180 | 55.0,58.0,1.0,-1.0
181 | 55.0,58.0,0.0,-1.0
182 | 55.0,58.0,1.0,-1.0
183 | 55.0,66.0,18.0,-1.0
184 | 54.0,68.0,7.0,1.0
185 | 55.0,63.0,6.0,1.0
186 | 55.0,68.0,15.0,1.0
187 | 56.0,65.0,9.0,1.0
188 | 56.0,66.0,3.0,1.0
189 | 57.0,61.0,5.0,1.0
190 | 57.0,62.0,14.0,1.0
191 | 57.0,64.0,1.0,1.0
192 | 59.0,62.0,35.0,1.0
193 | 60.0,59.0,17.0,1.0
194 | 60.0,65.0,0.0,1.0
195 | 61.0,62.0,5.0,1.0
196 | 61.0,65.0,0.0,1.0
197 | 61.0,68.0,1.0,1.0
198 | 62.0,59.0,13.0,1.0
199 | 62.0,58.0,0.0,1.0
200 | 55.0,66.0,0.0,-1.0
201 | 55.0,69.0,3.0,-1.0
202 | 55.0,69.0,22.0,-1.0
203 | 55.0,67.0,1.0,-1.0
204 | 56.0,60.0,0.0,-1.0
205 | 56.0,66.0,2.0,-1.0
206 | 56.0,66.0,1.0,-1.0
207 | 56.0,67.0,0.0,-1.0
208 | 56.0,60.0,0.0,-1.0
209 | 57.0,64.0,9.0,-1.0
210 | 57.0,69.0,0.0,-1.0
211 | 57.0,61.0,0.0,-1.0
212 | 57.0,62.0,0.0,-1.0
213 | 57.0,63.0,0.0,-1.0
214 | 57.0,64.0,0.0,-1.0
215 | 57.0,64.0,0.0,-1.0
216 | 57.0,67.0,0.0,-1.0
217 | 58.0,59.0,0.0,-1.0
218 | 58.0,60.0,3.0,-1.0
219 | 58.0,61.0,1.0,-1.0
220 | 58.0,67.0,0.0,-1.0
221 | 58.0,58.0,0.0,-1.0
222 | 58.0,58.0,3.0,-1.0
223 | 58.0,61.0,2.0,-1.0
224 | 59.0,60.0,0.0,-1.0
225 | 59.0,63.0,0.0,-1.0
226 | 59.0,64.0,1.0,-1.0
227 | 59.0,64.0,4.0,-1.0
228 | 59.0,64.0,0.0,-1.0
229 | 59.0,64.0,7.0,-1.0
230 | 59.0,67.0,3.0,-1.0
231 | 60.0,61.0,1.0,-1.0
232 | 60.0,67.0,2.0,-1.0
233 | 60.0,61.0,25.0,-1.0
234 | 60.0,64.0,0.0,-1.0
235 | 61.0,59.0,0.0,-1.0
236 | 61.0,59.0,0.0,-1.0
237 | 61.0,64.0,0.0,-1.0
238 | 61.0,65.0,8.0,-1.0
239 | 61.0,68.0,0.0,-1.0
240 | 61.0,59.0,0.0,-1.0
241 | 62.0,62.0,6.0,-1.0
242 | 62.0,66.0,0.0,-1.0
243 | 62.0,66.0,0.0,-1.0
244 | 62.0,58.0,0.0,-1.0
245 | 62.0,65.0,19.0,1.0
246 | 63.0,60.0,1.0,1.0
247 | 65.0,58.0,0.0,1.0
248 | 65.0,61.0,2.0,1.0
249 | 65.0,62.0,22.0,1.0
250 | 65.0,66.0,15.0,1.0
251 | 66.0,58.0,0.0,1.0
252 | 66.0,61.0,13.0,1.0
253 | 67.0,64.0,8.0,1.0
254 | 67.0,63.0,1.0,1.0
255 | 69.0,67.0,8.0,1.0
256 | 70.0,58.0,0.0,1.0
257 | 70.0,58.0,4.0,1.0
258 | 72.0,63.0,0.0,1.0
259 | 74.0,65.0,3.0,1.0
260 | 78.0,65.0,1.0,1.0
261 | 83.0,58.0,2.0,1.0
262 | 63.0,61.0,0.0,-1.0
263 | 63.0,62.0,0.0,-1.0
264 | 63.0,63.0,0.0,-1.0
265 | 63.0,63.0,0.0,-1.0
266 | 63.0,66.0,0.0,-1.0
267 | 63.0,61.0,9.0,-1.0
268 | 63.0,61.0,28.0,-1.0
269 | 64.0,58.0,0.0,-1.0
270 | 64.0,65.0,22.0,-1.0
271 | 64.0,66.0,0.0,-1.0
272 | 64.0,61.0,0.0,-1.0
273 | 64.0,68.0,0.0,-1.0
274 | 65.0,58.0,0.0,-1.0
275 | 65.0,64.0,0.0,-1.0
276 | 65.0,67.0,0.0,-1.0
277 | 65.0,59.0,2.0,-1.0
278 | 65.0,64.0,0.0,-1.0
279 | 65.0,67.0,1.0,-1.0
280 | 66.0,58.0,0.0,-1.0
281 | 66.0,58.0,1.0,-1.0
282 | 66.0,68.0,0.0,-1.0
283 | 67.0,66.0,0.0,-1.0
284 | 67.0,66.0,0.0,-1.0
285 | 67.0,61.0,0.0,-1.0
286 | 67.0,65.0,0.0,-1.0
287 | 68.0,67.0,0.0,-1.0
288 | 68.0,68.0,0.0,-1.0
289 | 69.0,60.0,0.0,-1.0
290 | 69.0,65.0,0.0,-1.0
291 | 69.0,66.0,0.0,-1.0
292 | 70.0,66.0,14.0,-1.0
293 | 70.0,67.0,0.0,-1.0
294 | 70.0,68.0,0.0,-1.0
295 | 70.0,59.0,8.0,-1.0
296 | 70.0,63.0,0.0,-1.0
297 | 71.0,68.0,2.0,-1.0
298 | 72.0,58.0,0.0,-1.0
299 | 72.0,64.0,0.0,-1.0
300 | 72.0,67.0,3.0,-1.0
301 | 73.0,62.0,0.0,-1.0
302 | 73.0,68.0,0.0,-1.0
303 | 74.0,63.0,0.0,-1.0
304 | 75.0,62.0,1.0,-1.0
305 | 76.0,67.0,0.0,-1.0
306 | 77.0,65.0,3.0,-1.0
307 |
--------------------------------------------------------------------------------
/Preprocessing PIma_indians.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# ADITYA SAHU"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Pre processing of Pima indians dataset to make imbalance ratio of training and testing same"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import numpy as np\n",
24 | "from numpy import linalg\n",
25 | "import pandas as pd"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 2,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "data": {
35 | "text/html": [
36 | "
\n",
37 | "\n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " | \n",
54 | " 1 | \n",
55 | " 2 | \n",
56 | " 3 | \n",
57 | " 4 | \n",
58 | " 5 | \n",
59 | " 6 | \n",
60 | " 7 | \n",
61 | " 8 | \n",
62 | " 9 | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " | 0 | \n",
68 | " 6 | \n",
69 | " 148 | \n",
70 | " 72 | \n",
71 | " 35 | \n",
72 | " 0 | \n",
73 | " 33.6 | \n",
74 | " 0.627 | \n",
75 | " 50 | \n",
76 | " 1 | \n",
77 | "
\n",
78 | " \n",
79 | " | 1 | \n",
80 | " 1 | \n",
81 | " 85 | \n",
82 | " 66 | \n",
83 | " 29 | \n",
84 | " 0 | \n",
85 | " 26.6 | \n",
86 | " 0.351 | \n",
87 | " 31 | \n",
88 | " 0 | \n",
89 | "
\n",
90 | " \n",
91 | " | 2 | \n",
92 | " 8 | \n",
93 | " 183 | \n",
94 | " 64 | \n",
95 | " 0 | \n",
96 | " 0 | \n",
97 | " 23.3 | \n",
98 | " 0.672 | \n",
99 | " 32 | \n",
100 | " 1 | \n",
101 | "
\n",
102 | " \n",
103 | " | 3 | \n",
104 | " 1 | \n",
105 | " 89 | \n",
106 | " 66 | \n",
107 | " 23 | \n",
108 | " 94 | \n",
109 | " 28.1 | \n",
110 | " 0.167 | \n",
111 | " 21 | \n",
112 | " 0 | \n",
113 | "
\n",
114 | " \n",
115 | " | 4 | \n",
116 | " 0 | \n",
117 | " 137 | \n",
118 | " 40 | \n",
119 | " 35 | \n",
120 | " 168 | \n",
121 | " 43.1 | \n",
122 | " 2.288 | \n",
123 | " 33 | \n",
124 | " 1 | \n",
125 | "
\n",
126 | " \n",
127 | "
\n",
128 | "
"
129 | ],
130 | "text/plain": [
131 | " 1 2 3 4 5 6 7 8 9\n",
132 | "0 6 148 72 35 0 33.6 0.627 50 1\n",
133 | "1 1 85 66 29 0 26.6 0.351 31 0\n",
134 | "2 8 183 64 0 0 23.3 0.672 32 1\n",
135 | "3 1 89 66 23 94 28.1 0.167 21 0\n",
136 | "4 0 137 40 35 168 43.1 2.288 33 1"
137 | ]
138 | },
139 | "execution_count": 2,
140 | "metadata": {},
141 | "output_type": "execute_result"
142 | }
143 | ],
144 | "source": [
145 | "train = pd.read_csv(\"pima-indians-diabetes.csv\")\n",
146 | "train.head()"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 3,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | "0 1\n",
159 | "1 -1\n",
160 | "2 1\n",
161 | "3 -1\n",
162 | "4 1\n",
163 | "5 -1\n",
164 | "6 1\n",
165 | "7 -1\n",
166 | "8 1\n",
167 | "9 1\n",
168 | "10 -1\n",
169 | "11 1\n",
170 | "12 -1\n",
171 | "13 1\n",
172 | "14 1\n",
173 | "15 1\n",
174 | "16 1\n",
175 | "17 1\n",
176 | "18 -1\n",
177 | "19 1\n",
178 | "20 -1\n",
179 | "21 -1\n",
180 | "22 1\n",
181 | "23 1\n",
182 | "24 1\n",
183 | "25 1\n",
184 | "26 1\n",
185 | "27 -1\n",
186 | "28 -1\n",
187 | "29 -1\n",
188 | " ..\n",
189 | "738 -1\n",
190 | "739 1\n",
191 | "740 1\n",
192 | "741 -1\n",
193 | "742 -1\n",
194 | "743 1\n",
195 | "744 -1\n",
196 | "745 -1\n",
197 | "746 1\n",
198 | "747 -1\n",
199 | "748 1\n",
200 | "749 1\n",
201 | "750 1\n",
202 | "751 -1\n",
203 | "752 -1\n",
204 | "753 1\n",
205 | "754 1\n",
206 | "755 1\n",
207 | "756 -1\n",
208 | "757 1\n",
209 | "758 -1\n",
210 | "759 1\n",
211 | "760 -1\n",
212 | "761 1\n",
213 | "762 -1\n",
214 | "763 -1\n",
215 | "764 -1\n",
216 | "765 -1\n",
217 | "766 1\n",
218 | "767 -1\n",
219 | "Name: 9, Length: 768, dtype: int64\n"
220 | ]
221 | }
222 | ],
223 | "source": [
224 | "train['9'] = train['9'].map({1: 1, 0: -1})\n",
225 | "print(train['9'])"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 4,
231 | "metadata": {},
232 | "outputs": [],
233 | "source": [
234 | "\n",
235 | "train=np.asarray(train)"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 5,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "min_train=np.zeros((268,9))\n",
245 | "max_train=np.zeros((500,9))\n",
246 | "min_train=np.asarray(min_train)\n",
247 | "max_train=np.asarray(max_train)"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": 6,
253 | "metadata": {},
254 | "outputs": [],
255 | "source": [
256 | "\n",
257 | "k=0\n",
258 | "l=0\n",
259 | "for i in range(0,768):\n",
260 | " if(train[i][8]==1):\n",
261 | " for j in range(0,9):\n",
262 | " min_train[k][j]=train[i][j] \n",
263 | " k=k+1\n",
264 | " else :\n",
265 | " for j in range(0,9):\n",
266 | " max_train[l][j]=train[i][j]\n",
267 | " l=l+1"
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": 7,
273 | "metadata": {},
274 | "outputs": [
275 | {
276 | "name": "stdout",
277 | "output_type": "stream",
278 | "text": [
279 | "[[ 6. 148. 72. ... 0.627 50. 1. ]\n",
280 | " [ 8. 183. 64. ... 0.672 32. 1. ]\n",
281 | " [ 0. 137. 40. ... 2.288 33. 1. ]\n",
282 | " ...\n",
283 | " [ 6. 190. 92. ... 0.278 66. 1. ]\n",
284 | " [ 9. 170. 74. ... 0.403 43. 1. ]\n",
285 | " [ 1. 126. 60. ... 0.349 47. 1. ]]\n"
286 | ]
287 | }
288 | ],
289 | "source": [
290 | "print(min_train)"
291 | ]
292 | },
293 | {
294 | "cell_type": "markdown",
295 | "metadata": {},
296 | "source": [
297 | "#768/5 =153.6\n",
298 | "#So we have to make no. of instances in data1,2 as 153 and data3,4,5 as 154 \n",
299 | "#Also min class value will be 35% of 153=53 and max class=100"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": 21,
305 | "metadata": {},
306 | "outputs": [],
307 | "source": [
308 | "data1=np.zeros((153,9))\n",
309 | "data2=np.zeros((153,9))\n",
310 | "data3=np.zeros((154,9))\n",
311 | "data4=np.zeros((154,9))\n",
312 | "data5=np.zeros((154,9))"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": null,
318 | "metadata": {},
319 | "outputs": [],
320 | "source": []
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 22,
325 | "metadata": {},
326 | "outputs": [
327 | {
328 | "name": "stdout",
329 | "output_type": "stream",
330 | "text": [
331 | "[0. 0. 0. 0. 0. 0. 0. 0. 0.]\n"
332 | ]
333 | }
334 | ],
335 | "source": [
336 | "for i in range(0,268):\n",
337 | " for j in range(0,9):\n",
338 | " if(i<53):\n",
339 | " data1[i][j]=min_train[i][j]\n",
340 | " elif(52\n",
30 | "\n",
43 | "\n",
44 | " \n",
45 | " \n",
46 | " | \n",
47 | " 0 | \n",
48 | " 1 | \n",
49 | " 2 | \n",
50 | " 3 | \n",
51 | " 4 | \n",
52 | " 5 | \n",
53 | " 6 | \n",
54 | " 7 | \n",
55 | " 8 | \n",
56 | " 9 | \n",
57 | " 10 | \n",
58 | "
\n",
59 | " \n",
60 | " \n",
61 | " \n",
62 | " | 0 | \n",
63 | " 5 | \n",
64 | " 7 | \n",
65 | " 35 | \n",
66 | " 1.400 | \n",
67 | " 0.400 | \n",
68 | " 0.657 | \n",
69 | " 2.33 | \n",
70 | " 14 | \n",
71 | " 23 | \n",
72 | " 6 | \n",
73 | " 1 | \n",
74 | "
\n",
75 | " \n",
76 | " | 1 | \n",
77 | " 6 | \n",
78 | " 7 | \n",
79 | " 42 | \n",
80 | " 1.167 | \n",
81 | " 0.429 | \n",
82 | " 0.881 | \n",
83 | " 3.60 | \n",
84 | " 18 | \n",
85 | " 37 | \n",
86 | " 5 | \n",
87 | " 1 | \n",
88 | "
\n",
89 | " \n",
90 | " | 2 | \n",
91 | " 6 | \n",
92 | " 18 | \n",
93 | " 108 | \n",
94 | " 3.000 | \n",
95 | " 0.287 | \n",
96 | " 0.741 | \n",
97 | " 4.43 | \n",
98 | " 31 | \n",
99 | " 80 | \n",
100 | " 7 | \n",
101 | " 1 | \n",
102 | "
\n",
103 | " \n",
104 | " | 3 | \n",
105 | " 5 | \n",
106 | " 7 | \n",
107 | " 35 | \n",
108 | " 1.400 | \n",
109 | " 0.371 | \n",
110 | " 0.743 | \n",
111 | " 4.33 | \n",
112 | " 13 | \n",
113 | " 26 | \n",
114 | " 3 | \n",
115 | " 1 | \n",
116 | "
\n",
117 | " \n",
118 | " | 4 | \n",
119 | " 6 | \n",
120 | " 3 | \n",
121 | " 18 | \n",
122 | " 0.500 | \n",
123 | " 0.500 | \n",
124 | " 0.944 | \n",
125 | " 2.25 | \n",
126 | " 9 | \n",
127 | " 17 | \n",
128 | " 4 | \n",
129 | " 1 | \n",
130 | "
\n",
131 | " \n",
132 | "
\n",
133 | ""
134 | ],
135 | "text/plain": [
136 | " 0 1 2 3 4 5 6 7 8 9 10\n",
137 | "0 5 7 35 1.400 0.400 0.657 2.33 14 23 6 1\n",
138 | "1 6 7 42 1.167 0.429 0.881 3.60 18 37 5 1\n",
139 | "2 6 18 108 3.000 0.287 0.741 4.43 31 80 7 1\n",
140 | "3 5 7 35 1.400 0.371 0.743 4.33 13 26 3 1\n",
141 | "4 6 3 18 0.500 0.500 0.944 2.25 9 17 4 1"
142 | ]
143 | },
144 | "execution_count": 31,
145 | "metadata": {},
146 | "output_type": "execute_result"
147 | }
148 | ],
149 | "source": [
150 | "train = pd.read_table(\"page-blocks.data\", sep=\"\\s+\",header=None)\n",
151 | "train.head()"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 32,
157 | "metadata": {},
158 | "outputs": [
159 | {
160 | "name": "stdout",
161 | "output_type": "stream",
162 | "text": [
163 | "0 -1\n",
164 | "1 -1\n",
165 | "2 -1\n",
166 | "3 -1\n",
167 | "4 -1\n",
168 | "5 -1\n",
169 | "6 -1\n",
170 | "7 -1\n",
171 | "8 -1\n",
172 | "9 -1\n",
173 | "10 -1\n",
174 | "11 -1\n",
175 | "12 -1\n",
176 | "13 -1\n",
177 | "14 -1\n",
178 | "15 -1\n",
179 | "16 -1\n",
180 | "17 -1\n",
181 | "18 -1\n",
182 | "19 -1\n",
183 | "20 -1\n",
184 | "21 -1\n",
185 | "22 -1\n",
186 | "23 -1\n",
187 | "24 -1\n",
188 | "25 -1\n",
189 | "26 -1\n",
190 | "27 -1\n",
191 | "28 -1\n",
192 | "29 -1\n",
193 | " ..\n",
194 | "5443 -1\n",
195 | "5444 -1\n",
196 | "5445 -1\n",
197 | "5446 -1\n",
198 | "5447 -1\n",
199 | "5448 -1\n",
200 | "5449 -1\n",
201 | "5450 -1\n",
202 | "5451 -1\n",
203 | "5452 -1\n",
204 | "5453 -1\n",
205 | "5454 -1\n",
206 | "5455 -1\n",
207 | "5456 -1\n",
208 | "5457 -1\n",
209 | "5458 -1\n",
210 | "5459 -1\n",
211 | "5460 -1\n",
212 | "5461 -1\n",
213 | "5462 -1\n",
214 | "5463 -1\n",
215 | "5464 -1\n",
216 | "5465 -1\n",
217 | "5466 -1\n",
218 | "5467 -1\n",
219 | "5468 -1\n",
220 | "5469 -1\n",
221 | "5470 -1\n",
222 | "5471 -1\n",
223 | "5472 -1\n",
224 | "Name: 10, Length: 5473, dtype: int64\n"
225 | ]
226 | }
227 | ],
228 | "source": [
229 | "train[10] = train[10].map({5: 1, 1:-1,2:-1,3:-1,4:-1})\n",
230 | "print(train[10])"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 33,
236 | "metadata": {},
237 | "outputs": [],
238 | "source": [
239 | "train=np.asarray(train)"
240 | ]
241 | },
242 | {
243 | "cell_type": "code",
244 | "execution_count": 34,
245 | "metadata": {},
246 | "outputs": [],
247 | "source": [
248 | "min_train=np.zeros((115,11))\n",
249 | "max_train=np.zeros((5358,11))\n",
250 | "min_train=np.asarray(min_train)\n",
251 | "max_train=np.asarray(max_train)"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 62,
257 | "metadata": {},
258 | "outputs": [],
259 | "source": [
260 | "k=0\n",
261 | "l=0\n",
262 | "for i in range(0,5473):\n",
263 | " if(train[i][10]==1):\n",
264 | " for j in range(0,11):\n",
265 | " min_train[k][j]=train[i][j] \n",
266 | " k=k+1\n",
267 | " else :\n",
268 | " for j in range(0,11):\n",
269 | " max_train[l][j]=train[i][j]\n",
270 | " l=l+1"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 63,
276 | "metadata": {},
277 | "outputs": [
278 | {
279 | "name": "stdout",
280 | "output_type": "stream",
281 | "text": [
282 | "[[ 5. 7. 35. ... 23. 6. -1.]\n",
283 | " [ 6. 7. 42. ... 37. 5. -1.]\n",
284 | " [ 6. 18. 108. ... 80. 7. -1.]\n",
285 | " ...\n",
286 | " [ 6. 95. 570. ... 519. 104. -1.]\n",
287 | " [ 7. 41. 287. ... 230. 45. -1.]\n",
288 | " [ 8. 1. 8. ... 8. 1. -1.]]\n"
289 | ]
290 | }
291 | ],
292 | "source": [
293 | "print(max_train)"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 64,
299 | "metadata": {},
300 | "outputs": [],
301 | "source": [
302 | "data1=np.zeros((1094,11))\n",
303 | "data2=np.zeros((1094,11))\n",
304 | "data3=np.zeros((1095,11))\n",
305 | "data4=np.zeros((1095,11))\n",
306 | "data5=np.zeros((1095,11))"
307 | ]
308 | },
309 | {
310 | "cell_type": "code",
311 | "execution_count": null,
312 | "metadata": {},
313 | "outputs": [],
314 | "source": []
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 65,
319 | "metadata": {},
320 | "outputs": [
321 | {
322 | "name": "stdout",
323 | "output_type": "stream",
324 | "text": [
325 | "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n"
326 | ]
327 | }
328 | ],
329 | "source": [
330 | "for i in range(0,115):\n",
331 | " for j in range(0,11):\n",
332 | " if(i<23):\n",
333 | " data1[i][j]=min_train[i][j]\n",
334 | " elif(22\n",
30 | "\n",
43 | "\n",
44 | " \n",
45 | " \n",
46 | " | \n",
47 | " 0 | \n",
48 | " 1 | \n",
49 | " 2 | \n",
50 | " 3 | \n",
51 | "
\n",
52 | " \n",
53 | " \n",
54 | " \n",
55 | " | 0 | \n",
56 | " 30 | \n",
57 | " 64 | \n",
58 | " 1 | \n",
59 | " 1 | \n",
60 | "
\n",
61 | " \n",
62 | " | 1 | \n",
63 | " 30 | \n",
64 | " 62 | \n",
65 | " 3 | \n",
66 | " 1 | \n",
67 | "
\n",
68 | " \n",
69 | " | 2 | \n",
70 | " 30 | \n",
71 | " 65 | \n",
72 | " 0 | \n",
73 | " 1 | \n",
74 | "
\n",
75 | " \n",
76 | " | 3 | \n",
77 | " 31 | \n",
78 | " 59 | \n",
79 | " 2 | \n",
80 | " 1 | \n",
81 | "
\n",
82 | " \n",
83 | " | 4 | \n",
84 | " 31 | \n",
85 | " 65 | \n",
86 | " 4 | \n",
87 | " 1 | \n",
88 | "
\n",
89 | " \n",
90 | "
\n",
91 | ""
92 | ],
93 | "text/plain": [
94 | " 0 1 2 3\n",
95 | "0 30 64 1 1\n",
96 | "1 30 62 3 1\n",
97 | "2 30 65 0 1\n",
98 | "3 31 59 2 1\n",
99 | "4 31 65 4 1"
100 | ]
101 | },
102 | "execution_count": 4,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | }
106 | ],
107 | "source": [
108 | "train = pd.read_csv(\"haberman.csv\",header=None)\n",
109 | "train.head()"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 6,
115 | "metadata": {},
116 | "outputs": [
117 | {
118 | "name": "stdout",
119 | "output_type": "stream",
120 | "text": [
121 | "0 -1\n",
122 | "1 -1\n",
123 | "2 -1\n",
124 | "3 -1\n",
125 | "4 -1\n",
126 | "5 -1\n",
127 | "6 -1\n",
128 | "7 1\n",
129 | "8 1\n",
130 | "9 -1\n",
131 | "10 -1\n",
132 | "11 -1\n",
133 | "12 -1\n",
134 | "13 -1\n",
135 | "14 -1\n",
136 | "15 -1\n",
137 | "16 -1\n",
138 | "17 -1\n",
139 | "18 -1\n",
140 | "19 -1\n",
141 | "20 -1\n",
142 | "21 -1\n",
143 | "22 -1\n",
144 | "23 -1\n",
145 | "24 1\n",
146 | "25 -1\n",
147 | "26 -1\n",
148 | "27 -1\n",
149 | "28 -1\n",
150 | "29 -1\n",
151 | " ..\n",
152 | "276 -1\n",
153 | "277 -1\n",
154 | "278 -1\n",
155 | "279 -1\n",
156 | "280 -1\n",
157 | "281 1\n",
158 | "282 -1\n",
159 | "283 -1\n",
160 | "284 -1\n",
161 | "285 1\n",
162 | "286 1\n",
163 | "287 -1\n",
164 | "288 -1\n",
165 | "289 -1\n",
166 | "290 -1\n",
167 | "291 -1\n",
168 | "292 -1\n",
169 | "293 1\n",
170 | "294 -1\n",
171 | "295 -1\n",
172 | "296 -1\n",
173 | "297 -1\n",
174 | "298 -1\n",
175 | "299 1\n",
176 | "300 -1\n",
177 | "301 -1\n",
178 | "302 -1\n",
179 | "303 -1\n",
180 | "304 1\n",
181 | "305 1\n",
182 | "Name: 3, Length: 306, dtype: int64\n"
183 | ]
184 | }
185 | ],
186 | "source": [
187 | "train[3] = train[3].map({2:1,1:-1})\n",
188 | "print(train[3])"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 7,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "train=np.asarray(train)"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": 19,
203 | "metadata": {},
204 | "outputs": [],
205 | "source": [
206 | "min_train=np.zeros((81,4))\n",
207 | "max_train=np.zeros((225,4))\n",
208 | "min_train=np.asarray(min_train)\n",
209 | "max_train=np.asarray(max_train)"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 20,
215 | "metadata": {},
216 | "outputs": [],
217 | "source": [
218 | "k=0\n",
219 | "l=0\n",
220 | "for i in range(0,306):\n",
221 | " if(train[i][3]==1):\n",
222 | " for j in range(0,4):\n",
223 | " min_train[k][j]=train[i][j] \n",
224 | " k=k+1\n",
225 | " else :\n",
226 | " for j in range(0,4):\n",
227 | " max_train[l][j]=train[i][j]\n",
228 | " l=l+1"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 21,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "name": "stdout",
238 | "output_type": "stream",
239 | "text": [
240 | "[[34. 59. 0. 1.]\n",
241 | " [34. 66. 9. 1.]\n",
242 | " [38. 69. 21. 1.]\n",
243 | " [39. 66. 0. 1.]\n",
244 | " [41. 60. 23. 1.]\n",
245 | " [41. 64. 0. 1.]\n",
246 | " [41. 67. 0. 1.]\n",
247 | " [42. 69. 1. 1.]\n",
248 | " [42. 59. 0. 1.]\n",
249 | " [43. 58. 52. 1.]\n",
250 | " [43. 59. 2. 1.]\n",
251 | " [43. 64. 0. 1.]\n",
252 | " [43. 64. 0. 1.]\n",
253 | " [44. 64. 6. 1.]\n",
254 | " [44. 58. 9. 1.]\n",
255 | " [44. 63. 19. 1.]\n",
256 | " [45. 65. 6. 1.]\n",
257 | " [45. 66. 0. 1.]\n",
258 | " [45. 67. 1. 1.]\n",
259 | " [46. 58. 2. 1.]\n",
260 | " [46. 69. 3. 1.]\n",
261 | " [46. 62. 5. 1.]\n",
262 | " [46. 65. 20. 1.]\n",
263 | " [47. 63. 23. 1.]\n",
264 | " [47. 62. 0. 1.]\n",
265 | " [47. 65. 0. 1.]\n",
266 | " [48. 58. 11. 1.]\n",
267 | " [48. 58. 11. 1.]\n",
268 | " [48. 67. 7. 1.]\n",
269 | " [49. 63. 0. 1.]\n",
270 | " [49. 64. 10. 1.]\n",
271 | " [50. 63. 13. 1.]\n",
272 | " [50. 64. 0. 1.]\n",
273 | " [51. 59. 13. 1.]\n",
274 | " [51. 59. 3. 1.]\n",
275 | " [52. 69. 3. 1.]\n",
276 | " [52. 59. 2. 1.]\n",
277 | " [52. 62. 3. 1.]\n",
278 | " [52. 66. 4. 1.]\n",
279 | " [53. 58. 4. 1.]\n",
280 | " [53. 65. 1. 1.]\n",
281 | " [53. 59. 3. 1.]\n",
282 | " [53. 60. 9. 1.]\n",
283 | " [53. 63. 24. 1.]\n",
284 | " [53. 65. 12. 1.]\n",
285 | " [54. 60. 11. 1.]\n",
286 | " [54. 65. 23. 1.]\n",
287 | " [54. 65. 5. 1.]\n",
288 | " [54. 68. 7. 1.]\n",
289 | " [55. 63. 6. 1.]\n",
290 | " [55. 68. 15. 1.]\n",
291 | " [56. 65. 9. 1.]\n",
292 | " [56. 66. 3. 1.]\n",
293 | " [57. 61. 5. 1.]\n",
294 | " [57. 62. 14. 1.]\n",
295 | " [57. 64. 1. 1.]\n",
296 | " [59. 62. 35. 1.]\n",
297 | " [60. 59. 17. 1.]\n",
298 | " [60. 65. 0. 1.]\n",
299 | " [61. 62. 5. 1.]\n",
300 | " [61. 65. 0. 1.]\n",
301 | " [61. 68. 1. 1.]\n",
302 | " [62. 59. 13. 1.]\n",
303 | " [62. 58. 0. 1.]\n",
304 | " [62. 65. 19. 1.]\n",
305 | " [63. 60. 1. 1.]\n",
306 | " [65. 58. 0. 1.]\n",
307 | " [65. 61. 2. 1.]\n",
308 | " [65. 62. 22. 1.]\n",
309 | " [65. 66. 15. 1.]\n",
310 | " [66. 58. 0. 1.]\n",
311 | " [66. 61. 13. 1.]\n",
312 | " [67. 64. 8. 1.]\n",
313 | " [67. 63. 1. 1.]\n",
314 | " [69. 67. 8. 1.]\n",
315 | " [70. 58. 0. 1.]\n",
316 | " [70. 58. 4. 1.]\n",
317 | " [72. 63. 0. 1.]\n",
318 | " [74. 65. 3. 1.]\n",
319 | " [78. 65. 1. 1.]\n",
320 | " [83. 58. 2. 1.]]\n"
321 | ]
322 | }
323 | ],
324 | "source": [
325 | "print(min_train)"
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": 23,
331 | "metadata": {},
332 | "outputs": [],
333 | "source": [
334 | "data1=np.zeros((61,4))\n",
335 | "data2=np.zeros((61,4))\n",
336 | "data3=np.zeros((61,4))\n",
337 | "data4=np.zeros((61,4))\n",
338 | "data5=np.zeros((62,4))"
339 | ]
340 | },
341 | {
342 | "cell_type": "code",
343 | "execution_count": null,
344 | "metadata": {},
345 | "outputs": [],
346 | "source": []
347 | },
348 | {
349 | "cell_type": "code",
350 | "execution_count": 28,
351 | "metadata": {},
352 | "outputs": [
353 | {
354 | "name": "stdout",
355 | "output_type": "stream",
356 | "text": [
357 | "[0. 0. 0. 0.]\n"
358 | ]
359 | }
360 | ],
361 | "source": [
362 | "for i in range(0,81):\n",
363 | " for j in range(0,4):\n",
364 | " if(i<16):\n",
365 | " data1[i][j]=min_train[i][j]\n",
366 | " elif(15\n",
37 | "\n",
50 | "\n",
51 | " \n",
52 | " \n",
53 | " | \n",
54 | " sex | \n",
55 | " length | \n",
56 | " diameter | \n",
57 | " height | \n",
58 | " weight.w | \n",
59 | " weight.s | \n",
60 | " weight.v | \n",
61 | " weight.sh | \n",
62 | " rings | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " | 0 | \n",
68 | " M | \n",
69 | " 0.455 | \n",
70 | " 0.365 | \n",
71 | " 0.095 | \n",
72 | " 0.5140 | \n",
73 | " 0.2245 | \n",
74 | " 0.1010 | \n",
75 | " 0.150 | \n",
76 | " 15 | \n",
77 | "
\n",
78 | " \n",
79 | " | 1 | \n",
80 | " M | \n",
81 | " 0.350 | \n",
82 | " 0.265 | \n",
83 | " 0.090 | \n",
84 | " 0.2255 | \n",
85 | " 0.0995 | \n",
86 | " 0.0485 | \n",
87 | " 0.070 | \n",
88 | " 7 | \n",
89 | "
\n",
90 | " \n",
91 | " | 2 | \n",
92 | " F | \n",
93 | " 0.530 | \n",
94 | " 0.420 | \n",
95 | " 0.135 | \n",
96 | " 0.6770 | \n",
97 | " 0.2565 | \n",
98 | " 0.1415 | \n",
99 | " 0.210 | \n",
100 | " 9 | \n",
101 | "
\n",
102 | " \n",
103 | " | 3 | \n",
104 | " M | \n",
105 | " 0.440 | \n",
106 | " 0.365 | \n",
107 | " 0.125 | \n",
108 | " 0.5160 | \n",
109 | " 0.2155 | \n",
110 | " 0.1140 | \n",
111 | " 0.155 | \n",
112 | " 10 | \n",
113 | "
\n",
114 | " \n",
115 | " | 4 | \n",
116 | " I | \n",
117 | " 0.330 | \n",
118 | " 0.255 | \n",
119 | " 0.080 | \n",
120 | " 0.2050 | \n",
121 | " 0.0895 | \n",
122 | " 0.0395 | \n",
123 | " 0.055 | \n",
124 | " 7 | \n",
125 | "
\n",
126 | " \n",
127 | "
\n",
128 | ""
129 | ],
130 | "text/plain": [
131 | " sex length diameter height weight.w weight.s weight.v weight.sh \\\n",
132 | "0 M 0.455 0.365 0.095 0.5140 0.2245 0.1010 0.150 \n",
133 | "1 M 0.350 0.265 0.090 0.2255 0.0995 0.0485 0.070 \n",
134 | "2 F 0.530 0.420 0.135 0.6770 0.2565 0.1415 0.210 \n",
135 | "3 M 0.440 0.365 0.125 0.5160 0.2155 0.1140 0.155 \n",
136 | "4 I 0.330 0.255 0.080 0.2050 0.0895 0.0395 0.055 \n",
137 | "\n",
138 | " rings \n",
139 | "0 15 \n",
140 | "1 7 \n",
141 | "2 9 \n",
142 | "3 10 \n",
143 | "4 7 "
144 | ]
145 | },
146 | "execution_count": 2,
147 | "metadata": {},
148 | "output_type": "execute_result"
149 | }
150 | ],
151 | "source": [
152 | "train = pd.read_csv(\"Abalone.csv\")\n",
153 | "train.head()"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 3,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "train=train.replace(to_replace=['M', 'F', 'I'], value=[1, 2, 3])"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 4,
168 | "metadata": {},
169 | "outputs": [
170 | {
171 | "name": "stdout",
172 | "output_type": "stream",
173 | "text": [
174 | "0 1\n",
175 | "1 -1\n",
176 | "2 -1\n",
177 | "3 -1\n",
178 | "4 -1\n",
179 | "5 -1\n",
180 | "6 -1\n",
181 | "7 -1\n",
182 | "8 -1\n",
183 | "9 -1\n",
184 | "10 -1\n",
185 | "11 -1\n",
186 | "12 -1\n",
187 | "13 -1\n",
188 | "14 -1\n",
189 | "15 -1\n",
190 | "16 -1\n",
191 | "17 -1\n",
192 | "18 -1\n",
193 | "19 -1\n",
194 | "20 -1\n",
195 | "21 -1\n",
196 | "22 -1\n",
197 | "23 -1\n",
198 | "24 -1\n",
199 | "25 -1\n",
200 | "26 -1\n",
201 | "27 -1\n",
202 | "28 1\n",
203 | "29 -1\n",
204 | " ..\n",
205 | "4147 -1\n",
206 | "4148 -1\n",
207 | "4149 -1\n",
208 | "4150 -1\n",
209 | "4151 -1\n",
210 | "4152 -1\n",
211 | "4153 -1\n",
212 | "4154 -1\n",
213 | "4155 -1\n",
214 | "4156 -1\n",
215 | "4157 -1\n",
216 | "4158 -1\n",
217 | "4159 -1\n",
218 | "4160 -1\n",
219 | "4161 -1\n",
220 | "4162 -1\n",
221 | "4163 -1\n",
222 | "4164 -1\n",
223 | "4165 -1\n",
224 | "4166 -1\n",
225 | "4167 -1\n",
226 | "4168 -1\n",
227 | "4169 -1\n",
228 | "4170 -1\n",
229 | "4171 -1\n",
230 | "4172 -1\n",
231 | "4173 -1\n",
232 | "4174 -1\n",
233 | "4175 -1\n",
234 | "4176 -1\n",
235 | "Name: rings, Length: 4177, dtype: int64\n"
236 | ]
237 | }
238 | ],
239 | "source": [
240 | "train['rings'] = train['rings'].map({15: 1, 1:-1,2:-1,3:-1,4:-1,5:-1,6:-1,7:-1,8:-1,9:-1,10:-1,11:-1,12:-1,13:-1,14:-1,16:-1,17:-1,18:-1,19:-1,20:-1,21:-1,22:-1,23:-1,24:-1,25:-1,26:-1,27:-1,28:-1,29:-1})\n",
241 | "print(train['rings'])"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 5,
247 | "metadata": {},
248 | "outputs": [],
249 | "source": [
250 | "train=np.asarray(train)"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": 6,
256 | "metadata": {},
257 | "outputs": [],
258 | "source": [
259 | "min_train=np.zeros((103,9))\n",
260 | "max_train=np.zeros((4074,9))\n",
261 | "min_train=np.asarray(min_train)\n",
262 | "max_train=np.asarray(max_train)"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 7,
268 | "metadata": {},
269 | "outputs": [],
270 | "source": [
271 | "k=0\n",
272 | "l=0\n",
273 | "for i in range(0,4177):\n",
274 | " if(train[i][8]==1):\n",
275 | " for j in range(0,9):\n",
276 | " min_train[k][j]=train[i][j] \n",
277 | " k=k+1\n",
278 | " else :\n",
279 | " for j in range(0,9):\n",
280 | " max_train[l][j]=train[i][j]\n",
281 | " l=l+1"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": 8,
287 | "metadata": {},
288 | "outputs": [
289 | {
290 | "name": "stdout",
291 | "output_type": "stream",
292 | "text": [
293 | "[[1. 0.455 0.365 0.095 0.514 0.2245 0.101 0.15 1. ]\n",
294 | " [1. 0.605 0.475 0.18 0.9365 0.394 0.219 0.295 1. ]\n",
295 | " [2. 0.68 0.56 0.165 1.639 0.6055 0.2805 0.46 1. ]\n",
296 | " [2. 0.6 0.475 0.15 1.0075 0.4425 0.221 0.28 1. ]\n",
297 | " [1. 0.565 0.425 0.135 0.8115 0.341 0.1675 0.255 1. ]\n",
298 | " [1. 0.695 0.56 0.19 1.494 0.588 0.3425 0.485 1. ]\n",
299 | " [1. 0.55 0.435 0.145 0.843 0.328 0.1915 0.255 1. ]\n",
300 | " [1. 0.53 0.435 0.16 0.883 0.316 0.164 0.335 1. ]\n",
301 | " [1. 0.59 0.475 0.145 1.053 0.4415 0.262 0.325 1. ]\n",
302 | " [1. 0.56 0.45 0.16 0.922 0.432 0.178 0.26 1. ]\n",
303 | " [2. 0.53 0.415 0.16 0.783 0.2935 0.158 0.245 1. ]\n",
304 | " [2. 0.575 0.46 0.185 1.094 0.4485 0.217 0.345 1. ]\n",
305 | " [1. 0.6 0.495 0.165 1.2415 0.485 0.2775 0.34 1. ]\n",
306 | " [1. 0.56 0.45 0.175 1.011 0.3835 0.2065 0.37 1. ]\n",
307 | " [2. 0.635 0.505 0.17 1.415 0.605 0.297 0.365 1. ]\n",
308 | " [1. 0.63 0.505 0.225 1.525 0.56 0.3335 0.45 1. ]\n",
309 | " [2. 0.535 0.415 0.185 0.8415 0.314 0.1585 0.3 1. ]\n",
310 | " [1. 0.61 0.475 0.165 1.116 0.428 0.2205 0.315 1. ]\n",
311 | " [2. 0.565 0.45 0.195 1.0035 0.406 0.2505 0.285 1. ]\n",
312 | " [1. 0.565 0.465 0.175 0.995 0.3895 0.183 0.37 1. ]\n",
313 | " [1. 0.605 0.47 0.18 1.1405 0.3755 0.2805 0.385 1. ]\n",
314 | " [1. 0.59 0.5 0.165 1.1045 0.4565 0.2425 0.34 1. ]\n",
315 | " [2. 0.62 0.47 0.14 1.0325 0.3605 0.224 0.36 1. ]\n",
316 | " [2. 0.64 0.54 0.175 1.221 0.51 0.259 0.39 1. ]\n",
317 | " [1. 0.57 0.465 0.125 0.849 0.3785 0.1765 0.24 1. ]\n",
318 | " [2. 0.625 0.515 0.15 1.2415 0.5235 0.3065 0.36 1. ]\n",
319 | " [1. 0.655 0.53 0.175 1.2635 0.486 0.2635 0.415 1. ]\n",
320 | " [2. 0.625 0.5 0.15 0.953 0.3445 0.2235 0.305 1. ]\n",
321 | " [2. 0.62 0.47 0.225 1.115 0.378 0.2145 0.36 1. ]\n",
322 | " [1. 0.6 0.47 0.175 1.105 0.4865 0.247 0.315 1. ]\n",
323 | " [1. 0.585 0.455 0.225 1.055 0.3815 0.221 0.365 1. ]\n",
324 | " [2. 0.5 0.375 0.14 0.604 0.242 0.1415 0.179 1. ]\n",
325 | " [1. 0.42 0.325 0.115 0.2885 0.1 0.057 0.1135 1. ]\n",
326 | " [3. 0.45 0.35 0.145 0.525 0.2085 0.1 0.1655 1. ]\n",
327 | " [3. 0.465 0.36 0.105 0.498 0.214 0.116 0.14 1. ]\n",
328 | " [2. 0.485 0.38 0.15 0.605 0.2155 0.14 0.18 1. ]\n",
329 | " [1. 0.565 0.44 0.185 0.909 0.344 0.2325 0.255 1. ]\n",
330 | " [1. 0.555 0.44 0.15 1.092 0.416 0.212 0.4405 1. ]\n",
331 | " [1. 0.525 0.41 0.13 0.99 0.3865 0.243 0.295 1. ]\n",
332 | " [2. 0.52 0.4 0.12 0.6515 0.261 0.2015 0.165 1. ]\n",
333 | " [1. 0.52 0.4 0.12 0.823 0.298 0.1805 0.265 1. ]\n",
334 | " [1. 0.695 0.515 0.175 1.5165 0.578 0.4105 0.39 1. ]\n",
335 | " [2. 0.605 0.495 0.19 1.437 0.469 0.2655 0.41 1. ]\n",
336 | " [1. 0.57 0.43 0.12 1.0615 0.348 0.167 0.31 1. ]\n",
337 | " [1. 0.585 0.405 0.15 1.2565 0.435 0.202 0.325 1. ]\n",
338 | " [1. 0.505 0.385 0.145 0.6775 0.236 0.179 0.2 1. ]\n",
339 | " [1. 0.465 0.35 0.14 0.5755 0.2015 0.1505 0.19 1. ]\n",
340 | " [2. 0.47 0.36 0.145 0.537 0.1725 0.1375 0.195 1. ]\n",
341 | " [1. 0.55 0.415 0.175 1.042 0.3295 0.2325 0.2905 1. ]\n",
342 | " [1. 0.515 0.405 0.145 0.695 0.215 0.1635 0.234 1. ]\n",
343 | " [2. 0.48 0.4 0.125 0.759 0.2125 0.179 0.24 1. ]\n",
344 | " [1. 0.66 0.53 0.17 1.3905 0.5905 0.212 0.453 1. ]\n",
345 | " [1. 0.64 0.565 0.23 1.521 0.644 0.372 0.406 1. ]\n",
346 | " [2. 0.7 0.535 0.175 1.773 0.6805 0.48 0.512 1. ]\n",
347 | " [1. 0.62 0.495 0.195 1.5145 0.579 0.346 0.5195 1. ]\n",
348 | " [2. 0.675 0.55 0.18 1.6885 0.562 0.3705 0.6 1. ]\n",
349 | " [2. 0.595 0.48 0.2 0.975 0.358 0.2035 0.34 1. ]\n",
350 | " [1. 0.645 0.495 0.185 1.4935 0.5265 0.2785 0.455 1. ]\n",
351 | " [2. 0.56 0.435 0.185 1.106 0.422 0.2435 0.33 1. ]\n",
352 | " [2. 0.61 0.48 0.175 1.0675 0.391 0.216 0.42 1. ]\n",
353 | " [1. 0.635 0.51 0.21 1.598 0.6535 0.2835 0.58 1. ]\n",
354 | " [1. 0.695 0.57 0.2 2.033 0.751 0.4255 0.685 1. ]\n",
355 | " [2. 0.505 0.395 0.145 0.6515 0.2695 0.153 0.205 1. ]\n",
356 | " [2. 0.525 0.425 0.145 0.7995 0.3345 0.209 0.24 1. ]\n",
357 | " [3. 0.48 0.39 0.145 0.5825 0.2315 0.121 0.255 1. ]\n",
358 | " [1. 0.59 0.46 0.155 0.906 0.327 0.1485 0.335 1. ]\n",
359 | " [2. 0.6 0.47 0.2 1.031 0.392 0.2035 0.29 1. ]\n",
360 | " [1. 0.65 0.545 0.16 1.2425 0.487 0.296 0.48 1. ]\n",
361 | " [3. 0.555 0.455 0.17 0.8435 0.309 0.1905 0.3 1. ]\n",
362 | " [3. 0.655 0.515 0.145 1.25 0.5265 0.283 0.315 1. ]\n",
363 | " [3. 0.62 0.485 0.17 1.208 0.4805 0.3045 0.33 1. ]\n",
364 | " [3. 0.52 0.415 0.16 0.595 0.2105 0.142 0.26 1. ]\n",
365 | " [1. 0.49 0.39 0.135 0.592 0.242 0.096 0.1835 1. ]\n",
366 | " [2. 0.52 0.4 0.13 0.6245 0.215 0.2065 0.17 1. ]\n",
367 | " [1. 0.495 0.4 0.14 0.7775 0.2015 0.18 0.25 1. ]\n",
368 | " [1. 0.66 0.535 0.2 1.791 0.733 0.318 0.54 1. ]\n",
369 | " [1. 0.65 0.52 0.195 1.676 0.693 0.44 0.47 1. ]\n",
370 | " [1. 0.64 0.49 0.14 1.194 0.4445 0.238 0.375 1. ]\n",
371 | " [1. 0.605 0.49 0.155 1.153 0.503 0.2505 0.295 1. ]\n",
372 | " [1. 0.605 0.47 0.115 1.114 0.3925 0.291 0.31 1. ]\n",
373 | " [2. 0.505 0.41 0.135 0.657 0.291 0.133 0.195 1. ]\n",
374 | " [2. 0.665 0.53 0.185 1.3955 0.456 0.3205 0.49 1. ]\n",
375 | " [3. 0.48 0.38 0.125 0.523 0.2105 0.1045 0.175 1. ]\n",
376 | " [2. 0.69 0.54 0.185 1.5715 0.6935 0.318 0.47 1. ]\n",
377 | " [1. 0.555 0.435 0.135 0.858 0.377 0.1585 0.29 1. ]\n",
378 | " [1. 0.635 0.48 0.19 1.467 0.5825 0.303 0.42 1. ]\n",
379 | " [2. 0.61 0.495 0.19 1.213 0.464 0.306 0.365 1. ]\n",
380 | " [2. 0.465 0.39 0.14 0.5555 0.213 0.1075 0.215 1. ]\n",
381 | " [2. 0.605 0.475 0.145 1.0185 0.4695 0.225 0.27 1. ]\n",
382 | " [1. 0.535 0.42 0.16 0.72 0.275 0.164 0.225 1. ]\n",
383 | " [2. 0.71 0.575 0.175 1.555 0.6465 0.3705 0.52 1. ]\n",
384 | " [2. 0.48 0.37 0.13 0.5885 0.2475 0.1505 0.1595 1. ]\n",
385 | " [3. 0.66 0.525 0.18 1.6935 0.6025 0.4005 0.42 1. ]\n",
386 | " [2. 0.52 0.405 0.145 0.829 0.3535 0.1685 0.205 1. ]\n",
387 | " [1. 0.495 0.4 0.12 0.6605 0.2605 0.161 0.19 1. ]\n",
388 | " [2. 0.5 0.39 0.13 0.6355 0.2505 0.1635 0.195 1. ]\n",
389 | " [1. 0.545 0.44 0.165 0.744 0.2875 0.204 0.25 1. ]\n",
390 | " [2. 0.645 0.5 0.225 1.626 0.587 0.4055 0.41 1. ]\n",
391 | " [2. 0.61 0.49 0.17 1.1775 0.5655 0.2385 0.295 1. ]\n",
392 | " [2. 0.67 0.545 0.16 1.5415 0.5985 0.2565 0.495 1. ]\n",
393 | " [1. 0.445 0.345 0.14 0.476 0.2055 0.1015 0.1085 1. ]\n",
394 | " [3. 0.52 0.405 0.14 0.6765 0.2865 0.146 0.205 1. ]\n",
395 | " [2. 0.54 0.44 0.16 1.0905 0.391 0.2295 0.355 1. ]]\n"
396 | ]
397 | }
398 | ],
399 | "source": [
400 | "print(min_train)"
401 | ]
402 | },
403 | {
404 | "cell_type": "code",
405 | "execution_count": 14,
406 | "metadata": {},
407 | "outputs": [],
408 | "source": [
409 | "data1=np.zeros((834,9))\n",
410 | "data2=np.zeros((835,9))\n",
411 | "data3=np.zeros((836,9))\n",
412 | "data4=np.zeros((836,9))\n",
413 | "data5=np.zeros((836,9))"
414 | ]
415 | },
416 | {
417 | "cell_type": "code",
418 | "execution_count": null,
419 | "metadata": {},
420 | "outputs": [],
421 | "source": []
422 | },
423 | {
424 | "cell_type": "code",
425 | "execution_count": 16,
426 | "metadata": {},
427 | "outputs": [
428 | {
429 | "name": "stdout",
430 | "output_type": "stream",
431 | "text": [
432 | "[2. 0.54 0.44 0.16 1.0905 0.391 0.2295 0.355 1. ]\n"
433 | ]
434 | }
435 | ],
436 | "source": [
437 | "for i in range(0,103):\n",
438 | " for j in range(0,9):\n",
439 | " if(i<20):\n",
440 | " data1[i][j]=min_train[i][j]\n",
441 | " elif(19\n",
41 | "\n",
54 | "\n",
55 | " \n",
56 | " \n",
57 | " | \n",
58 | " 0 | \n",
59 | " 1 | \n",
60 | " 2 | \n",
61 | " 3 | \n",
62 | "
\n",
63 | " \n",
64 | " \n",
65 | " \n",
66 | " | 0 | \n",
67 | " 34.0 | \n",
68 | " 59.0 | \n",
69 | " 0.0 | \n",
70 | " 1.0 | \n",
71 | "
\n",
72 | " \n",
73 | " | 1 | \n",
74 | " 34.0 | \n",
75 | " 66.0 | \n",
76 | " 9.0 | \n",
77 | " 1.0 | \n",
78 | "
\n",
79 | " \n",
80 | " | 2 | \n",
81 | " 38.0 | \n",
82 | " 69.0 | \n",
83 | " 21.0 | \n",
84 | " 1.0 | \n",
85 | "
\n",
86 | " \n",
87 | " | 3 | \n",
88 | " 39.0 | \n",
89 | " 66.0 | \n",
90 | " 0.0 | \n",
91 | " 1.0 | \n",
92 | "
\n",
93 | " \n",
94 | " | 4 | \n",
95 | " 41.0 | \n",
96 | " 60.0 | \n",
97 | " 23.0 | \n",
98 | " 1.0 | \n",
99 | "
\n",
100 | " \n",
101 | "
\n",
102 | ""
103 | ],
104 | "text/plain": [
105 | " 0 1 2 3\n",
106 | "0 34.0 59.0 0.0 1.0\n",
107 | "1 34.0 66.0 9.0 1.0\n",
108 | "2 38.0 69.0 21.0 1.0\n",
109 | "3 39.0 66.0 0.0 1.0\n",
110 | "4 41.0 60.0 23.0 1.0"
111 | ]
112 | },
113 | "execution_count": 2,
114 | "metadata": {},
115 | "output_type": "execute_result"
116 | }
117 | ],
118 | "source": [
119 | "train = pd.read_csv(\"modifiedhaberman.csv\", header=None)\n",
120 | "train.head()"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 3,
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "data": {
130 | "text/html": [
131 | "\n",
132 | "\n",
145 | "
\n",
146 | " \n",
147 | " \n",
148 | " | \n",
149 | " 0 | \n",
150 | " 1 | \n",
151 | " 2 | \n",
152 | "
\n",
153 | " \n",
154 | " \n",
155 | " \n",
156 | " | 0 | \n",
157 | " 34.0 | \n",
158 | " 59.0 | \n",
159 | " 0.0 | \n",
160 | "
\n",
161 | " \n",
162 | " | 1 | \n",
163 | " 34.0 | \n",
164 | " 66.0 | \n",
165 | " 9.0 | \n",
166 | "
\n",
167 | " \n",
168 | " | 2 | \n",
169 | " 38.0 | \n",
170 | " 69.0 | \n",
171 | " 21.0 | \n",
172 | "
\n",
173 | " \n",
174 | " | 3 | \n",
175 | " 39.0 | \n",
176 | " 66.0 | \n",
177 | " 0.0 | \n",
178 | "
\n",
179 | " \n",
180 | " | 4 | \n",
181 | " 41.0 | \n",
182 | " 60.0 | \n",
183 | " 23.0 | \n",
184 | "
\n",
185 | " \n",
186 | "
\n",
187 | "
"
188 | ],
189 | "text/plain": [
190 | " 0 1 2\n",
191 | "0 34.0 59.0 0.0\n",
192 | "1 34.0 66.0 9.0\n",
193 | "2 38.0 69.0 21.0\n",
194 | "3 39.0 66.0 0.0\n",
195 | "4 41.0 60.0 23.0"
196 | ]
197 | },
198 | "execution_count": 3,
199 | "metadata": {},
200 | "output_type": "execute_result"
201 | }
202 | ],
203 | "source": [
204 | "features = train.columns[0:3]\n",
205 | "X = train[features]\n",
206 | "y = train[3]\n",
207 | "X.head()"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": 4,
213 | "metadata": {},
214 | "outputs": [],
215 | "source": [
216 | "X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=0)\n"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 5,
222 | "metadata": {},
223 | "outputs": [
224 | {
225 | "name": "stdout",
226 | "output_type": "stream",
227 | "text": [
228 | "(244, 3) (62, 3)\n"
229 | ]
230 | }
231 | ],
232 | "source": [
233 | "print(X_train.shape,X_test.shape)"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 6,
239 | "metadata": {},
240 | "outputs": [],
241 | "source": [
242 | "X_train=np.asarray(X_train)\n",
243 | "y_train=np.asarray(y_train)"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 7,
249 | "metadata": {},
250 | "outputs": [],
251 | "source": [
252 | "def linear_kernel(x1, x2):\n",
253 | " return np.dot(x1, x2)\n",
254 | "\n",
255 | "def polynomial_kernel(x, y, p=3):\n",
256 | " return (1 + np.dot(x, y)) ** p\n",
257 | "\n",
258 | "def gaussian_kernel(x, y, sigma=100.0):\n",
259 | " # print(-linalg.norm(x-y)**2)\n",
260 | " x=np.asarray(x)\n",
261 | " y=np.asarray(y)\n",
262 | " return np.exp((-linalg.norm(x-y)**2) / (2 * (sigma ** 2)))\n",
263 | "\n",
264 | "def gm(y_predict,y_test):\n",
265 | " test_min=0\n",
266 | " test_max=0\n",
267 | " pred_min=0\n",
268 | " pred_max=0\n",
269 | " y_test=np.asarray(y_test)\n",
270 | " for i in range(0,62):\n",
271 | " if(y_test[i]==1):\n",
272 | " test_min=test_min+1\n",
273 | " else:\n",
274 | " test_max=test_max+1\n",
275 | " print(\"y_test min\",test_min) \n",
276 | " print(\"y_test max\",test_max)\n",
277 | " for i in range(0,62):\n",
278 | " if(y_predict[i]==1 and y_predict[i]==y_test[i]):\n",
279 | " pred_min=pred_min+1\n",
280 | " elif(y_predict[i]==-1 and y_predict[i]==y_test[i]):\n",
281 | " pred_max=pred_max+1\n",
282 | " print(\"y_pred min\",pred_min) \n",
283 | " print(\"y_pred max\",pred_max)\n",
284 | " se=pred_min/test_min\n",
285 | " sp=pred_max/test_max\n",
286 | " print(se,sp)\n",
287 | " gm=math.sqrt(se*sp)\n",
288 | " print(\"GM\",gm)"
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "metadata": {},
294 | "source": [
295 | "# FSVM using Hyperplane"
296 | ]
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": 14,
301 | "metadata": {},
302 | "outputs": [],
303 | "source": [
304 | "from cvxopt import matrix\n",
305 | "class HYP_SVM(object):\n",
306 | "\n",
307 | " def __init__(self, kernel=gaussian_kernel, C=None):\n",
308 | " self.kernel = kernel\n",
309 | " self.C = C\n",
310 | " if self.C is not None: self.C = float(self.C)\n",
311 | " def m_func(self, X_train,X_test, y):\n",
312 | " n_samples, n_features = X_train.shape \n",
313 | " nt_samples, nt_features= X_test.shape\n",
314 | " self.K = np.zeros((n_samples, n_samples))\n",
315 | " for i in range(n_samples):\n",
316 | " for j in range(n_samples):\n",
317 | " self.K[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
318 | " # print(K[i,j])\n",
319 | " X_train=np.asarray(X_train)\n",
320 | " X_test=np.asarray(X_test)\n",
321 | " K1 = np.zeros((n_samples, n_samples))\n",
322 | " for i in range(n_samples):\n",
323 | " for j in range(n_samples):\n",
324 | " K1[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
325 | " # print(K[i,j])\n",
326 | " print(K1.shape)\n",
327 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
328 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
329 | " A = cvxopt.matrix(y, (1,n_samples))\n",
330 | " A = matrix(A, (1,n_samples), 'd') #changes done\n",
331 | " b = cvxopt.matrix(0.0)\n",
332 | " #print(P,q,A,b)\n",
333 | " if self.C is None:\n",
334 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
335 | " h = cvxopt.matrix(np.zeros(n_samples))\n",
336 | " \n",
337 | " else:\n",
338 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n",
339 | " tmp2 = np.identity(n_samples)\n",
340 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
341 | " tmp1 = np.zeros(n_samples)\n",
342 | " tmp2 = np.ones(n_samples) * self.C\n",
343 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
344 | " # solve QP problem\n",
345 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
346 | " print(solution['status'])\n",
347 | " # Lagrange multipliers\n",
348 | " a = np.ravel(solution['x'])\n",
349 | " a_org = np.ravel(solution['x'])\n",
350 | " # Support vectors have non zero lagrange multipliers\n",
351 | " sv = a > 1e-5\n",
352 | " #print(sv.shape)\n",
353 | " ind = np.arange(len(a))[sv]\n",
354 | " self.a_org=a\n",
355 | " self.a = a[sv]\n",
356 | " self.sv = X_train[sv]\n",
357 | " self.sv_y = y[sv]\n",
358 | " self.sv_yorg=y\n",
359 | " self.kernel = gaussian_kernel\n",
360 | " X_train=np.asarray(X_train)\n",
361 | " b = 0\n",
362 | " for n in range(len(self.a)):\n",
363 | " b += self.sv_y[n]\n",
364 | " b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
365 | " b /= len(self.a)\n",
366 | " # print(self.a_org[1])\n",
367 | " #print(self.a_org.shape,self.sv_yorg.shape,K.shape)\n",
368 | " w_phi=0\n",
369 | " total=0\n",
370 | " for n in range(len(self.a_org)):\n",
371 | " w_phi = self.a_org[n] * self.sv_yorg[n] * K1[n] \n",
372 | " self.d_hyp=np.zeros(n_samples)\n",
373 | " for n in range(len(self.a_org)):\n",
374 | " self.d_hyp += self.sv_yorg[n]*(w_phi+b)\n",
375 | " func=np.zeros((n_samples))\n",
376 | " func=np.asarray(func)\n",
377 | " typ=2\n",
378 | " if(typ==1):\n",
379 | " for i in range(n_samples):\n",
380 | " func[i]=1-(self.d_hyp[i]/(np.amax(self.d_hyp[i])+0.000001))\n",
381 | " beta=0.8\n",
382 | " if(typ==2):\n",
383 | " for i in range(n_samples):\n",
384 | " func[i]=2/(1+beta*self.d_hyp[i])\n",
385 | " r_max=26/74\n",
386 | " r_min=1\n",
387 | " self.m=func[0:81]*r_min\n",
388 | " print(self.m.shape)\n",
389 | " self.m=np.append(self.m,func[81:306]*r_max)\n",
390 | " print(self.m.shape)\n",
391 | " \n",
392 | " ##############################################################################\n",
393 | "\n",
394 | "\n",
395 | " def fit(self, X_train,X_test, y):\n",
396 | " self.kernel = gaussian_kernel\n",
397 | " n_samples, n_features = X_train.shape \n",
398 | " nt_samples, nt_features = X_test.shape\n",
399 | " # Gram matrix\n",
400 | "\n",
401 | " print(self.K.shape)\n",
402 | "\n",
403 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
404 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
405 | " A = cvxopt.matrix(y, (1,n_samples))\n",
406 | " A = matrix(A, (1,n_samples), 'd') #changes done\n",
407 | " b = cvxopt.matrix(0.0)\n",
408 | " #print(P,q,A,b)\n",
409 | " if self.C is None:\n",
410 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
411 | " h = cvxopt.matrix(np.zeros(n_samples))\n",
412 | " \n",
413 | " else:\n",
414 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n",
415 | " tmp2 = np.identity(n_samples)\n",
416 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
417 | " tmp1 = np.zeros(n_samples)\n",
418 | " tmp2 = np.ones(n_samples) * self.C\n",
419 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
420 | " # solve QP problem\n",
421 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
422 | " print(solution['status'])\n",
423 | " # Lagrange multipliers\n",
424 | " a = np.ravel(solution['x'])\n",
425 | " a_org = np.ravel(solution['x'])\n",
426 | " # Support vectors have non zero lagrange multipliers\n",
427 | " for i in range(n_samples):\n",
428 | " sv=np.logical_or(self.a_org 1e-5)\n",
429 | " #print(sv.shape)\n",
430 | " ind = np.arange(len(a))[sv]\n",
431 | " self.a = a[sv]\n",
432 | " self.sv = X_train[sv]\n",
433 | " self.sv_y = y[sv]\n",
434 | " #print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
435 | "\n",
436 | " # Intercept\n",
437 | " self.b = 0\n",
438 | " for n in range(len(self.a)):\n",
439 | " self.b += self.sv_y[n]\n",
440 | " self.b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
441 | " self.b /= len(self.a)\n",
442 | " print(self.b)\n",
443 | "\n",
444 | " # Weight vector\n",
445 | " if self.kernel == gaussian_kernel:\n",
446 | " self.w = np.zeros(n_features)\n",
447 | " for n in range(len(self.a)):\n",
448 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
449 | " else :\n",
450 | " self.w = None \n",
451 | " \n",
452 | " def project(self, X):\n",
453 | " if self.w is None:\n",
454 | " return np.dot(X, self.w) + self.b\n",
455 | " else:\n",
456 | " y_predict = np.zeros(len(X))\n",
457 | " X=np.asarray(X)\n",
458 | " for i in range(len(X)):\n",
459 | " s = 0\n",
460 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
461 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n",
462 | " y_predict[i] = s\n",
463 | " # print(y_predict[i])\n",
464 | " return y_predict + self.b\n",
465 | "\n",
466 | " def predict(self, X):\n",
467 | " return np.sign(self.project(X))"
468 | ]
469 | },
470 | {
471 | "cell_type": "code",
472 | "execution_count": 15,
473 | "metadata": {},
474 | "outputs": [
475 | {
476 | "name": "stdout",
477 | "output_type": "stream",
478 | "text": [
479 | "(244, 244)\n",
480 | " pcost dcost gap pres dres\n",
481 | " 0: 8.9609e+03 -4.9961e+05 5e+05 1e-13 2e-13\n",
482 | " 1: -2.9301e+03 -8.8506e+04 9e+04 3e-13 1e-13\n",
483 | " 2: -7.7538e+03 -2.2899e+04 2e+04 6e-14 1e-13\n",
484 | " 3: -9.3958e+03 -1.5271e+04 6e+03 1e-13 2e-13\n",
485 | " 4: -9.9982e+03 -1.3503e+04 4e+03 3e-13 2e-13\n",
486 | " 5: -1.0390e+04 -1.2500e+04 2e+03 1e-13 2e-13\n",
487 | " 6: -1.0647e+04 -1.1812e+04 1e+03 8e-14 2e-13\n",
488 | " 7: -1.0823e+04 -1.1430e+04 6e+02 3e-13 2e-13\n",
489 | " 8: -1.0890e+04 -1.1286e+04 4e+02 2e-13 2e-13\n",
490 | " 9: -1.0957e+04 -1.1157e+04 2e+02 3e-13 2e-13\n",
491 | "10: -1.0999e+04 -1.1094e+04 1e+02 3e-13 2e-13\n",
492 | "11: -1.1013e+04 -1.1065e+04 5e+01 3e-13 2e-13\n",
493 | "12: -1.1026e+04 -1.1045e+04 2e+01 2e-13 3e-13\n",
494 | "13: -1.1029e+04 -1.1040e+04 1e+01 3e-13 2e-13\n",
495 | "14: -1.1030e+04 -1.1039e+04 1e+01 1e-13 2e-13\n",
496 | "15: -1.1033e+04 -1.1036e+04 3e+00 3e-13 3e-13\n",
497 | "16: -1.1034e+04 -1.1035e+04 1e+00 9e-14 3e-13\n",
498 | "17: -1.1034e+04 -1.1034e+04 4e-01 2e-13 2e-13\n",
499 | "18: -1.1034e+04 -1.1034e+04 6e-02 2e-13 3e-13\n",
500 | "19: -1.1034e+04 -1.1034e+04 9e-04 1e-13 3e-13\n",
501 | "Optimal solution found.\n",
502 | "optimal\n",
503 | "(81,)\n",
504 | "(244,)\n",
505 | "(244, 244)\n",
506 | " pcost dcost gap pres dres\n",
507 | " 0: 8.9609e+03 -4.9961e+05 5e+05 1e-13 2e-13\n",
508 | " 1: -2.9301e+03 -8.8506e+04 9e+04 3e-13 1e-13\n",
509 | " 2: -7.7538e+03 -2.2899e+04 2e+04 6e-14 1e-13\n",
510 | " 3: -9.3958e+03 -1.5271e+04 6e+03 1e-13 2e-13\n",
511 | " 4: -9.9982e+03 -1.3503e+04 4e+03 3e-13 2e-13\n",
512 | " 5: -1.0390e+04 -1.2500e+04 2e+03 1e-13 2e-13\n",
513 | " 6: -1.0647e+04 -1.1812e+04 1e+03 8e-14 2e-13\n",
514 | " 7: -1.0823e+04 -1.1430e+04 6e+02 3e-13 2e-13\n",
515 | " 8: -1.0890e+04 -1.1286e+04 4e+02 2e-13 2e-13\n",
516 | " 9: -1.0957e+04 -1.1157e+04 2e+02 3e-13 2e-13\n",
517 | "10: -1.0999e+04 -1.1094e+04 1e+02 3e-13 2e-13\n",
518 | "11: -1.1013e+04 -1.1065e+04 5e+01 3e-13 2e-13\n",
519 | "12: -1.1026e+04 -1.1045e+04 2e+01 2e-13 3e-13\n",
520 | "13: -1.1029e+04 -1.1040e+04 1e+01 3e-13 2e-13\n",
521 | "14: -1.1030e+04 -1.1039e+04 1e+01 1e-13 2e-13\n",
522 | "15: -1.1033e+04 -1.1036e+04 3e+00 3e-13 3e-13\n",
523 | "16: -1.1034e+04 -1.1035e+04 1e+00 9e-14 3e-13\n",
524 | "17: -1.1034e+04 -1.1034e+04 4e-01 2e-13 2e-13\n",
525 | "18: -1.1034e+04 -1.1034e+04 6e-02 2e-13 3e-13\n",
526 | "19: -1.1034e+04 -1.1034e+04 9e-04 1e-13 3e-13\n",
527 | "Optimal solution found.\n",
528 | "optimal\n",
529 | "-0.7094793824855187\n",
530 | "y_test min 21\n",
531 | "y_test max 41\n",
532 | "y_pred min 5\n",
533 | "y_pred max 35\n",
534 | "0.23809523809523808 0.8536585365853658\n",
535 | "GM 0.45083481733371616\n",
536 | "40 out of 62 predictions correct\n",
537 | "Accuracy 0.6451612903225806\n"
538 | ]
539 | }
540 | ],
541 | "source": [
542 | "\n",
543 | "if __name__ == \"__main__\":\n",
544 | " import pylab as pl \n",
545 | " def hyp_svm():\n",
546 | " \n",
547 | " clf = HYP_SVM(C=100.0)\n",
548 | " clf.m_func(X_train,X_test,y_train)\n",
549 | " clf.fit(X_train,X_test, y_train)\n",
550 | " y_predict = clf.predict(X_test)\n",
551 | " gm(y_predict,y_test)\n",
552 | " correct = np.sum(y_predict == y_test)\n",
553 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
554 | " print(\"Accuracy\",correct/len(y_predict))\n",
555 | "\n",
556 | " hyp_svm() "
557 | ]
558 | },
559 | {
560 | "cell_type": "code",
561 | "execution_count": null,
562 | "metadata": {},
563 | "outputs": [],
564 | "source": []
565 | },
566 | {
567 | "cell_type": "code",
568 | "execution_count": null,
569 | "metadata": {},
570 | "outputs": [],
571 | "source": []
572 | },
573 | {
574 | "cell_type": "code",
575 | "execution_count": 13,
576 | "metadata": {},
577 | "outputs": [
578 | {
579 | "name": "stdout",
580 | "output_type": "stream",
581 | "text": [
582 | "Overall RBF KERNEL SVM accuracy: 0.6290322580645161\n"
583 | ]
584 | }
585 | ],
586 | "source": [
587 | "clf_svm = svm.SVC(kernel='rbf', gamma=0.0001, C=100)\n",
588 | "clf_svm.fit(X_train, y_train)\n",
589 | "y_pred_svm = clf_svm.predict(X_test) \n",
590 | "acc_svm = accuracy_score(y_test, y_pred_svm)\n",
591 | "print (\"Overall RBF KERNEL SVM accuracy: \",acc_svm)"
592 | ]
593 | },
594 | {
595 | "cell_type": "code",
596 | "execution_count": null,
597 | "metadata": {},
598 | "outputs": [],
599 | "source": []
600 | },
601 | {
602 | "cell_type": "code",
603 | "execution_count": null,
604 | "metadata": {},
605 | "outputs": [],
606 | "source": []
607 | },
608 | {
609 | "cell_type": "code",
610 | "execution_count": null,
611 | "metadata": {},
612 | "outputs": [],
613 | "source": []
614 | },
615 | {
616 | "cell_type": "code",
617 | "execution_count": null,
618 | "metadata": {},
619 | "outputs": [],
620 | "source": []
621 | },
622 | {
623 | "cell_type": "code",
624 | "execution_count": null,
625 | "metadata": {},
626 | "outputs": [],
627 | "source": []
628 | },
629 | {
630 | "cell_type": "code",
631 | "execution_count": null,
632 | "metadata": {},
633 | "outputs": [],
634 | "source": []
635 | },
636 | {
637 | "cell_type": "code",
638 | "execution_count": null,
639 | "metadata": {},
640 | "outputs": [],
641 | "source": []
642 | },
643 | {
644 | "cell_type": "code",
645 | "execution_count": null,
646 | "metadata": {},
647 | "outputs": [],
648 | "source": []
649 | },
650 | {
651 | "cell_type": "code",
652 | "execution_count": null,
653 | "metadata": {},
654 | "outputs": [],
655 | "source": []
656 | },
657 | {
658 | "cell_type": "code",
659 | "execution_count": null,
660 | "metadata": {},
661 | "outputs": [],
662 | "source": []
663 | },
664 | {
665 | "cell_type": "code",
666 | "execution_count": null,
667 | "metadata": {},
668 | "outputs": [],
669 | "source": []
670 | },
671 | {
672 | "cell_type": "code",
673 | "execution_count": null,
674 | "metadata": {},
675 | "outputs": [],
676 | "source": []
677 | },
678 | {
679 | "cell_type": "markdown",
680 | "metadata": {},
681 | "source": [
682 | "# Normal SVM using CVXOPT"
683 | ]
684 | },
685 | {
686 | "cell_type": "code",
687 | "execution_count": 8,
688 | "metadata": {},
689 | "outputs": [],
690 | "source": [
691 | "from cvxopt import matrix\n",
692 | "class SVM(object):\n",
693 | "\n",
694 | " def __init__(self, kernel=gaussian_kernel, C=None):\n",
695 | " self.kernel = kernel\n",
696 | " self.C = C\n",
697 | " if self.C is not None: self.C = float(self.C)\n",
698 | " def fit(self, X, y):\n",
699 | " self.kernel = gaussian_kernel\n",
700 | " n_samples, n_features = X.shape\n",
701 | " # Gram matrix\n",
702 | " K = np.zeros((n_samples, n_samples))\n",
703 | " for i in range(n_samples):\n",
704 | " for j in range(n_samples):\n",
705 | " K[i,j] = gaussian_kernel(X[i], X[j])\n",
706 | " # print(K[i,j])\n",
707 | " print(K.shape)\n",
708 | "\n",
709 | " P = cvxopt.matrix(np.outer(y,y) * K)\n",
710 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
711 | " A = cvxopt.matrix(y, (1,n_samples))\n",
712 | " A = matrix(A, (1,n_samples), 'd') #changes done\n",
713 | " b = cvxopt.matrix(0.0)\n",
714 | " #print(P,q,A,b)\n",
715 | " if self.C is None:\n",
716 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
717 | " h = cvxopt.matrix(np.zeros(n_samples))\n",
718 | " \n",
719 | " else:\n",
720 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n",
721 | " tmp2 = np.identity(n_samples)\n",
722 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
723 | " tmp1 = np.zeros(n_samples)\n",
724 | " tmp2 = np.ones(n_samples) * self.C\n",
725 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
726 | " # solve QP problem\n",
727 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
728 | " print(solution['status'])\n",
729 | " # Lagrange multipliers\n",
730 | " a = np.ravel(solution['x'])\n",
731 | " # print(a)\n",
732 | " # Support vectors have non zero lagrange multipliers\n",
733 | " sv = a > 1e-5\n",
734 | " print(sv.shape)\n",
735 | " ind = np.arange(len(a))[sv]\n",
736 | " self.a = a[sv]\n",
737 | " self.sv = X[sv]\n",
738 | " self.sv_y = y[sv]\n",
739 | " print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
740 | "\n",
741 | " # Intercept\n",
742 | " self.b = 0\n",
743 | " for n in range(len(self.a)):\n",
744 | " self.b += self.sv_y[n]\n",
745 | " self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])\n",
746 | " self.b /= len(self.a)\n",
747 | "\n",
748 | " # Weight vector\n",
749 | " if self.kernel == gaussian_kernel:\n",
750 | " self.w = np.zeros(n_features)\n",
751 | " for n in range(len(self.a)):\n",
752 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
753 | " #print(self.w)\n",
754 | " else:\n",
755 | " self.w = None\n",
756 | "\n",
757 | " def project(self, X):\n",
758 | " if self.w is None:\n",
759 | " return np.dot(X, self.w) + self.b\n",
760 | " else:\n",
761 | " y_predict = np.zeros(len(X))\n",
762 | " X=np.asarray(X)\n",
763 | " for i in range(len(X)):\n",
764 | " s = 0\n",
765 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
766 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n",
767 | " y_predict[i] = s\n",
768 | " # print(y_predict[i])\n",
769 | " return y_predict + self.b\n",
770 | "\n",
771 | " def predict(self, X):\n",
772 | " return np.sign(self.project(X))"
773 | ]
774 | },
775 | {
776 | "cell_type": "code",
777 | "execution_count": 9,
778 | "metadata": {},
779 | "outputs": [
780 | {
781 | "name": "stdout",
782 | "output_type": "stream",
783 | "text": [
784 | "(244, 244)\n",
785 | " pcost dcost gap pres dres\n",
786 | " 0: 8.9609e+03 -4.9961e+05 5e+05 1e-13 2e-13\n",
787 | " 1: -2.9301e+03 -8.8506e+04 9e+04 3e-13 1e-13\n",
788 | " 2: -7.7538e+03 -2.2899e+04 2e+04 6e-14 1e-13\n",
789 | " 3: -9.3958e+03 -1.5271e+04 6e+03 1e-13 2e-13\n",
790 | " 4: -9.9982e+03 -1.3503e+04 4e+03 3e-13 2e-13\n",
791 | " 5: -1.0390e+04 -1.2500e+04 2e+03 1e-13 2e-13\n",
792 | " 6: -1.0647e+04 -1.1812e+04 1e+03 8e-14 2e-13\n",
793 | " 7: -1.0823e+04 -1.1430e+04 6e+02 3e-13 2e-13\n",
794 | " 8: -1.0890e+04 -1.1286e+04 4e+02 2e-13 2e-13\n",
795 | " 9: -1.0957e+04 -1.1157e+04 2e+02 3e-13 2e-13\n",
796 | "10: -1.0999e+04 -1.1094e+04 1e+02 3e-13 2e-13\n",
797 | "11: -1.1013e+04 -1.1065e+04 5e+01 3e-13 2e-13\n",
798 | "12: -1.1026e+04 -1.1045e+04 2e+01 2e-13 3e-13\n",
799 | "13: -1.1029e+04 -1.1040e+04 1e+01 3e-13 2e-13\n",
800 | "14: -1.1030e+04 -1.1039e+04 1e+01 1e-13 2e-13\n",
801 | "15: -1.1033e+04 -1.1036e+04 3e+00 3e-13 3e-13\n",
802 | "16: -1.1034e+04 -1.1035e+04 1e+00 9e-14 3e-13\n",
803 | "17: -1.1034e+04 -1.1034e+04 4e-01 2e-13 2e-13\n",
804 | "18: -1.1034e+04 -1.1034e+04 6e-02 2e-13 3e-13\n",
805 | "19: -1.1034e+04 -1.1034e+04 9e-04 1e-13 3e-13\n",
806 | "Optimal solution found.\n",
807 | "optimal\n",
808 | "(244,)\n",
809 | "206 support vectors out of 244 points\n",
810 | "y_test min 21\n",
811 | "y_test max 41\n",
812 | "y_pred min 5\n",
813 | "y_pred max 35\n",
814 | "0.23809523809523808 0.8536585365853658\n",
815 | "GM 0.45083481733371616\n",
816 | "40 out of 62 predictions correct\n",
817 | "Accuracy 0.6451612903225806\n"
818 | ]
819 | }
820 | ],
821 | "source": [
822 | "\n",
823 | "if __name__ == \"__main__\":\n",
824 | " import pylab as pl \n",
825 | " def normal_svm():\n",
826 | " \n",
827 | " clf = SVM(C=100.0)\n",
828 | " clf.fit(X_train, y_train)\n",
829 | " y_predict = clf.predict(X_test)\n",
830 | " gm(y_predict,y_test)\n",
831 | " correct = np.sum(y_predict == y_test)\n",
832 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
833 | " print(\"Accuracy\",correct/len(y_predict))\n",
834 | "\n",
835 | " normal_svm() "
836 | ]
837 | },
838 | {
839 | "cell_type": "code",
840 | "execution_count": null,
841 | "metadata": {},
842 | "outputs": [],
843 | "source": []
844 | }
845 | ],
846 | "metadata": {
847 | "kernelspec": {
848 | "display_name": "Python 3",
849 | "language": "python",
850 | "name": "python3"
851 | },
852 | "language_info": {
853 | "codemirror_mode": {
854 | "name": "ipython",
855 | "version": 3
856 | },
857 | "file_extension": ".py",
858 | "mimetype": "text/x-python",
859 | "name": "python",
860 | "nbconvert_exporter": "python",
861 | "pygments_lexer": "ipython3",
862 | "version": "3.6.5"
863 | }
864 | },
865 | "nbformat": 4,
866 | "nbformat_minor": 2
867 | }
868 |
--------------------------------------------------------------------------------
/FUZZY SVM.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "C:\\Users\\HP\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
13 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n"
14 | ]
15 | }
16 | ],
17 | "source": [
18 | "import numpy as np\n",
19 | "from numpy import linalg\n",
20 | "import cvxopt\n",
21 | "import cvxopt.solvers\n",
22 | "import pandas as pd\n",
23 | "from sklearn import cross_validation\n",
24 | "from sklearn.metrics import classification_report\n",
25 | "from sklearn.metrics import accuracy_score\n",
26 | "from cvxopt import matrix as cvxopt_matrix\n",
27 | "from cvxopt import solvers as cvxopt_solvers\n",
28 | "from sklearn import svm\n",
29 | "import math "
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "data": {
39 | "text/html": [
40 | "\n",
41 | "\n",
54 | "
\n",
55 | " \n",
56 | " \n",
57 | " | \n",
58 | " 0 | \n",
59 | " 1 | \n",
60 | " 2 | \n",
61 | " 3 | \n",
62 | " 4 | \n",
63 | " 5 | \n",
64 | " 6 | \n",
65 | " 7 | \n",
66 | " 8 | \n",
67 | "
\n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " | 0 | \n",
72 | " 6.0 | \n",
73 | " 148.0 | \n",
74 | " 72.0 | \n",
75 | " 35.0 | \n",
76 | " 0.0 | \n",
77 | " 33.6 | \n",
78 | " 0.627 | \n",
79 | " 50.0 | \n",
80 | " 1.0 | \n",
81 | "
\n",
82 | " \n",
83 | " | 1 | \n",
84 | " 8.0 | \n",
85 | " 183.0 | \n",
86 | " 64.0 | \n",
87 | " 0.0 | \n",
88 | " 0.0 | \n",
89 | " 23.3 | \n",
90 | " 0.672 | \n",
91 | " 32.0 | \n",
92 | " 1.0 | \n",
93 | "
\n",
94 | " \n",
95 | " | 2 | \n",
96 | " 0.0 | \n",
97 | " 137.0 | \n",
98 | " 40.0 | \n",
99 | " 35.0 | \n",
100 | " 168.0 | \n",
101 | " 43.1 | \n",
102 | " 2.288 | \n",
103 | " 33.0 | \n",
104 | " 1.0 | \n",
105 | "
\n",
106 | " \n",
107 | " | 3 | \n",
108 | " 3.0 | \n",
109 | " 78.0 | \n",
110 | " 50.0 | \n",
111 | " 32.0 | \n",
112 | " 88.0 | \n",
113 | " 31.0 | \n",
114 | " 0.248 | \n",
115 | " 26.0 | \n",
116 | " 1.0 | \n",
117 | "
\n",
118 | " \n",
119 | " | 4 | \n",
120 | " 2.0 | \n",
121 | " 197.0 | \n",
122 | " 70.0 | \n",
123 | " 45.0 | \n",
124 | " 543.0 | \n",
125 | " 30.5 | \n",
126 | " 0.158 | \n",
127 | " 53.0 | \n",
128 | " 1.0 | \n",
129 | "
\n",
130 | " \n",
131 | "
\n",
132 | "
"
133 | ],
134 | "text/plain": [
135 | " 0 1 2 3 4 5 6 7 8\n",
136 | "0 6.0 148.0 72.0 35.0 0.0 33.6 0.627 50.0 1.0\n",
137 | "1 8.0 183.0 64.0 0.0 0.0 23.3 0.672 32.0 1.0\n",
138 | "2 0.0 137.0 40.0 35.0 168.0 43.1 2.288 33.0 1.0\n",
139 | "3 3.0 78.0 50.0 32.0 88.0 31.0 0.248 26.0 1.0\n",
140 | "4 2.0 197.0 70.0 45.0 543.0 30.5 0.158 53.0 1.0"
141 | ]
142 | },
143 | "execution_count": 2,
144 | "metadata": {},
145 | "output_type": "execute_result"
146 | }
147 | ],
148 | "source": [
149 | "train = pd.read_csv(\"modifiedpima.csv\", header=None)\n",
150 | "train.head()"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": 3,
156 | "metadata": {},
157 | "outputs": [
158 | {
159 | "data": {
160 | "text/html": [
161 | "\n",
162 | "\n",
175 | "
\n",
176 | " \n",
177 | " \n",
178 | " | \n",
179 | " 0 | \n",
180 | " 1 | \n",
181 | " 2 | \n",
182 | " 3 | \n",
183 | " 4 | \n",
184 | " 5 | \n",
185 | " 6 | \n",
186 | " 7 | \n",
187 | "
\n",
188 | " \n",
189 | " \n",
190 | " \n",
191 | " | 0 | \n",
192 | " 6.0 | \n",
193 | " 148.0 | \n",
194 | " 72.0 | \n",
195 | " 35.0 | \n",
196 | " 0.0 | \n",
197 | " 33.6 | \n",
198 | " 0.627 | \n",
199 | " 50.0 | \n",
200 | "
\n",
201 | " \n",
202 | " | 1 | \n",
203 | " 8.0 | \n",
204 | " 183.0 | \n",
205 | " 64.0 | \n",
206 | " 0.0 | \n",
207 | " 0.0 | \n",
208 | " 23.3 | \n",
209 | " 0.672 | \n",
210 | " 32.0 | \n",
211 | "
\n",
212 | " \n",
213 | " | 2 | \n",
214 | " 0.0 | \n",
215 | " 137.0 | \n",
216 | " 40.0 | \n",
217 | " 35.0 | \n",
218 | " 168.0 | \n",
219 | " 43.1 | \n",
220 | " 2.288 | \n",
221 | " 33.0 | \n",
222 | "
\n",
223 | " \n",
224 | " | 3 | \n",
225 | " 3.0 | \n",
226 | " 78.0 | \n",
227 | " 50.0 | \n",
228 | " 32.0 | \n",
229 | " 88.0 | \n",
230 | " 31.0 | \n",
231 | " 0.248 | \n",
232 | " 26.0 | \n",
233 | "
\n",
234 | " \n",
235 | " | 4 | \n",
236 | " 2.0 | \n",
237 | " 197.0 | \n",
238 | " 70.0 | \n",
239 | " 45.0 | \n",
240 | " 543.0 | \n",
241 | " 30.5 | \n",
242 | " 0.158 | \n",
243 | " 53.0 | \n",
244 | "
\n",
245 | " \n",
246 | "
\n",
247 | "
"
248 | ],
249 | "text/plain": [
250 | " 0 1 2 3 4 5 6 7\n",
251 | "0 6.0 148.0 72.0 35.0 0.0 33.6 0.627 50.0\n",
252 | "1 8.0 183.0 64.0 0.0 0.0 23.3 0.672 32.0\n",
253 | "2 0.0 137.0 40.0 35.0 168.0 43.1 2.288 33.0\n",
254 | "3 3.0 78.0 50.0 32.0 88.0 31.0 0.248 26.0\n",
255 | "4 2.0 197.0 70.0 45.0 543.0 30.5 0.158 53.0"
256 | ]
257 | },
258 | "execution_count": 3,
259 | "metadata": {},
260 | "output_type": "execute_result"
261 | }
262 | ],
263 | "source": [
264 | "features = train.columns[0:8]\n",
265 | "X = train[features]\n",
266 | "y = train[8]\n",
267 | "X.head()"
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": 53,
273 | "metadata": {},
274 | "outputs": [],
275 | "source": [
276 | "X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=10)\n"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": 54,
282 | "metadata": {},
283 | "outputs": [
284 | {
285 | "name": "stdout",
286 | "output_type": "stream",
287 | "text": [
288 | "(614, 8) (154, 8)\n"
289 | ]
290 | }
291 | ],
292 | "source": [
293 | "print(X_train.shape,X_test.shape)"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 55,
299 | "metadata": {},
300 | "outputs": [],
301 | "source": [
302 | "X_train=np.asarray(X_train)\n",
303 | "y_train=np.asarray(y_train)"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": 56,
309 | "metadata": {},
310 | "outputs": [],
311 | "source": [
312 | "def linear_kernel(x1, x2):\n",
313 | " return np.dot(x1, x2)\n",
314 | "\n",
315 | "def polynomial_kernel(x, y, p=3):\n",
316 | " return (1 + np.dot(x, y)) ** p\n",
317 | "\n",
318 | "def gaussian_kernel(x, y, sigma=100.0):\n",
319 | " # print(-linalg.norm(x-y)**2)\n",
320 | " x=np.asarray(x)\n",
321 | " y=np.asarray(y)\n",
322 | " return np.exp((-linalg.norm(x-y)**2) / (2 * (sigma ** 2)))\n",
323 | "\n",
324 | "def gm(y_predict,y_test):\n",
325 | " test_min=0\n",
326 | " test_max=0\n",
327 | " pred_min=0\n",
328 | " pred_max=0\n",
329 | " y_test=np.asarray(y_test)\n",
330 | " for i in range(0,154):\n",
331 | " if(y_test[i]==1):\n",
332 | " test_min=test_min+1\n",
333 | " else:\n",
334 | " test_max=test_max+1\n",
335 | " print(\"y_test min\",test_min) \n",
336 | " print(\"y_test max\",test_max)\n",
337 | " for i in range(0,154):\n",
338 | " if(y_predict[i]==1 and y_predict[i]==y_test[i]):\n",
339 | " pred_min=pred_min+1\n",
340 | " elif(y_predict[i]==-1 and y_predict[i]==y_test[i]):\n",
341 | " pred_max=pred_max+1\n",
342 | " print(\"y_pred min\",pred_min) \n",
343 | " print(\"y_pred max\",pred_max)\n",
344 | " se=pred_min/test_min\n",
345 | " sp=pred_max/test_max\n",
346 | " print(se,sp)\n",
347 | " gm=math.sqrt(se*sp)\n",
348 | " print(\"GM\",gm)"
349 | ]
350 | },
351 | {
352 | "cell_type": "markdown",
353 | "metadata": {},
354 | "source": [
355 | "# FSVM using Hyperplane"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 68,
361 | "metadata": {},
362 | "outputs": [],
363 | "source": [
364 | "from cvxopt import matrix\n",
365 | "class HYP_SVM(object):\n",
366 | "\n",
367 | " def __init__(self, kernel=gaussian_kernel, C=None):\n",
368 | " self.kernel = kernel\n",
369 | " self.C = C\n",
370 | " if self.C is not None: self.C = float(self.C)\n",
371 | " def m_func(self, X_train,X_test, y):\n",
372 | " n_samples, n_features = X_train.shape \n",
373 | " nt_samples, nt_features= X_test.shape\n",
374 | " self.K = np.zeros((n_samples, n_samples))\n",
375 | " for i in range(n_samples):\n",
376 | " for j in range(n_samples):\n",
377 | " self.K[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
378 | " # print(K[i,j])\n",
379 | " X_train=np.asarray(X_train)\n",
380 | " X_test=np.asarray(X_test)\n",
381 | " K1 = np.zeros((n_samples, n_samples))\n",
382 | " for i in range(n_samples):\n",
383 | " for j in range(n_samples):\n",
384 | " K1[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
385 | " # print(K[i,j])\n",
386 | " print(K1.shape)\n",
387 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
388 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
389 | " A = cvxopt.matrix(y, (1,n_samples))\n",
390 | " A = matrix(A, (1,n_samples), 'd') #changes done\n",
391 | " b = cvxopt.matrix(0.0)\n",
392 | " #print(P,q,A,b)\n",
393 | " if self.C is None:\n",
394 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
395 | " h = cvxopt.matrix(np.zeros(n_samples))\n",
396 | " \n",
397 | " else:\n",
398 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n",
399 | " tmp2 = np.identity(n_samples)\n",
400 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
401 | " tmp1 = np.zeros(n_samples)\n",
402 | " tmp2 = np.ones(n_samples) * self.C\n",
403 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
404 | " # solve QP problem\n",
405 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
406 | " print(solution['status'])\n",
407 | " # Lagrange multipliers\n",
408 | " a = np.ravel(solution['x'])\n",
409 | " a_org = np.ravel(solution['x'])\n",
410 | " # Support vectors have non zero lagrange multipliers\n",
411 | " sv = a > 1e-5\n",
412 | " #print(sv.shape)\n",
413 | " ind = np.arange(len(a))[sv]\n",
414 | " self.a_org=a\n",
415 | " self.a = a[sv]\n",
416 | " self.sv = X_train[sv]\n",
417 | " self.sv_y = y[sv]\n",
418 | " self.sv_yorg=y\n",
419 | " self.kernel = gaussian_kernel\n",
420 | " X_train=np.asarray(X_train)\n",
421 | " b = 0\n",
422 | " for n in range(len(self.a)):\n",
423 | " b += self.sv_y[n]\n",
424 | " b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
425 | " b /= len(self.a)\n",
426 | " # print(self.a_org[1])\n",
427 | " #print(self.a_org.shape,self.sv_yorg.shape,K.shape)\n",
428 | " w_phi=0\n",
429 | " total=0\n",
430 | " for n in range(len(self.a_org)):\n",
431 | " w_phi = self.a_org[n] * self.sv_yorg[n] * K1[n] \n",
432 | " self.d_hyp=np.zeros(n_samples)\n",
433 | " for n in range(len(self.a_org)):\n",
434 | " self.d_hyp += self.sv_yorg[n]*(w_phi+b)\n",
435 | " func=np.zeros((n_samples))\n",
436 | " func=np.asarray(func)\n",
437 | " typ=2\n",
438 | " if(typ==1):\n",
439 | " for i in range(n_samples):\n",
440 | " func[i]=1-(self.d_hyp[i]/(np.amax(self.d_hyp[i])+0.000001))\n",
441 | " beta=0.2\n",
442 | " if(typ==2):\n",
443 | " for i in range(n_samples):\n",
444 | " func[i]=2/(1+beta*self.d_hyp[i])\n",
445 | " r_max=268/500\n",
446 | " r_min=1\n",
447 | " self.m=func[0:268]*r_min\n",
448 | " print(self.m.shape)\n",
449 | " self.m=np.append(self.m,func[268:768]*r_max)\n",
450 | " print(self.m.shape)\n",
451 | " \n",
452 | " ##############################################################################\n",
453 | "\n",
454 | "\n",
455 | " def fit(self, X_train,X_test, y):\n",
456 | " self.kernel = gaussian_kernel\n",
457 | " n_samples, n_features = X_train.shape \n",
458 | " nt_samples, nt_features = X_test.shape\n",
459 | " # Gram matrix\n",
460 | "\n",
461 | " print(self.K.shape)\n",
462 | "\n",
463 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
464 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
465 | " A = cvxopt.matrix(y, (1,n_samples))\n",
466 | " A = matrix(A, (1,n_samples), 'd') #changes done\n",
467 | " b = cvxopt.matrix(0.0)\n",
468 | " #print(P,q,A,b)\n",
469 | " if self.C is None:\n",
470 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
471 | " h = cvxopt.matrix(np.zeros(n_samples))\n",
472 | " \n",
473 | " else:\n",
474 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n",
475 | " tmp2 = np.identity(n_samples)\n",
476 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
477 | " tmp1 = np.zeros(n_samples)\n",
478 | " tmp2 = np.ones(n_samples) * self.C\n",
479 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
480 | " # solve QP problem\n",
481 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
482 | " print(solution['status'])\n",
483 | " # Lagrange multipliers\n",
484 | " a = np.ravel(solution['x'])\n",
485 | " a_org = np.ravel(solution['x'])\n",
486 | " # Support vectors have non zero lagrange multipliers\n",
487 | " for i in range(n_samples):\n",
488 | " sv=np.logical_or(self.a_org 1e-5)\n",
489 | " #print(sv.shape)\n",
490 | " ind = np.arange(len(a))[sv]\n",
491 | " self.a = a[sv]\n",
492 | " self.sv = X_train[sv]\n",
493 | " self.sv_y = y[sv]\n",
494 | " #print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
495 | "\n",
496 | " # Intercept\n",
497 | " self.b = 0\n",
498 | " for n in range(len(self.a)):\n",
499 | " self.b += self.sv_y[n]\n",
500 | " self.b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
501 | " self.b /= len(self.a)\n",
502 | " print(self.b)\n",
503 | "\n",
504 | " # Weight vector\n",
505 | " if self.kernel == gaussian_kernel:\n",
506 | " self.w = np.zeros(n_features)\n",
507 | " for n in range(len(self.a)):\n",
508 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
509 | " else :\n",
510 | " self.w = None \n",
511 | " \n",
512 | " def project(self, X):\n",
513 | " if self.w is None:\n",
514 | " return np.dot(X, self.w) + self.b\n",
515 | " else:\n",
516 | " y_predict = np.zeros(len(X))\n",
517 | " X=np.asarray(X)\n",
518 | " for i in range(len(X)):\n",
519 | " s = 0\n",
520 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
521 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n",
522 | " y_predict[i] = s\n",
523 | " # print(y_predict[i])\n",
524 | " return y_predict + self.b\n",
525 | "\n",
526 | " def predict(self, X):\n",
527 | " return np.sign(self.project(X))"
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "execution_count": 69,
533 | "metadata": {},
534 | "outputs": [
535 | {
536 | "name": "stdout",
537 | "output_type": "stream",
538 | "text": [
539 | "(614, 614)\n",
540 | " pcost dcost gap pres dres\n",
541 | " 0: 3.1997e+04 -2.6998e+06 3e+06 1e-01 3e-13\n",
542 | " 1: 7.0493e+02 -3.7645e+05 4e+05 1e-02 2e-13\n",
543 | " 2: -1.5080e+04 -8.7294e+04 7e+04 2e-03 2e-13\n",
544 | " 3: -2.0698e+04 -4.2280e+04 2e+04 3e-04 2e-13\n",
545 | " 4: -2.3448e+04 -3.5410e+04 1e+04 2e-04 3e-13\n",
546 | " 5: -2.4919e+04 -3.1494e+04 7e+03 7e-05 3e-13\n",
547 | " 6: -2.5867e+04 -2.9015e+04 3e+03 3e-05 3e-13\n",
548 | " 7: -2.6304e+04 -2.8026e+04 2e+03 1e-05 3e-13\n",
549 | " 8: -2.6625e+04 -2.7314e+04 7e+02 2e-06 4e-13\n",
550 | " 9: -2.6775e+04 -2.7035e+04 3e+02 7e-07 4e-13\n",
551 | "10: -2.6855e+04 -2.6900e+04 4e+01 4e-08 4e-13\n",
552 | "11: -2.6872e+04 -2.6874e+04 2e+00 1e-09 4e-13\n",
553 | "12: -2.6873e+04 -2.6873e+04 4e-02 2e-11 4e-13\n",
554 | "13: -2.6873e+04 -2.6873e+04 6e-04 2e-12 4e-13\n",
555 | "Optimal solution found.\n",
556 | "optimal\n",
557 | "(268,)\n",
558 | "(614,)\n",
559 | "(614, 614)\n",
560 | " pcost dcost gap pres dres\n",
561 | " 0: 3.1997e+04 -2.6998e+06 3e+06 1e-01 3e-13\n",
562 | " 1: 7.0493e+02 -3.7645e+05 4e+05 1e-02 2e-13\n",
563 | " 2: -1.5080e+04 -8.7294e+04 7e+04 2e-03 2e-13\n",
564 | " 3: -2.0698e+04 -4.2280e+04 2e+04 3e-04 2e-13\n",
565 | " 4: -2.3448e+04 -3.5410e+04 1e+04 2e-04 3e-13\n",
566 | " 5: -2.4919e+04 -3.1494e+04 7e+03 7e-05 3e-13\n",
567 | " 6: -2.5867e+04 -2.9015e+04 3e+03 3e-05 3e-13\n",
568 | " 7: -2.6304e+04 -2.8026e+04 2e+03 1e-05 3e-13\n",
569 | " 8: -2.6625e+04 -2.7314e+04 7e+02 2e-06 4e-13\n",
570 | " 9: -2.6775e+04 -2.7035e+04 3e+02 7e-07 4e-13\n",
571 | "10: -2.6855e+04 -2.6900e+04 4e+01 4e-08 4e-13\n",
572 | "11: -2.6872e+04 -2.6874e+04 2e+00 1e-09 4e-13\n",
573 | "12: -2.6873e+04 -2.6873e+04 4e-02 2e-11 4e-13\n",
574 | "13: -2.6873e+04 -2.6873e+04 6e-04 2e-12 4e-13\n",
575 | "Optimal solution found.\n",
576 | "optimal\n",
577 | "-1.0325312375429936\n",
578 | "y_test min 53\n",
579 | "y_test max 101\n",
580 | "y_pred min 30\n",
581 | "y_pred max 89\n",
582 | "0.5660377358490566 0.8811881188118812\n",
583 | "GM 0.7062476390256938\n",
584 | "119 out of 154 predictions correct\n",
585 | "Accuracy 0.7727272727272727\n"
586 | ]
587 | }
588 | ],
589 | "source": [
590 | "\n",
591 | "if __name__ == \"__main__\":\n",
592 | " import pylab as pl \n",
593 | " def hyp_svm():\n",
594 | " \n",
595 | " clf = HYP_SVM(C=100.0)\n",
596 | " typ=2\n",
597 | " clf.m_func(X_train,X_test,y_train)\n",
598 | " clf.fit(X_train,X_test, y_train)\n",
599 | " y_predict = clf.predict(X_test)\n",
600 | " gm(y_predict,y_test)\n",
601 | " correct = np.sum(y_predict == y_test)\n",
602 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
603 | " print(\"Accuracy\",correct/len(y_predict))\n",
604 | "\n",
605 | " hyp_svm() "
606 | ]
607 | },
608 | {
609 | "cell_type": "code",
610 | "execution_count": null,
611 | "metadata": {},
612 | "outputs": [],
613 | "source": []
614 | },
615 | {
616 | "cell_type": "code",
617 | "execution_count": null,
618 | "metadata": {},
619 | "outputs": [],
620 | "source": []
621 | },
622 | {
623 | "cell_type": "code",
624 | "execution_count": null,
625 | "metadata": {},
626 | "outputs": [],
627 | "source": []
628 | },
629 | {
630 | "cell_type": "code",
631 | "execution_count": null,
632 | "metadata": {},
633 | "outputs": [],
634 | "source": []
635 | },
636 | {
637 | "cell_type": "code",
638 | "execution_count": null,
639 | "metadata": {},
640 | "outputs": [],
641 | "source": []
642 | },
643 | {
644 | "cell_type": "code",
645 | "execution_count": null,
646 | "metadata": {},
647 | "outputs": [],
648 | "source": []
649 | },
650 | {
651 | "cell_type": "code",
652 | "execution_count": null,
653 | "metadata": {},
654 | "outputs": [],
655 | "source": []
656 | },
657 | {
658 | "cell_type": "code",
659 | "execution_count": null,
660 | "metadata": {},
661 | "outputs": [],
662 | "source": []
663 | },
664 | {
665 | "cell_type": "code",
666 | "execution_count": null,
667 | "metadata": {},
668 | "outputs": [],
669 | "source": []
670 | },
671 | {
672 | "cell_type": "code",
673 | "execution_count": null,
674 | "metadata": {},
675 | "outputs": [],
676 | "source": []
677 | },
678 | {
679 | "cell_type": "code",
680 | "execution_count": null,
681 | "metadata": {},
682 | "outputs": [],
683 | "source": []
684 | },
685 | {
686 | "cell_type": "code",
687 | "execution_count": null,
688 | "metadata": {},
689 | "outputs": [],
690 | "source": []
691 | },
692 | {
693 | "cell_type": "code",
694 | "execution_count": null,
695 | "metadata": {},
696 | "outputs": [],
697 | "source": []
698 | },
699 | {
700 | "cell_type": "code",
701 | "execution_count": null,
702 | "metadata": {},
703 | "outputs": [],
704 | "source": []
705 | },
706 | {
707 | "cell_type": "code",
708 | "execution_count": null,
709 | "metadata": {},
710 | "outputs": [],
711 | "source": []
712 | },
713 | {
714 | "cell_type": "markdown",
715 | "metadata": {},
716 | "source": [
717 | "# Normal SVM using CVXOPT"
718 | ]
719 | },
720 | {
721 | "cell_type": "code",
722 | "execution_count": 145,
723 | "metadata": {},
724 | "outputs": [],
725 | "source": [
726 | "from cvxopt import matrix\n",
727 | "class SVM(object):\n",
728 | "\n",
729 | " def __init__(self, kernel=gaussian_kernel, C=None):\n",
730 | " self.kernel = kernel\n",
731 | " self.C = C\n",
732 | " if self.C is not None: self.C = float(self.C)\n",
733 | " def fit(self, X, y):\n",
734 | " self.kernel = gaussian_kernel\n",
735 | " n_samples, n_features = X.shape\n",
736 | " # Gram matrix\n",
737 | " K = np.zeros((n_samples, n_samples))\n",
738 | " for i in range(n_samples):\n",
739 | " for j in range(n_samples):\n",
740 | " K[i,j] = gaussian_kernel(X[i], X[j])\n",
741 | " # print(K[i,j])\n",
742 | " print(K.shape)\n",
743 | "\n",
744 | " P = cvxopt.matrix(np.outer(y,y) * K)\n",
745 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
746 | " A = cvxopt.matrix(y, (1,n_samples))\n",
747 | " A = matrix(A, (1,n_samples), 'd') #changes done\n",
748 | " b = cvxopt.matrix(0.0)\n",
749 | " #print(P,q,A,b)\n",
750 | " if self.C is None:\n",
751 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
752 | " h = cvxopt.matrix(np.zeros(n_samples))\n",
753 | " \n",
754 | " else:\n",
755 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n",
756 | " tmp2 = np.identity(n_samples)\n",
757 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
758 | " tmp1 = np.zeros(n_samples)\n",
759 | " tmp2 = np.ones(n_samples) * self.C\n",
760 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
761 | " # solve QP problem\n",
762 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
763 | " print(solution['status'])\n",
764 | " # Lagrange multipliers\n",
765 | " a = np.ravel(solution['x'])\n",
766 | " # print(a)\n",
767 | " # Support vectors have non zero lagrange multipliers\n",
768 | " sv = a > 1e-5\n",
769 | " print(sv.shape)\n",
770 | " ind = np.arange(len(a))[sv]\n",
771 | " self.a = a[sv]\n",
772 | " self.sv = X[sv]\n",
773 | " self.sv_y = y[sv]\n",
774 | " print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
775 | "\n",
776 | " # Intercept\n",
777 | " self.b = 0\n",
778 | " for n in range(len(self.a)):\n",
779 | " self.b += self.sv_y[n]\n",
780 | " self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])\n",
781 | " self.b /= len(self.a)\n",
782 | "\n",
783 | " # Weight vector\n",
784 | " if self.kernel == gaussian_kernel:\n",
785 | " self.w = np.zeros(n_features)\n",
786 | " for n in range(len(self.a)):\n",
787 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
788 | " #print(self.w)\n",
789 | " else:\n",
790 | " self.w = None\n",
791 | "\n",
792 | " def project(self, X):\n",
793 | " if self.w is None:\n",
794 | " return np.dot(X, self.w) + self.b\n",
795 | " else:\n",
796 | " y_predict = np.zeros(len(X))\n",
797 | " X=np.asarray(X)\n",
798 | " for i in range(len(X)):\n",
799 | " s = 0\n",
800 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
801 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n",
802 | " y_predict[i] = s\n",
803 | " # print(y_predict[i])\n",
804 | " return y_predict + self.b\n",
805 | "\n",
806 | " def predict(self, X):\n",
807 | " return np.sign(self.project(X))"
808 | ]
809 | },
810 | {
811 | "cell_type": "code",
812 | "execution_count": 146,
813 | "metadata": {},
814 | "outputs": [
815 | {
816 | "name": "stdout",
817 | "output_type": "stream",
818 | "text": [
819 | "(614, 614)\n",
820 | " pcost dcost gap pres dres\n",
821 | " 0: 4.0124e+04 -2.8615e+06 4e+06 2e-01 3e-13\n",
822 | " 1: 1.0960e+04 -3.9278e+05 4e+05 1e-02 3e-13\n",
823 | " 2: -1.1678e+04 -1.0938e+05 1e+05 2e-03 2e-13\n",
824 | " 3: -1.8013e+04 -4.6040e+04 3e+04 4e-04 3e-13\n",
825 | " 4: -2.0703e+04 -3.7635e+04 2e+04 2e-04 3e-13\n",
826 | " 5: -2.2896e+04 -3.0055e+04 7e+03 7e-05 3e-13\n",
827 | " 6: -2.3582e+04 -2.8515e+04 5e+03 4e-05 3e-13\n",
828 | " 7: -2.4334e+04 -2.6705e+04 2e+03 2e-05 3e-13\n",
829 | " 8: -2.4847e+04 -2.5598e+04 8e+02 3e-06 4e-13\n",
830 | " 9: -2.5003e+04 -2.5317e+04 3e+02 1e-06 3e-13\n",
831 | "10: -2.5063e+04 -2.5207e+04 1e+02 2e-12 4e-13\n",
832 | "11: -2.5108e+04 -2.5145e+04 4e+01 1e-12 4e-13\n",
833 | "12: -2.5120e+04 -2.5128e+04 8e+00 7e-13 4e-13\n",
834 | "13: -2.5124e+04 -2.5124e+04 2e-01 5e-13 4e-13\n",
835 | "14: -2.5124e+04 -2.5124e+04 5e-03 5e-13 4e-13\n",
836 | "Optimal solution found.\n",
837 | "optimal\n",
838 | "(614,)\n",
839 | "404 support vectors out of 614 points\n",
840 | "y_test min 54\n",
841 | "y_test max 100\n",
842 | "y_pred min 29\n",
843 | "y_pred max 78\n",
844 | "0.5370370370370371 0.78\n",
845 | "GM 0.6472162612982533\n",
846 | "107 out of 154 predictions correct\n",
847 | "Accuracy 0.6948051948051948\n"
848 | ]
849 | }
850 | ],
851 | "source": [
852 | "\n",
853 | "if __name__ == \"__main__\":\n",
854 | " import pylab as pl \n",
855 | " def normal_svm():\n",
856 | " \n",
857 | " clf = SVM(C=100.0)\n",
858 | " clf.fit(X_train, y_train)\n",
859 | " y_predict = clf.predict(X_test)\n",
860 | " gm(y_predict,y_test)\n",
861 | " correct = np.sum(y_predict == y_test)\n",
862 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
863 | " print(\"Accuracy\",correct/len(y_predict))\n",
864 | "\n",
865 | " normal_svm() "
866 | ]
867 | },
868 | {
869 | "cell_type": "code",
870 | "execution_count": null,
871 | "metadata": {},
872 | "outputs": [],
873 | "source": []
874 | }
875 | ],
876 | "metadata": {
877 | "kernelspec": {
878 | "display_name": "Python 3",
879 | "language": "python",
880 | "name": "python3"
881 | },
882 | "language_info": {
883 | "codemirror_mode": {
884 | "name": "ipython",
885 | "version": 3
886 | },
887 | "file_extension": ".py",
888 | "mimetype": "text/x-python",
889 | "name": "python",
890 | "nbconvert_exporter": "python",
891 | "pygments_lexer": "ipython3",
892 | "version": "3.6.5"
893 | }
894 | },
895 | "nbformat": 4,
896 | "nbformat_minor": 2
897 | }
898 |
--------------------------------------------------------------------------------
/FUZZY SVM Pageblock.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "C:\\Users\\HP\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
13 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n"
14 | ]
15 | }
16 | ],
17 | "source": [
18 | "import numpy as np\n",
19 | "from numpy import linalg\n",
20 | "import cvxopt\n",
21 | "import cvxopt.solvers\n",
22 | "import pandas as pd\n",
23 | "from sklearn import cross_validation\n",
24 | "from sklearn.metrics import classification_report\n",
25 | "from sklearn.metrics import accuracy_score\n",
26 | "from cvxopt import matrix as cvxopt_matrix\n",
27 | "from cvxopt import solvers as cvxopt_solvers\n",
28 | "from sklearn import svm\n",
29 | "import math "
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "data": {
39 | "text/html": [
40 | "\n",
41 | "\n",
54 | "
\n",
55 | " \n",
56 | " \n",
57 | " | \n",
58 | " 0 | \n",
59 | " 1 | \n",
60 | " 2 | \n",
61 | " 3 | \n",
62 | " 4 | \n",
63 | " 5 | \n",
64 | " 6 | \n",
65 | " 7 | \n",
66 | " 8 | \n",
67 | " 9 | \n",
68 | " 10 | \n",
69 | "
\n",
70 | " \n",
71 | " \n",
72 | " \n",
73 | " | 0 | \n",
74 | " 48.0 | \n",
75 | " 402.0 | \n",
76 | " 19296.0 | \n",
77 | " 8.375 | \n",
78 | " 0.055 | \n",
79 | " 0.094 | \n",
80 | " 4.13 | \n",
81 | " 1058.0 | \n",
82 | " 1814.0 | \n",
83 | " 256.0 | \n",
84 | " 1.0 | \n",
85 | "
\n",
86 | " \n",
87 | " | 1 | \n",
88 | " 304.0 | \n",
89 | " 463.0 | \n",
90 | " 140752.0 | \n",
91 | " 1.523 | \n",
92 | " 0.063 | \n",
93 | " 0.121 | \n",
94 | " 3.91 | \n",
95 | " 8898.0 | \n",
96 | " 17081.0 | \n",
97 | " 2273.0 | \n",
98 | " 1.0 | \n",
99 | "
\n",
100 | " \n",
101 | " | 2 | \n",
102 | " 306.0 | \n",
103 | " 465.0 | \n",
104 | " 142290.0 | \n",
105 | " 1.520 | \n",
106 | " 0.055 | \n",
107 | " 0.123 | \n",
108 | " 2.69 | \n",
109 | " 7861.0 | \n",
110 | " 17452.0 | \n",
111 | " 2925.0 | \n",
112 | " 1.0 | \n",
113 | "
\n",
114 | " \n",
115 | " | 3 | \n",
116 | " 45.0 | \n",
117 | " 79.0 | \n",
118 | " 3555.0 | \n",
119 | " 1.756 | \n",
120 | " 0.087 | \n",
121 | " 0.195 | \n",
122 | " 4.81 | \n",
123 | " 308.0 | \n",
124 | " 693.0 | \n",
125 | " 64.0 | \n",
126 | " 1.0 | \n",
127 | "
\n",
128 | " \n",
129 | " | 4 | \n",
130 | " 311.0 | \n",
131 | " 463.0 | \n",
132 | " 143993.0 | \n",
133 | " 1.489 | \n",
134 | " 0.088 | \n",
135 | " 0.160 | \n",
136 | " 3.93 | \n",
137 | " 12631.0 | \n",
138 | " 23092.0 | \n",
139 | " 3212.0 | \n",
140 | " 1.0 | \n",
141 | "
\n",
142 | " \n",
143 | "
\n",
144 | "
"
145 | ],
146 | "text/plain": [
147 | " 0 1 2 3 4 5 6 7 8 \\\n",
148 | "0 48.0 402.0 19296.0 8.375 0.055 0.094 4.13 1058.0 1814.0 \n",
149 | "1 304.0 463.0 140752.0 1.523 0.063 0.121 3.91 8898.0 17081.0 \n",
150 | "2 306.0 465.0 142290.0 1.520 0.055 0.123 2.69 7861.0 17452.0 \n",
151 | "3 45.0 79.0 3555.0 1.756 0.087 0.195 4.81 308.0 693.0 \n",
152 | "4 311.0 463.0 143993.0 1.489 0.088 0.160 3.93 12631.0 23092.0 \n",
153 | "\n",
154 | " 9 10 \n",
155 | "0 256.0 1.0 \n",
156 | "1 2273.0 1.0 \n",
157 | "2 2925.0 1.0 \n",
158 | "3 64.0 1.0 \n",
159 | "4 3212.0 1.0 "
160 | ]
161 | },
162 | "execution_count": 2,
163 | "metadata": {},
164 | "output_type": "execute_result"
165 | }
166 | ],
167 | "source": [
168 | "train = pd.read_csv(\"modifiedpage.csv\", header=None)\n",
169 | "train.head()"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 3,
175 | "metadata": {},
176 | "outputs": [
177 | {
178 | "data": {
179 | "text/html": [
180 | "\n",
181 | "\n",
194 | "
\n",
195 | " \n",
196 | " \n",
197 | " | \n",
198 | " 0 | \n",
199 | " 1 | \n",
200 | " 2 | \n",
201 | " 3 | \n",
202 | " 4 | \n",
203 | " 5 | \n",
204 | " 6 | \n",
205 | " 7 | \n",
206 | " 8 | \n",
207 | " 9 | \n",
208 | "
\n",
209 | " \n",
210 | " \n",
211 | " \n",
212 | " | 0 | \n",
213 | " 48.0 | \n",
214 | " 402.0 | \n",
215 | " 19296.0 | \n",
216 | " 8.375 | \n",
217 | " 0.055 | \n",
218 | " 0.094 | \n",
219 | " 4.13 | \n",
220 | " 1058.0 | \n",
221 | " 1814.0 | \n",
222 | " 256.0 | \n",
223 | "
\n",
224 | " \n",
225 | " | 1 | \n",
226 | " 304.0 | \n",
227 | " 463.0 | \n",
228 | " 140752.0 | \n",
229 | " 1.523 | \n",
230 | " 0.063 | \n",
231 | " 0.121 | \n",
232 | " 3.91 | \n",
233 | " 8898.0 | \n",
234 | " 17081.0 | \n",
235 | " 2273.0 | \n",
236 | "
\n",
237 | " \n",
238 | " | 2 | \n",
239 | " 306.0 | \n",
240 | " 465.0 | \n",
241 | " 142290.0 | \n",
242 | " 1.520 | \n",
243 | " 0.055 | \n",
244 | " 0.123 | \n",
245 | " 2.69 | \n",
246 | " 7861.0 | \n",
247 | " 17452.0 | \n",
248 | " 2925.0 | \n",
249 | "
\n",
250 | " \n",
251 | " | 3 | \n",
252 | " 45.0 | \n",
253 | " 79.0 | \n",
254 | " 3555.0 | \n",
255 | " 1.756 | \n",
256 | " 0.087 | \n",
257 | " 0.195 | \n",
258 | " 4.81 | \n",
259 | " 308.0 | \n",
260 | " 693.0 | \n",
261 | " 64.0 | \n",
262 | "
\n",
263 | " \n",
264 | " | 4 | \n",
265 | " 311.0 | \n",
266 | " 463.0 | \n",
267 | " 143993.0 | \n",
268 | " 1.489 | \n",
269 | " 0.088 | \n",
270 | " 0.160 | \n",
271 | " 3.93 | \n",
272 | " 12631.0 | \n",
273 | " 23092.0 | \n",
274 | " 3212.0 | \n",
275 | "
\n",
276 | " \n",
277 | "
\n",
278 | "
"
279 | ],
280 | "text/plain": [
281 | " 0 1 2 3 4 5 6 7 8 9\n",
282 | "0 48.0 402.0 19296.0 8.375 0.055 0.094 4.13 1058.0 1814.0 256.0\n",
283 | "1 304.0 463.0 140752.0 1.523 0.063 0.121 3.91 8898.0 17081.0 2273.0\n",
284 | "2 306.0 465.0 142290.0 1.520 0.055 0.123 2.69 7861.0 17452.0 2925.0\n",
285 | "3 45.0 79.0 3555.0 1.756 0.087 0.195 4.81 308.0 693.0 64.0\n",
286 | "4 311.0 463.0 143993.0 1.489 0.088 0.160 3.93 12631.0 23092.0 3212.0"
287 | ]
288 | },
289 | "execution_count": 3,
290 | "metadata": {},
291 | "output_type": "execute_result"
292 | }
293 | ],
294 | "source": [
295 | "features = train.columns[0:10]\n",
296 | "X = train[features]\n",
297 | "y = train[10]\n",
298 | "X.head()"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": 4,
304 | "metadata": {},
305 | "outputs": [],
306 | "source": [
307 | "X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=40)\n"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": 5,
313 | "metadata": {},
314 | "outputs": [
315 | {
316 | "name": "stdout",
317 | "output_type": "stream",
318 | "text": [
319 | "(4378, 10) (1095, 10)\n"
320 | ]
321 | }
322 | ],
323 | "source": [
324 | "print(X_train.shape,X_test.shape)"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 6,
330 | "metadata": {},
331 | "outputs": [],
332 | "source": [
333 | "X_train=np.asarray(X_train)\n",
334 | "y_train=np.asarray(y_train)"
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": 17,
340 | "metadata": {},
341 | "outputs": [],
342 | "source": [
343 | "def linear_kernel(x1, x2):\n",
344 | " return np.dot(x1, x2)\n",
345 | "\n",
346 | "def polynomial_kernel(x, y, p=3):\n",
347 | " return (1 + np.dot(x, y)) ** p\n",
348 | "\n",
349 | "def gaussian_kernel(x, y, sigma=90.0):\n",
350 | " # print(-linalg.norm(x-y)**2)\n",
351 | " x=np.asarray(x)\n",
352 | " y=np.asarray(y)\n",
353 | " return np.exp((-linalg.norm(x-y)**2) / (2 * (sigma ** 2)))\n",
354 | "\n",
355 | "def gm(y_predict,y_test):\n",
356 | " test_min=0\n",
357 | " test_max=0\n",
358 | " pred_min=0\n",
359 | " pred_max=0\n",
360 | " y_test=np.asarray(y_test)\n",
361 | " for i in range(0,1095):\n",
362 | " if(y_test[i]==1):\n",
363 | " test_min=test_min+1\n",
364 | " else:\n",
365 | " test_max=test_max+1\n",
366 | " print(\"y_test min\",test_min) \n",
367 | " print(\"y_test max\",test_max)\n",
368 | " for i in range(0,1095):\n",
369 | " if(y_predict[i]==1 and y_predict[i]==y_test[i]):\n",
370 | " pred_min=pred_min+1\n",
371 | " elif(y_predict[i]==-1 and y_predict[i]==y_test[i]):\n",
372 | " pred_max=pred_max+1\n",
373 | " print(\"y_pred min\",pred_min) \n",
374 | " print(\"y_pred max\",pred_max)\n",
375 | " se=pred_min/test_min\n",
376 | " sp=pred_max/test_max\n",
377 | " print(se,sp)\n",
378 | " gm=math.sqrt(se*sp)\n",
379 | " print(\"GM\",gm)"
380 | ]
381 | },
382 | {
383 | "cell_type": "markdown",
384 | "metadata": {},
385 | "source": [
386 | "# FSVM using Hyperplane"
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": 18,
392 | "metadata": {},
393 | "outputs": [],
394 | "source": [
395 | "from cvxopt import matrix\n",
396 | "class HYP_SVM(object):\n",
397 | "\n",
398 | " def __init__(self, kernel=gaussian_kernel, C=None):\n",
399 | " self.kernel = kernel\n",
400 | " self.C = C\n",
401 | " if self.C is not None: self.C = float(self.C)\n",
402 | " def m_func(self, X_train,X_test, y):\n",
403 | " n_samples, n_features = X_train.shape \n",
404 | " nt_samples, nt_features= X_test.shape\n",
405 | " self.K = np.zeros((n_samples, n_samples))\n",
406 | " for i in range(n_samples):\n",
407 | " for j in range(n_samples):\n",
408 | " self.K[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
409 | " # print(K[i,j])\n",
410 | " X_train=np.asarray(X_train)\n",
411 | " X_test=np.asarray(X_test)\n",
412 | " K1 = np.zeros((n_samples, n_samples))\n",
413 | " for i in range(n_samples):\n",
414 | " for j in range(n_samples):\n",
415 | " K1[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
416 | " # print(K[i,j])\n",
417 | " print(K1.shape)\n",
418 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
419 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
420 | " A = cvxopt.matrix(y, (1,n_samples))\n",
421 | " A = matrix(A, (1,n_samples), 'd') #changes done\n",
422 | " b = cvxopt.matrix(0.0)\n",
423 | " #print(P,q,A,b)\n",
424 | " if self.C is None:\n",
425 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
426 | " h = cvxopt.matrix(np.zeros(n_samples))\n",
427 | " \n",
428 | " else:\n",
429 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n",
430 | " tmp2 = np.identity(n_samples)\n",
431 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
432 | " tmp1 = np.zeros(n_samples)\n",
433 | " tmp2 = np.ones(n_samples) * self.C\n",
434 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
435 | " # solve QP problem\n",
436 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
437 | " print(solution['status'])\n",
438 | " # Lagrange multipliers\n",
439 | " a = np.ravel(solution['x'])\n",
440 | " a_org = np.ravel(solution['x'])\n",
441 | " # Support vectors have non zero lagrange multipliers\n",
442 | " sv = a > 1e-5\n",
443 | " #print(sv.shape)\n",
444 | " ind = np.arange(len(a))[sv]\n",
445 | " self.a_org=a\n",
446 | " self.a = a[sv]\n",
447 | " self.sv = X_train[sv]\n",
448 | " self.sv_y = y[sv]\n",
449 | " self.sv_yorg=y\n",
450 | " self.kernel = gaussian_kernel\n",
451 | " X_train=np.asarray(X_train)\n",
452 | " b = 0\n",
453 | " for n in range(len(self.a)):\n",
454 | " b += self.sv_y[n]\n",
455 | " b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
456 | " b /= len(self.a)\n",
457 | " # print(self.a_org[1])\n",
458 | " #print(self.a_org.shape,self.sv_yorg.shape,K.shape)\n",
459 | " w_phi=0\n",
460 | " total=0\n",
461 | " for n in range(len(self.a_org)):\n",
462 | " w_phi = self.a_org[n] * self.sv_yorg[n] * K1[n] \n",
463 | " self.d_hyp=np.zeros(n_samples)\n",
464 | " for n in range(len(self.a_org)):\n",
465 | " self.d_hyp += self.sv_yorg[n]*(w_phi+b)\n",
466 | " func=np.zeros((n_samples))\n",
467 | " func=np.asarray(func)\n",
468 | " typ=1\n",
469 | " if(typ==1):\n",
470 | " for i in range(n_samples):\n",
471 | " func[i]=1-(self.d_hyp[i]/(np.amax(self.d_hyp[i])+0.000001))\n",
472 | " beta=0.8\n",
473 | " if(typ==2):\n",
474 | " for i in range(n_samples):\n",
475 | " func[i]=2/(1+beta*self.d_hyp[i])\n",
476 | " r_max=103/4074\n",
477 | " r_min=1\n",
478 | " self.m=func[0:115]*r_min\n",
479 | " print(self.m.shape)\n",
480 | " self.m=np.append(self.m,func[115:5473]*r_max)\n",
481 | " print(self.m.shape)\n",
482 | " \n",
483 | " ##############################################################################\n",
484 | "\n",
485 | "\n",
486 | " def fit(self, X_train,X_test, y):\n",
487 | " self.kernel = gaussian_kernel\n",
488 | " n_samples, n_features = X_train.shape \n",
489 | " nt_samples, nt_features = X_test.shape\n",
490 | " # Gram matrix\n",
491 | "\n",
492 | " print(self.K.shape)\n",
493 | "\n",
494 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
495 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
496 | " A = cvxopt.matrix(y, (1,n_samples))\n",
497 | " A = matrix(A, (1,n_samples), 'd') #changes done\n",
498 | " b = cvxopt.matrix(0.0)\n",
499 | " #print(P,q,A,b)\n",
500 | " if self.C is None:\n",
501 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
502 | " h = cvxopt.matrix(np.zeros(n_samples))\n",
503 | " \n",
504 | " else:\n",
505 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n",
506 | " tmp2 = np.identity(n_samples)\n",
507 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
508 | " tmp1 = np.zeros(n_samples)\n",
509 | " tmp2 = np.ones(n_samples) * self.C\n",
510 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
511 | " # solve QP problem\n",
512 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
513 | " print(solution['status'])\n",
514 | " # Lagrange multipliers\n",
515 | " a = np.ravel(solution['x'])\n",
516 | " a_org = np.ravel(solution['x'])\n",
517 | " # Support vectors have non zero lagrange multipliers\n",
518 | " for i in range(n_samples):\n",
519 | " sv=np.logical_or(self.a_org 1e-5)\n",
520 | " #print(sv.shape)\n",
521 | " ind = np.arange(len(a))[sv]\n",
522 | " self.a = a[sv]\n",
523 | " self.sv = X_train[sv]\n",
524 | " self.sv_y = y[sv]\n",
525 | " #print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
526 | "\n",
527 | " # Intercept\n",
528 | " self.b = 0\n",
529 | " for n in range(len(self.a)):\n",
530 | " self.b += self.sv_y[n]\n",
531 | " self.b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
532 | " self.b /= len(self.a)\n",
533 | " print(self.b)\n",
534 | "\n",
535 | " # Weight vector\n",
536 | " if self.kernel == gaussian_kernel:\n",
537 | " self.w = np.zeros(n_features)\n",
538 | " for n in range(len(self.a)):\n",
539 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
540 | " else :\n",
541 | " self.w = None \n",
542 | " \n",
543 | " def project(self, X):\n",
544 | " if self.w is None:\n",
545 | " return np.dot(X, self.w) + self.b\n",
546 | " else:\n",
547 | " y_predict = np.zeros(len(X))\n",
548 | " X=np.asarray(X)\n",
549 | " for i in range(len(X)):\n",
550 | " s = 0\n",
551 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
552 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n",
553 | " y_predict[i] = s\n",
554 | " # print(y_predict[i])\n",
555 | " return y_predict + self.b\n",
556 | "\n",
557 | " def predict(self, X):\n",
558 | " return np.sign(self.project(X))"
559 | ]
560 | },
561 | {
562 | "cell_type": "code",
563 | "execution_count": null,
564 | "metadata": {},
565 | "outputs": [],
566 | "source": [
567 | "\n",
568 | "if __name__ == \"__main__\":\n",
569 | " import pylab as pl \n",
570 | " def hyp_svm():\n",
571 | " \n",
572 | " clf = HYP_SVM(C=100.0)\n",
573 | " typ=2\n",
574 | " clf.m_func(X_train,X_test,y_train)\n",
575 | " clf.fit(X_train,X_test, y_train)\n",
576 | " y_predict = clf.predict(X_test)\n",
577 | " gm(y_predict,y_test)\n",
578 | " correct = np.sum(y_predict == y_test)\n",
579 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
580 | " print(\"Accuracy\",correct/len(y_predict))\n",
581 | "\n",
582 | " hyp_svm() "
583 | ]
584 | },
585 | {
586 | "cell_type": "code",
587 | "execution_count": null,
588 | "metadata": {},
589 | "outputs": [],
590 | "source": []
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": null,
595 | "metadata": {},
596 | "outputs": [],
597 | "source": []
598 | },
599 | {
600 | "cell_type": "code",
601 | "execution_count": 10,
602 | "metadata": {},
603 | "outputs": [
604 | {
605 | "name": "stdout",
606 | "output_type": "stream",
607 | "text": [
608 | "Overall RBF KERNEL SVM accuracy: 0.9808219178082191\n"
609 | ]
610 | }
611 | ],
612 | "source": [
613 | "clf_svm = svm.SVC(kernel='rbf', gamma=0.001, C=100)\n",
614 | "clf_svm.fit(X_train, y_train)\n",
615 | "y_pred_svm = clf_svm.predict(X_test) \n",
616 | "acc_svm = accuracy_score(y_test, y_pred_svm)\n",
617 | "print (\"Overall RBF KERNEL SVM accuracy: \",acc_svm)"
618 | ]
619 | },
620 | {
621 | "cell_type": "code",
622 | "execution_count": null,
623 | "metadata": {},
624 | "outputs": [],
625 | "source": []
626 | },
627 | {
628 | "cell_type": "code",
629 | "execution_count": null,
630 | "metadata": {},
631 | "outputs": [],
632 | "source": []
633 | },
634 | {
635 | "cell_type": "code",
636 | "execution_count": null,
637 | "metadata": {},
638 | "outputs": [],
639 | "source": []
640 | },
641 | {
642 | "cell_type": "code",
643 | "execution_count": null,
644 | "metadata": {},
645 | "outputs": [],
646 | "source": []
647 | },
648 | {
649 | "cell_type": "code",
650 | "execution_count": null,
651 | "metadata": {},
652 | "outputs": [],
653 | "source": []
654 | },
655 | {
656 | "cell_type": "code",
657 | "execution_count": null,
658 | "metadata": {},
659 | "outputs": [],
660 | "source": []
661 | },
662 | {
663 | "cell_type": "code",
664 | "execution_count": null,
665 | "metadata": {},
666 | "outputs": [],
667 | "source": []
668 | },
669 | {
670 | "cell_type": "code",
671 | "execution_count": null,
672 | "metadata": {},
673 | "outputs": [],
674 | "source": []
675 | },
676 | {
677 | "cell_type": "code",
678 | "execution_count": null,
679 | "metadata": {},
680 | "outputs": [],
681 | "source": []
682 | },
683 | {
684 | "cell_type": "code",
685 | "execution_count": null,
686 | "metadata": {},
687 | "outputs": [],
688 | "source": []
689 | },
690 | {
691 | "cell_type": "code",
692 | "execution_count": null,
693 | "metadata": {},
694 | "outputs": [],
695 | "source": []
696 | },
697 | {
698 | "cell_type": "code",
699 | "execution_count": null,
700 | "metadata": {},
701 | "outputs": [],
702 | "source": []
703 | },
704 | {
705 | "cell_type": "markdown",
706 | "metadata": {},
707 | "source": [
708 | "# Normal SVM using CVXOPT"
709 | ]
710 | },
711 | {
712 | "cell_type": "code",
713 | "execution_count": 21,
714 | "metadata": {},
715 | "outputs": [],
716 | "source": [
717 | "from cvxopt import matrix\n",
718 | "class SVM(object):\n",
719 | "\n",
720 | " def __init__(self, kernel=gaussian_kernel, C=None):\n",
721 | " self.kernel = kernel\n",
722 | " self.C = C\n",
723 | " if self.C is not None: self.C = float(self.C)\n",
724 | " def fit(self, X, y):\n",
725 | " self.kernel = gaussian_kernel\n",
726 | " n_samples, n_features = X.shape\n",
727 | " # Gram matrix\n",
728 | " K = np.zeros((n_samples, n_samples))\n",
729 | " for i in range(n_samples):\n",
730 | " for j in range(n_samples):\n",
731 | " K[i,j] = gaussian_kernel(X[i], X[j])\n",
732 | " # print(K[i,j])\n",
733 | " print(K.shape)\n",
734 | "\n",
735 | " P = cvxopt.matrix(np.outer(y,y) * K)\n",
736 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
737 | " A = cvxopt.matrix(y, (1,n_samples))\n",
738 | " A = matrix(A, (1,n_samples), 'd') #changes done\n",
739 | " b = cvxopt.matrix(0.0)\n",
740 | " #print(P,q,A,b)\n",
741 | " if self.C is None:\n",
742 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
743 | " h = cvxopt.matrix(np.zeros(n_samples))\n",
744 | " \n",
745 | " else:\n",
746 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n",
747 | " tmp2 = np.identity(n_samples)\n",
748 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
749 | " tmp1 = np.zeros(n_samples)\n",
750 | " tmp2 = np.ones(n_samples) * self.C\n",
751 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
752 | " # solve QP problem\n",
753 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
754 | " print(solution['status'])\n",
755 | " # Lagrange multipliers\n",
756 | " a = np.ravel(solution['x'])\n",
757 | " # print(a)\n",
758 | " # Support vectors have non zero lagrange multipliers\n",
759 | " sv = a > 1e-5\n",
760 | " print(sv.shape)\n",
761 | " ind = np.arange(len(a))[sv]\n",
762 | " self.a = a[sv]\n",
763 | " self.sv = X[sv]\n",
764 | " self.sv_y = y[sv]\n",
765 | " print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
766 | "\n",
767 | " # Intercept\n",
768 | " self.b = 0\n",
769 | " for n in range(len(self.a)):\n",
770 | " self.b += self.sv_y[n]\n",
771 | " self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])\n",
772 | " self.b /= len(self.a)\n",
773 | "\n",
774 | " # Weight vector\n",
775 | " if self.kernel == gaussian_kernel:\n",
776 | " self.w = np.zeros(n_features)\n",
777 | " for n in range(len(self.a)):\n",
778 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
779 | " #print(self.w)\n",
780 | " else:\n",
781 | " self.w = None\n",
782 | "\n",
783 | " def project(self, X):\n",
784 | " if self.w is None:\n",
785 | " return np.dot(X, self.w) + self.b\n",
786 | " else:\n",
787 | " y_predict = np.zeros(len(X))\n",
788 | " X=np.asarray(X)\n",
789 | " for i in range(len(X)):\n",
790 | " s = 0\n",
791 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
792 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n",
793 | " y_predict[i] = s\n",
794 | " # print(y_predict[i])\n",
795 | " return y_predict + self.b\n",
796 | "\n",
797 | " def predict(self, X):\n",
798 | " return np.sign(self.project(X))"
799 | ]
800 | },
801 | {
802 | "cell_type": "code",
803 | "execution_count": 22,
804 | "metadata": {},
805 | "outputs": [
806 | {
807 | "name": "stdout",
808 | "output_type": "stream",
809 | "text": [
810 | "(3341, 3341)\n",
811 | " pcost dcost gap pres dres\n",
812 | " 0: -1.4904e+04 -2.0036e+06 2e+06 2e-02 9e-13\n",
813 | " 1: -1.3268e+04 -1.4210e+05 1e+05 4e-04 8e-13\n",
814 | " 2: -1.5305e+04 -5.5521e+04 4e+04 3e-05 8e-13\n",
815 | " 3: -1.5484e+04 -5.3758e+04 4e+04 3e-05 8e-13\n",
816 | " 4: -1.6200e+04 -3.0338e+04 1e+04 5e-06 8e-13\n",
817 | " 5: -1.6293e+04 -2.9163e+04 1e+04 3e-06 7e-13\n",
818 | " 6: -1.6519e+04 -2.4440e+04 8e+03 1e-06 7e-13\n",
819 | " 7: -1.6658e+04 -2.0918e+04 4e+03 6e-07 7e-13\n",
820 | " 8: -1.6753e+04 -1.8317e+04 2e+03 2e-07 7e-13\n",
821 | " 9: -1.6785e+04 -1.7377e+04 6e+02 5e-08 7e-13\n",
822 | "10: -1.6797e+04 -1.6906e+04 1e+02 5e-09 8e-13\n",
823 | "11: -1.6799e+04 -1.6829e+04 3e+01 8e-11 8e-13\n",
824 | "12: -1.6799e+04 -1.6820e+04 2e+01 4e-11 7e-13\n",
825 | "13: -1.6800e+04 -1.6806e+04 6e+00 9e-12 8e-13\n",
826 | "14: -1.6800e+04 -1.6805e+04 6e+00 7e-12 8e-13\n",
827 | "15: -1.6800e+04 -1.6803e+04 4e+00 1e-12 9e-13\n",
828 | "16: -1.6800e+04 -1.6802e+04 2e+00 2e-13 8e-13\n",
829 | "17: -1.6800e+04 -1.6801e+04 1e+00 9e-14 7e-13\n",
830 | "18: -1.6800e+04 -1.6801e+04 1e+00 6e-13 7e-13\n",
831 | "19: -1.6800e+04 -1.6801e+04 9e-01 1e-12 7e-13\n",
832 | "20: -1.6800e+04 -1.6801e+04 8e-01 1e-12 7e-13\n",
833 | "21: -1.6800e+04 -1.6800e+04 3e-01 3e-13 7e-13\n",
834 | "22: -1.6800e+04 -1.6800e+04 1e-01 7e-13 8e-13\n",
835 | "23: -1.6800e+04 -1.6800e+04 8e-02 1e-12 7e-13\n",
836 | "24: -1.6800e+04 -1.6800e+04 2e-02 1e-12 7e-13\n",
837 | "25: -1.6800e+04 -1.6800e+04 1e-02 7e-13 8e-13\n",
838 | "Optimal solution found.\n",
839 | "optimal\n",
840 | "(3341,)\n",
841 | "3341 support vectors out of 3341 points\n",
842 | "y_test min 19\n",
843 | "y_test max 817\n",
844 | "y_pred min 0\n",
845 | "y_pred max 817\n",
846 | "0.0 1.0\n",
847 | "GM 0.0\n",
848 | "817 out of 836 predictions correct\n",
849 | "Accuracy 0.9772727272727273\n"
850 | ]
851 | }
852 | ],
853 | "source": [
854 | "\n",
855 | "if __name__ == \"__main__\":\n",
856 | " import pylab as pl \n",
857 | " def normal_svm():\n",
858 | " \n",
859 | " clf = SVM(C=100.0)\n",
860 | " clf.fit(X_train, y_train)\n",
861 | " y_predict = clf.predict(X_test)\n",
862 | " gm(y_predict,y_test)\n",
863 | " correct = np.sum(y_predict == y_test)\n",
864 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
865 | " print(\"Accuracy\",correct/len(y_predict))\n",
866 | "\n",
867 | " normal_svm() "
868 | ]
869 | },
870 | {
871 | "cell_type": "code",
872 | "execution_count": null,
873 | "metadata": {},
874 | "outputs": [],
875 | "source": []
876 | }
877 | ],
878 | "metadata": {
879 | "kernelspec": {
880 | "display_name": "Python 3",
881 | "language": "python",
882 | "name": "python3"
883 | },
884 | "language_info": {
885 | "codemirror_mode": {
886 | "name": "ipython",
887 | "version": 3
888 | },
889 | "file_extension": ".py",
890 | "mimetype": "text/x-python",
891 | "name": "python",
892 | "nbconvert_exporter": "python",
893 | "pygments_lexer": "ipython3",
894 | "version": "3.6.5"
895 | }
896 | },
897 | "nbformat": 4,
898 | "nbformat_minor": 2
899 | }
900 |
--------------------------------------------------------------------------------