├── FSVM-CIL Fuzzy Support Vector Machines.pdf
├── README.md
├── LICENSE
├── haberman.csv
├── modifiedhaberman.csv
├── Preprocessing PIma_indians.ipynb
├── Preprocessing pagebreak.ipynb
├── Preprocessing Haberman.ipynb
├── Preprocessing Abalone.ipynb
├── pima-indians-diabetes.csv
├── FUZZY SVM Haberman.ipynb
├── FUZZY SVM.ipynb
└── FUZZY SVM Pageblock.ipynb


/FSVM-CIL Fuzzy Support Vector Machines.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityasahugit/Fuzzy-SVM/HEAD/FSVM-CIL Fuzzy Support Vector Machines.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fuzzy-SVM
 2 | 
 3 | Based on research paper “FSVM-CIL: Fuzzy Support Vector Machines for Class Imbalance Learning” by Rukshan Batuwita and Vasile Palade which discuss Fuzzy concept
 4 | 
 5 | It is used for optimazation of algorithm for imbalanced datasets which do not have 1:1 no.  of instances of each class.
 6 | 
 7 | # Datasets used
 8 | 
 9 | Pima Indians diabities dataset has 35:65 ratio in their class.
10 | haberman, abalone,pageblocks which have diff class ratio.
11 | 
12 | # Notetbooks
13 | 
14 | Preprocessing notebooks are to make a new version of each dataset such that thier class ratio after splitting in test and training is same.
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 ADITYA SAHU
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/haberman.csv:
--------------------------------------------------------------------------------
  1 | 30,64,1,1
  2 | 30,62,3,1
  3 | 30,65,0,1
  4 | 31,59,2,1
  5 | 31,65,4,1
  6 | 33,58,10,1
  7 | 33,60,0,1
  8 | 34,59,0,2
  9 | 34,66,9,2
 10 | 34,58,30,1
 11 | 34,60,1,1
 12 | 34,61,10,1
 13 | 34,67,7,1
 14 | 34,60,0,1
 15 | 35,64,13,1
 16 | 35,63,0,1
 17 | 36,60,1,1
 18 | 36,69,0,1
 19 | 37,60,0,1
 20 | 37,63,0,1
 21 | 37,58,0,1
 22 | 37,59,6,1
 23 | 37,60,15,1
 24 | 37,63,0,1
 25 | 38,69,21,2
 26 | 38,59,2,1
 27 | 38,60,0,1
 28 | 38,60,0,1
 29 | 38,62,3,1
 30 | 38,64,1,1
 31 | 38,66,0,1
 32 | 38,66,11,1
 33 | 38,60,1,1
 34 | 38,67,5,1
 35 | 39,66,0,2
 36 | 39,63,0,1
 37 | 39,67,0,1
 38 | 39,58,0,1
 39 | 39,59,2,1
 40 | 39,63,4,1
 41 | 40,58,2,1
 42 | 40,58,0,1
 43 | 40,65,0,1
 44 | 41,60,23,2
 45 | 41,64,0,2
 46 | 41,67,0,2
 47 | 41,58,0,1
 48 | 41,59,8,1
 49 | 41,59,0,1
 50 | 41,64,0,1
 51 | 41,69,8,1
 52 | 41,65,0,1
 53 | 41,65,0,1
 54 | 42,69,1,2
 55 | 42,59,0,2
 56 | 42,58,0,1
 57 | 42,60,1,1
 58 | 42,59,2,1
 59 | 42,61,4,1
 60 | 42,62,20,1
 61 | 42,65,0,1
 62 | 42,63,1,1
 63 | 43,58,52,2
 64 | 43,59,2,2
 65 | 43,64,0,2
 66 | 43,64,0,2
 67 | 43,63,14,1
 68 | 43,64,2,1
 69 | 43,64,3,1
 70 | 43,60,0,1
 71 | 43,63,2,1
 72 | 43,65,0,1
 73 | 43,66,4,1
 74 | 44,64,6,2
 75 | 44,58,9,2
 76 | 44,63,19,2
 77 | 44,61,0,1
 78 | 44,63,1,1
 79 | 44,61,0,1
 80 | 44,67,16,1
 81 | 45,65,6,2
 82 | 45,66,0,2
 83 | 45,67,1,2
 84 | 45,60,0,1
 85 | 45,67,0,1
 86 | 45,59,14,1
 87 | 45,64,0,1
 88 | 45,68,0,1
 89 | 45,67,1,1
 90 | 46,58,2,2
 91 | 46,69,3,2
 92 | 46,62,5,2
 93 | 46,65,20,2
 94 | 46,62,0,1
 95 | 46,58,3,1
 96 | 46,63,0,1
 97 | 47,63,23,2
 98 | 47,62,0,2
 99 | 47,65,0,2
100 | 47,61,0,1
101 | 47,63,6,1
102 | 47,66,0,1
103 | 47,67,0,1
104 | 47,58,3,1
105 | 47,60,4,1
106 | 47,68,4,1
107 | 47,66,12,1
108 | 48,58,11,2
109 | 48,58,11,2
110 | 48,67,7,2
111 | 48,61,8,1
112 | 48,62,2,1
113 | 48,64,0,1
114 | 48,66,0,1
115 | 49,63,0,2
116 | 49,64,10,2
117 | 49,61,1,1
118 | 49,62,0,1
119 | 49,66,0,1
120 | 49,60,1,1
121 | 49,62,1,1
122 | 49,63,3,1
123 | 49,61,0,1
124 | 49,67,1,1
125 | 50,63,13,2
126 | 50,64,0,2
127 | 50,59,0,1
128 | 50,61,6,1
129 | 50,61,0,1
130 | 50,63,1,1
131 | 50,58,1,1
132 | 50,59,2,1
133 | 50,61,0,1
134 | 50,64,0,1
135 | 50,65,4,1
136 | 50,66,1,1
137 | 51,59,13,2
138 | 51,59,3,2
139 | 51,64,7,1
140 | 51,59,1,1
141 | 51,65,0,1
142 | 51,66,1,1
143 | 52,69,3,2
144 | 52,59,2,2
145 | 52,62,3,2
146 | 52,66,4,2
147 | 52,61,0,1
148 | 52,63,4,1
149 | 52,69,0,1
150 | 52,60,4,1
151 | 52,60,5,1
152 | 52,62,0,1
153 | 52,62,1,1
154 | 52,64,0,1
155 | 52,65,0,1
156 | 52,68,0,1
157 | 53,58,4,2
158 | 53,65,1,2
159 | 53,59,3,2
160 | 53,60,9,2
161 | 53,63,24,2
162 | 53,65,12,2
163 | 53,58,1,1
164 | 53,60,1,1
165 | 53,60,2,1
166 | 53,61,1,1
167 | 53,63,0,1
168 | 54,60,11,2
169 | 54,65,23,2
170 | 54,65,5,2
171 | 54,68,7,2
172 | 54,59,7,1
173 | 54,60,3,1
174 | 54,66,0,1
175 | 54,67,46,1
176 | 54,62,0,1
177 | 54,69,7,1
178 | 54,63,19,1
179 | 54,58,1,1
180 | 54,62,0,1
181 | 55,63,6,2
182 | 55,68,15,2
183 | 55,58,1,1
184 | 55,58,0,1
185 | 55,58,1,1
186 | 55,66,18,1
187 | 55,66,0,1
188 | 55,69,3,1
189 | 55,69,22,1
190 | 55,67,1,1
191 | 56,65,9,2
192 | 56,66,3,2
193 | 56,60,0,1
194 | 56,66,2,1
195 | 56,66,1,1
196 | 56,67,0,1
197 | 56,60,0,1
198 | 57,61,5,2
199 | 57,62,14,2
200 | 57,64,1,2
201 | 57,64,9,1
202 | 57,69,0,1
203 | 57,61,0,1
204 | 57,62,0,1
205 | 57,63,0,1
206 | 57,64,0,1
207 | 57,64,0,1
208 | 57,67,0,1
209 | 58,59,0,1
210 | 58,60,3,1
211 | 58,61,1,1
212 | 58,67,0,1
213 | 58,58,0,1
214 | 58,58,3,1
215 | 58,61,2,1
216 | 59,62,35,2
217 | 59,60,0,1
218 | 59,63,0,1
219 | 59,64,1,1
220 | 59,64,4,1
221 | 59,64,0,1
222 | 59,64,7,1
223 | 59,67,3,1
224 | 60,59,17,2
225 | 60,65,0,2
226 | 60,61,1,1
227 | 60,67,2,1
228 | 60,61,25,1
229 | 60,64,0,1
230 | 61,62,5,2
231 | 61,65,0,2
232 | 61,68,1,2
233 | 61,59,0,1
234 | 61,59,0,1
235 | 61,64,0,1
236 | 61,65,8,1
237 | 61,68,0,1
238 | 61,59,0,1
239 | 62,59,13,2
240 | 62,58,0,2
241 | 62,65,19,2
242 | 62,62,6,1
243 | 62,66,0,1
244 | 62,66,0,1
245 | 62,58,0,1
246 | 63,60,1,2
247 | 63,61,0,1
248 | 63,62,0,1
249 | 63,63,0,1
250 | 63,63,0,1
251 | 63,66,0,1
252 | 63,61,9,1
253 | 63,61,28,1
254 | 64,58,0,1
255 | 64,65,22,1
256 | 64,66,0,1
257 | 64,61,0,1
258 | 64,68,0,1
259 | 65,58,0,2
260 | 65,61,2,2
261 | 65,62,22,2
262 | 65,66,15,2
263 | 65,58,0,1
264 | 65,64,0,1
265 | 65,67,0,1
266 | 65,59,2,1
267 | 65,64,0,1
268 | 65,67,1,1
269 | 66,58,0,2
270 | 66,61,13,2
271 | 66,58,0,1
272 | 66,58,1,1
273 | 66,68,0,1
274 | 67,64,8,2
275 | 67,63,1,2
276 | 67,66,0,1
277 | 67,66,0,1
278 | 67,61,0,1
279 | 67,65,0,1
280 | 68,67,0,1
281 | 68,68,0,1
282 | 69,67,8,2
283 | 69,60,0,1
284 | 69,65,0,1
285 | 69,66,0,1
286 | 70,58,0,2
287 | 70,58,4,2
288 | 70,66,14,1
289 | 70,67,0,1
290 | 70,68,0,1
291 | 70,59,8,1
292 | 70,63,0,1
293 | 71,68,2,1
294 | 72,63,0,2
295 | 72,58,0,1
296 | 72,64,0,1
297 | 72,67,3,1
298 | 73,62,0,1
299 | 73,68,0,1
300 | 74,65,3,2
301 | 74,63,0,1
302 | 75,62,1,1
303 | 76,67,0,1
304 | 77,65,3,1
305 | 78,65,1,2
306 | 83,58,2,2
307 | 


--------------------------------------------------------------------------------
/modifiedhaberman.csv:
--------------------------------------------------------------------------------
  1 | 34.0,59.0,0.0,1.0
  2 | 34.0,66.0,9.0,1.0
  3 | 38.0,69.0,21.0,1.0
  4 | 39.0,66.0,0.0,1.0
  5 | 41.0,60.0,23.0,1.0
  6 | 41.0,64.0,0.0,1.0
  7 | 41.0,67.0,0.0,1.0
  8 | 42.0,69.0,1.0,1.0
  9 | 42.0,59.0,0.0,1.0
 10 | 43.0,58.0,52.0,1.0
 11 | 43.0,59.0,2.0,1.0
 12 | 43.0,64.0,0.0,1.0
 13 | 43.0,64.0,0.0,1.0
 14 | 44.0,64.0,6.0,1.0
 15 | 44.0,58.0,9.0,1.0
 16 | 44.0,63.0,19.0,1.0
 17 | 30.0,64.0,1.0,-1.0
 18 | 30.0,62.0,3.0,-1.0
 19 | 30.0,65.0,0.0,-1.0
 20 | 31.0,59.0,2.0,-1.0
 21 | 31.0,65.0,4.0,-1.0
 22 | 33.0,58.0,10.0,-1.0
 23 | 33.0,60.0,0.0,-1.0
 24 | 34.0,58.0,30.0,-1.0
 25 | 34.0,60.0,1.0,-1.0
 26 | 34.0,61.0,10.0,-1.0
 27 | 34.0,67.0,7.0,-1.0
 28 | 34.0,60.0,0.0,-1.0
 29 | 35.0,64.0,13.0,-1.0
 30 | 35.0,63.0,0.0,-1.0
 31 | 36.0,60.0,1.0,-1.0
 32 | 36.0,69.0,0.0,-1.0
 33 | 37.0,60.0,0.0,-1.0
 34 | 37.0,63.0,0.0,-1.0
 35 | 37.0,58.0,0.0,-1.0
 36 | 37.0,59.0,6.0,-1.0
 37 | 37.0,60.0,15.0,-1.0
 38 | 37.0,63.0,0.0,-1.0
 39 | 38.0,59.0,2.0,-1.0
 40 | 38.0,60.0,0.0,-1.0
 41 | 38.0,60.0,0.0,-1.0
 42 | 38.0,62.0,3.0,-1.0
 43 | 38.0,64.0,1.0,-1.0
 44 | 38.0,66.0,0.0,-1.0
 45 | 38.0,66.0,11.0,-1.0
 46 | 38.0,60.0,1.0,-1.0
 47 | 38.0,67.0,5.0,-1.0
 48 | 39.0,63.0,0.0,-1.0
 49 | 39.0,67.0,0.0,-1.0
 50 | 39.0,58.0,0.0,-1.0
 51 | 39.0,59.0,2.0,-1.0
 52 | 39.0,63.0,4.0,-1.0
 53 | 40.0,58.0,2.0,-1.0
 54 | 40.0,58.0,0.0,-1.0
 55 | 40.0,65.0,0.0,-1.0
 56 | 41.0,58.0,0.0,-1.0
 57 | 41.0,59.0,8.0,-1.0
 58 | 41.0,59.0,0.0,-1.0
 59 | 41.0,64.0,0.0,-1.0
 60 | 41.0,69.0,8.0,-1.0
 61 | 41.0,65.0,0.0,-1.0
 62 | 45.0,65.0,6.0,1.0
 63 | 45.0,66.0,0.0,1.0
 64 | 45.0,67.0,1.0,1.0
 65 | 46.0,58.0,2.0,1.0
 66 | 46.0,69.0,3.0,1.0
 67 | 46.0,62.0,5.0,1.0
 68 | 46.0,65.0,20.0,1.0
 69 | 47.0,63.0,23.0,1.0
 70 | 47.0,62.0,0.0,1.0
 71 | 47.0,65.0,0.0,1.0
 72 | 48.0,58.0,11.0,1.0
 73 | 48.0,58.0,11.0,1.0
 74 | 48.0,67.0,7.0,1.0
 75 | 49.0,63.0,0.0,1.0
 76 | 49.0,64.0,10.0,1.0
 77 | 50.0,63.0,13.0,1.0
 78 | 41.0,65.0,0.0,-1.0
 79 | 42.0,58.0,0.0,-1.0
 80 | 42.0,60.0,1.0,-1.0
 81 | 42.0,59.0,2.0,-1.0
 82 | 42.0,61.0,4.0,-1.0
 83 | 42.0,62.0,20.0,-1.0
 84 | 42.0,65.0,0.0,-1.0
 85 | 42.0,63.0,1.0,-1.0
 86 | 43.0,63.0,14.0,-1.0
 87 | 43.0,64.0,2.0,-1.0
 88 | 43.0,64.0,3.0,-1.0
 89 | 43.0,60.0,0.0,-1.0
 90 | 43.0,63.0,2.0,-1.0
 91 | 43.0,65.0,0.0,-1.0
 92 | 43.0,66.0,4.0,-1.0
 93 | 44.0,61.0,0.0,-1.0
 94 | 44.0,63.0,1.0,-1.0
 95 | 44.0,61.0,0.0,-1.0
 96 | 44.0,67.0,16.0,-1.0
 97 | 45.0,60.0,0.0,-1.0
 98 | 45.0,67.0,0.0,-1.0
 99 | 45.0,59.0,14.0,-1.0
100 | 45.0,64.0,0.0,-1.0
101 | 45.0,68.0,0.0,-1.0
102 | 45.0,67.0,1.0,-1.0
103 | 46.0,62.0,0.0,-1.0
104 | 46.0,58.0,3.0,-1.0
105 | 46.0,63.0,0.0,-1.0
106 | 47.0,61.0,0.0,-1.0
107 | 47.0,63.0,6.0,-1.0
108 | 47.0,66.0,0.0,-1.0
109 | 47.0,67.0,0.0,-1.0
110 | 47.0,58.0,3.0,-1.0
111 | 47.0,60.0,4.0,-1.0
112 | 47.0,68.0,4.0,-1.0
113 | 47.0,66.0,12.0,-1.0
114 | 48.0,61.0,8.0,-1.0
115 | 48.0,62.0,2.0,-1.0
116 | 48.0,64.0,0.0,-1.0
117 | 48.0,66.0,0.0,-1.0
118 | 49.0,61.0,1.0,-1.0
119 | 49.0,62.0,0.0,-1.0
120 | 49.0,66.0,0.0,-1.0
121 | 49.0,60.0,1.0,-1.0
122 | 49.0,62.0,1.0,-1.0
123 | 50.0,64.0,0.0,1.0
124 | 51.0,59.0,13.0,1.0
125 | 51.0,59.0,3.0,1.0
126 | 52.0,69.0,3.0,1.0
127 | 52.0,59.0,2.0,1.0
128 | 52.0,62.0,3.0,1.0
129 | 52.0,66.0,4.0,1.0
130 | 53.0,58.0,4.0,1.0
131 | 53.0,65.0,1.0,1.0
132 | 53.0,59.0,3.0,1.0
133 | 53.0,60.0,9.0,1.0
134 | 53.0,63.0,24.0,1.0
135 | 53.0,65.0,12.0,1.0
136 | 54.0,60.0,11.0,1.0
137 | 54.0,65.0,23.0,1.0
138 | 54.0,65.0,5.0,1.0
139 | 49.0,63.0,3.0,-1.0
140 | 49.0,61.0,0.0,-1.0
141 | 49.0,67.0,1.0,-1.0
142 | 50.0,59.0,0.0,-1.0
143 | 50.0,61.0,6.0,-1.0
144 | 50.0,61.0,0.0,-1.0
145 | 50.0,63.0,1.0,-1.0
146 | 50.0,58.0,1.0,-1.0
147 | 50.0,59.0,2.0,-1.0
148 | 50.0,61.0,0.0,-1.0
149 | 50.0,64.0,0.0,-1.0
150 | 50.0,65.0,4.0,-1.0
151 | 50.0,66.0,1.0,-1.0
152 | 51.0,64.0,7.0,-1.0
153 | 51.0,59.0,1.0,-1.0
154 | 51.0,65.0,0.0,-1.0
155 | 51.0,66.0,1.0,-1.0
156 | 52.0,61.0,0.0,-1.0
157 | 52.0,63.0,4.0,-1.0
158 | 52.0,69.0,0.0,-1.0
159 | 52.0,60.0,4.0,-1.0
160 | 52.0,60.0,5.0,-1.0
161 | 52.0,62.0,0.0,-1.0
162 | 52.0,62.0,1.0,-1.0
163 | 52.0,64.0,0.0,-1.0
164 | 52.0,65.0,0.0,-1.0
165 | 52.0,68.0,0.0,-1.0
166 | 53.0,58.0,1.0,-1.0
167 | 53.0,60.0,1.0,-1.0
168 | 53.0,60.0,2.0,-1.0
169 | 53.0,61.0,1.0,-1.0
170 | 53.0,63.0,0.0,-1.0
171 | 54.0,59.0,7.0,-1.0
172 | 54.0,60.0,3.0,-1.0
173 | 54.0,66.0,0.0,-1.0
174 | 54.0,67.0,46.0,-1.0
175 | 54.0,62.0,0.0,-1.0
176 | 54.0,69.0,7.0,-1.0
177 | 54.0,63.0,19.0,-1.0
178 | 54.0,58.0,1.0,-1.0
179 | 54.0,62.0,0.0,-1.0
180 | 55.0,58.0,1.0,-1.0
181 | 55.0,58.0,0.0,-1.0
182 | 55.0,58.0,1.0,-1.0
183 | 55.0,66.0,18.0,-1.0
184 | 54.0,68.0,7.0,1.0
185 | 55.0,63.0,6.0,1.0
186 | 55.0,68.0,15.0,1.0
187 | 56.0,65.0,9.0,1.0
188 | 56.0,66.0,3.0,1.0
189 | 57.0,61.0,5.0,1.0
190 | 57.0,62.0,14.0,1.0
191 | 57.0,64.0,1.0,1.0
192 | 59.0,62.0,35.0,1.0
193 | 60.0,59.0,17.0,1.0
194 | 60.0,65.0,0.0,1.0
195 | 61.0,62.0,5.0,1.0
196 | 61.0,65.0,0.0,1.0
197 | 61.0,68.0,1.0,1.0
198 | 62.0,59.0,13.0,1.0
199 | 62.0,58.0,0.0,1.0
200 | 55.0,66.0,0.0,-1.0
201 | 55.0,69.0,3.0,-1.0
202 | 55.0,69.0,22.0,-1.0
203 | 55.0,67.0,1.0,-1.0
204 | 56.0,60.0,0.0,-1.0
205 | 56.0,66.0,2.0,-1.0
206 | 56.0,66.0,1.0,-1.0
207 | 56.0,67.0,0.0,-1.0
208 | 56.0,60.0,0.0,-1.0
209 | 57.0,64.0,9.0,-1.0
210 | 57.0,69.0,0.0,-1.0
211 | 57.0,61.0,0.0,-1.0
212 | 57.0,62.0,0.0,-1.0
213 | 57.0,63.0,0.0,-1.0
214 | 57.0,64.0,0.0,-1.0
215 | 57.0,64.0,0.0,-1.0
216 | 57.0,67.0,0.0,-1.0
217 | 58.0,59.0,0.0,-1.0
218 | 58.0,60.0,3.0,-1.0
219 | 58.0,61.0,1.0,-1.0
220 | 58.0,67.0,0.0,-1.0
221 | 58.0,58.0,0.0,-1.0
222 | 58.0,58.0,3.0,-1.0
223 | 58.0,61.0,2.0,-1.0
224 | 59.0,60.0,0.0,-1.0
225 | 59.0,63.0,0.0,-1.0
226 | 59.0,64.0,1.0,-1.0
227 | 59.0,64.0,4.0,-1.0
228 | 59.0,64.0,0.0,-1.0
229 | 59.0,64.0,7.0,-1.0
230 | 59.0,67.0,3.0,-1.0
231 | 60.0,61.0,1.0,-1.0
232 | 60.0,67.0,2.0,-1.0
233 | 60.0,61.0,25.0,-1.0
234 | 60.0,64.0,0.0,-1.0
235 | 61.0,59.0,0.0,-1.0
236 | 61.0,59.0,0.0,-1.0
237 | 61.0,64.0,0.0,-1.0
238 | 61.0,65.0,8.0,-1.0
239 | 61.0,68.0,0.0,-1.0
240 | 61.0,59.0,0.0,-1.0
241 | 62.0,62.0,6.0,-1.0
242 | 62.0,66.0,0.0,-1.0
243 | 62.0,66.0,0.0,-1.0
244 | 62.0,58.0,0.0,-1.0
245 | 62.0,65.0,19.0,1.0
246 | 63.0,60.0,1.0,1.0
247 | 65.0,58.0,0.0,1.0
248 | 65.0,61.0,2.0,1.0
249 | 65.0,62.0,22.0,1.0
250 | 65.0,66.0,15.0,1.0
251 | 66.0,58.0,0.0,1.0
252 | 66.0,61.0,13.0,1.0
253 | 67.0,64.0,8.0,1.0
254 | 67.0,63.0,1.0,1.0
255 | 69.0,67.0,8.0,1.0
256 | 70.0,58.0,0.0,1.0
257 | 70.0,58.0,4.0,1.0
258 | 72.0,63.0,0.0,1.0
259 | 74.0,65.0,3.0,1.0
260 | 78.0,65.0,1.0,1.0
261 | 83.0,58.0,2.0,1.0
262 | 63.0,61.0,0.0,-1.0
263 | 63.0,62.0,0.0,-1.0
264 | 63.0,63.0,0.0,-1.0
265 | 63.0,63.0,0.0,-1.0
266 | 63.0,66.0,0.0,-1.0
267 | 63.0,61.0,9.0,-1.0
268 | 63.0,61.0,28.0,-1.0
269 | 64.0,58.0,0.0,-1.0
270 | 64.0,65.0,22.0,-1.0
271 | 64.0,66.0,0.0,-1.0
272 | 64.0,61.0,0.0,-1.0
273 | 64.0,68.0,0.0,-1.0
274 | 65.0,58.0,0.0,-1.0
275 | 65.0,64.0,0.0,-1.0
276 | 65.0,67.0,0.0,-1.0
277 | 65.0,59.0,2.0,-1.0
278 | 65.0,64.0,0.0,-1.0
279 | 65.0,67.0,1.0,-1.0
280 | 66.0,58.0,0.0,-1.0
281 | 66.0,58.0,1.0,-1.0
282 | 66.0,68.0,0.0,-1.0
283 | 67.0,66.0,0.0,-1.0
284 | 67.0,66.0,0.0,-1.0
285 | 67.0,61.0,0.0,-1.0
286 | 67.0,65.0,0.0,-1.0
287 | 68.0,67.0,0.0,-1.0
288 | 68.0,68.0,0.0,-1.0
289 | 69.0,60.0,0.0,-1.0
290 | 69.0,65.0,0.0,-1.0
291 | 69.0,66.0,0.0,-1.0
292 | 70.0,66.0,14.0,-1.0
293 | 70.0,67.0,0.0,-1.0
294 | 70.0,68.0,0.0,-1.0
295 | 70.0,59.0,8.0,-1.0
296 | 70.0,63.0,0.0,-1.0
297 | 71.0,68.0,2.0,-1.0
298 | 72.0,58.0,0.0,-1.0
299 | 72.0,64.0,0.0,-1.0
300 | 72.0,67.0,3.0,-1.0
301 | 73.0,62.0,0.0,-1.0
302 | 73.0,68.0,0.0,-1.0
303 | 74.0,63.0,0.0,-1.0
304 | 75.0,62.0,1.0,-1.0
305 | 76.0,67.0,0.0,-1.0
306 | 77.0,65.0,3.0,-1.0
307 | 


--------------------------------------------------------------------------------
/Preprocessing PIma_indians.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# ADITYA SAHU"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Pre processing of Pima indians dataset to make imbalance ratio of training and testing same"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import numpy as np\n",
 24 |     "from numpy import linalg\n",
 25 |     "import pandas as pd"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "data": {
 35 |       "text/html": [
 36 |        "<div>\n",
 37 |        "<style scoped>\n",
 38 |        "    .dataframe tbody tr th:only-of-type {\n",
 39 |        "        vertical-align: middle;\n",
 40 |        "    }\n",
 41 |        "\n",
 42 |        "    .dataframe tbody tr th {\n",
 43 |        "        vertical-align: top;\n",
 44 |        "    }\n",
 45 |        "\n",
 46 |        "    .dataframe thead th {\n",
 47 |        "        text-align: right;\n",
 48 |        "    }\n",
 49 |        "</style>\n",
 50 |        "<table border=\"1\" class=\"dataframe\">\n",
 51 |        "  <thead>\n",
 52 |        "    <tr style=\"text-align: right;\">\n",
 53 |        "      <th></th>\n",
 54 |        "      <th>1</th>\n",
 55 |        "      <th>2</th>\n",
 56 |        "      <th>3</th>\n",
 57 |        "      <th>4</th>\n",
 58 |        "      <th>5</th>\n",
 59 |        "      <th>6</th>\n",
 60 |        "      <th>7</th>\n",
 61 |        "      <th>8</th>\n",
 62 |        "      <th>9</th>\n",
 63 |        "    </tr>\n",
 64 |        "  </thead>\n",
 65 |        "  <tbody>\n",
 66 |        "    <tr>\n",
 67 |        "      <th>0</th>\n",
 68 |        "      <td>6</td>\n",
 69 |        "      <td>148</td>\n",
 70 |        "      <td>72</td>\n",
 71 |        "      <td>35</td>\n",
 72 |        "      <td>0</td>\n",
 73 |        "      <td>33.6</td>\n",
 74 |        "      <td>0.627</td>\n",
 75 |        "      <td>50</td>\n",
 76 |        "      <td>1</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>1</th>\n",
 80 |        "      <td>1</td>\n",
 81 |        "      <td>85</td>\n",
 82 |        "      <td>66</td>\n",
 83 |        "      <td>29</td>\n",
 84 |        "      <td>0</td>\n",
 85 |        "      <td>26.6</td>\n",
 86 |        "      <td>0.351</td>\n",
 87 |        "      <td>31</td>\n",
 88 |        "      <td>0</td>\n",
 89 |        "    </tr>\n",
 90 |        "    <tr>\n",
 91 |        "      <th>2</th>\n",
 92 |        "      <td>8</td>\n",
 93 |        "      <td>183</td>\n",
 94 |        "      <td>64</td>\n",
 95 |        "      <td>0</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "      <td>23.3</td>\n",
 98 |        "      <td>0.672</td>\n",
 99 |        "      <td>32</td>\n",
100 |        "      <td>1</td>\n",
101 |        "    </tr>\n",
102 |        "    <tr>\n",
103 |        "      <th>3</th>\n",
104 |        "      <td>1</td>\n",
105 |        "      <td>89</td>\n",
106 |        "      <td>66</td>\n",
107 |        "      <td>23</td>\n",
108 |        "      <td>94</td>\n",
109 |        "      <td>28.1</td>\n",
110 |        "      <td>0.167</td>\n",
111 |        "      <td>21</td>\n",
112 |        "      <td>0</td>\n",
113 |        "    </tr>\n",
114 |        "    <tr>\n",
115 |        "      <th>4</th>\n",
116 |        "      <td>0</td>\n",
117 |        "      <td>137</td>\n",
118 |        "      <td>40</td>\n",
119 |        "      <td>35</td>\n",
120 |        "      <td>168</td>\n",
121 |        "      <td>43.1</td>\n",
122 |        "      <td>2.288</td>\n",
123 |        "      <td>33</td>\n",
124 |        "      <td>1</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "   1    2   3   4    5     6      7   8  9\n",
132 |        "0  6  148  72  35    0  33.6  0.627  50  1\n",
133 |        "1  1   85  66  29    0  26.6  0.351  31  0\n",
134 |        "2  8  183  64   0    0  23.3  0.672  32  1\n",
135 |        "3  1   89  66  23   94  28.1  0.167  21  0\n",
136 |        "4  0  137  40  35  168  43.1  2.288  33  1"
137 |       ]
138 |      },
139 |      "execution_count": 2,
140 |      "metadata": {},
141 |      "output_type": "execute_result"
142 |     }
143 |    ],
144 |    "source": [
145 |     "train = pd.read_csv(\"pima-indians-diabetes.csv\")\n",
146 |     "train.head()"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 3,
152 |    "metadata": {},
153 |    "outputs": [
154 |     {
155 |      "name": "stdout",
156 |      "output_type": "stream",
157 |      "text": [
158 |       "0      1\n",
159 |       "1     -1\n",
160 |       "2      1\n",
161 |       "3     -1\n",
162 |       "4      1\n",
163 |       "5     -1\n",
164 |       "6      1\n",
165 |       "7     -1\n",
166 |       "8      1\n",
167 |       "9      1\n",
168 |       "10    -1\n",
169 |       "11     1\n",
170 |       "12    -1\n",
171 |       "13     1\n",
172 |       "14     1\n",
173 |       "15     1\n",
174 |       "16     1\n",
175 |       "17     1\n",
176 |       "18    -1\n",
177 |       "19     1\n",
178 |       "20    -1\n",
179 |       "21    -1\n",
180 |       "22     1\n",
181 |       "23     1\n",
182 |       "24     1\n",
183 |       "25     1\n",
184 |       "26     1\n",
185 |       "27    -1\n",
186 |       "28    -1\n",
187 |       "29    -1\n",
188 |       "      ..\n",
189 |       "738   -1\n",
190 |       "739    1\n",
191 |       "740    1\n",
192 |       "741   -1\n",
193 |       "742   -1\n",
194 |       "743    1\n",
195 |       "744   -1\n",
196 |       "745   -1\n",
197 |       "746    1\n",
198 |       "747   -1\n",
199 |       "748    1\n",
200 |       "749    1\n",
201 |       "750    1\n",
202 |       "751   -1\n",
203 |       "752   -1\n",
204 |       "753    1\n",
205 |       "754    1\n",
206 |       "755    1\n",
207 |       "756   -1\n",
208 |       "757    1\n",
209 |       "758   -1\n",
210 |       "759    1\n",
211 |       "760   -1\n",
212 |       "761    1\n",
213 |       "762   -1\n",
214 |       "763   -1\n",
215 |       "764   -1\n",
216 |       "765   -1\n",
217 |       "766    1\n",
218 |       "767   -1\n",
219 |       "Name: 9, Length: 768, dtype: int64\n"
220 |      ]
221 |     }
222 |    ],
223 |    "source": [
224 |     "train['9'] = train['9'].map({1: 1, 0: -1})\n",
225 |     "print(train['9'])"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 4,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "\n",
235 |     "train=np.asarray(train)"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 5,
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": [
244 |     "min_train=np.zeros((268,9))\n",
245 |     "max_train=np.zeros((500,9))\n",
246 |     "min_train=np.asarray(min_train)\n",
247 |     "max_train=np.asarray(max_train)"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 6,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "\n",
257 |     "k=0\n",
258 |     "l=0\n",
259 |     "for i in range(0,768):\n",
260 |     "    if(train[i][8]==1):\n",
261 |     "        for j in range(0,9):\n",
262 |     "            min_train[k][j]=train[i][j]   \n",
263 |     "        k=k+1\n",
264 |     "    else :\n",
265 |     "        for j in range(0,9):\n",
266 |     "            max_train[l][j]=train[i][j]\n",
267 |     "        l=l+1"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 7,
273 |    "metadata": {},
274 |    "outputs": [
275 |     {
276 |      "name": "stdout",
277 |      "output_type": "stream",
278 |      "text": [
279 |       "[[  6.    148.     72.    ...   0.627  50.      1.   ]\n",
280 |       " [  8.    183.     64.    ...   0.672  32.      1.   ]\n",
281 |       " [  0.    137.     40.    ...   2.288  33.      1.   ]\n",
282 |       " ...\n",
283 |       " [  6.    190.     92.    ...   0.278  66.      1.   ]\n",
284 |       " [  9.    170.     74.    ...   0.403  43.      1.   ]\n",
285 |       " [  1.    126.     60.    ...   0.349  47.      1.   ]]\n"
286 |      ]
287 |     }
288 |    ],
289 |    "source": [
290 |     "print(min_train)"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "#768/5 =153.6\n",
298 |     "#So we have to make no. of instances in data1,2 as 153 and data3,4,5 as 154 \n",
299 |     "#Also min class value will be 35% of 153=53 and max class=100"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": 21,
305 |    "metadata": {},
306 |    "outputs": [],
307 |    "source": [
308 |     "data1=np.zeros((153,9))\n",
309 |     "data2=np.zeros((153,9))\n",
310 |     "data3=np.zeros((154,9))\n",
311 |     "data4=np.zeros((154,9))\n",
312 |     "data5=np.zeros((154,9))"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "metadata": {},
319 |    "outputs": [],
320 |    "source": []
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 22,
325 |    "metadata": {},
326 |    "outputs": [
327 |     {
328 |      "name": "stdout",
329 |      "output_type": "stream",
330 |      "text": [
331 |       "[0. 0. 0. 0. 0. 0. 0. 0. 0.]\n"
332 |      ]
333 |     }
334 |    ],
335 |    "source": [
336 |     "for i in range(0,268):\n",
337 |     "    for j in range(0,9):\n",
338 |     "        if(i<53):\n",
339 |     "            data1[i][j]=min_train[i][j]\n",
340 |     "        elif(52<i and i<106):\n",
341 |     "            data2[i-53][j]=min_train[i][j]\n",
342 |     "        elif(105<i and i<160):\n",
343 |     "            data3[i-106][j]=min_train[i][j]\n",
344 |     "        elif(159<i and i<214):\n",
345 |     "            data4[i-160][j]=min_train[i][j]\n",
346 |     "        elif(213<i and i<268):\n",
347 |     "            data5[i-214][j]=min_train[i][j]\n",
348 |     "    \n",
349 |     "        \n",
350 |     "print(data5[54])"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": 28,
356 |    "metadata": {},
357 |    "outputs": [
358 |     {
359 |      "name": "stdout",
360 |      "output_type": "stream",
361 |      "text": [
362 |       "[ 1.    93.    70.    31.     0.    30.4    0.315 23.    -1.   ]\n"
363 |      ]
364 |     }
365 |    ],
366 |    "source": [
367 |     "for i in range(0,500):\n",
368 |     "    for j in range(0,9):\n",
369 |     "        if(i<100):\n",
370 |     "            data1[i+53][j]=max_train[i][j]\n",
371 |     "        elif(99<i and i<200):\n",
372 |     "            data2[i-47][j]=max_train[i][j]\n",
373 |     "        elif(199<i and i<300):\n",
374 |     "            data3[i-146][j]=max_train[i][j]\n",
375 |     "        elif(299<i and i<400):\n",
376 |     "            data4[i-246][j]=max_train[i][j]\n",
377 |     "        elif(399<i and i<500):\n",
378 |     "            data5[i-346][j]=max_train[i][j]   \n",
379 |     "print(data5[153])"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": 35,
385 |    "metadata": {},
386 |    "outputs": [
387 |     {
388 |      "name": "stdout",
389 |      "output_type": "stream",
390 |      "text": [
391 |       "(154, 9)\n"
392 |      ]
393 |     },
394 |     {
395 |      "data": {
396 |       "text/plain": [
397 |        "array([[  9.   , 112.   ,  82.   , ...,   1.282,  50.   ,   1.   ],\n",
398 |        "       [  0.   , 119.   ,   0.   , ...,   0.141,  24.   ,   1.   ],\n",
399 |        "       [  7.   , 114.   ,  64.   , ...,   0.732,  34.   ,   1.   ],\n",
400 |        "       ...,\n",
401 |        "       [  2.   , 122.   ,  70.   , ...,   0.34 ,  27.   ,  -1.   ],\n",
402 |        "       [  5.   , 121.   ,  72.   , ...,   0.245,  30.   ,  -1.   ],\n",
403 |        "       [  1.   ,  93.   ,  70.   , ...,   0.315,  23.   ,  -1.   ]])"
404 |       ]
405 |      },
406 |      "execution_count": 35,
407 |      "metadata": {},
408 |      "output_type": "execute_result"
409 |     }
410 |    ],
411 |    "source": [
412 |     "print(data5.shape)\n",
413 |     "data5"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": 38,
419 |    "metadata": {},
420 |    "outputs": [],
421 |    "source": [
422 |     "import csv\n",
423 |     "with open('new_pima.csv', 'w') as csvFile:\n",
424 |     "    writer = csv.writer(csvFile)\n",
425 |     "    writer.writerows(data1)\n",
426 |     "\n",
427 |     "csvFile.close()"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "code",
432 |    "execution_count": 39,
433 |    "metadata": {},
434 |    "outputs": [],
435 |    "source": [
436 |     "import csv\n",
437 |     "with open('new_pima.csv', 'a') as csvFile:\n",
438 |     "    writer = csv.writer(csvFile)\n",
439 |     "    writer.writerows(data2)\n",
440 |     "\n",
441 |     "csvFile.close()"
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "code",
446 |    "execution_count": 40,
447 |    "metadata": {},
448 |    "outputs": [],
449 |    "source": [
450 |     "import csv\n",
451 |     "with open('new_pima.csv', 'a') as csvFile:\n",
452 |     "    writer = csv.writer(csvFile)\n",
453 |     "    writer.writerows(data3)\n",
454 |     "\n",
455 |     "csvFile.close()"
456 |    ]
457 |   },
458 |   {
459 |    "cell_type": "code",
460 |    "execution_count": 41,
461 |    "metadata": {},
462 |    "outputs": [],
463 |    "source": [
464 |     "import csv\n",
465 |     "with open('new_pima.csv', 'a') as csvFile:\n",
466 |     "    writer = csv.writer(csvFile)\n",
467 |     "    writer.writerows(data4)\n",
468 |     "\n",
469 |     "csvFile.close()"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": 42,
475 |    "metadata": {},
476 |    "outputs": [],
477 |    "source": [
478 |     "import csv\n",
479 |     "with open('new_pima.csv', 'a') as csvFile:\n",
480 |     "    writer = csv.writer(csvFile)\n",
481 |     "    writer.writerows(data5)\n",
482 |     "\n",
483 |     "csvFile.close()"
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "code",
488 |    "execution_count": 47,
489 |    "metadata": {},
490 |    "outputs": [
491 |     {
492 |      "name": "stdout",
493 |      "output_type": "stream",
494 |      "text": [
495 |       "6        767\n",
496 |       "148      767\n",
497 |       "72       767\n",
498 |       "35       767\n",
499 |       "0        767\n",
500 |       "33.6     767\n",
501 |       "0.627    767\n",
502 |       "50       767\n",
503 |       "1        767\n",
504 |       "dtype: int64\n"
505 |      ]
506 |     }
507 |    ],
508 |    "source": [
509 |     "df = pd.read_csv(\"new_pima.csv\")\n",
510 |     "#checking the number of empty rows in th csv file\n",
511 |     "print (df.isnull().sum())\n",
512 |     "#Droping the empty rows\n",
513 |     "modifiedDF = df.dropna()\n",
514 |     "#Saving it to the csv file \n",
515 |     "modifiedDF.to_csv('modifiedpima.csv',index=False)"
516 |    ]
517 |   },
518 |   {
519 |    "cell_type": "code",
520 |    "execution_count": null,
521 |    "metadata": {},
522 |    "outputs": [],
523 |    "source": []
524 |   },
525 |   {
526 |    "cell_type": "code",
527 |    "execution_count": null,
528 |    "metadata": {},
529 |    "outputs": [],
530 |    "source": []
531 |   }
532 |  ],
533 |  "metadata": {
534 |   "kernelspec": {
535 |    "display_name": "Python 3",
536 |    "language": "python",
537 |    "name": "python3"
538 |   },
539 |   "language_info": {
540 |    "codemirror_mode": {
541 |     "name": "ipython",
542 |     "version": 3
543 |    },
544 |    "file_extension": ".py",
545 |    "mimetype": "text/x-python",
546 |    "name": "python",
547 |    "nbconvert_exporter": "python",
548 |    "pygments_lexer": "ipython3",
549 |    "version": "3.6.5"
550 |   }
551 |  },
552 |  "nbformat": 4,
553 |  "nbformat_minor": 2
554 | }
555 | 


--------------------------------------------------------------------------------
/Preprocessing pagebreak.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pre processing of Abalone dataset to make imbalance ratio of training and testing same"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 30,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "from numpy import linalg\n",
 18 |     "import pandas as pd"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 31,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/html": [
 29 |        "<div>\n",
 30 |        "<style scoped>\n",
 31 |        "    .dataframe tbody tr th:only-of-type {\n",
 32 |        "        vertical-align: middle;\n",
 33 |        "    }\n",
 34 |        "\n",
 35 |        "    .dataframe tbody tr th {\n",
 36 |        "        vertical-align: top;\n",
 37 |        "    }\n",
 38 |        "\n",
 39 |        "    .dataframe thead th {\n",
 40 |        "        text-align: right;\n",
 41 |        "    }\n",
 42 |        "</style>\n",
 43 |        "<table border=\"1\" class=\"dataframe\">\n",
 44 |        "  <thead>\n",
 45 |        "    <tr style=\"text-align: right;\">\n",
 46 |        "      <th></th>\n",
 47 |        "      <th>0</th>\n",
 48 |        "      <th>1</th>\n",
 49 |        "      <th>2</th>\n",
 50 |        "      <th>3</th>\n",
 51 |        "      <th>4</th>\n",
 52 |        "      <th>5</th>\n",
 53 |        "      <th>6</th>\n",
 54 |        "      <th>7</th>\n",
 55 |        "      <th>8</th>\n",
 56 |        "      <th>9</th>\n",
 57 |        "      <th>10</th>\n",
 58 |        "    </tr>\n",
 59 |        "  </thead>\n",
 60 |        "  <tbody>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>0</th>\n",
 63 |        "      <td>5</td>\n",
 64 |        "      <td>7</td>\n",
 65 |        "      <td>35</td>\n",
 66 |        "      <td>1.400</td>\n",
 67 |        "      <td>0.400</td>\n",
 68 |        "      <td>0.657</td>\n",
 69 |        "      <td>2.33</td>\n",
 70 |        "      <td>14</td>\n",
 71 |        "      <td>23</td>\n",
 72 |        "      <td>6</td>\n",
 73 |        "      <td>1</td>\n",
 74 |        "    </tr>\n",
 75 |        "    <tr>\n",
 76 |        "      <th>1</th>\n",
 77 |        "      <td>6</td>\n",
 78 |        "      <td>7</td>\n",
 79 |        "      <td>42</td>\n",
 80 |        "      <td>1.167</td>\n",
 81 |        "      <td>0.429</td>\n",
 82 |        "      <td>0.881</td>\n",
 83 |        "      <td>3.60</td>\n",
 84 |        "      <td>18</td>\n",
 85 |        "      <td>37</td>\n",
 86 |        "      <td>5</td>\n",
 87 |        "      <td>1</td>\n",
 88 |        "    </tr>\n",
 89 |        "    <tr>\n",
 90 |        "      <th>2</th>\n",
 91 |        "      <td>6</td>\n",
 92 |        "      <td>18</td>\n",
 93 |        "      <td>108</td>\n",
 94 |        "      <td>3.000</td>\n",
 95 |        "      <td>0.287</td>\n",
 96 |        "      <td>0.741</td>\n",
 97 |        "      <td>4.43</td>\n",
 98 |        "      <td>31</td>\n",
 99 |        "      <td>80</td>\n",
100 |        "      <td>7</td>\n",
101 |        "      <td>1</td>\n",
102 |        "    </tr>\n",
103 |        "    <tr>\n",
104 |        "      <th>3</th>\n",
105 |        "      <td>5</td>\n",
106 |        "      <td>7</td>\n",
107 |        "      <td>35</td>\n",
108 |        "      <td>1.400</td>\n",
109 |        "      <td>0.371</td>\n",
110 |        "      <td>0.743</td>\n",
111 |        "      <td>4.33</td>\n",
112 |        "      <td>13</td>\n",
113 |        "      <td>26</td>\n",
114 |        "      <td>3</td>\n",
115 |        "      <td>1</td>\n",
116 |        "    </tr>\n",
117 |        "    <tr>\n",
118 |        "      <th>4</th>\n",
119 |        "      <td>6</td>\n",
120 |        "      <td>3</td>\n",
121 |        "      <td>18</td>\n",
122 |        "      <td>0.500</td>\n",
123 |        "      <td>0.500</td>\n",
124 |        "      <td>0.944</td>\n",
125 |        "      <td>2.25</td>\n",
126 |        "      <td>9</td>\n",
127 |        "      <td>17</td>\n",
128 |        "      <td>4</td>\n",
129 |        "      <td>1</td>\n",
130 |        "    </tr>\n",
131 |        "  </tbody>\n",
132 |        "</table>\n",
133 |        "</div>"
134 |       ],
135 |       "text/plain": [
136 |        "   0   1    2      3      4      5     6   7   8   9   10\n",
137 |        "0   5   7   35  1.400  0.400  0.657  2.33  14  23   6   1\n",
138 |        "1   6   7   42  1.167  0.429  0.881  3.60  18  37   5   1\n",
139 |        "2   6  18  108  3.000  0.287  0.741  4.43  31  80   7   1\n",
140 |        "3   5   7   35  1.400  0.371  0.743  4.33  13  26   3   1\n",
141 |        "4   6   3   18  0.500  0.500  0.944  2.25   9  17   4   1"
142 |       ]
143 |      },
144 |      "execution_count": 31,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "train = pd.read_table(\"page-blocks.data\", sep=\"\\s+\",header=None)\n",
151 |     "train.head()"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 32,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       "0      -1\n",
164 |       "1      -1\n",
165 |       "2      -1\n",
166 |       "3      -1\n",
167 |       "4      -1\n",
168 |       "5      -1\n",
169 |       "6      -1\n",
170 |       "7      -1\n",
171 |       "8      -1\n",
172 |       "9      -1\n",
173 |       "10     -1\n",
174 |       "11     -1\n",
175 |       "12     -1\n",
176 |       "13     -1\n",
177 |       "14     -1\n",
178 |       "15     -1\n",
179 |       "16     -1\n",
180 |       "17     -1\n",
181 |       "18     -1\n",
182 |       "19     -1\n",
183 |       "20     -1\n",
184 |       "21     -1\n",
185 |       "22     -1\n",
186 |       "23     -1\n",
187 |       "24     -1\n",
188 |       "25     -1\n",
189 |       "26     -1\n",
190 |       "27     -1\n",
191 |       "28     -1\n",
192 |       "29     -1\n",
193 |       "       ..\n",
194 |       "5443   -1\n",
195 |       "5444   -1\n",
196 |       "5445   -1\n",
197 |       "5446   -1\n",
198 |       "5447   -1\n",
199 |       "5448   -1\n",
200 |       "5449   -1\n",
201 |       "5450   -1\n",
202 |       "5451   -1\n",
203 |       "5452   -1\n",
204 |       "5453   -1\n",
205 |       "5454   -1\n",
206 |       "5455   -1\n",
207 |       "5456   -1\n",
208 |       "5457   -1\n",
209 |       "5458   -1\n",
210 |       "5459   -1\n",
211 |       "5460   -1\n",
212 |       "5461   -1\n",
213 |       "5462   -1\n",
214 |       "5463   -1\n",
215 |       "5464   -1\n",
216 |       "5465   -1\n",
217 |       "5466   -1\n",
218 |       "5467   -1\n",
219 |       "5468   -1\n",
220 |       "5469   -1\n",
221 |       "5470   -1\n",
222 |       "5471   -1\n",
223 |       "5472   -1\n",
224 |       "Name: 10, Length: 5473, dtype: int64\n"
225 |      ]
226 |     }
227 |    ],
228 |    "source": [
229 |     "train[10] = train[10].map({5: 1, 1:-1,2:-1,3:-1,4:-1})\n",
230 |     "print(train[10])"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 33,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "train=np.asarray(train)"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 34,
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "min_train=np.zeros((115,11))\n",
249 |     "max_train=np.zeros((5358,11))\n",
250 |     "min_train=np.asarray(min_train)\n",
251 |     "max_train=np.asarray(max_train)"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 62,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "k=0\n",
261 |     "l=0\n",
262 |     "for i in range(0,5473):\n",
263 |     "    if(train[i][10]==1):\n",
264 |     "        for j in range(0,11):\n",
265 |     "            min_train[k][j]=train[i][j]   \n",
266 |     "        k=k+1\n",
267 |     "    else :\n",
268 |     "        for j in range(0,11):\n",
269 |     "            max_train[l][j]=train[i][j]\n",
270 |     "        l=l+1"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 63,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "name": "stdout",
280 |      "output_type": "stream",
281 |      "text": [
282 |       "[[  5.   7.  35. ...  23.   6.  -1.]\n",
283 |       " [  6.   7.  42. ...  37.   5.  -1.]\n",
284 |       " [  6.  18. 108. ...  80.   7.  -1.]\n",
285 |       " ...\n",
286 |       " [  6.  95. 570. ... 519. 104.  -1.]\n",
287 |       " [  7.  41. 287. ... 230.  45.  -1.]\n",
288 |       " [  8.   1.   8. ...   8.   1.  -1.]]\n"
289 |      ]
290 |     }
291 |    ],
292 |    "source": [
293 |     "print(max_train)"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 64,
299 |    "metadata": {},
300 |    "outputs": [],
301 |    "source": [
302 |     "data1=np.zeros((1094,11))\n",
303 |     "data2=np.zeros((1094,11))\n",
304 |     "data3=np.zeros((1095,11))\n",
305 |     "data4=np.zeros((1095,11))\n",
306 |     "data5=np.zeros((1095,11))"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": null,
312 |    "metadata": {},
313 |    "outputs": [],
314 |    "source": []
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": 65,
319 |    "metadata": {},
320 |    "outputs": [
321 |     {
322 |      "name": "stdout",
323 |      "output_type": "stream",
324 |      "text": [
325 |       "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n"
326 |      ]
327 |     }
328 |    ],
329 |    "source": [
330 |     "for i in range(0,115):\n",
331 |     "    for j in range(0,11):\n",
332 |     "        if(i<23):\n",
333 |     "            data1[i][j]=min_train[i][j]\n",
334 |     "        elif(22<i and i<46):\n",
335 |     "            data2[i-23][j]=min_train[i][j]\n",
336 |     "        elif(45<i and i<69):\n",
337 |     "            data3[i-46][j]=min_train[i][j]\n",
338 |     "        elif(68<i and i<92):\n",
339 |     "            data4[i-69][j]=min_train[i][j]\n",
340 |     "        elif(91<i and i<115):\n",
341 |     "            data5[i-92][j]=min_train[i][j]\n",
342 |     "    \n",
343 |     "        \n",
344 |     "print(data5[24])"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": 66,
350 |    "metadata": {},
351 |    "outputs": [
352 |     {
353 |      "name": "stdout",
354 |      "output_type": "stream",
355 |      "text": [
356 |       "[  9.     33.    297.      3.667   0.347   0.929   2.58  103.    276.\n",
357 |       "  40.     -1.   ]\n"
358 |      ]
359 |     }
360 |    ],
361 |    "source": [
362 |     "for i in range(0,5358):\n",
363 |     "    for j in range(0,11):\n",
364 |     "        if(i<1071):\n",
365 |     "            data1[i+23][j]=max_train[i][j]\n",
366 |     "        elif(1070<i and i<2142):\n",
367 |     "            data2[i-1048][j]=max_train[i][j]\n",
368 |     "        elif(2141<i and i<3214):\n",
369 |     "            data3[i-2119][j]=max_train[i][j]\n",
370 |     "        elif(3213<i and i<4286):\n",
371 |     "            data4[i-3191][j]=max_train[i][j]\n",
372 |     "        elif(4285<i and i<5358):\n",
373 |     "            data5[i-4263][j]=max_train[i][j]   \n",
374 |     "print(data5[1000])"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": 67,
380 |    "metadata": {},
381 |    "outputs": [
382 |     {
383 |      "name": "stdout",
384 |      "output_type": "stream",
385 |      "text": [
386 |       "(1095, 11)\n"
387 |      ]
388 |     },
389 |     {
390 |      "data": {
391 |       "text/plain": [
392 |        "array([[ 1.010e+02,  9.900e+01,  9.999e+03, ...,  7.780e+03,  6.910e+02,\n",
393 |        "         1.000e+00],\n",
394 |        "       [ 3.000e+01,  6.000e+01,  1.800e+03, ...,  8.230e+02,  1.500e+02,\n",
395 |        "         1.000e+00],\n",
396 |        "       [ 3.200e+01,  1.060e+02,  3.392e+03, ...,  1.341e+03,  1.470e+02,\n",
397 |        "         1.000e+00],\n",
398 |        "       ...,\n",
399 |        "       [ 6.000e+00,  9.500e+01,  5.700e+02, ...,  5.190e+02,  1.040e+02,\n",
400 |        "        -1.000e+00],\n",
401 |        "       [ 7.000e+00,  4.100e+01,  2.870e+02, ...,  2.300e+02,  4.500e+01,\n",
402 |        "        -1.000e+00],\n",
403 |        "       [ 8.000e+00,  1.000e+00,  8.000e+00, ...,  8.000e+00,  1.000e+00,\n",
404 |        "        -1.000e+00]])"
405 |       ]
406 |      },
407 |      "execution_count": 67,
408 |      "metadata": {},
409 |      "output_type": "execute_result"
410 |     }
411 |    ],
412 |    "source": [
413 |     "print(data5.shape)\n",
414 |     "data5"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": 68,
420 |    "metadata": {},
421 |    "outputs": [],
422 |    "source": [
423 |     "import csv\n",
424 |     "with open('newab.csv', 'w') as csvFile:\n",
425 |     "    writer = csv.writer(csvFile)\n",
426 |     "    writer.writerows(data1)\n",
427 |     "\n",
428 |     "csvFile.close()"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": 69,
434 |    "metadata": {},
435 |    "outputs": [],
436 |    "source": [
437 |     "import csv\n",
438 |     "with open('newab.csv', 'a') as csvFile:\n",
439 |     "    writer = csv.writer(csvFile)\n",
440 |     "    writer.writerows(data2)\n",
441 |     "\n",
442 |     "csvFile.close()"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": 70,
448 |    "metadata": {},
449 |    "outputs": [],
450 |    "source": [
451 |     "import csv\n",
452 |     "with open('newab.csv', 'a') as csvFile:\n",
453 |     "    writer = csv.writer(csvFile)\n",
454 |     "    writer.writerows(data3)\n",
455 |     "\n",
456 |     "csvFile.close()"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": 71,
462 |    "metadata": {},
463 |    "outputs": [],
464 |    "source": [
465 |     "import csv\n",
466 |     "with open('newab.csv', 'a') as csvFile:\n",
467 |     "    writer = csv.writer(csvFile)\n",
468 |     "    writer.writerows(data4)\n",
469 |     "\n",
470 |     "csvFile.close()"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "code",
475 |    "execution_count": 72,
476 |    "metadata": {},
477 |    "outputs": [],
478 |    "source": [
479 |     "import csv\n",
480 |     "with open('newab.csv', 'a') as csvFile:\n",
481 |     "    writer = csv.writer(csvFile)\n",
482 |     "    writer.writerows(data5)\n",
483 |     "\n",
484 |     "csvFile.close()"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": 73,
490 |    "metadata": {},
491 |    "outputs": [
492 |     {
493 |      "name": "stdout",
494 |      "output_type": "stream",
495 |      "text": [
496 |       "48.0       0\n",
497 |       "402.0      0\n",
498 |       "19296.0    0\n",
499 |       "8.375      0\n",
500 |       "0.055      0\n",
501 |       "0.094      0\n",
502 |       "4.13       0\n",
503 |       "1058.0     0\n",
504 |       "1814.0     0\n",
505 |       "256.0      0\n",
506 |       "1.0        0\n",
507 |       "dtype: int64\n"
508 |      ]
509 |     }
510 |    ],
511 |    "source": [
512 |     "df = pd.read_csv(\"newab.csv\")\n",
513 |     "#checking the number of empty rows in th csv file\n",
514 |     "print (df.isnull().sum())\n",
515 |     "#Droping the empty rows\n",
516 |     "modifiedDF = df.dropna()\n",
517 |     "#Saving it to the csv file \n",
518 |     "modifiedDF.to_csv('modifiedpage.csv',index=False)"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": null,
524 |    "metadata": {},
525 |    "outputs": [],
526 |    "source": []
527 |   }
528 |  ],
529 |  "metadata": {
530 |   "kernelspec": {
531 |    "display_name": "Python 3",
532 |    "language": "python",
533 |    "name": "python3"
534 |   },
535 |   "language_info": {
536 |    "codemirror_mode": {
537 |     "name": "ipython",
538 |     "version": 3
539 |    },
540 |    "file_extension": ".py",
541 |    "mimetype": "text/x-python",
542 |    "name": "python",
543 |    "nbconvert_exporter": "python",
544 |    "pygments_lexer": "ipython3",
545 |    "version": "3.6.5"
546 |   }
547 |  },
548 |  "nbformat": 4,
549 |  "nbformat_minor": 2
550 | }
551 | 


--------------------------------------------------------------------------------
/Preprocessing Haberman.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pre processing of Abalone dataset to make imbalance ratio of training and testing same"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "from numpy import linalg\n",
 18 |     "import pandas as pd"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 4,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/html": [
 29 |        "<div>\n",
 30 |        "<style scoped>\n",
 31 |        "    .dataframe tbody tr th:only-of-type {\n",
 32 |        "        vertical-align: middle;\n",
 33 |        "    }\n",
 34 |        "\n",
 35 |        "    .dataframe tbody tr th {\n",
 36 |        "        vertical-align: top;\n",
 37 |        "    }\n",
 38 |        "\n",
 39 |        "    .dataframe thead th {\n",
 40 |        "        text-align: right;\n",
 41 |        "    }\n",
 42 |        "</style>\n",
 43 |        "<table border=\"1\" class=\"dataframe\">\n",
 44 |        "  <thead>\n",
 45 |        "    <tr style=\"text-align: right;\">\n",
 46 |        "      <th></th>\n",
 47 |        "      <th>0</th>\n",
 48 |        "      <th>1</th>\n",
 49 |        "      <th>2</th>\n",
 50 |        "      <th>3</th>\n",
 51 |        "    </tr>\n",
 52 |        "  </thead>\n",
 53 |        "  <tbody>\n",
 54 |        "    <tr>\n",
 55 |        "      <th>0</th>\n",
 56 |        "      <td>30</td>\n",
 57 |        "      <td>64</td>\n",
 58 |        "      <td>1</td>\n",
 59 |        "      <td>1</td>\n",
 60 |        "    </tr>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>1</th>\n",
 63 |        "      <td>30</td>\n",
 64 |        "      <td>62</td>\n",
 65 |        "      <td>3</td>\n",
 66 |        "      <td>1</td>\n",
 67 |        "    </tr>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>2</th>\n",
 70 |        "      <td>30</td>\n",
 71 |        "      <td>65</td>\n",
 72 |        "      <td>0</td>\n",
 73 |        "      <td>1</td>\n",
 74 |        "    </tr>\n",
 75 |        "    <tr>\n",
 76 |        "      <th>3</th>\n",
 77 |        "      <td>31</td>\n",
 78 |        "      <td>59</td>\n",
 79 |        "      <td>2</td>\n",
 80 |        "      <td>1</td>\n",
 81 |        "    </tr>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>4</th>\n",
 84 |        "      <td>31</td>\n",
 85 |        "      <td>65</td>\n",
 86 |        "      <td>4</td>\n",
 87 |        "      <td>1</td>\n",
 88 |        "    </tr>\n",
 89 |        "  </tbody>\n",
 90 |        "</table>\n",
 91 |        "</div>"
 92 |       ],
 93 |       "text/plain": [
 94 |        "    0   1  2  3\n",
 95 |        "0  30  64  1  1\n",
 96 |        "1  30  62  3  1\n",
 97 |        "2  30  65  0  1\n",
 98 |        "3  31  59  2  1\n",
 99 |        "4  31  65  4  1"
100 |       ]
101 |      },
102 |      "execution_count": 4,
103 |      "metadata": {},
104 |      "output_type": "execute_result"
105 |     }
106 |    ],
107 |    "source": [
108 |     "train = pd.read_csv(\"haberman.csv\",header=None)\n",
109 |     "train.head()"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 6,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "name": "stdout",
119 |      "output_type": "stream",
120 |      "text": [
121 |       "0     -1\n",
122 |       "1     -1\n",
123 |       "2     -1\n",
124 |       "3     -1\n",
125 |       "4     -1\n",
126 |       "5     -1\n",
127 |       "6     -1\n",
128 |       "7      1\n",
129 |       "8      1\n",
130 |       "9     -1\n",
131 |       "10    -1\n",
132 |       "11    -1\n",
133 |       "12    -1\n",
134 |       "13    -1\n",
135 |       "14    -1\n",
136 |       "15    -1\n",
137 |       "16    -1\n",
138 |       "17    -1\n",
139 |       "18    -1\n",
140 |       "19    -1\n",
141 |       "20    -1\n",
142 |       "21    -1\n",
143 |       "22    -1\n",
144 |       "23    -1\n",
145 |       "24     1\n",
146 |       "25    -1\n",
147 |       "26    -1\n",
148 |       "27    -1\n",
149 |       "28    -1\n",
150 |       "29    -1\n",
151 |       "      ..\n",
152 |       "276   -1\n",
153 |       "277   -1\n",
154 |       "278   -1\n",
155 |       "279   -1\n",
156 |       "280   -1\n",
157 |       "281    1\n",
158 |       "282   -1\n",
159 |       "283   -1\n",
160 |       "284   -1\n",
161 |       "285    1\n",
162 |       "286    1\n",
163 |       "287   -1\n",
164 |       "288   -1\n",
165 |       "289   -1\n",
166 |       "290   -1\n",
167 |       "291   -1\n",
168 |       "292   -1\n",
169 |       "293    1\n",
170 |       "294   -1\n",
171 |       "295   -1\n",
172 |       "296   -1\n",
173 |       "297   -1\n",
174 |       "298   -1\n",
175 |       "299    1\n",
176 |       "300   -1\n",
177 |       "301   -1\n",
178 |       "302   -1\n",
179 |       "303   -1\n",
180 |       "304    1\n",
181 |       "305    1\n",
182 |       "Name: 3, Length: 306, dtype: int64\n"
183 |      ]
184 |     }
185 |    ],
186 |    "source": [
187 |     "train[3] = train[3].map({2:1,1:-1})\n",
188 |     "print(train[3])"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 7,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "train=np.asarray(train)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 19,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "min_train=np.zeros((81,4))\n",
207 |     "max_train=np.zeros((225,4))\n",
208 |     "min_train=np.asarray(min_train)\n",
209 |     "max_train=np.asarray(max_train)"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 20,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "k=0\n",
219 |     "l=0\n",
220 |     "for i in range(0,306):\n",
221 |     "    if(train[i][3]==1):\n",
222 |     "        for j in range(0,4):\n",
223 |     "            min_train[k][j]=train[i][j]   \n",
224 |     "        k=k+1\n",
225 |     "    else :\n",
226 |     "        for j in range(0,4):\n",
227 |     "            max_train[l][j]=train[i][j]\n",
228 |     "        l=l+1"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 21,
234 |    "metadata": {},
235 |    "outputs": [
236 |     {
237 |      "name": "stdout",
238 |      "output_type": "stream",
239 |      "text": [
240 |       "[[34. 59.  0.  1.]\n",
241 |       " [34. 66.  9.  1.]\n",
242 |       " [38. 69. 21.  1.]\n",
243 |       " [39. 66.  0.  1.]\n",
244 |       " [41. 60. 23.  1.]\n",
245 |       " [41. 64.  0.  1.]\n",
246 |       " [41. 67.  0.  1.]\n",
247 |       " [42. 69.  1.  1.]\n",
248 |       " [42. 59.  0.  1.]\n",
249 |       " [43. 58. 52.  1.]\n",
250 |       " [43. 59.  2.  1.]\n",
251 |       " [43. 64.  0.  1.]\n",
252 |       " [43. 64.  0.  1.]\n",
253 |       " [44. 64.  6.  1.]\n",
254 |       " [44. 58.  9.  1.]\n",
255 |       " [44. 63. 19.  1.]\n",
256 |       " [45. 65.  6.  1.]\n",
257 |       " [45. 66.  0.  1.]\n",
258 |       " [45. 67.  1.  1.]\n",
259 |       " [46. 58.  2.  1.]\n",
260 |       " [46. 69.  3.  1.]\n",
261 |       " [46. 62.  5.  1.]\n",
262 |       " [46. 65. 20.  1.]\n",
263 |       " [47. 63. 23.  1.]\n",
264 |       " [47. 62.  0.  1.]\n",
265 |       " [47. 65.  0.  1.]\n",
266 |       " [48. 58. 11.  1.]\n",
267 |       " [48. 58. 11.  1.]\n",
268 |       " [48. 67.  7.  1.]\n",
269 |       " [49. 63.  0.  1.]\n",
270 |       " [49. 64. 10.  1.]\n",
271 |       " [50. 63. 13.  1.]\n",
272 |       " [50. 64.  0.  1.]\n",
273 |       " [51. 59. 13.  1.]\n",
274 |       " [51. 59.  3.  1.]\n",
275 |       " [52. 69.  3.  1.]\n",
276 |       " [52. 59.  2.  1.]\n",
277 |       " [52. 62.  3.  1.]\n",
278 |       " [52. 66.  4.  1.]\n",
279 |       " [53. 58.  4.  1.]\n",
280 |       " [53. 65.  1.  1.]\n",
281 |       " [53. 59.  3.  1.]\n",
282 |       " [53. 60.  9.  1.]\n",
283 |       " [53. 63. 24.  1.]\n",
284 |       " [53. 65. 12.  1.]\n",
285 |       " [54. 60. 11.  1.]\n",
286 |       " [54. 65. 23.  1.]\n",
287 |       " [54. 65.  5.  1.]\n",
288 |       " [54. 68.  7.  1.]\n",
289 |       " [55. 63.  6.  1.]\n",
290 |       " [55. 68. 15.  1.]\n",
291 |       " [56. 65.  9.  1.]\n",
292 |       " [56. 66.  3.  1.]\n",
293 |       " [57. 61.  5.  1.]\n",
294 |       " [57. 62. 14.  1.]\n",
295 |       " [57. 64.  1.  1.]\n",
296 |       " [59. 62. 35.  1.]\n",
297 |       " [60. 59. 17.  1.]\n",
298 |       " [60. 65.  0.  1.]\n",
299 |       " [61. 62.  5.  1.]\n",
300 |       " [61. 65.  0.  1.]\n",
301 |       " [61. 68.  1.  1.]\n",
302 |       " [62. 59. 13.  1.]\n",
303 |       " [62. 58.  0.  1.]\n",
304 |       " [62. 65. 19.  1.]\n",
305 |       " [63. 60.  1.  1.]\n",
306 |       " [65. 58.  0.  1.]\n",
307 |       " [65. 61.  2.  1.]\n",
308 |       " [65. 62. 22.  1.]\n",
309 |       " [65. 66. 15.  1.]\n",
310 |       " [66. 58.  0.  1.]\n",
311 |       " [66. 61. 13.  1.]\n",
312 |       " [67. 64.  8.  1.]\n",
313 |       " [67. 63.  1.  1.]\n",
314 |       " [69. 67.  8.  1.]\n",
315 |       " [70. 58.  0.  1.]\n",
316 |       " [70. 58.  4.  1.]\n",
317 |       " [72. 63.  0.  1.]\n",
318 |       " [74. 65.  3.  1.]\n",
319 |       " [78. 65.  1.  1.]\n",
320 |       " [83. 58.  2.  1.]]\n"
321 |      ]
322 |     }
323 |    ],
324 |    "source": [
325 |     "print(min_train)"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 23,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "data1=np.zeros((61,4))\n",
335 |     "data2=np.zeros((61,4))\n",
336 |     "data3=np.zeros((61,4))\n",
337 |     "data4=np.zeros((61,4))\n",
338 |     "data5=np.zeros((62,4))"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": null,
344 |    "metadata": {},
345 |    "outputs": [],
346 |    "source": []
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": 28,
351 |    "metadata": {},
352 |    "outputs": [
353 |     {
354 |      "name": "stdout",
355 |      "output_type": "stream",
356 |      "text": [
357 |       "[0. 0. 0. 0.]\n"
358 |      ]
359 |     }
360 |    ],
361 |    "source": [
362 |     "for i in range(0,81):\n",
363 |     "    for j in range(0,4):\n",
364 |     "        if(i<16):\n",
365 |     "            data1[i][j]=min_train[i][j]\n",
366 |     "        elif(15<i and i<32):\n",
367 |     "            data2[i-16][j]=min_train[i][j]\n",
368 |     "        elif(31<i and i<48):\n",
369 |     "            data3[i-32][j]=min_train[i][j]\n",
370 |     "        elif(47<i and i<64):\n",
371 |     "            data4[i-48][j]=min_train[i][j]\n",
372 |     "        elif(63<i and i<81):\n",
373 |     "            data5[i-64][j]=min_train[i][j]\n",
374 |     "    \n",
375 |     "        \n",
376 |     "print(data5[17])"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": 31,
382 |    "metadata": {},
383 |    "outputs": [
384 |     {
385 |      "name": "stdout",
386 |      "output_type": "stream",
387 |      "text": [
388 |       "[77. 65.  3. -1.]\n"
389 |      ]
390 |     }
391 |    ],
392 |    "source": [
393 |     "for i in range(0,225):\n",
394 |     "    for j in range(0,4):\n",
395 |     "        if(i<45):\n",
396 |     "            data1[i+16][j]=max_train[i][j]\n",
397 |     "        elif(44<i and i<90):\n",
398 |     "            data2[i-29][j]=max_train[i][j]\n",
399 |     "        elif(89<i and i<135):\n",
400 |     "            data3[i-74][j]=max_train[i][j]\n",
401 |     "        elif(134<i and i<180):\n",
402 |     "            data4[i-119][j]=max_train[i][j]\n",
403 |     "        elif(179<i and i<225):\n",
404 |     "            data5[i-163][j]=max_train[i][j]   \n",
405 |     "print(data5[61])"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": 32,
411 |    "metadata": {},
412 |    "outputs": [
413 |     {
414 |      "name": "stdout",
415 |      "output_type": "stream",
416 |      "text": [
417 |       "(62, 4)\n"
418 |      ]
419 |     },
420 |     {
421 |      "data": {
422 |       "text/plain": [
423 |        "array([[62., 65., 19.,  1.],\n",
424 |        "       [63., 60.,  1.,  1.],\n",
425 |        "       [65., 58.,  0.,  1.],\n",
426 |        "       [65., 61.,  2.,  1.],\n",
427 |        "       [65., 62., 22.,  1.],\n",
428 |        "       [65., 66., 15.,  1.],\n",
429 |        "       [66., 58.,  0.,  1.],\n",
430 |        "       [66., 61., 13.,  1.],\n",
431 |        "       [67., 64.,  8.,  1.],\n",
432 |        "       [67., 63.,  1.,  1.],\n",
433 |        "       [69., 67.,  8.,  1.],\n",
434 |        "       [70., 58.,  0.,  1.],\n",
435 |        "       [70., 58.,  4.,  1.],\n",
436 |        "       [72., 63.,  0.,  1.],\n",
437 |        "       [74., 65.,  3.,  1.],\n",
438 |        "       [78., 65.,  1.,  1.],\n",
439 |        "       [83., 58.,  2.,  1.],\n",
440 |        "       [63., 61.,  0., -1.],\n",
441 |        "       [63., 62.,  0., -1.],\n",
442 |        "       [63., 63.,  0., -1.],\n",
443 |        "       [63., 63.,  0., -1.],\n",
444 |        "       [63., 66.,  0., -1.],\n",
445 |        "       [63., 61.,  9., -1.],\n",
446 |        "       [63., 61., 28., -1.],\n",
447 |        "       [64., 58.,  0., -1.],\n",
448 |        "       [64., 65., 22., -1.],\n",
449 |        "       [64., 66.,  0., -1.],\n",
450 |        "       [64., 61.,  0., -1.],\n",
451 |        "       [64., 68.,  0., -1.],\n",
452 |        "       [65., 58.,  0., -1.],\n",
453 |        "       [65., 64.,  0., -1.],\n",
454 |        "       [65., 67.,  0., -1.],\n",
455 |        "       [65., 59.,  2., -1.],\n",
456 |        "       [65., 64.,  0., -1.],\n",
457 |        "       [65., 67.,  1., -1.],\n",
458 |        "       [66., 58.,  0., -1.],\n",
459 |        "       [66., 58.,  1., -1.],\n",
460 |        "       [66., 68.,  0., -1.],\n",
461 |        "       [67., 66.,  0., -1.],\n",
462 |        "       [67., 66.,  0., -1.],\n",
463 |        "       [67., 61.,  0., -1.],\n",
464 |        "       [67., 65.,  0., -1.],\n",
465 |        "       [68., 67.,  0., -1.],\n",
466 |        "       [68., 68.,  0., -1.],\n",
467 |        "       [69., 60.,  0., -1.],\n",
468 |        "       [69., 65.,  0., -1.],\n",
469 |        "       [69., 66.,  0., -1.],\n",
470 |        "       [70., 66., 14., -1.],\n",
471 |        "       [70., 67.,  0., -1.],\n",
472 |        "       [70., 68.,  0., -1.],\n",
473 |        "       [70., 59.,  8., -1.],\n",
474 |        "       [70., 63.,  0., -1.],\n",
475 |        "       [71., 68.,  2., -1.],\n",
476 |        "       [72., 58.,  0., -1.],\n",
477 |        "       [72., 64.,  0., -1.],\n",
478 |        "       [72., 67.,  3., -1.],\n",
479 |        "       [73., 62.,  0., -1.],\n",
480 |        "       [73., 68.,  0., -1.],\n",
481 |        "       [74., 63.,  0., -1.],\n",
482 |        "       [75., 62.,  1., -1.],\n",
483 |        "       [76., 67.,  0., -1.],\n",
484 |        "       [77., 65.,  3., -1.]])"
485 |       ]
486 |      },
487 |      "execution_count": 32,
488 |      "metadata": {},
489 |      "output_type": "execute_result"
490 |     }
491 |    ],
492 |    "source": [
493 |     "print(data5.shape)\n",
494 |     "data5"
495 |    ]
496 |   },
497 |   {
498 |    "cell_type": "code",
499 |    "execution_count": 33,
500 |    "metadata": {},
501 |    "outputs": [],
502 |    "source": [
503 |     "import csv\n",
504 |     "with open('newab.csv', 'w') as csvFile:\n",
505 |     "    writer = csv.writer(csvFile)\n",
506 |     "    writer.writerows(data1)\n",
507 |     "\n",
508 |     "csvFile.close()"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "code",
513 |    "execution_count": 34,
514 |    "metadata": {},
515 |    "outputs": [],
516 |    "source": [
517 |     "import csv\n",
518 |     "with open('newab.csv', 'a') as csvFile:\n",
519 |     "    writer = csv.writer(csvFile)\n",
520 |     "    writer.writerows(data2)\n",
521 |     "\n",
522 |     "csvFile.close()"
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "code",
527 |    "execution_count": 35,
528 |    "metadata": {},
529 |    "outputs": [],
530 |    "source": [
531 |     "import csv\n",
532 |     "with open('newab.csv', 'a') as csvFile:\n",
533 |     "    writer = csv.writer(csvFile)\n",
534 |     "    writer.writerows(data3)\n",
535 |     "\n",
536 |     "csvFile.close()"
537 |    ]
538 |   },
539 |   {
540 |    "cell_type": "code",
541 |    "execution_count": 36,
542 |    "metadata": {},
543 |    "outputs": [],
544 |    "source": [
545 |     "import csv\n",
546 |     "with open('newab.csv', 'a') as csvFile:\n",
547 |     "    writer = csv.writer(csvFile)\n",
548 |     "    writer.writerows(data4)\n",
549 |     "\n",
550 |     "csvFile.close()"
551 |    ]
552 |   },
553 |   {
554 |    "cell_type": "code",
555 |    "execution_count": 37,
556 |    "metadata": {},
557 |    "outputs": [],
558 |    "source": [
559 |     "import csv\n",
560 |     "with open('newab.csv', 'a') as csvFile:\n",
561 |     "    writer = csv.writer(csvFile)\n",
562 |     "    writer.writerows(data5)\n",
563 |     "\n",
564 |     "csvFile.close()"
565 |    ]
566 |   },
567 |   {
568 |    "cell_type": "code",
569 |    "execution_count": 38,
570 |    "metadata": {},
571 |    "outputs": [
572 |     {
573 |      "name": "stdout",
574 |      "output_type": "stream",
575 |      "text": [
576 |       "34.0    0\n",
577 |       "59.0    0\n",
578 |       "0.0     0\n",
579 |       "1.0     0\n",
580 |       "dtype: int64\n"
581 |      ]
582 |     }
583 |    ],
584 |    "source": [
585 |     "df = pd.read_csv(\"newab.csv\")\n",
586 |     "#checking the number of empty rows in th csv file\n",
587 |     "print (df.isnull().sum())\n",
588 |     "#Droping the empty rows\n",
589 |     "modifiedDF = df.dropna()\n",
590 |     "#Saving it to the csv file \n",
591 |     "modifiedDF.to_csv('modifiedhaberman.csv',index=False)"
592 |    ]
593 |   },
594 |   {
595 |    "cell_type": "code",
596 |    "execution_count": null,
597 |    "metadata": {},
598 |    "outputs": [],
599 |    "source": []
600 |   }
601 |  ],
602 |  "metadata": {
603 |   "kernelspec": {
604 |    "display_name": "Python 3",
605 |    "language": "python",
606 |    "name": "python3"
607 |   },
608 |   "language_info": {
609 |    "codemirror_mode": {
610 |     "name": "ipython",
611 |     "version": 3
612 |    },
613 |    "file_extension": ".py",
614 |    "mimetype": "text/x-python",
615 |    "name": "python",
616 |    "nbconvert_exporter": "python",
617 |    "pygments_lexer": "ipython3",
618 |    "version": "3.6.5"
619 |   }
620 |  },
621 |  "nbformat": 4,
622 |  "nbformat_minor": 2
623 | }
624 | 


--------------------------------------------------------------------------------
/Preprocessing Abalone.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# ADITYA SAHU"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Pre processing of Abalone dataset to make imbalance ratio of training and testing same"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import numpy as np\n",
 24 |     "from numpy import linalg\n",
 25 |     "import pandas as pd"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "data": {
 35 |       "text/html": [
 36 |        "<div>\n",
 37 |        "<style scoped>\n",
 38 |        "    .dataframe tbody tr th:only-of-type {\n",
 39 |        "        vertical-align: middle;\n",
 40 |        "    }\n",
 41 |        "\n",
 42 |        "    .dataframe tbody tr th {\n",
 43 |        "        vertical-align: top;\n",
 44 |        "    }\n",
 45 |        "\n",
 46 |        "    .dataframe thead th {\n",
 47 |        "        text-align: right;\n",
 48 |        "    }\n",
 49 |        "</style>\n",
 50 |        "<table border=\"1\" class=\"dataframe\">\n",
 51 |        "  <thead>\n",
 52 |        "    <tr style=\"text-align: right;\">\n",
 53 |        "      <th></th>\n",
 54 |        "      <th>sex</th>\n",
 55 |        "      <th>length</th>\n",
 56 |        "      <th>diameter</th>\n",
 57 |        "      <th>height</th>\n",
 58 |        "      <th>weight.w</th>\n",
 59 |        "      <th>weight.s</th>\n",
 60 |        "      <th>weight.v</th>\n",
 61 |        "      <th>weight.sh</th>\n",
 62 |        "      <th>rings</th>\n",
 63 |        "    </tr>\n",
 64 |        "  </thead>\n",
 65 |        "  <tbody>\n",
 66 |        "    <tr>\n",
 67 |        "      <th>0</th>\n",
 68 |        "      <td>M</td>\n",
 69 |        "      <td>0.455</td>\n",
 70 |        "      <td>0.365</td>\n",
 71 |        "      <td>0.095</td>\n",
 72 |        "      <td>0.5140</td>\n",
 73 |        "      <td>0.2245</td>\n",
 74 |        "      <td>0.1010</td>\n",
 75 |        "      <td>0.150</td>\n",
 76 |        "      <td>15</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>1</th>\n",
 80 |        "      <td>M</td>\n",
 81 |        "      <td>0.350</td>\n",
 82 |        "      <td>0.265</td>\n",
 83 |        "      <td>0.090</td>\n",
 84 |        "      <td>0.2255</td>\n",
 85 |        "      <td>0.0995</td>\n",
 86 |        "      <td>0.0485</td>\n",
 87 |        "      <td>0.070</td>\n",
 88 |        "      <td>7</td>\n",
 89 |        "    </tr>\n",
 90 |        "    <tr>\n",
 91 |        "      <th>2</th>\n",
 92 |        "      <td>F</td>\n",
 93 |        "      <td>0.530</td>\n",
 94 |        "      <td>0.420</td>\n",
 95 |        "      <td>0.135</td>\n",
 96 |        "      <td>0.6770</td>\n",
 97 |        "      <td>0.2565</td>\n",
 98 |        "      <td>0.1415</td>\n",
 99 |        "      <td>0.210</td>\n",
100 |        "      <td>9</td>\n",
101 |        "    </tr>\n",
102 |        "    <tr>\n",
103 |        "      <th>3</th>\n",
104 |        "      <td>M</td>\n",
105 |        "      <td>0.440</td>\n",
106 |        "      <td>0.365</td>\n",
107 |        "      <td>0.125</td>\n",
108 |        "      <td>0.5160</td>\n",
109 |        "      <td>0.2155</td>\n",
110 |        "      <td>0.1140</td>\n",
111 |        "      <td>0.155</td>\n",
112 |        "      <td>10</td>\n",
113 |        "    </tr>\n",
114 |        "    <tr>\n",
115 |        "      <th>4</th>\n",
116 |        "      <td>I</td>\n",
117 |        "      <td>0.330</td>\n",
118 |        "      <td>0.255</td>\n",
119 |        "      <td>0.080</td>\n",
120 |        "      <td>0.2050</td>\n",
121 |        "      <td>0.0895</td>\n",
122 |        "      <td>0.0395</td>\n",
123 |        "      <td>0.055</td>\n",
124 |        "      <td>7</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "  sex  length  diameter  height  weight.w  weight.s  weight.v  weight.sh  \\\n",
132 |        "0   M   0.455     0.365   0.095    0.5140    0.2245    0.1010      0.150   \n",
133 |        "1   M   0.350     0.265   0.090    0.2255    0.0995    0.0485      0.070   \n",
134 |        "2   F   0.530     0.420   0.135    0.6770    0.2565    0.1415      0.210   \n",
135 |        "3   M   0.440     0.365   0.125    0.5160    0.2155    0.1140      0.155   \n",
136 |        "4   I   0.330     0.255   0.080    0.2050    0.0895    0.0395      0.055   \n",
137 |        "\n",
138 |        "   rings  \n",
139 |        "0     15  \n",
140 |        "1      7  \n",
141 |        "2      9  \n",
142 |        "3     10  \n",
143 |        "4      7  "
144 |       ]
145 |      },
146 |      "execution_count": 2,
147 |      "metadata": {},
148 |      "output_type": "execute_result"
149 |     }
150 |    ],
151 |    "source": [
152 |     "train = pd.read_csv(\"Abalone.csv\")\n",
153 |     "train.head()"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 3,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "train=train.replace(to_replace=['M', 'F', 'I'], value=[1, 2, 3])"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 4,
168 |    "metadata": {},
169 |    "outputs": [
170 |     {
171 |      "name": "stdout",
172 |      "output_type": "stream",
173 |      "text": [
174 |       "0       1\n",
175 |       "1      -1\n",
176 |       "2      -1\n",
177 |       "3      -1\n",
178 |       "4      -1\n",
179 |       "5      -1\n",
180 |       "6      -1\n",
181 |       "7      -1\n",
182 |       "8      -1\n",
183 |       "9      -1\n",
184 |       "10     -1\n",
185 |       "11     -1\n",
186 |       "12     -1\n",
187 |       "13     -1\n",
188 |       "14     -1\n",
189 |       "15     -1\n",
190 |       "16     -1\n",
191 |       "17     -1\n",
192 |       "18     -1\n",
193 |       "19     -1\n",
194 |       "20     -1\n",
195 |       "21     -1\n",
196 |       "22     -1\n",
197 |       "23     -1\n",
198 |       "24     -1\n",
199 |       "25     -1\n",
200 |       "26     -1\n",
201 |       "27     -1\n",
202 |       "28      1\n",
203 |       "29     -1\n",
204 |       "       ..\n",
205 |       "4147   -1\n",
206 |       "4148   -1\n",
207 |       "4149   -1\n",
208 |       "4150   -1\n",
209 |       "4151   -1\n",
210 |       "4152   -1\n",
211 |       "4153   -1\n",
212 |       "4154   -1\n",
213 |       "4155   -1\n",
214 |       "4156   -1\n",
215 |       "4157   -1\n",
216 |       "4158   -1\n",
217 |       "4159   -1\n",
218 |       "4160   -1\n",
219 |       "4161   -1\n",
220 |       "4162   -1\n",
221 |       "4163   -1\n",
222 |       "4164   -1\n",
223 |       "4165   -1\n",
224 |       "4166   -1\n",
225 |       "4167   -1\n",
226 |       "4168   -1\n",
227 |       "4169   -1\n",
228 |       "4170   -1\n",
229 |       "4171   -1\n",
230 |       "4172   -1\n",
231 |       "4173   -1\n",
232 |       "4174   -1\n",
233 |       "4175   -1\n",
234 |       "4176   -1\n",
235 |       "Name: rings, Length: 4177, dtype: int64\n"
236 |      ]
237 |     }
238 |    ],
239 |    "source": [
240 |     "train['rings'] = train['rings'].map({15: 1, 1:-1,2:-1,3:-1,4:-1,5:-1,6:-1,7:-1,8:-1,9:-1,10:-1,11:-1,12:-1,13:-1,14:-1,16:-1,17:-1,18:-1,19:-1,20:-1,21:-1,22:-1,23:-1,24:-1,25:-1,26:-1,27:-1,28:-1,29:-1})\n",
241 |     "print(train['rings'])"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 5,
247 |    "metadata": {},
248 |    "outputs": [],
249 |    "source": [
250 |     "train=np.asarray(train)"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 6,
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "min_train=np.zeros((103,9))\n",
260 |     "max_train=np.zeros((4074,9))\n",
261 |     "min_train=np.asarray(min_train)\n",
262 |     "max_train=np.asarray(max_train)"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 7,
268 |    "metadata": {},
269 |    "outputs": [],
270 |    "source": [
271 |     "k=0\n",
272 |     "l=0\n",
273 |     "for i in range(0,4177):\n",
274 |     "    if(train[i][8]==1):\n",
275 |     "        for j in range(0,9):\n",
276 |     "            min_train[k][j]=train[i][j]   \n",
277 |     "        k=k+1\n",
278 |     "    else :\n",
279 |     "        for j in range(0,9):\n",
280 |     "            max_train[l][j]=train[i][j]\n",
281 |     "        l=l+1"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": 8,
287 |    "metadata": {},
288 |    "outputs": [
289 |     {
290 |      "name": "stdout",
291 |      "output_type": "stream",
292 |      "text": [
293 |       "[[1.     0.455  0.365  0.095  0.514  0.2245 0.101  0.15   1.    ]\n",
294 |       " [1.     0.605  0.475  0.18   0.9365 0.394  0.219  0.295  1.    ]\n",
295 |       " [2.     0.68   0.56   0.165  1.639  0.6055 0.2805 0.46   1.    ]\n",
296 |       " [2.     0.6    0.475  0.15   1.0075 0.4425 0.221  0.28   1.    ]\n",
297 |       " [1.     0.565  0.425  0.135  0.8115 0.341  0.1675 0.255  1.    ]\n",
298 |       " [1.     0.695  0.56   0.19   1.494  0.588  0.3425 0.485  1.    ]\n",
299 |       " [1.     0.55   0.435  0.145  0.843  0.328  0.1915 0.255  1.    ]\n",
300 |       " [1.     0.53   0.435  0.16   0.883  0.316  0.164  0.335  1.    ]\n",
301 |       " [1.     0.59   0.475  0.145  1.053  0.4415 0.262  0.325  1.    ]\n",
302 |       " [1.     0.56   0.45   0.16   0.922  0.432  0.178  0.26   1.    ]\n",
303 |       " [2.     0.53   0.415  0.16   0.783  0.2935 0.158  0.245  1.    ]\n",
304 |       " [2.     0.575  0.46   0.185  1.094  0.4485 0.217  0.345  1.    ]\n",
305 |       " [1.     0.6    0.495  0.165  1.2415 0.485  0.2775 0.34   1.    ]\n",
306 |       " [1.     0.56   0.45   0.175  1.011  0.3835 0.2065 0.37   1.    ]\n",
307 |       " [2.     0.635  0.505  0.17   1.415  0.605  0.297  0.365  1.    ]\n",
308 |       " [1.     0.63   0.505  0.225  1.525  0.56   0.3335 0.45   1.    ]\n",
309 |       " [2.     0.535  0.415  0.185  0.8415 0.314  0.1585 0.3    1.    ]\n",
310 |       " [1.     0.61   0.475  0.165  1.116  0.428  0.2205 0.315  1.    ]\n",
311 |       " [2.     0.565  0.45   0.195  1.0035 0.406  0.2505 0.285  1.    ]\n",
312 |       " [1.     0.565  0.465  0.175  0.995  0.3895 0.183  0.37   1.    ]\n",
313 |       " [1.     0.605  0.47   0.18   1.1405 0.3755 0.2805 0.385  1.    ]\n",
314 |       " [1.     0.59   0.5    0.165  1.1045 0.4565 0.2425 0.34   1.    ]\n",
315 |       " [2.     0.62   0.47   0.14   1.0325 0.3605 0.224  0.36   1.    ]\n",
316 |       " [2.     0.64   0.54   0.175  1.221  0.51   0.259  0.39   1.    ]\n",
317 |       " [1.     0.57   0.465  0.125  0.849  0.3785 0.1765 0.24   1.    ]\n",
318 |       " [2.     0.625  0.515  0.15   1.2415 0.5235 0.3065 0.36   1.    ]\n",
319 |       " [1.     0.655  0.53   0.175  1.2635 0.486  0.2635 0.415  1.    ]\n",
320 |       " [2.     0.625  0.5    0.15   0.953  0.3445 0.2235 0.305  1.    ]\n",
321 |       " [2.     0.62   0.47   0.225  1.115  0.378  0.2145 0.36   1.    ]\n",
322 |       " [1.     0.6    0.47   0.175  1.105  0.4865 0.247  0.315  1.    ]\n",
323 |       " [1.     0.585  0.455  0.225  1.055  0.3815 0.221  0.365  1.    ]\n",
324 |       " [2.     0.5    0.375  0.14   0.604  0.242  0.1415 0.179  1.    ]\n",
325 |       " [1.     0.42   0.325  0.115  0.2885 0.1    0.057  0.1135 1.    ]\n",
326 |       " [3.     0.45   0.35   0.145  0.525  0.2085 0.1    0.1655 1.    ]\n",
327 |       " [3.     0.465  0.36   0.105  0.498  0.214  0.116  0.14   1.    ]\n",
328 |       " [2.     0.485  0.38   0.15   0.605  0.2155 0.14   0.18   1.    ]\n",
329 |       " [1.     0.565  0.44   0.185  0.909  0.344  0.2325 0.255  1.    ]\n",
330 |       " [1.     0.555  0.44   0.15   1.092  0.416  0.212  0.4405 1.    ]\n",
331 |       " [1.     0.525  0.41   0.13   0.99   0.3865 0.243  0.295  1.    ]\n",
332 |       " [2.     0.52   0.4    0.12   0.6515 0.261  0.2015 0.165  1.    ]\n",
333 |       " [1.     0.52   0.4    0.12   0.823  0.298  0.1805 0.265  1.    ]\n",
334 |       " [1.     0.695  0.515  0.175  1.5165 0.578  0.4105 0.39   1.    ]\n",
335 |       " [2.     0.605  0.495  0.19   1.437  0.469  0.2655 0.41   1.    ]\n",
336 |       " [1.     0.57   0.43   0.12   1.0615 0.348  0.167  0.31   1.    ]\n",
337 |       " [1.     0.585  0.405  0.15   1.2565 0.435  0.202  0.325  1.    ]\n",
338 |       " [1.     0.505  0.385  0.145  0.6775 0.236  0.179  0.2    1.    ]\n",
339 |       " [1.     0.465  0.35   0.14   0.5755 0.2015 0.1505 0.19   1.    ]\n",
340 |       " [2.     0.47   0.36   0.145  0.537  0.1725 0.1375 0.195  1.    ]\n",
341 |       " [1.     0.55   0.415  0.175  1.042  0.3295 0.2325 0.2905 1.    ]\n",
342 |       " [1.     0.515  0.405  0.145  0.695  0.215  0.1635 0.234  1.    ]\n",
343 |       " [2.     0.48   0.4    0.125  0.759  0.2125 0.179  0.24   1.    ]\n",
344 |       " [1.     0.66   0.53   0.17   1.3905 0.5905 0.212  0.453  1.    ]\n",
345 |       " [1.     0.64   0.565  0.23   1.521  0.644  0.372  0.406  1.    ]\n",
346 |       " [2.     0.7    0.535  0.175  1.773  0.6805 0.48   0.512  1.    ]\n",
347 |       " [1.     0.62   0.495  0.195  1.5145 0.579  0.346  0.5195 1.    ]\n",
348 |       " [2.     0.675  0.55   0.18   1.6885 0.562  0.3705 0.6    1.    ]\n",
349 |       " [2.     0.595  0.48   0.2    0.975  0.358  0.2035 0.34   1.    ]\n",
350 |       " [1.     0.645  0.495  0.185  1.4935 0.5265 0.2785 0.455  1.    ]\n",
351 |       " [2.     0.56   0.435  0.185  1.106  0.422  0.2435 0.33   1.    ]\n",
352 |       " [2.     0.61   0.48   0.175  1.0675 0.391  0.216  0.42   1.    ]\n",
353 |       " [1.     0.635  0.51   0.21   1.598  0.6535 0.2835 0.58   1.    ]\n",
354 |       " [1.     0.695  0.57   0.2    2.033  0.751  0.4255 0.685  1.    ]\n",
355 |       " [2.     0.505  0.395  0.145  0.6515 0.2695 0.153  0.205  1.    ]\n",
356 |       " [2.     0.525  0.425  0.145  0.7995 0.3345 0.209  0.24   1.    ]\n",
357 |       " [3.     0.48   0.39   0.145  0.5825 0.2315 0.121  0.255  1.    ]\n",
358 |       " [1.     0.59   0.46   0.155  0.906  0.327  0.1485 0.335  1.    ]\n",
359 |       " [2.     0.6    0.47   0.2    1.031  0.392  0.2035 0.29   1.    ]\n",
360 |       " [1.     0.65   0.545  0.16   1.2425 0.487  0.296  0.48   1.    ]\n",
361 |       " [3.     0.555  0.455  0.17   0.8435 0.309  0.1905 0.3    1.    ]\n",
362 |       " [3.     0.655  0.515  0.145  1.25   0.5265 0.283  0.315  1.    ]\n",
363 |       " [3.     0.62   0.485  0.17   1.208  0.4805 0.3045 0.33   1.    ]\n",
364 |       " [3.     0.52   0.415  0.16   0.595  0.2105 0.142  0.26   1.    ]\n",
365 |       " [1.     0.49   0.39   0.135  0.592  0.242  0.096  0.1835 1.    ]\n",
366 |       " [2.     0.52   0.4    0.13   0.6245 0.215  0.2065 0.17   1.    ]\n",
367 |       " [1.     0.495  0.4    0.14   0.7775 0.2015 0.18   0.25   1.    ]\n",
368 |       " [1.     0.66   0.535  0.2    1.791  0.733  0.318  0.54   1.    ]\n",
369 |       " [1.     0.65   0.52   0.195  1.676  0.693  0.44   0.47   1.    ]\n",
370 |       " [1.     0.64   0.49   0.14   1.194  0.4445 0.238  0.375  1.    ]\n",
371 |       " [1.     0.605  0.49   0.155  1.153  0.503  0.2505 0.295  1.    ]\n",
372 |       " [1.     0.605  0.47   0.115  1.114  0.3925 0.291  0.31   1.    ]\n",
373 |       " [2.     0.505  0.41   0.135  0.657  0.291  0.133  0.195  1.    ]\n",
374 |       " [2.     0.665  0.53   0.185  1.3955 0.456  0.3205 0.49   1.    ]\n",
375 |       " [3.     0.48   0.38   0.125  0.523  0.2105 0.1045 0.175  1.    ]\n",
376 |       " [2.     0.69   0.54   0.185  1.5715 0.6935 0.318  0.47   1.    ]\n",
377 |       " [1.     0.555  0.435  0.135  0.858  0.377  0.1585 0.29   1.    ]\n",
378 |       " [1.     0.635  0.48   0.19   1.467  0.5825 0.303  0.42   1.    ]\n",
379 |       " [2.     0.61   0.495  0.19   1.213  0.464  0.306  0.365  1.    ]\n",
380 |       " [2.     0.465  0.39   0.14   0.5555 0.213  0.1075 0.215  1.    ]\n",
381 |       " [2.     0.605  0.475  0.145  1.0185 0.4695 0.225  0.27   1.    ]\n",
382 |       " [1.     0.535  0.42   0.16   0.72   0.275  0.164  0.225  1.    ]\n",
383 |       " [2.     0.71   0.575  0.175  1.555  0.6465 0.3705 0.52   1.    ]\n",
384 |       " [2.     0.48   0.37   0.13   0.5885 0.2475 0.1505 0.1595 1.    ]\n",
385 |       " [3.     0.66   0.525  0.18   1.6935 0.6025 0.4005 0.42   1.    ]\n",
386 |       " [2.     0.52   0.405  0.145  0.829  0.3535 0.1685 0.205  1.    ]\n",
387 |       " [1.     0.495  0.4    0.12   0.6605 0.2605 0.161  0.19   1.    ]\n",
388 |       " [2.     0.5    0.39   0.13   0.6355 0.2505 0.1635 0.195  1.    ]\n",
389 |       " [1.     0.545  0.44   0.165  0.744  0.2875 0.204  0.25   1.    ]\n",
390 |       " [2.     0.645  0.5    0.225  1.626  0.587  0.4055 0.41   1.    ]\n",
391 |       " [2.     0.61   0.49   0.17   1.1775 0.5655 0.2385 0.295  1.    ]\n",
392 |       " [2.     0.67   0.545  0.16   1.5415 0.5985 0.2565 0.495  1.    ]\n",
393 |       " [1.     0.445  0.345  0.14   0.476  0.2055 0.1015 0.1085 1.    ]\n",
394 |       " [3.     0.52   0.405  0.14   0.6765 0.2865 0.146  0.205  1.    ]\n",
395 |       " [2.     0.54   0.44   0.16   1.0905 0.391  0.2295 0.355  1.    ]]\n"
396 |      ]
397 |     }
398 |    ],
399 |    "source": [
400 |     "print(min_train)"
401 |    ]
402 |   },
403 |   {
404 |    "cell_type": "code",
405 |    "execution_count": 14,
406 |    "metadata": {},
407 |    "outputs": [],
408 |    "source": [
409 |     "data1=np.zeros((834,9))\n",
410 |     "data2=np.zeros((835,9))\n",
411 |     "data3=np.zeros((836,9))\n",
412 |     "data4=np.zeros((836,9))\n",
413 |     "data5=np.zeros((836,9))"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": null,
419 |    "metadata": {},
420 |    "outputs": [],
421 |    "source": []
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": 16,
426 |    "metadata": {},
427 |    "outputs": [
428 |     {
429 |      "name": "stdout",
430 |      "output_type": "stream",
431 |      "text": [
432 |       "[2.     0.54   0.44   0.16   1.0905 0.391  0.2295 0.355  1.    ]\n"
433 |      ]
434 |     }
435 |    ],
436 |    "source": [
437 |     "for i in range(0,103):\n",
438 |     "    for j in range(0,9):\n",
439 |     "        if(i<20):\n",
440 |     "            data1[i][j]=min_train[i][j]\n",
441 |     "        elif(19<i and i<40):\n",
442 |     "            data2[i-20][j]=min_train[i][j]\n",
443 |     "        elif(39<i and i<61):\n",
444 |     "            data3[i-40][j]=min_train[i][j]\n",
445 |     "        elif(60<i and i<82):\n",
446 |     "            data4[i-61][j]=min_train[i][j]\n",
447 |     "        elif(81<i and i<103):\n",
448 |     "            data5[i-82][j]=min_train[i][j]\n",
449 |     "    \n",
450 |     "        \n",
451 |     "print(data5[20])"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "code",
456 |    "execution_count": 19,
457 |    "metadata": {},
458 |    "outputs": [
459 |     {
460 |      "name": "stdout",
461 |      "output_type": "stream",
462 |      "text": [
463 |       "[3.     0.48   0.38   0.125  0.523  0.2105 0.1045 0.175  1.    ]\n"
464 |      ]
465 |     }
466 |    ],
467 |    "source": [
468 |     "for i in range(0,4074):\n",
469 |     "    for j in range(0,9):\n",
470 |     "        if(i<814):\n",
471 |     "            data1[i+20][j]=max_train[i][j]\n",
472 |     "        elif(813<i and i<1629):\n",
473 |     "            data2[i-794][j]=max_train[i][j]\n",
474 |     "        elif(1628<i and i<2444):\n",
475 |     "            data3[i-1608][j]=max_train[i][j]\n",
476 |     "        elif(2443<i and i<3259):\n",
477 |     "            data4[i-2423][j]=max_train[i][j]\n",
478 |     "        elif(3258<i and i<4074):\n",
479 |     "            data5[i-3238][j]=max_train[i][j]   \n",
480 |     "print(data5[0])"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "code",
485 |    "execution_count": 20,
486 |    "metadata": {},
487 |    "outputs": [
488 |     {
489 |      "name": "stdout",
490 |      "output_type": "stream",
491 |      "text": [
492 |       "(836, 9)\n"
493 |      ]
494 |     },
495 |     {
496 |      "data": {
497 |       "text/plain": [
498 |        "array([[ 3.    ,  0.48  ,  0.38  , ...,  0.1045,  0.175 ,  1.    ],\n",
499 |        "       [ 2.    ,  0.69  ,  0.54  , ...,  0.318 ,  0.47  ,  1.    ],\n",
500 |        "       [ 1.    ,  0.555 ,  0.435 , ...,  0.1585,  0.29  ,  1.    ],\n",
501 |        "       ...,\n",
502 |        "       [ 1.    ,  0.6   ,  0.475 , ...,  0.2875,  0.308 , -1.    ],\n",
503 |        "       [ 2.    ,  0.625 ,  0.485 , ...,  0.261 ,  0.296 , -1.    ],\n",
504 |        "       [ 1.    ,  0.71  ,  0.555 , ...,  0.3765,  0.495 , -1.    ]])"
505 |       ]
506 |      },
507 |      "execution_count": 20,
508 |      "metadata": {},
509 |      "output_type": "execute_result"
510 |     }
511 |    ],
512 |    "source": [
513 |     "print(data5.shape)\n",
514 |     "data5"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "code",
519 |    "execution_count": 22,
520 |    "metadata": {},
521 |    "outputs": [],
522 |    "source": [
523 |     "import csv\n",
524 |     "with open('newab.csv', 'w') as csvFile:\n",
525 |     "    writer = csv.writer(csvFile)\n",
526 |     "    writer.writerows(data1)\n",
527 |     "\n",
528 |     "csvFile.close()"
529 |    ]
530 |   },
531 |   {
532 |    "cell_type": "code",
533 |    "execution_count": 23,
534 |    "metadata": {},
535 |    "outputs": [],
536 |    "source": [
537 |     "import csv\n",
538 |     "with open('newab.csv', 'a') as csvFile:\n",
539 |     "    writer = csv.writer(csvFile)\n",
540 |     "    writer.writerows(data2)\n",
541 |     "\n",
542 |     "csvFile.close()"
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "code",
547 |    "execution_count": 24,
548 |    "metadata": {},
549 |    "outputs": [],
550 |    "source": [
551 |     "import csv\n",
552 |     "with open('newab.csv', 'a') as csvFile:\n",
553 |     "    writer = csv.writer(csvFile)\n",
554 |     "    writer.writerows(data3)\n",
555 |     "\n",
556 |     "csvFile.close()"
557 |    ]
558 |   },
559 |   {
560 |    "cell_type": "code",
561 |    "execution_count": 25,
562 |    "metadata": {},
563 |    "outputs": [],
564 |    "source": [
565 |     "import csv\n",
566 |     "with open('newab.csv', 'a') as csvFile:\n",
567 |     "    writer = csv.writer(csvFile)\n",
568 |     "    writer.writerows(data4)\n",
569 |     "\n",
570 |     "csvFile.close()"
571 |    ]
572 |   },
573 |   {
574 |    "cell_type": "code",
575 |    "execution_count": 26,
576 |    "metadata": {},
577 |    "outputs": [],
578 |    "source": [
579 |     "import csv\n",
580 |     "with open('newab.csv', 'a') as csvFile:\n",
581 |     "    writer = csv.writer(csvFile)\n",
582 |     "    writer.writerows(data5)\n",
583 |     "\n",
584 |     "csvFile.close()"
585 |    ]
586 |   },
587 |   {
588 |    "cell_type": "code",
589 |    "execution_count": 27,
590 |    "metadata": {},
591 |    "outputs": [
592 |     {
593 |      "name": "stdout",
594 |      "output_type": "stream",
595 |      "text": [
596 |       "1.0                    0\n",
597 |       "0.455                  0\n",
598 |       "0.365                  0\n",
599 |       "0.095                  0\n",
600 |       "0.514                  0\n",
601 |       "0.2245                 0\n",
602 |       "0.10099999999999999    0\n",
603 |       "0.15                   0\n",
604 |       "1.0.1                  0\n",
605 |       "dtype: int64\n"
606 |      ]
607 |     }
608 |    ],
609 |    "source": [
610 |     "df = pd.read_csv(\"newab.csv\")\n",
611 |     "#checking the number of empty rows in th csv file\n",
612 |     "print (df.isnull().sum())\n",
613 |     "#Droping the empty rows\n",
614 |     "modifiedDF = df.dropna()\n",
615 |     "#Saving it to the csv file \n",
616 |     "modifiedDF.to_csv('modifiedabalone.csv',index=False)"
617 |    ]
618 |   },
619 |   {
620 |    "cell_type": "code",
621 |    "execution_count": null,
622 |    "metadata": {},
623 |    "outputs": [],
624 |    "source": []
625 |   }
626 |  ],
627 |  "metadata": {
628 |   "kernelspec": {
629 |    "display_name": "Python 3",
630 |    "language": "python",
631 |    "name": "python3"
632 |   },
633 |   "language_info": {
634 |    "codemirror_mode": {
635 |     "name": "ipython",
636 |     "version": 3
637 |    },
638 |    "file_extension": ".py",
639 |    "mimetype": "text/x-python",
640 |    "name": "python",
641 |    "nbconvert_exporter": "python",
642 |    "pygments_lexer": "ipython3",
643 |    "version": "3.6.5"
644 |   }
645 |  },
646 |  "nbformat": 4,
647 |  "nbformat_minor": 2
648 | }
649 | 


--------------------------------------------------------------------------------
/pima-indians-diabetes.csv:
--------------------------------------------------------------------------------
  1 | 1,2,3,4,5,6,7,8,9
  2 | 6,148,72,35,0,33.6,0.627,50,1
  3 | 1,85,66,29,0,26.6,0.351,31,0
  4 | 8,183,64,0,0,23.3,0.672,32,1
  5 | 1,89,66,23,94,28.1,0.167,21,0
  6 | 0,137,40,35,168,43.1,2.288,33,1
  7 | 5,116,74,0,0,25.6,0.201,30,0
  8 | 3,78,50,32,88,31,0.248,26,1
  9 | 10,115,0,0,0,35.3,0.134,29,0
 10 | 2,197,70,45,543,30.5,0.158,53,1
 11 | 8,125,96,0,0,0,0.232,54,1
 12 | 4,110,92,0,0,37.6,0.191,30,0
 13 | 10,168,74,0,0,38,0.537,34,1
 14 | 10,139,80,0,0,27.1,1.441,57,0
 15 | 1,189,60,23,846,30.1,0.398,59,1
 16 | 5,166,72,19,175,25.8,0.587,51,1
 17 | 7,100,0,0,0,30,0.484,32,1
 18 | 0,118,84,47,230,45.8,0.551,31,1
 19 | 7,107,74,0,0,29.6,0.254,31,1
 20 | 1,103,30,38,83,43.3,0.183,33,0
 21 | 1,115,70,30,96,34.6,0.529,32,1
 22 | 3,126,88,41,235,39.3,0.704,27,0
 23 | 8,99,84,0,0,35.4,0.388,50,0
 24 | 7,196,90,0,0,39.8,0.451,41,1
 25 | 9,119,80,35,0,29,0.263,29,1
 26 | 11,143,94,33,146,36.6,0.254,51,1
 27 | 10,125,70,26,115,31.1,0.205,41,1
 28 | 7,147,76,0,0,39.4,0.257,43,1
 29 | 1,97,66,15,140,23.2,0.487,22,0
 30 | 13,145,82,19,110,22.2,0.245,57,0
 31 | 5,117,92,0,0,34.1,0.337,38,0
 32 | 5,109,75,26,0,36,0.546,60,0
 33 | 3,158,76,36,245,31.6,0.851,28,1
 34 | 3,88,58,11,54,24.8,0.267,22,0
 35 | 6,92,92,0,0,19.9,0.188,28,0
 36 | 10,122,78,31,0,27.6,0.512,45,0
 37 | 4,103,60,33,192,24,0.966,33,0
 38 | 11,138,76,0,0,33.2,0.42,35,0
 39 | 9,102,76,37,0,32.9,0.665,46,1
 40 | 2,90,68,42,0,38.2,0.503,27,1
 41 | 4,111,72,47,207,37.1,1.39,56,1
 42 | 3,180,64,25,70,34,0.271,26,0
 43 | 7,133,84,0,0,40.2,0.696,37,0
 44 | 7,106,92,18,0,22.7,0.235,48,0
 45 | 9,171,110,24,240,45.4,0.721,54,1
 46 | 7,159,64,0,0,27.4,0.294,40,0
 47 | 0,180,66,39,0,42,1.893,25,1
 48 | 1,146,56,0,0,29.7,0.564,29,0
 49 | 2,71,70,27,0,28,0.586,22,0
 50 | 7,103,66,32,0,39.1,0.344,31,1
 51 | 7,105,0,0,0,0,0.305,24,0
 52 | 1,103,80,11,82,19.4,0.491,22,0
 53 | 1,101,50,15,36,24.2,0.526,26,0
 54 | 5,88,66,21,23,24.4,0.342,30,0
 55 | 8,176,90,34,300,33.7,0.467,58,1
 56 | 7,150,66,42,342,34.7,0.718,42,0
 57 | 1,73,50,10,0,23,0.248,21,0
 58 | 7,187,68,39,304,37.7,0.254,41,1
 59 | 0,100,88,60,110,46.8,0.962,31,0
 60 | 0,146,82,0,0,40.5,1.781,44,0
 61 | 0,105,64,41,142,41.5,0.173,22,0
 62 | 2,84,0,0,0,0,0.304,21,0
 63 | 8,133,72,0,0,32.9,0.27,39,1
 64 | 5,44,62,0,0,25,0.587,36,0
 65 | 2,141,58,34,128,25.4,0.699,24,0
 66 | 7,114,66,0,0,32.8,0.258,42,1
 67 | 5,99,74,27,0,29,0.203,32,0
 68 | 0,109,88,30,0,32.5,0.855,38,1
 69 | 2,109,92,0,0,42.7,0.845,54,0
 70 | 1,95,66,13,38,19.6,0.334,25,0
 71 | 4,146,85,27,100,28.9,0.189,27,0
 72 | 2,100,66,20,90,32.9,0.867,28,1
 73 | 5,139,64,35,140,28.6,0.411,26,0
 74 | 13,126,90,0,0,43.4,0.583,42,1
 75 | 4,129,86,20,270,35.1,0.231,23,0
 76 | 1,79,75,30,0,32,0.396,22,0
 77 | 1,0,48,20,0,24.7,0.14,22,0
 78 | 7,62,78,0,0,32.6,0.391,41,0
 79 | 5,95,72,33,0,37.7,0.37,27,0
 80 | 0,131,0,0,0,43.2,0.27,26,1
 81 | 2,112,66,22,0,25,0.307,24,0
 82 | 3,113,44,13,0,22.4,0.14,22,0
 83 | 2,74,0,0,0,0,0.102,22,0
 84 | 7,83,78,26,71,29.3,0.767,36,0
 85 | 0,101,65,28,0,24.6,0.237,22,0
 86 | 5,137,108,0,0,48.8,0.227,37,1
 87 | 2,110,74,29,125,32.4,0.698,27,0
 88 | 13,106,72,54,0,36.6,0.178,45,0
 89 | 2,100,68,25,71,38.5,0.324,26,0
 90 | 15,136,70,32,110,37.1,0.153,43,1
 91 | 1,107,68,19,0,26.5,0.165,24,0
 92 | 1,80,55,0,0,19.1,0.258,21,0
 93 | 4,123,80,15,176,32,0.443,34,0
 94 | 7,81,78,40,48,46.7,0.261,42,0
 95 | 4,134,72,0,0,23.8,0.277,60,1
 96 | 2,142,82,18,64,24.7,0.761,21,0
 97 | 6,144,72,27,228,33.9,0.255,40,0
 98 | 2,92,62,28,0,31.6,0.13,24,0
 99 | 1,71,48,18,76,20.4,0.323,22,0
100 | 6,93,50,30,64,28.7,0.356,23,0
101 | 1,122,90,51,220,49.7,0.325,31,1
102 | 1,163,72,0,0,39,1.222,33,1
103 | 1,151,60,0,0,26.1,0.179,22,0
104 | 0,125,96,0,0,22.5,0.262,21,0
105 | 1,81,72,18,40,26.6,0.283,24,0
106 | 2,85,65,0,0,39.6,0.93,27,0
107 | 1,126,56,29,152,28.7,0.801,21,0
108 | 1,96,122,0,0,22.4,0.207,27,0
109 | 4,144,58,28,140,29.5,0.287,37,0
110 | 3,83,58,31,18,34.3,0.336,25,0
111 | 0,95,85,25,36,37.4,0.247,24,1
112 | 3,171,72,33,135,33.3,0.199,24,1
113 | 8,155,62,26,495,34,0.543,46,1
114 | 1,89,76,34,37,31.2,0.192,23,0
115 | 4,76,62,0,0,34,0.391,25,0
116 | 7,160,54,32,175,30.5,0.588,39,1
117 | 4,146,92,0,0,31.2,0.539,61,1
118 | 5,124,74,0,0,34,0.22,38,1
119 | 5,78,48,0,0,33.7,0.654,25,0
120 | 4,97,60,23,0,28.2,0.443,22,0
121 | 4,99,76,15,51,23.2,0.223,21,0
122 | 0,162,76,56,100,53.2,0.759,25,1
123 | 6,111,64,39,0,34.2,0.26,24,0
124 | 2,107,74,30,100,33.6,0.404,23,0
125 | 5,132,80,0,0,26.8,0.186,69,0
126 | 0,113,76,0,0,33.3,0.278,23,1
127 | 1,88,30,42,99,55,0.496,26,1
128 | 3,120,70,30,135,42.9,0.452,30,0
129 | 1,118,58,36,94,33.3,0.261,23,0
130 | 1,117,88,24,145,34.5,0.403,40,1
131 | 0,105,84,0,0,27.9,0.741,62,1
132 | 4,173,70,14,168,29.7,0.361,33,1
133 | 9,122,56,0,0,33.3,1.114,33,1
134 | 3,170,64,37,225,34.5,0.356,30,1
135 | 8,84,74,31,0,38.3,0.457,39,0
136 | 2,96,68,13,49,21.1,0.647,26,0
137 | 2,125,60,20,140,33.8,0.088,31,0
138 | 0,100,70,26,50,30.8,0.597,21,0
139 | 0,93,60,25,92,28.7,0.532,22,0
140 | 0,129,80,0,0,31.2,0.703,29,0
141 | 5,105,72,29,325,36.9,0.159,28,0
142 | 3,128,78,0,0,21.1,0.268,55,0
143 | 5,106,82,30,0,39.5,0.286,38,0
144 | 2,108,52,26,63,32.5,0.318,22,0
145 | 10,108,66,0,0,32.4,0.272,42,1
146 | 4,154,62,31,284,32.8,0.237,23,0
147 | 0,102,75,23,0,0,0.572,21,0
148 | 9,57,80,37,0,32.8,0.096,41,0
149 | 2,106,64,35,119,30.5,1.4,34,0
150 | 5,147,78,0,0,33.7,0.218,65,0
151 | 2,90,70,17,0,27.3,0.085,22,0
152 | 1,136,74,50,204,37.4,0.399,24,0
153 | 4,114,65,0,0,21.9,0.432,37,0
154 | 9,156,86,28,155,34.3,1.189,42,1
155 | 1,153,82,42,485,40.6,0.687,23,0
156 | 8,188,78,0,0,47.9,0.137,43,1
157 | 7,152,88,44,0,50,0.337,36,1
158 | 2,99,52,15,94,24.6,0.637,21,0
159 | 1,109,56,21,135,25.2,0.833,23,0
160 | 2,88,74,19,53,29,0.229,22,0
161 | 17,163,72,41,114,40.9,0.817,47,1
162 | 4,151,90,38,0,29.7,0.294,36,0
163 | 7,102,74,40,105,37.2,0.204,45,0
164 | 0,114,80,34,285,44.2,0.167,27,0
165 | 2,100,64,23,0,29.7,0.368,21,0
166 | 0,131,88,0,0,31.6,0.743,32,1
167 | 6,104,74,18,156,29.9,0.722,41,1
168 | 3,148,66,25,0,32.5,0.256,22,0
169 | 4,120,68,0,0,29.6,0.709,34,0
170 | 4,110,66,0,0,31.9,0.471,29,0
171 | 3,111,90,12,78,28.4,0.495,29,0
172 | 6,102,82,0,0,30.8,0.18,36,1
173 | 6,134,70,23,130,35.4,0.542,29,1
174 | 2,87,0,23,0,28.9,0.773,25,0
175 | 1,79,60,42,48,43.5,0.678,23,0
176 | 2,75,64,24,55,29.7,0.37,33,0
177 | 8,179,72,42,130,32.7,0.719,36,1
178 | 6,85,78,0,0,31.2,0.382,42,0
179 | 0,129,110,46,130,67.1,0.319,26,1
180 | 5,143,78,0,0,45,0.19,47,0
181 | 5,130,82,0,0,39.1,0.956,37,1
182 | 6,87,80,0,0,23.2,0.084,32,0
183 | 0,119,64,18,92,34.9,0.725,23,0
184 | 1,0,74,20,23,27.7,0.299,21,0
185 | 5,73,60,0,0,26.8,0.268,27,0
186 | 4,141,74,0,0,27.6,0.244,40,0
187 | 7,194,68,28,0,35.9,0.745,41,1
188 | 8,181,68,36,495,30.1,0.615,60,1
189 | 1,128,98,41,58,32,1.321,33,1
190 | 8,109,76,39,114,27.9,0.64,31,1
191 | 5,139,80,35,160,31.6,0.361,25,1
192 | 3,111,62,0,0,22.6,0.142,21,0
193 | 9,123,70,44,94,33.1,0.374,40,0
194 | 7,159,66,0,0,30.4,0.383,36,1
195 | 11,135,0,0,0,52.3,0.578,40,1
196 | 8,85,55,20,0,24.4,0.136,42,0
197 | 5,158,84,41,210,39.4,0.395,29,1
198 | 1,105,58,0,0,24.3,0.187,21,0
199 | 3,107,62,13,48,22.9,0.678,23,1
200 | 4,109,64,44,99,34.8,0.905,26,1
201 | 4,148,60,27,318,30.9,0.15,29,1
202 | 0,113,80,16,0,31,0.874,21,0
203 | 1,138,82,0,0,40.1,0.236,28,0
204 | 0,108,68,20,0,27.3,0.787,32,0
205 | 2,99,70,16,44,20.4,0.235,27,0
206 | 6,103,72,32,190,37.7,0.324,55,0
207 | 5,111,72,28,0,23.9,0.407,27,0
208 | 8,196,76,29,280,37.5,0.605,57,1
209 | 5,162,104,0,0,37.7,0.151,52,1
210 | 1,96,64,27,87,33.2,0.289,21,0
211 | 7,184,84,33,0,35.5,0.355,41,1
212 | 2,81,60,22,0,27.7,0.29,25,0
213 | 0,147,85,54,0,42.8,0.375,24,0
214 | 7,179,95,31,0,34.2,0.164,60,0
215 | 0,140,65,26,130,42.6,0.431,24,1
216 | 9,112,82,32,175,34.2,0.26,36,1
217 | 12,151,70,40,271,41.8,0.742,38,1
218 | 5,109,62,41,129,35.8,0.514,25,1
219 | 6,125,68,30,120,30,0.464,32,0
220 | 5,85,74,22,0,29,1.224,32,1
221 | 5,112,66,0,0,37.8,0.261,41,1
222 | 0,177,60,29,478,34.6,1.072,21,1
223 | 2,158,90,0,0,31.6,0.805,66,1
224 | 7,119,0,0,0,25.2,0.209,37,0
225 | 7,142,60,33,190,28.8,0.687,61,0
226 | 1,100,66,15,56,23.6,0.666,26,0
227 | 1,87,78,27,32,34.6,0.101,22,0
228 | 0,101,76,0,0,35.7,0.198,26,0
229 | 3,162,52,38,0,37.2,0.652,24,1
230 | 4,197,70,39,744,36.7,2.329,31,0
231 | 0,117,80,31,53,45.2,0.089,24,0
232 | 4,142,86,0,0,44,0.645,22,1
233 | 6,134,80,37,370,46.2,0.238,46,1
234 | 1,79,80,25,37,25.4,0.583,22,0
235 | 4,122,68,0,0,35,0.394,29,0
236 | 3,74,68,28,45,29.7,0.293,23,0
237 | 4,171,72,0,0,43.6,0.479,26,1
238 | 7,181,84,21,192,35.9,0.586,51,1
239 | 0,179,90,27,0,44.1,0.686,23,1
240 | 9,164,84,21,0,30.8,0.831,32,1
241 | 0,104,76,0,0,18.4,0.582,27,0
242 | 1,91,64,24,0,29.2,0.192,21,0
243 | 4,91,70,32,88,33.1,0.446,22,0
244 | 3,139,54,0,0,25.6,0.402,22,1
245 | 6,119,50,22,176,27.1,1.318,33,1
246 | 2,146,76,35,194,38.2,0.329,29,0
247 | 9,184,85,15,0,30,1.213,49,1
248 | 10,122,68,0,0,31.2,0.258,41,0
249 | 0,165,90,33,680,52.3,0.427,23,0
250 | 9,124,70,33,402,35.4,0.282,34,0
251 | 1,111,86,19,0,30.1,0.143,23,0
252 | 9,106,52,0,0,31.2,0.38,42,0
253 | 2,129,84,0,0,28,0.284,27,0
254 | 2,90,80,14,55,24.4,0.249,24,0
255 | 0,86,68,32,0,35.8,0.238,25,0
256 | 12,92,62,7,258,27.6,0.926,44,1
257 | 1,113,64,35,0,33.6,0.543,21,1
258 | 3,111,56,39,0,30.1,0.557,30,0
259 | 2,114,68,22,0,28.7,0.092,25,0
260 | 1,193,50,16,375,25.9,0.655,24,0
261 | 11,155,76,28,150,33.3,1.353,51,1
262 | 3,191,68,15,130,30.9,0.299,34,0
263 | 3,141,0,0,0,30,0.761,27,1
264 | 4,95,70,32,0,32.1,0.612,24,0
265 | 3,142,80,15,0,32.4,0.2,63,0
266 | 4,123,62,0,0,32,0.226,35,1
267 | 5,96,74,18,67,33.6,0.997,43,0
268 | 0,138,0,0,0,36.3,0.933,25,1
269 | 2,128,64,42,0,40,1.101,24,0
270 | 0,102,52,0,0,25.1,0.078,21,0
271 | 2,146,0,0,0,27.5,0.24,28,1
272 | 10,101,86,37,0,45.6,1.136,38,1
273 | 2,108,62,32,56,25.2,0.128,21,0
274 | 3,122,78,0,0,23,0.254,40,0
275 | 1,71,78,50,45,33.2,0.422,21,0
276 | 13,106,70,0,0,34.2,0.251,52,0
277 | 2,100,70,52,57,40.5,0.677,25,0
278 | 7,106,60,24,0,26.5,0.296,29,1
279 | 0,104,64,23,116,27.8,0.454,23,0
280 | 5,114,74,0,0,24.9,0.744,57,0
281 | 2,108,62,10,278,25.3,0.881,22,0
282 | 0,146,70,0,0,37.9,0.334,28,1
283 | 10,129,76,28,122,35.9,0.28,39,0
284 | 7,133,88,15,155,32.4,0.262,37,0
285 | 7,161,86,0,0,30.4,0.165,47,1
286 | 2,108,80,0,0,27,0.259,52,1
287 | 7,136,74,26,135,26,0.647,51,0
288 | 5,155,84,44,545,38.7,0.619,34,0
289 | 1,119,86,39,220,45.6,0.808,29,1
290 | 4,96,56,17,49,20.8,0.34,26,0
291 | 5,108,72,43,75,36.1,0.263,33,0
292 | 0,78,88,29,40,36.9,0.434,21,0
293 | 0,107,62,30,74,36.6,0.757,25,1
294 | 2,128,78,37,182,43.3,1.224,31,1
295 | 1,128,48,45,194,40.5,0.613,24,1
296 | 0,161,50,0,0,21.9,0.254,65,0
297 | 6,151,62,31,120,35.5,0.692,28,0
298 | 2,146,70,38,360,28,0.337,29,1
299 | 0,126,84,29,215,30.7,0.52,24,0
300 | 14,100,78,25,184,36.6,0.412,46,1
301 | 8,112,72,0,0,23.6,0.84,58,0
302 | 0,167,0,0,0,32.3,0.839,30,1
303 | 2,144,58,33,135,31.6,0.422,25,1
304 | 5,77,82,41,42,35.8,0.156,35,0
305 | 5,115,98,0,0,52.9,0.209,28,1
306 | 3,150,76,0,0,21,0.207,37,0
307 | 2,120,76,37,105,39.7,0.215,29,0
308 | 10,161,68,23,132,25.5,0.326,47,1
309 | 0,137,68,14,148,24.8,0.143,21,0
310 | 0,128,68,19,180,30.5,1.391,25,1
311 | 2,124,68,28,205,32.9,0.875,30,1
312 | 6,80,66,30,0,26.2,0.313,41,0
313 | 0,106,70,37,148,39.4,0.605,22,0
314 | 2,155,74,17,96,26.6,0.433,27,1
315 | 3,113,50,10,85,29.5,0.626,25,0
316 | 7,109,80,31,0,35.9,1.127,43,1
317 | 2,112,68,22,94,34.1,0.315,26,0
318 | 3,99,80,11,64,19.3,0.284,30,0
319 | 3,182,74,0,0,30.5,0.345,29,1
320 | 3,115,66,39,140,38.1,0.15,28,0
321 | 6,194,78,0,0,23.5,0.129,59,1
322 | 4,129,60,12,231,27.5,0.527,31,0
323 | 3,112,74,30,0,31.6,0.197,25,1
324 | 0,124,70,20,0,27.4,0.254,36,1
325 | 13,152,90,33,29,26.8,0.731,43,1
326 | 2,112,75,32,0,35.7,0.148,21,0
327 | 1,157,72,21,168,25.6,0.123,24,0
328 | 1,122,64,32,156,35.1,0.692,30,1
329 | 10,179,70,0,0,35.1,0.2,37,0
330 | 2,102,86,36,120,45.5,0.127,23,1
331 | 6,105,70,32,68,30.8,0.122,37,0
332 | 8,118,72,19,0,23.1,1.476,46,0
333 | 2,87,58,16,52,32.7,0.166,25,0
334 | 1,180,0,0,0,43.3,0.282,41,1
335 | 12,106,80,0,0,23.6,0.137,44,0
336 | 1,95,60,18,58,23.9,0.26,22,0
337 | 0,165,76,43,255,47.9,0.259,26,0
338 | 0,117,0,0,0,33.8,0.932,44,0
339 | 5,115,76,0,0,31.2,0.343,44,1
340 | 9,152,78,34,171,34.2,0.893,33,1
341 | 7,178,84,0,0,39.9,0.331,41,1
342 | 1,130,70,13,105,25.9,0.472,22,0
343 | 1,95,74,21,73,25.9,0.673,36,0
344 | 1,0,68,35,0,32,0.389,22,0
345 | 5,122,86,0,0,34.7,0.29,33,0
346 | 8,95,72,0,0,36.8,0.485,57,0
347 | 8,126,88,36,108,38.5,0.349,49,0
348 | 1,139,46,19,83,28.7,0.654,22,0
349 | 3,116,0,0,0,23.5,0.187,23,0
350 | 3,99,62,19,74,21.8,0.279,26,0
351 | 5,0,80,32,0,41,0.346,37,1
352 | 4,92,80,0,0,42.2,0.237,29,0
353 | 4,137,84,0,0,31.2,0.252,30,0
354 | 3,61,82,28,0,34.4,0.243,46,0
355 | 1,90,62,12,43,27.2,0.58,24,0
356 | 3,90,78,0,0,42.7,0.559,21,0
357 | 9,165,88,0,0,30.4,0.302,49,1
358 | 1,125,50,40,167,33.3,0.962,28,1
359 | 13,129,0,30,0,39.9,0.569,44,1
360 | 12,88,74,40,54,35.3,0.378,48,0
361 | 1,196,76,36,249,36.5,0.875,29,1
362 | 5,189,64,33,325,31.2,0.583,29,1
363 | 5,158,70,0,0,29.8,0.207,63,0
364 | 5,103,108,37,0,39.2,0.305,65,0
365 | 4,146,78,0,0,38.5,0.52,67,1
366 | 4,147,74,25,293,34.9,0.385,30,0
367 | 5,99,54,28,83,34,0.499,30,0
368 | 6,124,72,0,0,27.6,0.368,29,1
369 | 0,101,64,17,0,21,0.252,21,0
370 | 3,81,86,16,66,27.5,0.306,22,0
371 | 1,133,102,28,140,32.8,0.234,45,1
372 | 3,173,82,48,465,38.4,2.137,25,1
373 | 0,118,64,23,89,0,1.731,21,0
374 | 0,84,64,22,66,35.8,0.545,21,0
375 | 2,105,58,40,94,34.9,0.225,25,0
376 | 2,122,52,43,158,36.2,0.816,28,0
377 | 12,140,82,43,325,39.2,0.528,58,1
378 | 0,98,82,15,84,25.2,0.299,22,0
379 | 1,87,60,37,75,37.2,0.509,22,0
380 | 4,156,75,0,0,48.3,0.238,32,1
381 | 0,93,100,39,72,43.4,1.021,35,0
382 | 1,107,72,30,82,30.8,0.821,24,0
383 | 0,105,68,22,0,20,0.236,22,0
384 | 1,109,60,8,182,25.4,0.947,21,0
385 | 1,90,62,18,59,25.1,1.268,25,0
386 | 1,125,70,24,110,24.3,0.221,25,0
387 | 1,119,54,13,50,22.3,0.205,24,0
388 | 5,116,74,29,0,32.3,0.66,35,1
389 | 8,105,100,36,0,43.3,0.239,45,1
390 | 5,144,82,26,285,32,0.452,58,1
391 | 3,100,68,23,81,31.6,0.949,28,0
392 | 1,100,66,29,196,32,0.444,42,0
393 | 5,166,76,0,0,45.7,0.34,27,1
394 | 1,131,64,14,415,23.7,0.389,21,0
395 | 4,116,72,12,87,22.1,0.463,37,0
396 | 4,158,78,0,0,32.9,0.803,31,1
397 | 2,127,58,24,275,27.7,1.6,25,0
398 | 3,96,56,34,115,24.7,0.944,39,0
399 | 0,131,66,40,0,34.3,0.196,22,1
400 | 3,82,70,0,0,21.1,0.389,25,0
401 | 3,193,70,31,0,34.9,0.241,25,1
402 | 4,95,64,0,0,32,0.161,31,1
403 | 6,137,61,0,0,24.2,0.151,55,0
404 | 5,136,84,41,88,35,0.286,35,1
405 | 9,72,78,25,0,31.6,0.28,38,0
406 | 5,168,64,0,0,32.9,0.135,41,1
407 | 2,123,48,32,165,42.1,0.52,26,0
408 | 4,115,72,0,0,28.9,0.376,46,1
409 | 0,101,62,0,0,21.9,0.336,25,0
410 | 8,197,74,0,0,25.9,1.191,39,1
411 | 1,172,68,49,579,42.4,0.702,28,1
412 | 6,102,90,39,0,35.7,0.674,28,0
413 | 1,112,72,30,176,34.4,0.528,25,0
414 | 1,143,84,23,310,42.4,1.076,22,0
415 | 1,143,74,22,61,26.2,0.256,21,0
416 | 0,138,60,35,167,34.6,0.534,21,1
417 | 3,173,84,33,474,35.7,0.258,22,1
418 | 1,97,68,21,0,27.2,1.095,22,0
419 | 4,144,82,32,0,38.5,0.554,37,1
420 | 1,83,68,0,0,18.2,0.624,27,0
421 | 3,129,64,29,115,26.4,0.219,28,1
422 | 1,119,88,41,170,45.3,0.507,26,0
423 | 2,94,68,18,76,26,0.561,21,0
424 | 0,102,64,46,78,40.6,0.496,21,0
425 | 2,115,64,22,0,30.8,0.421,21,0
426 | 8,151,78,32,210,42.9,0.516,36,1
427 | 4,184,78,39,277,37,0.264,31,1
428 | 0,94,0,0,0,0,0.256,25,0
429 | 1,181,64,30,180,34.1,0.328,38,1
430 | 0,135,94,46,145,40.6,0.284,26,0
431 | 1,95,82,25,180,35,0.233,43,1
432 | 2,99,0,0,0,22.2,0.108,23,0
433 | 3,89,74,16,85,30.4,0.551,38,0
434 | 1,80,74,11,60,30,0.527,22,0
435 | 2,139,75,0,0,25.6,0.167,29,0
436 | 1,90,68,8,0,24.5,1.138,36,0
437 | 0,141,0,0,0,42.4,0.205,29,1
438 | 12,140,85,33,0,37.4,0.244,41,0
439 | 5,147,75,0,0,29.9,0.434,28,0
440 | 1,97,70,15,0,18.2,0.147,21,0
441 | 6,107,88,0,0,36.8,0.727,31,0
442 | 0,189,104,25,0,34.3,0.435,41,1
443 | 2,83,66,23,50,32.2,0.497,22,0
444 | 4,117,64,27,120,33.2,0.23,24,0
445 | 8,108,70,0,0,30.5,0.955,33,1
446 | 4,117,62,12,0,29.7,0.38,30,1
447 | 0,180,78,63,14,59.4,2.42,25,1
448 | 1,100,72,12,70,25.3,0.658,28,0
449 | 0,95,80,45,92,36.5,0.33,26,0
450 | 0,104,64,37,64,33.6,0.51,22,1
451 | 0,120,74,18,63,30.5,0.285,26,0
452 | 1,82,64,13,95,21.2,0.415,23,0
453 | 2,134,70,0,0,28.9,0.542,23,1
454 | 0,91,68,32,210,39.9,0.381,25,0
455 | 2,119,0,0,0,19.6,0.832,72,0
456 | 2,100,54,28,105,37.8,0.498,24,0
457 | 14,175,62,30,0,33.6,0.212,38,1
458 | 1,135,54,0,0,26.7,0.687,62,0
459 | 5,86,68,28,71,30.2,0.364,24,0
460 | 10,148,84,48,237,37.6,1.001,51,1
461 | 9,134,74,33,60,25.9,0.46,81,0
462 | 9,120,72,22,56,20.8,0.733,48,0
463 | 1,71,62,0,0,21.8,0.416,26,0
464 | 8,74,70,40,49,35.3,0.705,39,0
465 | 5,88,78,30,0,27.6,0.258,37,0
466 | 10,115,98,0,0,24,1.022,34,0
467 | 0,124,56,13,105,21.8,0.452,21,0
468 | 0,74,52,10,36,27.8,0.269,22,0
469 | 0,97,64,36,100,36.8,0.6,25,0
470 | 8,120,0,0,0,30,0.183,38,1
471 | 6,154,78,41,140,46.1,0.571,27,0
472 | 1,144,82,40,0,41.3,0.607,28,0
473 | 0,137,70,38,0,33.2,0.17,22,0
474 | 0,119,66,27,0,38.8,0.259,22,0
475 | 7,136,90,0,0,29.9,0.21,50,0
476 | 4,114,64,0,0,28.9,0.126,24,0
477 | 0,137,84,27,0,27.3,0.231,59,0
478 | 2,105,80,45,191,33.7,0.711,29,1
479 | 7,114,76,17,110,23.8,0.466,31,0
480 | 8,126,74,38,75,25.9,0.162,39,0
481 | 4,132,86,31,0,28,0.419,63,0
482 | 3,158,70,30,328,35.5,0.344,35,1
483 | 0,123,88,37,0,35.2,0.197,29,0
484 | 4,85,58,22,49,27.8,0.306,28,0
485 | 0,84,82,31,125,38.2,0.233,23,0
486 | 0,145,0,0,0,44.2,0.63,31,1
487 | 0,135,68,42,250,42.3,0.365,24,1
488 | 1,139,62,41,480,40.7,0.536,21,0
489 | 0,173,78,32,265,46.5,1.159,58,0
490 | 4,99,72,17,0,25.6,0.294,28,0
491 | 8,194,80,0,0,26.1,0.551,67,0
492 | 2,83,65,28,66,36.8,0.629,24,0
493 | 2,89,90,30,0,33.5,0.292,42,0
494 | 4,99,68,38,0,32.8,0.145,33,0
495 | 4,125,70,18,122,28.9,1.144,45,1
496 | 3,80,0,0,0,0,0.174,22,0
497 | 6,166,74,0,0,26.6,0.304,66,0
498 | 5,110,68,0,0,26,0.292,30,0
499 | 2,81,72,15,76,30.1,0.547,25,0
500 | 7,195,70,33,145,25.1,0.163,55,1
501 | 6,154,74,32,193,29.3,0.839,39,0
502 | 2,117,90,19,71,25.2,0.313,21,0
503 | 3,84,72,32,0,37.2,0.267,28,0
504 | 6,0,68,41,0,39,0.727,41,1
505 | 7,94,64,25,79,33.3,0.738,41,0
506 | 3,96,78,39,0,37.3,0.238,40,0
507 | 10,75,82,0,0,33.3,0.263,38,0
508 | 0,180,90,26,90,36.5,0.314,35,1
509 | 1,130,60,23,170,28.6,0.692,21,0
510 | 2,84,50,23,76,30.4,0.968,21,0
511 | 8,120,78,0,0,25,0.409,64,0
512 | 12,84,72,31,0,29.7,0.297,46,1
513 | 0,139,62,17,210,22.1,0.207,21,0
514 | 9,91,68,0,0,24.2,0.2,58,0
515 | 2,91,62,0,0,27.3,0.525,22,0
516 | 3,99,54,19,86,25.6,0.154,24,0
517 | 3,163,70,18,105,31.6,0.268,28,1
518 | 9,145,88,34,165,30.3,0.771,53,1
519 | 7,125,86,0,0,37.6,0.304,51,0
520 | 13,76,60,0,0,32.8,0.18,41,0
521 | 6,129,90,7,326,19.6,0.582,60,0
522 | 2,68,70,32,66,25,0.187,25,0
523 | 3,124,80,33,130,33.2,0.305,26,0
524 | 6,114,0,0,0,0,0.189,26,0
525 | 9,130,70,0,0,34.2,0.652,45,1
526 | 3,125,58,0,0,31.6,0.151,24,0
527 | 3,87,60,18,0,21.8,0.444,21,0
528 | 1,97,64,19,82,18.2,0.299,21,0
529 | 3,116,74,15,105,26.3,0.107,24,0
530 | 0,117,66,31,188,30.8,0.493,22,0
531 | 0,111,65,0,0,24.6,0.66,31,0
532 | 2,122,60,18,106,29.8,0.717,22,0
533 | 0,107,76,0,0,45.3,0.686,24,0
534 | 1,86,66,52,65,41.3,0.917,29,0
535 | 6,91,0,0,0,29.8,0.501,31,0
536 | 1,77,56,30,56,33.3,1.251,24,0
537 | 4,132,0,0,0,32.9,0.302,23,1
538 | 0,105,90,0,0,29.6,0.197,46,0
539 | 0,57,60,0,0,21.7,0.735,67,0
540 | 0,127,80,37,210,36.3,0.804,23,0
541 | 3,129,92,49,155,36.4,0.968,32,1
542 | 8,100,74,40,215,39.4,0.661,43,1
543 | 3,128,72,25,190,32.4,0.549,27,1
544 | 10,90,85,32,0,34.9,0.825,56,1
545 | 4,84,90,23,56,39.5,0.159,25,0
546 | 1,88,78,29,76,32,0.365,29,0
547 | 8,186,90,35,225,34.5,0.423,37,1
548 | 5,187,76,27,207,43.6,1.034,53,1
549 | 4,131,68,21,166,33.1,0.16,28,0
550 | 1,164,82,43,67,32.8,0.341,50,0
551 | 4,189,110,31,0,28.5,0.68,37,0
552 | 1,116,70,28,0,27.4,0.204,21,0
553 | 3,84,68,30,106,31.9,0.591,25,0
554 | 6,114,88,0,0,27.8,0.247,66,0
555 | 1,88,62,24,44,29.9,0.422,23,0
556 | 1,84,64,23,115,36.9,0.471,28,0
557 | 7,124,70,33,215,25.5,0.161,37,0
558 | 1,97,70,40,0,38.1,0.218,30,0
559 | 8,110,76,0,0,27.8,0.237,58,0
560 | 11,103,68,40,0,46.2,0.126,42,0
561 | 11,85,74,0,0,30.1,0.3,35,0
562 | 6,125,76,0,0,33.8,0.121,54,1
563 | 0,198,66,32,274,41.3,0.502,28,1
564 | 1,87,68,34,77,37.6,0.401,24,0
565 | 6,99,60,19,54,26.9,0.497,32,0
566 | 0,91,80,0,0,32.4,0.601,27,0
567 | 2,95,54,14,88,26.1,0.748,22,0
568 | 1,99,72,30,18,38.6,0.412,21,0
569 | 6,92,62,32,126,32,0.085,46,0
570 | 4,154,72,29,126,31.3,0.338,37,0
571 | 0,121,66,30,165,34.3,0.203,33,1
572 | 3,78,70,0,0,32.5,0.27,39,0
573 | 2,130,96,0,0,22.6,0.268,21,0
574 | 3,111,58,31,44,29.5,0.43,22,0
575 | 2,98,60,17,120,34.7,0.198,22,0
576 | 1,143,86,30,330,30.1,0.892,23,0
577 | 1,119,44,47,63,35.5,0.28,25,0
578 | 6,108,44,20,130,24,0.813,35,0
579 | 2,118,80,0,0,42.9,0.693,21,1
580 | 10,133,68,0,0,27,0.245,36,0
581 | 2,197,70,99,0,34.7,0.575,62,1
582 | 0,151,90,46,0,42.1,0.371,21,1
583 | 6,109,60,27,0,25,0.206,27,0
584 | 12,121,78,17,0,26.5,0.259,62,0
585 | 8,100,76,0,0,38.7,0.19,42,0
586 | 8,124,76,24,600,28.7,0.687,52,1
587 | 1,93,56,11,0,22.5,0.417,22,0
588 | 8,143,66,0,0,34.9,0.129,41,1
589 | 6,103,66,0,0,24.3,0.249,29,0
590 | 3,176,86,27,156,33.3,1.154,52,1
591 | 0,73,0,0,0,21.1,0.342,25,0
592 | 11,111,84,40,0,46.8,0.925,45,1
593 | 2,112,78,50,140,39.4,0.175,24,0
594 | 3,132,80,0,0,34.4,0.402,44,1
595 | 2,82,52,22,115,28.5,1.699,25,0
596 | 6,123,72,45,230,33.6,0.733,34,0
597 | 0,188,82,14,185,32,0.682,22,1
598 | 0,67,76,0,0,45.3,0.194,46,0
599 | 1,89,24,19,25,27.8,0.559,21,0
600 | 1,173,74,0,0,36.8,0.088,38,1
601 | 1,109,38,18,120,23.1,0.407,26,0
602 | 1,108,88,19,0,27.1,0.4,24,0
603 | 6,96,0,0,0,23.7,0.19,28,0
604 | 1,124,74,36,0,27.8,0.1,30,0
605 | 7,150,78,29,126,35.2,0.692,54,1
606 | 4,183,0,0,0,28.4,0.212,36,1
607 | 1,124,60,32,0,35.8,0.514,21,0
608 | 1,181,78,42,293,40,1.258,22,1
609 | 1,92,62,25,41,19.5,0.482,25,0
610 | 0,152,82,39,272,41.5,0.27,27,0
611 | 1,111,62,13,182,24,0.138,23,0
612 | 3,106,54,21,158,30.9,0.292,24,0
613 | 3,174,58,22,194,32.9,0.593,36,1
614 | 7,168,88,42,321,38.2,0.787,40,1
615 | 6,105,80,28,0,32.5,0.878,26,0
616 | 11,138,74,26,144,36.1,0.557,50,1
617 | 3,106,72,0,0,25.8,0.207,27,0
618 | 6,117,96,0,0,28.7,0.157,30,0
619 | 2,68,62,13,15,20.1,0.257,23,0
620 | 9,112,82,24,0,28.2,1.282,50,1
621 | 0,119,0,0,0,32.4,0.141,24,1
622 | 2,112,86,42,160,38.4,0.246,28,0
623 | 2,92,76,20,0,24.2,1.698,28,0
624 | 6,183,94,0,0,40.8,1.461,45,0
625 | 0,94,70,27,115,43.5,0.347,21,0
626 | 2,108,64,0,0,30.8,0.158,21,0
627 | 4,90,88,47,54,37.7,0.362,29,0
628 | 0,125,68,0,0,24.7,0.206,21,0
629 | 0,132,78,0,0,32.4,0.393,21,0
630 | 5,128,80,0,0,34.6,0.144,45,0
631 | 4,94,65,22,0,24.7,0.148,21,0
632 | 7,114,64,0,0,27.4,0.732,34,1
633 | 0,102,78,40,90,34.5,0.238,24,0
634 | 2,111,60,0,0,26.2,0.343,23,0
635 | 1,128,82,17,183,27.5,0.115,22,0
636 | 10,92,62,0,0,25.9,0.167,31,0
637 | 13,104,72,0,0,31.2,0.465,38,1
638 | 5,104,74,0,0,28.8,0.153,48,0
639 | 2,94,76,18,66,31.6,0.649,23,0
640 | 7,97,76,32,91,40.9,0.871,32,1
641 | 1,100,74,12,46,19.5,0.149,28,0
642 | 0,102,86,17,105,29.3,0.695,27,0
643 | 4,128,70,0,0,34.3,0.303,24,0
644 | 6,147,80,0,0,29.5,0.178,50,1
645 | 4,90,0,0,0,28,0.61,31,0
646 | 3,103,72,30,152,27.6,0.73,27,0
647 | 2,157,74,35,440,39.4,0.134,30,0
648 | 1,167,74,17,144,23.4,0.447,33,1
649 | 0,179,50,36,159,37.8,0.455,22,1
650 | 11,136,84,35,130,28.3,0.26,42,1
651 | 0,107,60,25,0,26.4,0.133,23,0
652 | 1,91,54,25,100,25.2,0.234,23,0
653 | 1,117,60,23,106,33.8,0.466,27,0
654 | 5,123,74,40,77,34.1,0.269,28,0
655 | 2,120,54,0,0,26.8,0.455,27,0
656 | 1,106,70,28,135,34.2,0.142,22,0
657 | 2,155,52,27,540,38.7,0.24,25,1
658 | 2,101,58,35,90,21.8,0.155,22,0
659 | 1,120,80,48,200,38.9,1.162,41,0
660 | 11,127,106,0,0,39,0.19,51,0
661 | 3,80,82,31,70,34.2,1.292,27,1
662 | 10,162,84,0,0,27.7,0.182,54,0
663 | 1,199,76,43,0,42.9,1.394,22,1
664 | 8,167,106,46,231,37.6,0.165,43,1
665 | 9,145,80,46,130,37.9,0.637,40,1
666 | 6,115,60,39,0,33.7,0.245,40,1
667 | 1,112,80,45,132,34.8,0.217,24,0
668 | 4,145,82,18,0,32.5,0.235,70,1
669 | 10,111,70,27,0,27.5,0.141,40,1
670 | 6,98,58,33,190,34,0.43,43,0
671 | 9,154,78,30,100,30.9,0.164,45,0
672 | 6,165,68,26,168,33.6,0.631,49,0
673 | 1,99,58,10,0,25.4,0.551,21,0
674 | 10,68,106,23,49,35.5,0.285,47,0
675 | 3,123,100,35,240,57.3,0.88,22,0
676 | 8,91,82,0,0,35.6,0.587,68,0
677 | 6,195,70,0,0,30.9,0.328,31,1
678 | 9,156,86,0,0,24.8,0.23,53,1
679 | 0,93,60,0,0,35.3,0.263,25,0
680 | 3,121,52,0,0,36,0.127,25,1
681 | 2,101,58,17,265,24.2,0.614,23,0
682 | 2,56,56,28,45,24.2,0.332,22,0
683 | 0,162,76,36,0,49.6,0.364,26,1
684 | 0,95,64,39,105,44.6,0.366,22,0
685 | 4,125,80,0,0,32.3,0.536,27,1
686 | 5,136,82,0,0,0,0.64,69,0
687 | 2,129,74,26,205,33.2,0.591,25,0
688 | 3,130,64,0,0,23.1,0.314,22,0
689 | 1,107,50,19,0,28.3,0.181,29,0
690 | 1,140,74,26,180,24.1,0.828,23,0
691 | 1,144,82,46,180,46.1,0.335,46,1
692 | 8,107,80,0,0,24.6,0.856,34,0
693 | 13,158,114,0,0,42.3,0.257,44,1
694 | 2,121,70,32,95,39.1,0.886,23,0
695 | 7,129,68,49,125,38.5,0.439,43,1
696 | 2,90,60,0,0,23.5,0.191,25,0
697 | 7,142,90,24,480,30.4,0.128,43,1
698 | 3,169,74,19,125,29.9,0.268,31,1
699 | 0,99,0,0,0,25,0.253,22,0
700 | 4,127,88,11,155,34.5,0.598,28,0
701 | 4,118,70,0,0,44.5,0.904,26,0
702 | 2,122,76,27,200,35.9,0.483,26,0
703 | 6,125,78,31,0,27.6,0.565,49,1
704 | 1,168,88,29,0,35,0.905,52,1
705 | 2,129,0,0,0,38.5,0.304,41,0
706 | 4,110,76,20,100,28.4,0.118,27,0
707 | 6,80,80,36,0,39.8,0.177,28,0
708 | 10,115,0,0,0,0,0.261,30,1
709 | 2,127,46,21,335,34.4,0.176,22,0
710 | 9,164,78,0,0,32.8,0.148,45,1
711 | 2,93,64,32,160,38,0.674,23,1
712 | 3,158,64,13,387,31.2,0.295,24,0
713 | 5,126,78,27,22,29.6,0.439,40,0
714 | 10,129,62,36,0,41.2,0.441,38,1
715 | 0,134,58,20,291,26.4,0.352,21,0
716 | 3,102,74,0,0,29.5,0.121,32,0
717 | 7,187,50,33,392,33.9,0.826,34,1
718 | 3,173,78,39,185,33.8,0.97,31,1
719 | 10,94,72,18,0,23.1,0.595,56,0
720 | 1,108,60,46,178,35.5,0.415,24,0
721 | 5,97,76,27,0,35.6,0.378,52,1
722 | 4,83,86,19,0,29.3,0.317,34,0
723 | 1,114,66,36,200,38.1,0.289,21,0
724 | 1,149,68,29,127,29.3,0.349,42,1
725 | 5,117,86,30,105,39.1,0.251,42,0
726 | 1,111,94,0,0,32.8,0.265,45,0
727 | 4,112,78,40,0,39.4,0.236,38,0
728 | 1,116,78,29,180,36.1,0.496,25,0
729 | 0,141,84,26,0,32.4,0.433,22,0
730 | 2,175,88,0,0,22.9,0.326,22,0
731 | 2,92,52,0,0,30.1,0.141,22,0
732 | 3,130,78,23,79,28.4,0.323,34,1
733 | 8,120,86,0,0,28.4,0.259,22,1
734 | 2,174,88,37,120,44.5,0.646,24,1
735 | 2,106,56,27,165,29,0.426,22,0
736 | 2,105,75,0,0,23.3,0.56,53,0
737 | 4,95,60,32,0,35.4,0.284,28,0
738 | 0,126,86,27,120,27.4,0.515,21,0
739 | 8,65,72,23,0,32,0.6,42,0
740 | 2,99,60,17,160,36.6,0.453,21,0
741 | 1,102,74,0,0,39.5,0.293,42,1
742 | 11,120,80,37,150,42.3,0.785,48,1
743 | 3,102,44,20,94,30.8,0.4,26,0
744 | 1,109,58,18,116,28.5,0.219,22,0
745 | 9,140,94,0,0,32.7,0.734,45,1
746 | 13,153,88,37,140,40.6,1.174,39,0
747 | 12,100,84,33,105,30,0.488,46,0
748 | 1,147,94,41,0,49.3,0.358,27,1
749 | 1,81,74,41,57,46.3,1.096,32,0
750 | 3,187,70,22,200,36.4,0.408,36,1
751 | 6,162,62,0,0,24.3,0.178,50,1
752 | 4,136,70,0,0,31.2,1.182,22,1
753 | 1,121,78,39,74,39,0.261,28,0
754 | 3,108,62,24,0,26,0.223,25,0
755 | 0,181,88,44,510,43.3,0.222,26,1
756 | 8,154,78,32,0,32.4,0.443,45,1
757 | 1,128,88,39,110,36.5,1.057,37,1
758 | 7,137,90,41,0,32,0.391,39,0
759 | 0,123,72,0,0,36.3,0.258,52,1
760 | 1,106,76,0,0,37.5,0.197,26,0
761 | 6,190,92,0,0,35.5,0.278,66,1
762 | 2,88,58,26,16,28.4,0.766,22,0
763 | 9,170,74,31,0,44,0.403,43,1
764 | 9,89,62,0,0,22.5,0.142,33,0
765 | 10,101,76,48,180,32.9,0.171,63,0
766 | 2,122,70,27,0,36.8,0.34,27,0
767 | 5,121,72,23,112,26.2,0.245,30,0
768 | 1,126,60,0,0,30.1,0.349,47,1
769 | 1,93,70,31,0,30.4,0.315,23,0
770 | 


--------------------------------------------------------------------------------
/FUZZY SVM Haberman.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "C:\\Users\\HP\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
 13 |       "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "from numpy import linalg\n",
 20 |     "import cvxopt\n",
 21 |     "import cvxopt.solvers\n",
 22 |     "import pandas as pd\n",
 23 |     "from sklearn import cross_validation\n",
 24 |     "from sklearn.metrics import classification_report\n",
 25 |     "from sklearn.metrics import accuracy_score\n",
 26 |     "from cvxopt import matrix as cvxopt_matrix\n",
 27 |     "from cvxopt import solvers as cvxopt_solvers\n",
 28 |     "from sklearn import svm\n",
 29 |     "import math "
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "data": {
 39 |       "text/html": [
 40 |        "<div>\n",
 41 |        "<style scoped>\n",
 42 |        "    .dataframe tbody tr th:only-of-type {\n",
 43 |        "        vertical-align: middle;\n",
 44 |        "    }\n",
 45 |        "\n",
 46 |        "    .dataframe tbody tr th {\n",
 47 |        "        vertical-align: top;\n",
 48 |        "    }\n",
 49 |        "\n",
 50 |        "    .dataframe thead th {\n",
 51 |        "        text-align: right;\n",
 52 |        "    }\n",
 53 |        "</style>\n",
 54 |        "<table border=\"1\" class=\"dataframe\">\n",
 55 |        "  <thead>\n",
 56 |        "    <tr style=\"text-align: right;\">\n",
 57 |        "      <th></th>\n",
 58 |        "      <th>0</th>\n",
 59 |        "      <th>1</th>\n",
 60 |        "      <th>2</th>\n",
 61 |        "      <th>3</th>\n",
 62 |        "    </tr>\n",
 63 |        "  </thead>\n",
 64 |        "  <tbody>\n",
 65 |        "    <tr>\n",
 66 |        "      <th>0</th>\n",
 67 |        "      <td>34.0</td>\n",
 68 |        "      <td>59.0</td>\n",
 69 |        "      <td>0.0</td>\n",
 70 |        "      <td>1.0</td>\n",
 71 |        "    </tr>\n",
 72 |        "    <tr>\n",
 73 |        "      <th>1</th>\n",
 74 |        "      <td>34.0</td>\n",
 75 |        "      <td>66.0</td>\n",
 76 |        "      <td>9.0</td>\n",
 77 |        "      <td>1.0</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>2</th>\n",
 81 |        "      <td>38.0</td>\n",
 82 |        "      <td>69.0</td>\n",
 83 |        "      <td>21.0</td>\n",
 84 |        "      <td>1.0</td>\n",
 85 |        "    </tr>\n",
 86 |        "    <tr>\n",
 87 |        "      <th>3</th>\n",
 88 |        "      <td>39.0</td>\n",
 89 |        "      <td>66.0</td>\n",
 90 |        "      <td>0.0</td>\n",
 91 |        "      <td>1.0</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <th>4</th>\n",
 95 |        "      <td>41.0</td>\n",
 96 |        "      <td>60.0</td>\n",
 97 |        "      <td>23.0</td>\n",
 98 |        "      <td>1.0</td>\n",
 99 |        "    </tr>\n",
100 |        "  </tbody>\n",
101 |        "</table>\n",
102 |        "</div>"
103 |       ],
104 |       "text/plain": [
105 |        "      0     1     2    3\n",
106 |        "0  34.0  59.0   0.0  1.0\n",
107 |        "1  34.0  66.0   9.0  1.0\n",
108 |        "2  38.0  69.0  21.0  1.0\n",
109 |        "3  39.0  66.0   0.0  1.0\n",
110 |        "4  41.0  60.0  23.0  1.0"
111 |       ]
112 |      },
113 |      "execution_count": 2,
114 |      "metadata": {},
115 |      "output_type": "execute_result"
116 |     }
117 |    ],
118 |    "source": [
119 |     "train = pd.read_csv(\"modifiedhaberman.csv\", header=None)\n",
120 |     "train.head()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 3,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/html": [
131 |        "<div>\n",
132 |        "<style scoped>\n",
133 |        "    .dataframe tbody tr th:only-of-type {\n",
134 |        "        vertical-align: middle;\n",
135 |        "    }\n",
136 |        "\n",
137 |        "    .dataframe tbody tr th {\n",
138 |        "        vertical-align: top;\n",
139 |        "    }\n",
140 |        "\n",
141 |        "    .dataframe thead th {\n",
142 |        "        text-align: right;\n",
143 |        "    }\n",
144 |        "</style>\n",
145 |        "<table border=\"1\" class=\"dataframe\">\n",
146 |        "  <thead>\n",
147 |        "    <tr style=\"text-align: right;\">\n",
148 |        "      <th></th>\n",
149 |        "      <th>0</th>\n",
150 |        "      <th>1</th>\n",
151 |        "      <th>2</th>\n",
152 |        "    </tr>\n",
153 |        "  </thead>\n",
154 |        "  <tbody>\n",
155 |        "    <tr>\n",
156 |        "      <th>0</th>\n",
157 |        "      <td>34.0</td>\n",
158 |        "      <td>59.0</td>\n",
159 |        "      <td>0.0</td>\n",
160 |        "    </tr>\n",
161 |        "    <tr>\n",
162 |        "      <th>1</th>\n",
163 |        "      <td>34.0</td>\n",
164 |        "      <td>66.0</td>\n",
165 |        "      <td>9.0</td>\n",
166 |        "    </tr>\n",
167 |        "    <tr>\n",
168 |        "      <th>2</th>\n",
169 |        "      <td>38.0</td>\n",
170 |        "      <td>69.0</td>\n",
171 |        "      <td>21.0</td>\n",
172 |        "    </tr>\n",
173 |        "    <tr>\n",
174 |        "      <th>3</th>\n",
175 |        "      <td>39.0</td>\n",
176 |        "      <td>66.0</td>\n",
177 |        "      <td>0.0</td>\n",
178 |        "    </tr>\n",
179 |        "    <tr>\n",
180 |        "      <th>4</th>\n",
181 |        "      <td>41.0</td>\n",
182 |        "      <td>60.0</td>\n",
183 |        "      <td>23.0</td>\n",
184 |        "    </tr>\n",
185 |        "  </tbody>\n",
186 |        "</table>\n",
187 |        "</div>"
188 |       ],
189 |       "text/plain": [
190 |        "      0     1     2\n",
191 |        "0  34.0  59.0   0.0\n",
192 |        "1  34.0  66.0   9.0\n",
193 |        "2  38.0  69.0  21.0\n",
194 |        "3  39.0  66.0   0.0\n",
195 |        "4  41.0  60.0  23.0"
196 |       ]
197 |      },
198 |      "execution_count": 3,
199 |      "metadata": {},
200 |      "output_type": "execute_result"
201 |     }
202 |    ],
203 |    "source": [
204 |     "features = train.columns[0:3]\n",
205 |     "X = train[features]\n",
206 |     "y = train[3]\n",
207 |     "X.head()"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 4,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=0)\n"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 5,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "(244, 3) (62, 3)\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "print(X_train.shape,X_test.shape)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 6,
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "X_train=np.asarray(X_train)\n",
243 |     "y_train=np.asarray(y_train)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 7,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "def linear_kernel(x1, x2):\n",
253 |     "    return np.dot(x1, x2)\n",
254 |     "\n",
255 |     "def polynomial_kernel(x, y, p=3):\n",
256 |     "    return (1 + np.dot(x, y)) ** p\n",
257 |     "\n",
258 |     "def gaussian_kernel(x, y, sigma=100.0):\n",
259 |     "   # print(-linalg.norm(x-y)**2)\n",
260 |     "    x=np.asarray(x)\n",
261 |     "    y=np.asarray(y)\n",
262 |     "    return np.exp((-linalg.norm(x-y)**2) / (2 * (sigma ** 2)))\n",
263 |     "\n",
264 |     "def gm(y_predict,y_test):\n",
265 |     "    test_min=0\n",
266 |     "    test_max=0\n",
267 |     "    pred_min=0\n",
268 |     "    pred_max=0\n",
269 |     "    y_test=np.asarray(y_test)\n",
270 |     "    for i in range(0,62):\n",
271 |     "        if(y_test[i]==1):\n",
272 |     "             test_min=test_min+1\n",
273 |     "        else:\n",
274 |     "             test_max=test_max+1\n",
275 |     "    print(\"y_test min\",test_min)       \n",
276 |     "    print(\"y_test max\",test_max)\n",
277 |     "    for i in range(0,62):\n",
278 |     "        if(y_predict[i]==1 and y_predict[i]==y_test[i]):\n",
279 |     "             pred_min=pred_min+1\n",
280 |     "        elif(y_predict[i]==-1 and y_predict[i]==y_test[i]):\n",
281 |     "             pred_max=pred_max+1\n",
282 |     "    print(\"y_pred min\",pred_min)       \n",
283 |     "    print(\"y_pred max\",pred_max)\n",
284 |     "    se=pred_min/test_min\n",
285 |     "    sp=pred_max/test_max\n",
286 |     "    print(se,sp)\n",
287 |     "    gm=math.sqrt(se*sp)\n",
288 |     "    print(\"GM\",gm)"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "# FSVM using Hyperplane"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": 14,
301 |    "metadata": {},
302 |    "outputs": [],
303 |    "source": [
304 |     "from cvxopt import matrix\n",
305 |     "class HYP_SVM(object):\n",
306 |     "\n",
307 |     "    def __init__(self, kernel=gaussian_kernel, C=None):\n",
308 |     "        self.kernel = kernel\n",
309 |     "        self.C = C\n",
310 |     "        if self.C is not None: self.C = float(self.C)\n",
311 |     "    def m_func(self, X_train,X_test, y):\n",
312 |     "        n_samples, n_features = X_train.shape \n",
313 |     "        nt_samples, nt_features= X_test.shape\n",
314 |     "        self.K = np.zeros((n_samples, n_samples))\n",
315 |     "        for i in range(n_samples):\n",
316 |     "            for j in range(n_samples):\n",
317 |     "                self.K[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
318 |     "               # print(K[i,j])\n",
319 |     "        X_train=np.asarray(X_train)\n",
320 |     "        X_test=np.asarray(X_test)\n",
321 |     "        K1 = np.zeros((n_samples, n_samples))\n",
322 |     "        for i in range(n_samples):\n",
323 |     "            for j in range(n_samples):\n",
324 |     "                K1[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
325 |     "               # print(K[i,j])\n",
326 |     "        print(K1.shape)\n",
327 |     "        P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
328 |     "        q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
329 |     "        A = cvxopt.matrix(y, (1,n_samples))\n",
330 |     "        A = matrix(A, (1,n_samples), 'd') #changes done\n",
331 |     "        b = cvxopt.matrix(0.0)\n",
332 |     "        #print(P,q,A,b)\n",
333 |     "        if self.C is None:\n",
334 |     "            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
335 |     "            h = cvxopt.matrix(np.zeros(n_samples))\n",
336 |     "            \n",
337 |     "        else:\n",
338 |     "            tmp1 = np.diag(np.ones(n_samples) * -1)\n",
339 |     "            tmp2 = np.identity(n_samples)\n",
340 |     "            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
341 |     "            tmp1 = np.zeros(n_samples)\n",
342 |     "            tmp2 = np.ones(n_samples) * self.C\n",
343 |     "            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
344 |     "        # solve QP problem\n",
345 |     "        solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
346 |     "        print(solution['status'])\n",
347 |     "        # Lagrange multipliers\n",
348 |     "        a = np.ravel(solution['x'])\n",
349 |     "        a_org = np.ravel(solution['x'])\n",
350 |     "        # Support vectors have non zero lagrange multipliers\n",
351 |     "        sv = a > 1e-5\n",
352 |     "        #print(sv.shape)\n",
353 |     "        ind = np.arange(len(a))[sv]\n",
354 |     "        self.a_org=a\n",
355 |     "        self.a = a[sv]\n",
356 |     "        self.sv = X_train[sv]\n",
357 |     "        self.sv_y = y[sv]\n",
358 |     "        self.sv_yorg=y\n",
359 |     "        self.kernel = gaussian_kernel\n",
360 |     "        X_train=np.asarray(X_train)\n",
361 |     "        b = 0\n",
362 |     "        for n in range(len(self.a)):\n",
363 |     "            b += self.sv_y[n]\n",
364 |     "            b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
365 |     "        b /= len(self.a)\n",
366 |     "       # print(self.a_org[1])\n",
367 |     "        #print(self.a_org.shape,self.sv_yorg.shape,K.shape)\n",
368 |     "        w_phi=0\n",
369 |     "        total=0\n",
370 |     "        for n in range(len(self.a_org)):\n",
371 |     "            w_phi = self.a_org[n] * self.sv_yorg[n] * K1[n] \n",
372 |     "        self.d_hyp=np.zeros(n_samples)\n",
373 |     "        for n in range(len(self.a_org)):\n",
374 |     "            self.d_hyp += self.sv_yorg[n]*(w_phi+b)\n",
375 |     "        func=np.zeros((n_samples))\n",
376 |     "        func=np.asarray(func)\n",
377 |     "        typ=2\n",
378 |     "        if(typ==1):\n",
379 |     "            for i in range(n_samples):\n",
380 |     "                func[i]=1-(self.d_hyp[i]/(np.amax(self.d_hyp[i])+0.000001))\n",
381 |     "        beta=0.8\n",
382 |     "        if(typ==2):\n",
383 |     "            for i in range(n_samples):\n",
384 |     "                func[i]=2/(1+beta*self.d_hyp[i])\n",
385 |     "        r_max=26/74\n",
386 |     "        r_min=1\n",
387 |     "        self.m=func[0:81]*r_min\n",
388 |     "        print(self.m.shape)\n",
389 |     "        self.m=np.append(self.m,func[81:306]*r_max)\n",
390 |     "        print(self.m.shape)\n",
391 |     "        \n",
392 |     " ##############################################################################\n",
393 |     "\n",
394 |     "\n",
395 |     "    def fit(self, X_train,X_test, y):\n",
396 |     "        self.kernel = gaussian_kernel\n",
397 |     "        n_samples, n_features = X_train.shape \n",
398 |     "        nt_samples, nt_features = X_test.shape\n",
399 |     "        # Gram matrix\n",
400 |     "\n",
401 |     "        print(self.K.shape)\n",
402 |     "\n",
403 |     "        P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
404 |     "        q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
405 |     "        A = cvxopt.matrix(y, (1,n_samples))\n",
406 |     "        A = matrix(A, (1,n_samples), 'd') #changes done\n",
407 |     "        b = cvxopt.matrix(0.0)\n",
408 |     "        #print(P,q,A,b)\n",
409 |     "        if self.C is None:\n",
410 |     "            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
411 |     "            h = cvxopt.matrix(np.zeros(n_samples))\n",
412 |     "            \n",
413 |     "        else:\n",
414 |     "            tmp1 = np.diag(np.ones(n_samples) * -1)\n",
415 |     "            tmp2 = np.identity(n_samples)\n",
416 |     "            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
417 |     "            tmp1 = np.zeros(n_samples)\n",
418 |     "            tmp2 = np.ones(n_samples) * self.C\n",
419 |     "            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
420 |     "        # solve QP problem\n",
421 |     "        solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
422 |     "        print(solution['status'])\n",
423 |     "        # Lagrange multipliers\n",
424 |     "        a = np.ravel(solution['x'])\n",
425 |     "        a_org = np.ravel(solution['x'])\n",
426 |     "        # Support vectors have non zero lagrange multipliers\n",
427 |     "        for i in range(n_samples):\n",
428 |     "            sv=np.logical_or(self.a_org <self.m, self.a_org > 1e-5)\n",
429 |     "        #print(sv.shape)\n",
430 |     "        ind = np.arange(len(a))[sv]\n",
431 |     "        self.a = a[sv]\n",
432 |     "        self.sv = X_train[sv]\n",
433 |     "        self.sv_y = y[sv]\n",
434 |     "        #print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
435 |     "\n",
436 |     "        # Intercept\n",
437 |     "        self.b = 0\n",
438 |     "        for n in range(len(self.a)):\n",
439 |     "            self.b += self.sv_y[n]\n",
440 |     "            self.b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
441 |     "        self.b /= len(self.a)\n",
442 |     "        print(self.b)\n",
443 |     "\n",
444 |     "        # Weight vector\n",
445 |     "        if self.kernel == gaussian_kernel:\n",
446 |     "            self.w = np.zeros(n_features)\n",
447 |     "            for n in range(len(self.a)):\n",
448 |     "                self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
449 |     "        else :\n",
450 |     "            self.w = None        \n",
451 |     "        \n",
452 |     "    def project(self, X):\n",
453 |     "        if self.w is None:\n",
454 |     "            return np.dot(X, self.w) + self.b\n",
455 |     "        else:\n",
456 |     "            y_predict = np.zeros(len(X))\n",
457 |     "            X=np.asarray(X)\n",
458 |     "            for i in range(len(X)):\n",
459 |     "                s = 0\n",
460 |     "                for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
461 |     "                    s += a * sv_y * gaussian_kernel(X[i], sv)\n",
462 |     "                y_predict[i] = s\n",
463 |     "              #  print(y_predict[i])\n",
464 |     "            return y_predict + self.b\n",
465 |     "\n",
466 |     "    def predict(self, X):\n",
467 |     "        return np.sign(self.project(X))"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": 15,
473 |    "metadata": {},
474 |    "outputs": [
475 |     {
476 |      "name": "stdout",
477 |      "output_type": "stream",
478 |      "text": [
479 |       "(244, 244)\n",
480 |       "     pcost       dcost       gap    pres   dres\n",
481 |       " 0:  8.9609e+03 -4.9961e+05  5e+05  1e-13  2e-13\n",
482 |       " 1: -2.9301e+03 -8.8506e+04  9e+04  3e-13  1e-13\n",
483 |       " 2: -7.7538e+03 -2.2899e+04  2e+04  6e-14  1e-13\n",
484 |       " 3: -9.3958e+03 -1.5271e+04  6e+03  1e-13  2e-13\n",
485 |       " 4: -9.9982e+03 -1.3503e+04  4e+03  3e-13  2e-13\n",
486 |       " 5: -1.0390e+04 -1.2500e+04  2e+03  1e-13  2e-13\n",
487 |       " 6: -1.0647e+04 -1.1812e+04  1e+03  8e-14  2e-13\n",
488 |       " 7: -1.0823e+04 -1.1430e+04  6e+02  3e-13  2e-13\n",
489 |       " 8: -1.0890e+04 -1.1286e+04  4e+02  2e-13  2e-13\n",
490 |       " 9: -1.0957e+04 -1.1157e+04  2e+02  3e-13  2e-13\n",
491 |       "10: -1.0999e+04 -1.1094e+04  1e+02  3e-13  2e-13\n",
492 |       "11: -1.1013e+04 -1.1065e+04  5e+01  3e-13  2e-13\n",
493 |       "12: -1.1026e+04 -1.1045e+04  2e+01  2e-13  3e-13\n",
494 |       "13: -1.1029e+04 -1.1040e+04  1e+01  3e-13  2e-13\n",
495 |       "14: -1.1030e+04 -1.1039e+04  1e+01  1e-13  2e-13\n",
496 |       "15: -1.1033e+04 -1.1036e+04  3e+00  3e-13  3e-13\n",
497 |       "16: -1.1034e+04 -1.1035e+04  1e+00  9e-14  3e-13\n",
498 |       "17: -1.1034e+04 -1.1034e+04  4e-01  2e-13  2e-13\n",
499 |       "18: -1.1034e+04 -1.1034e+04  6e-02  2e-13  3e-13\n",
500 |       "19: -1.1034e+04 -1.1034e+04  9e-04  1e-13  3e-13\n",
501 |       "Optimal solution found.\n",
502 |       "optimal\n",
503 |       "(81,)\n",
504 |       "(244,)\n",
505 |       "(244, 244)\n",
506 |       "     pcost       dcost       gap    pres   dres\n",
507 |       " 0:  8.9609e+03 -4.9961e+05  5e+05  1e-13  2e-13\n",
508 |       " 1: -2.9301e+03 -8.8506e+04  9e+04  3e-13  1e-13\n",
509 |       " 2: -7.7538e+03 -2.2899e+04  2e+04  6e-14  1e-13\n",
510 |       " 3: -9.3958e+03 -1.5271e+04  6e+03  1e-13  2e-13\n",
511 |       " 4: -9.9982e+03 -1.3503e+04  4e+03  3e-13  2e-13\n",
512 |       " 5: -1.0390e+04 -1.2500e+04  2e+03  1e-13  2e-13\n",
513 |       " 6: -1.0647e+04 -1.1812e+04  1e+03  8e-14  2e-13\n",
514 |       " 7: -1.0823e+04 -1.1430e+04  6e+02  3e-13  2e-13\n",
515 |       " 8: -1.0890e+04 -1.1286e+04  4e+02  2e-13  2e-13\n",
516 |       " 9: -1.0957e+04 -1.1157e+04  2e+02  3e-13  2e-13\n",
517 |       "10: -1.0999e+04 -1.1094e+04  1e+02  3e-13  2e-13\n",
518 |       "11: -1.1013e+04 -1.1065e+04  5e+01  3e-13  2e-13\n",
519 |       "12: -1.1026e+04 -1.1045e+04  2e+01  2e-13  3e-13\n",
520 |       "13: -1.1029e+04 -1.1040e+04  1e+01  3e-13  2e-13\n",
521 |       "14: -1.1030e+04 -1.1039e+04  1e+01  1e-13  2e-13\n",
522 |       "15: -1.1033e+04 -1.1036e+04  3e+00  3e-13  3e-13\n",
523 |       "16: -1.1034e+04 -1.1035e+04  1e+00  9e-14  3e-13\n",
524 |       "17: -1.1034e+04 -1.1034e+04  4e-01  2e-13  2e-13\n",
525 |       "18: -1.1034e+04 -1.1034e+04  6e-02  2e-13  3e-13\n",
526 |       "19: -1.1034e+04 -1.1034e+04  9e-04  1e-13  3e-13\n",
527 |       "Optimal solution found.\n",
528 |       "optimal\n",
529 |       "-0.7094793824855187\n",
530 |       "y_test min 21\n",
531 |       "y_test max 41\n",
532 |       "y_pred min 5\n",
533 |       "y_pred max 35\n",
534 |       "0.23809523809523808 0.8536585365853658\n",
535 |       "GM 0.45083481733371616\n",
536 |       "40 out of 62 predictions correct\n",
537 |       "Accuracy 0.6451612903225806\n"
538 |      ]
539 |     }
540 |    ],
541 |    "source": [
542 |     "\n",
543 |     "if __name__ == \"__main__\":\n",
544 |     "    import pylab as pl           \n",
545 |     "    def hyp_svm():\n",
546 |     "        \n",
547 |     "        clf = HYP_SVM(C=100.0)\n",
548 |     "        clf.m_func(X_train,X_test,y_train)\n",
549 |     "        clf.fit(X_train,X_test, y_train)\n",
550 |     "        y_predict = clf.predict(X_test)\n",
551 |     "        gm(y_predict,y_test)\n",
552 |     "        correct = np.sum(y_predict == y_test)\n",
553 |     "        print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
554 |     "        print(\"Accuracy\",correct/len(y_predict))\n",
555 |     "\n",
556 |     "    hyp_svm()    "
557 |    ]
558 |   },
559 |   {
560 |    "cell_type": "code",
561 |    "execution_count": null,
562 |    "metadata": {},
563 |    "outputs": [],
564 |    "source": []
565 |   },
566 |   {
567 |    "cell_type": "code",
568 |    "execution_count": null,
569 |    "metadata": {},
570 |    "outputs": [],
571 |    "source": []
572 |   },
573 |   {
574 |    "cell_type": "code",
575 |    "execution_count": 13,
576 |    "metadata": {},
577 |    "outputs": [
578 |     {
579 |      "name": "stdout",
580 |      "output_type": "stream",
581 |      "text": [
582 |       "Overall RBF KERNEL SVM accuracy:  0.6290322580645161\n"
583 |      ]
584 |     }
585 |    ],
586 |    "source": [
587 |     "clf_svm = svm.SVC(kernel='rbf', gamma=0.0001, C=100)\n",
588 |     "clf_svm.fit(X_train, y_train)\n",
589 |     "y_pred_svm = clf_svm.predict(X_test) \n",
590 |     "acc_svm = accuracy_score(y_test, y_pred_svm)\n",
591 |     "print (\"Overall RBF KERNEL SVM accuracy: \",acc_svm)"
592 |    ]
593 |   },
594 |   {
595 |    "cell_type": "code",
596 |    "execution_count": null,
597 |    "metadata": {},
598 |    "outputs": [],
599 |    "source": []
600 |   },
601 |   {
602 |    "cell_type": "code",
603 |    "execution_count": null,
604 |    "metadata": {},
605 |    "outputs": [],
606 |    "source": []
607 |   },
608 |   {
609 |    "cell_type": "code",
610 |    "execution_count": null,
611 |    "metadata": {},
612 |    "outputs": [],
613 |    "source": []
614 |   },
615 |   {
616 |    "cell_type": "code",
617 |    "execution_count": null,
618 |    "metadata": {},
619 |    "outputs": [],
620 |    "source": []
621 |   },
622 |   {
623 |    "cell_type": "code",
624 |    "execution_count": null,
625 |    "metadata": {},
626 |    "outputs": [],
627 |    "source": []
628 |   },
629 |   {
630 |    "cell_type": "code",
631 |    "execution_count": null,
632 |    "metadata": {},
633 |    "outputs": [],
634 |    "source": []
635 |   },
636 |   {
637 |    "cell_type": "code",
638 |    "execution_count": null,
639 |    "metadata": {},
640 |    "outputs": [],
641 |    "source": []
642 |   },
643 |   {
644 |    "cell_type": "code",
645 |    "execution_count": null,
646 |    "metadata": {},
647 |    "outputs": [],
648 |    "source": []
649 |   },
650 |   {
651 |    "cell_type": "code",
652 |    "execution_count": null,
653 |    "metadata": {},
654 |    "outputs": [],
655 |    "source": []
656 |   },
657 |   {
658 |    "cell_type": "code",
659 |    "execution_count": null,
660 |    "metadata": {},
661 |    "outputs": [],
662 |    "source": []
663 |   },
664 |   {
665 |    "cell_type": "code",
666 |    "execution_count": null,
667 |    "metadata": {},
668 |    "outputs": [],
669 |    "source": []
670 |   },
671 |   {
672 |    "cell_type": "code",
673 |    "execution_count": null,
674 |    "metadata": {},
675 |    "outputs": [],
676 |    "source": []
677 |   },
678 |   {
679 |    "cell_type": "markdown",
680 |    "metadata": {},
681 |    "source": [
682 |     "# Normal SVM using CVXOPT"
683 |    ]
684 |   },
685 |   {
686 |    "cell_type": "code",
687 |    "execution_count": 8,
688 |    "metadata": {},
689 |    "outputs": [],
690 |    "source": [
691 |     "from cvxopt import matrix\n",
692 |     "class SVM(object):\n",
693 |     "\n",
694 |     "    def __init__(self, kernel=gaussian_kernel, C=None):\n",
695 |     "        self.kernel = kernel\n",
696 |     "        self.C = C\n",
697 |     "        if self.C is not None: self.C = float(self.C)\n",
698 |     "    def fit(self, X, y):\n",
699 |     "        self.kernel = gaussian_kernel\n",
700 |     "        n_samples, n_features = X.shape\n",
701 |     "        # Gram matrix\n",
702 |     "        K = np.zeros((n_samples, n_samples))\n",
703 |     "        for i in range(n_samples):\n",
704 |     "            for j in range(n_samples):\n",
705 |     "                K[i,j] = gaussian_kernel(X[i], X[j])\n",
706 |     "               # print(K[i,j])\n",
707 |     "        print(K.shape)\n",
708 |     "\n",
709 |     "        P = cvxopt.matrix(np.outer(y,y) * K)\n",
710 |     "        q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
711 |     "        A = cvxopt.matrix(y, (1,n_samples))\n",
712 |     "        A = matrix(A, (1,n_samples), 'd') #changes done\n",
713 |     "        b = cvxopt.matrix(0.0)\n",
714 |     "        #print(P,q,A,b)\n",
715 |     "        if self.C is None:\n",
716 |     "            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
717 |     "            h = cvxopt.matrix(np.zeros(n_samples))\n",
718 |     "            \n",
719 |     "        else:\n",
720 |     "            tmp1 = np.diag(np.ones(n_samples) * -1)\n",
721 |     "            tmp2 = np.identity(n_samples)\n",
722 |     "            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
723 |     "            tmp1 = np.zeros(n_samples)\n",
724 |     "            tmp2 = np.ones(n_samples) * self.C\n",
725 |     "            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
726 |     "        # solve QP problem\n",
727 |     "        solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
728 |     "        print(solution['status'])\n",
729 |     "        # Lagrange multipliers\n",
730 |     "        a = np.ravel(solution['x'])\n",
731 |     "       # print(a)\n",
732 |     "        # Support vectors have non zero lagrange multipliers\n",
733 |     "        sv = a > 1e-5\n",
734 |     "        print(sv.shape)\n",
735 |     "        ind = np.arange(len(a))[sv]\n",
736 |     "        self.a = a[sv]\n",
737 |     "        self.sv = X[sv]\n",
738 |     "        self.sv_y = y[sv]\n",
739 |     "        print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
740 |     "\n",
741 |     "        # Intercept\n",
742 |     "        self.b = 0\n",
743 |     "        for n in range(len(self.a)):\n",
744 |     "            self.b += self.sv_y[n]\n",
745 |     "            self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])\n",
746 |     "        self.b /= len(self.a)\n",
747 |     "\n",
748 |     "        # Weight vector\n",
749 |     "        if self.kernel == gaussian_kernel:\n",
750 |     "            self.w = np.zeros(n_features)\n",
751 |     "            for n in range(len(self.a)):\n",
752 |     "                self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
753 |     "                #print(self.w)\n",
754 |     "        else:\n",
755 |     "            self.w = None\n",
756 |     "\n",
757 |     "    def project(self, X):\n",
758 |     "        if self.w is None:\n",
759 |     "            return np.dot(X, self.w) + self.b\n",
760 |     "        else:\n",
761 |     "            y_predict = np.zeros(len(X))\n",
762 |     "            X=np.asarray(X)\n",
763 |     "            for i in range(len(X)):\n",
764 |     "                s = 0\n",
765 |     "                for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
766 |     "                    s += a * sv_y * gaussian_kernel(X[i], sv)\n",
767 |     "                y_predict[i] = s\n",
768 |     "              #  print(y_predict[i])\n",
769 |     "            return y_predict + self.b\n",
770 |     "\n",
771 |     "    def predict(self, X):\n",
772 |     "        return np.sign(self.project(X))"
773 |    ]
774 |   },
775 |   {
776 |    "cell_type": "code",
777 |    "execution_count": 9,
778 |    "metadata": {},
779 |    "outputs": [
780 |     {
781 |      "name": "stdout",
782 |      "output_type": "stream",
783 |      "text": [
784 |       "(244, 244)\n",
785 |       "     pcost       dcost       gap    pres   dres\n",
786 |       " 0:  8.9609e+03 -4.9961e+05  5e+05  1e-13  2e-13\n",
787 |       " 1: -2.9301e+03 -8.8506e+04  9e+04  3e-13  1e-13\n",
788 |       " 2: -7.7538e+03 -2.2899e+04  2e+04  6e-14  1e-13\n",
789 |       " 3: -9.3958e+03 -1.5271e+04  6e+03  1e-13  2e-13\n",
790 |       " 4: -9.9982e+03 -1.3503e+04  4e+03  3e-13  2e-13\n",
791 |       " 5: -1.0390e+04 -1.2500e+04  2e+03  1e-13  2e-13\n",
792 |       " 6: -1.0647e+04 -1.1812e+04  1e+03  8e-14  2e-13\n",
793 |       " 7: -1.0823e+04 -1.1430e+04  6e+02  3e-13  2e-13\n",
794 |       " 8: -1.0890e+04 -1.1286e+04  4e+02  2e-13  2e-13\n",
795 |       " 9: -1.0957e+04 -1.1157e+04  2e+02  3e-13  2e-13\n",
796 |       "10: -1.0999e+04 -1.1094e+04  1e+02  3e-13  2e-13\n",
797 |       "11: -1.1013e+04 -1.1065e+04  5e+01  3e-13  2e-13\n",
798 |       "12: -1.1026e+04 -1.1045e+04  2e+01  2e-13  3e-13\n",
799 |       "13: -1.1029e+04 -1.1040e+04  1e+01  3e-13  2e-13\n",
800 |       "14: -1.1030e+04 -1.1039e+04  1e+01  1e-13  2e-13\n",
801 |       "15: -1.1033e+04 -1.1036e+04  3e+00  3e-13  3e-13\n",
802 |       "16: -1.1034e+04 -1.1035e+04  1e+00  9e-14  3e-13\n",
803 |       "17: -1.1034e+04 -1.1034e+04  4e-01  2e-13  2e-13\n",
804 |       "18: -1.1034e+04 -1.1034e+04  6e-02  2e-13  3e-13\n",
805 |       "19: -1.1034e+04 -1.1034e+04  9e-04  1e-13  3e-13\n",
806 |       "Optimal solution found.\n",
807 |       "optimal\n",
808 |       "(244,)\n",
809 |       "206 support vectors out of 244 points\n",
810 |       "y_test min 21\n",
811 |       "y_test max 41\n",
812 |       "y_pred min 5\n",
813 |       "y_pred max 35\n",
814 |       "0.23809523809523808 0.8536585365853658\n",
815 |       "GM 0.45083481733371616\n",
816 |       "40 out of 62 predictions correct\n",
817 |       "Accuracy 0.6451612903225806\n"
818 |      ]
819 |     }
820 |    ],
821 |    "source": [
822 |     "\n",
823 |     "if __name__ == \"__main__\":\n",
824 |     "    import pylab as pl           \n",
825 |     "    def normal_svm():\n",
826 |     "        \n",
827 |     "        clf = SVM(C=100.0)\n",
828 |     "        clf.fit(X_train, y_train)\n",
829 |     "        y_predict = clf.predict(X_test)\n",
830 |     "        gm(y_predict,y_test)\n",
831 |     "        correct = np.sum(y_predict == y_test)\n",
832 |     "        print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
833 |     "        print(\"Accuracy\",correct/len(y_predict))\n",
834 |     "\n",
835 |     "    normal_svm()    "
836 |    ]
837 |   },
838 |   {
839 |    "cell_type": "code",
840 |    "execution_count": null,
841 |    "metadata": {},
842 |    "outputs": [],
843 |    "source": []
844 |   }
845 |  ],
846 |  "metadata": {
847 |   "kernelspec": {
848 |    "display_name": "Python 3",
849 |    "language": "python",
850 |    "name": "python3"
851 |   },
852 |   "language_info": {
853 |    "codemirror_mode": {
854 |     "name": "ipython",
855 |     "version": 3
856 |    },
857 |    "file_extension": ".py",
858 |    "mimetype": "text/x-python",
859 |    "name": "python",
860 |    "nbconvert_exporter": "python",
861 |    "pygments_lexer": "ipython3",
862 |    "version": "3.6.5"
863 |   }
864 |  },
865 |  "nbformat": 4,
866 |  "nbformat_minor": 2
867 | }
868 | 


--------------------------------------------------------------------------------
/FUZZY SVM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "C:\\Users\\HP\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
 13 |       "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "from numpy import linalg\n",
 20 |     "import cvxopt\n",
 21 |     "import cvxopt.solvers\n",
 22 |     "import pandas as pd\n",
 23 |     "from sklearn import cross_validation\n",
 24 |     "from sklearn.metrics import classification_report\n",
 25 |     "from sklearn.metrics import accuracy_score\n",
 26 |     "from cvxopt import matrix as cvxopt_matrix\n",
 27 |     "from cvxopt import solvers as cvxopt_solvers\n",
 28 |     "from sklearn import svm\n",
 29 |     "import math "
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "data": {
 39 |       "text/html": [
 40 |        "<div>\n",
 41 |        "<style scoped>\n",
 42 |        "    .dataframe tbody tr th:only-of-type {\n",
 43 |        "        vertical-align: middle;\n",
 44 |        "    }\n",
 45 |        "\n",
 46 |        "    .dataframe tbody tr th {\n",
 47 |        "        vertical-align: top;\n",
 48 |        "    }\n",
 49 |        "\n",
 50 |        "    .dataframe thead th {\n",
 51 |        "        text-align: right;\n",
 52 |        "    }\n",
 53 |        "</style>\n",
 54 |        "<table border=\"1\" class=\"dataframe\">\n",
 55 |        "  <thead>\n",
 56 |        "    <tr style=\"text-align: right;\">\n",
 57 |        "      <th></th>\n",
 58 |        "      <th>0</th>\n",
 59 |        "      <th>1</th>\n",
 60 |        "      <th>2</th>\n",
 61 |        "      <th>3</th>\n",
 62 |        "      <th>4</th>\n",
 63 |        "      <th>5</th>\n",
 64 |        "      <th>6</th>\n",
 65 |        "      <th>7</th>\n",
 66 |        "      <th>8</th>\n",
 67 |        "    </tr>\n",
 68 |        "  </thead>\n",
 69 |        "  <tbody>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>0</th>\n",
 72 |        "      <td>6.0</td>\n",
 73 |        "      <td>148.0</td>\n",
 74 |        "      <td>72.0</td>\n",
 75 |        "      <td>35.0</td>\n",
 76 |        "      <td>0.0</td>\n",
 77 |        "      <td>33.6</td>\n",
 78 |        "      <td>0.627</td>\n",
 79 |        "      <td>50.0</td>\n",
 80 |        "      <td>1.0</td>\n",
 81 |        "    </tr>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>1</th>\n",
 84 |        "      <td>8.0</td>\n",
 85 |        "      <td>183.0</td>\n",
 86 |        "      <td>64.0</td>\n",
 87 |        "      <td>0.0</td>\n",
 88 |        "      <td>0.0</td>\n",
 89 |        "      <td>23.3</td>\n",
 90 |        "      <td>0.672</td>\n",
 91 |        "      <td>32.0</td>\n",
 92 |        "      <td>1.0</td>\n",
 93 |        "    </tr>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>2</th>\n",
 96 |        "      <td>0.0</td>\n",
 97 |        "      <td>137.0</td>\n",
 98 |        "      <td>40.0</td>\n",
 99 |        "      <td>35.0</td>\n",
100 |        "      <td>168.0</td>\n",
101 |        "      <td>43.1</td>\n",
102 |        "      <td>2.288</td>\n",
103 |        "      <td>33.0</td>\n",
104 |        "      <td>1.0</td>\n",
105 |        "    </tr>\n",
106 |        "    <tr>\n",
107 |        "      <th>3</th>\n",
108 |        "      <td>3.0</td>\n",
109 |        "      <td>78.0</td>\n",
110 |        "      <td>50.0</td>\n",
111 |        "      <td>32.0</td>\n",
112 |        "      <td>88.0</td>\n",
113 |        "      <td>31.0</td>\n",
114 |        "      <td>0.248</td>\n",
115 |        "      <td>26.0</td>\n",
116 |        "      <td>1.0</td>\n",
117 |        "    </tr>\n",
118 |        "    <tr>\n",
119 |        "      <th>4</th>\n",
120 |        "      <td>2.0</td>\n",
121 |        "      <td>197.0</td>\n",
122 |        "      <td>70.0</td>\n",
123 |        "      <td>45.0</td>\n",
124 |        "      <td>543.0</td>\n",
125 |        "      <td>30.5</td>\n",
126 |        "      <td>0.158</td>\n",
127 |        "      <td>53.0</td>\n",
128 |        "      <td>1.0</td>\n",
129 |        "    </tr>\n",
130 |        "  </tbody>\n",
131 |        "</table>\n",
132 |        "</div>"
133 |       ],
134 |       "text/plain": [
135 |        "     0      1     2     3      4     5      6     7    8\n",
136 |        "0  6.0  148.0  72.0  35.0    0.0  33.6  0.627  50.0  1.0\n",
137 |        "1  8.0  183.0  64.0   0.0    0.0  23.3  0.672  32.0  1.0\n",
138 |        "2  0.0  137.0  40.0  35.0  168.0  43.1  2.288  33.0  1.0\n",
139 |        "3  3.0   78.0  50.0  32.0   88.0  31.0  0.248  26.0  1.0\n",
140 |        "4  2.0  197.0  70.0  45.0  543.0  30.5  0.158  53.0  1.0"
141 |       ]
142 |      },
143 |      "execution_count": 2,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "train = pd.read_csv(\"modifiedpima.csv\", header=None)\n",
150 |     "train.head()"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 3,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "data": {
160 |       "text/html": [
161 |        "<div>\n",
162 |        "<style scoped>\n",
163 |        "    .dataframe tbody tr th:only-of-type {\n",
164 |        "        vertical-align: middle;\n",
165 |        "    }\n",
166 |        "\n",
167 |        "    .dataframe tbody tr th {\n",
168 |        "        vertical-align: top;\n",
169 |        "    }\n",
170 |        "\n",
171 |        "    .dataframe thead th {\n",
172 |        "        text-align: right;\n",
173 |        "    }\n",
174 |        "</style>\n",
175 |        "<table border=\"1\" class=\"dataframe\">\n",
176 |        "  <thead>\n",
177 |        "    <tr style=\"text-align: right;\">\n",
178 |        "      <th></th>\n",
179 |        "      <th>0</th>\n",
180 |        "      <th>1</th>\n",
181 |        "      <th>2</th>\n",
182 |        "      <th>3</th>\n",
183 |        "      <th>4</th>\n",
184 |        "      <th>5</th>\n",
185 |        "      <th>6</th>\n",
186 |        "      <th>7</th>\n",
187 |        "    </tr>\n",
188 |        "  </thead>\n",
189 |        "  <tbody>\n",
190 |        "    <tr>\n",
191 |        "      <th>0</th>\n",
192 |        "      <td>6.0</td>\n",
193 |        "      <td>148.0</td>\n",
194 |        "      <td>72.0</td>\n",
195 |        "      <td>35.0</td>\n",
196 |        "      <td>0.0</td>\n",
197 |        "      <td>33.6</td>\n",
198 |        "      <td>0.627</td>\n",
199 |        "      <td>50.0</td>\n",
200 |        "    </tr>\n",
201 |        "    <tr>\n",
202 |        "      <th>1</th>\n",
203 |        "      <td>8.0</td>\n",
204 |        "      <td>183.0</td>\n",
205 |        "      <td>64.0</td>\n",
206 |        "      <td>0.0</td>\n",
207 |        "      <td>0.0</td>\n",
208 |        "      <td>23.3</td>\n",
209 |        "      <td>0.672</td>\n",
210 |        "      <td>32.0</td>\n",
211 |        "    </tr>\n",
212 |        "    <tr>\n",
213 |        "      <th>2</th>\n",
214 |        "      <td>0.0</td>\n",
215 |        "      <td>137.0</td>\n",
216 |        "      <td>40.0</td>\n",
217 |        "      <td>35.0</td>\n",
218 |        "      <td>168.0</td>\n",
219 |        "      <td>43.1</td>\n",
220 |        "      <td>2.288</td>\n",
221 |        "      <td>33.0</td>\n",
222 |        "    </tr>\n",
223 |        "    <tr>\n",
224 |        "      <th>3</th>\n",
225 |        "      <td>3.0</td>\n",
226 |        "      <td>78.0</td>\n",
227 |        "      <td>50.0</td>\n",
228 |        "      <td>32.0</td>\n",
229 |        "      <td>88.0</td>\n",
230 |        "      <td>31.0</td>\n",
231 |        "      <td>0.248</td>\n",
232 |        "      <td>26.0</td>\n",
233 |        "    </tr>\n",
234 |        "    <tr>\n",
235 |        "      <th>4</th>\n",
236 |        "      <td>2.0</td>\n",
237 |        "      <td>197.0</td>\n",
238 |        "      <td>70.0</td>\n",
239 |        "      <td>45.0</td>\n",
240 |        "      <td>543.0</td>\n",
241 |        "      <td>30.5</td>\n",
242 |        "      <td>0.158</td>\n",
243 |        "      <td>53.0</td>\n",
244 |        "    </tr>\n",
245 |        "  </tbody>\n",
246 |        "</table>\n",
247 |        "</div>"
248 |       ],
249 |       "text/plain": [
250 |        "     0      1     2     3      4     5      6     7\n",
251 |        "0  6.0  148.0  72.0  35.0    0.0  33.6  0.627  50.0\n",
252 |        "1  8.0  183.0  64.0   0.0    0.0  23.3  0.672  32.0\n",
253 |        "2  0.0  137.0  40.0  35.0  168.0  43.1  2.288  33.0\n",
254 |        "3  3.0   78.0  50.0  32.0   88.0  31.0  0.248  26.0\n",
255 |        "4  2.0  197.0  70.0  45.0  543.0  30.5  0.158  53.0"
256 |       ]
257 |      },
258 |      "execution_count": 3,
259 |      "metadata": {},
260 |      "output_type": "execute_result"
261 |     }
262 |    ],
263 |    "source": [
264 |     "features = train.columns[0:8]\n",
265 |     "X = train[features]\n",
266 |     "y = train[8]\n",
267 |     "X.head()"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 53,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=10)\n"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": 54,
282 |    "metadata": {},
283 |    "outputs": [
284 |     {
285 |      "name": "stdout",
286 |      "output_type": "stream",
287 |      "text": [
288 |       "(614, 8) (154, 8)\n"
289 |      ]
290 |     }
291 |    ],
292 |    "source": [
293 |     "print(X_train.shape,X_test.shape)"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 55,
299 |    "metadata": {},
300 |    "outputs": [],
301 |    "source": [
302 |     "X_train=np.asarray(X_train)\n",
303 |     "y_train=np.asarray(y_train)"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 56,
309 |    "metadata": {},
310 |    "outputs": [],
311 |    "source": [
312 |     "def linear_kernel(x1, x2):\n",
313 |     "    return np.dot(x1, x2)\n",
314 |     "\n",
315 |     "def polynomial_kernel(x, y, p=3):\n",
316 |     "    return (1 + np.dot(x, y)) ** p\n",
317 |     "\n",
318 |     "def gaussian_kernel(x, y, sigma=100.0):\n",
319 |     "   # print(-linalg.norm(x-y)**2)\n",
320 |     "    x=np.asarray(x)\n",
321 |     "    y=np.asarray(y)\n",
322 |     "    return np.exp((-linalg.norm(x-y)**2) / (2 * (sigma ** 2)))\n",
323 |     "\n",
324 |     "def gm(y_predict,y_test):\n",
325 |     "    test_min=0\n",
326 |     "    test_max=0\n",
327 |     "    pred_min=0\n",
328 |     "    pred_max=0\n",
329 |     "    y_test=np.asarray(y_test)\n",
330 |     "    for i in range(0,154):\n",
331 |     "        if(y_test[i]==1):\n",
332 |     "             test_min=test_min+1\n",
333 |     "        else:\n",
334 |     "             test_max=test_max+1\n",
335 |     "    print(\"y_test min\",test_min)       \n",
336 |     "    print(\"y_test max\",test_max)\n",
337 |     "    for i in range(0,154):\n",
338 |     "        if(y_predict[i]==1 and y_predict[i]==y_test[i]):\n",
339 |     "             pred_min=pred_min+1\n",
340 |     "        elif(y_predict[i]==-1 and y_predict[i]==y_test[i]):\n",
341 |     "             pred_max=pred_max+1\n",
342 |     "    print(\"y_pred min\",pred_min)       \n",
343 |     "    print(\"y_pred max\",pred_max)\n",
344 |     "    se=pred_min/test_min\n",
345 |     "    sp=pred_max/test_max\n",
346 |     "    print(se,sp)\n",
347 |     "    gm=math.sqrt(se*sp)\n",
348 |     "    print(\"GM\",gm)"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "markdown",
353 |    "metadata": {},
354 |    "source": [
355 |     "# FSVM using Hyperplane"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": 68,
361 |    "metadata": {},
362 |    "outputs": [],
363 |    "source": [
364 |     "from cvxopt import matrix\n",
365 |     "class HYP_SVM(object):\n",
366 |     "\n",
367 |     "    def __init__(self, kernel=gaussian_kernel, C=None):\n",
368 |     "        self.kernel = kernel\n",
369 |     "        self.C = C\n",
370 |     "        if self.C is not None: self.C = float(self.C)\n",
371 |     "    def m_func(self, X_train,X_test, y):\n",
372 |     "        n_samples, n_features = X_train.shape \n",
373 |     "        nt_samples, nt_features= X_test.shape\n",
374 |     "        self.K = np.zeros((n_samples, n_samples))\n",
375 |     "        for i in range(n_samples):\n",
376 |     "            for j in range(n_samples):\n",
377 |     "                self.K[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
378 |     "               # print(K[i,j])\n",
379 |     "        X_train=np.asarray(X_train)\n",
380 |     "        X_test=np.asarray(X_test)\n",
381 |     "        K1 = np.zeros((n_samples, n_samples))\n",
382 |     "        for i in range(n_samples):\n",
383 |     "            for j in range(n_samples):\n",
384 |     "                K1[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
385 |     "               # print(K[i,j])\n",
386 |     "        print(K1.shape)\n",
387 |     "        P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
388 |     "        q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
389 |     "        A = cvxopt.matrix(y, (1,n_samples))\n",
390 |     "        A = matrix(A, (1,n_samples), 'd') #changes done\n",
391 |     "        b = cvxopt.matrix(0.0)\n",
392 |     "        #print(P,q,A,b)\n",
393 |     "        if self.C is None:\n",
394 |     "            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
395 |     "            h = cvxopt.matrix(np.zeros(n_samples))\n",
396 |     "            \n",
397 |     "        else:\n",
398 |     "            tmp1 = np.diag(np.ones(n_samples) * -1)\n",
399 |     "            tmp2 = np.identity(n_samples)\n",
400 |     "            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
401 |     "            tmp1 = np.zeros(n_samples)\n",
402 |     "            tmp2 = np.ones(n_samples) * self.C\n",
403 |     "            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
404 |     "        # solve QP problem\n",
405 |     "        solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
406 |     "        print(solution['status'])\n",
407 |     "        # Lagrange multipliers\n",
408 |     "        a = np.ravel(solution['x'])\n",
409 |     "        a_org = np.ravel(solution['x'])\n",
410 |     "        # Support vectors have non zero lagrange multipliers\n",
411 |     "        sv = a > 1e-5\n",
412 |     "        #print(sv.shape)\n",
413 |     "        ind = np.arange(len(a))[sv]\n",
414 |     "        self.a_org=a\n",
415 |     "        self.a = a[sv]\n",
416 |     "        self.sv = X_train[sv]\n",
417 |     "        self.sv_y = y[sv]\n",
418 |     "        self.sv_yorg=y\n",
419 |     "        self.kernel = gaussian_kernel\n",
420 |     "        X_train=np.asarray(X_train)\n",
421 |     "        b = 0\n",
422 |     "        for n in range(len(self.a)):\n",
423 |     "            b += self.sv_y[n]\n",
424 |     "            b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
425 |     "        b /= len(self.a)\n",
426 |     "       # print(self.a_org[1])\n",
427 |     "        #print(self.a_org.shape,self.sv_yorg.shape,K.shape)\n",
428 |     "        w_phi=0\n",
429 |     "        total=0\n",
430 |     "        for n in range(len(self.a_org)):\n",
431 |     "            w_phi = self.a_org[n] * self.sv_yorg[n] * K1[n] \n",
432 |     "        self.d_hyp=np.zeros(n_samples)\n",
433 |     "        for n in range(len(self.a_org)):\n",
434 |     "            self.d_hyp += self.sv_yorg[n]*(w_phi+b)\n",
435 |     "        func=np.zeros((n_samples))\n",
436 |     "        func=np.asarray(func)\n",
437 |     "        typ=2\n",
438 |     "        if(typ==1):\n",
439 |     "            for i in range(n_samples):\n",
440 |     "                func[i]=1-(self.d_hyp[i]/(np.amax(self.d_hyp[i])+0.000001))\n",
441 |     "        beta=0.2\n",
442 |     "        if(typ==2):\n",
443 |     "            for i in range(n_samples):\n",
444 |     "                func[i]=2/(1+beta*self.d_hyp[i])\n",
445 |     "        r_max=268/500\n",
446 |     "        r_min=1\n",
447 |     "        self.m=func[0:268]*r_min\n",
448 |     "        print(self.m.shape)\n",
449 |     "        self.m=np.append(self.m,func[268:768]*r_max)\n",
450 |     "        print(self.m.shape)\n",
451 |     "        \n",
452 |     " ##############################################################################\n",
453 |     "\n",
454 |     "\n",
455 |     "    def fit(self, X_train,X_test, y):\n",
456 |     "        self.kernel = gaussian_kernel\n",
457 |     "        n_samples, n_features = X_train.shape \n",
458 |     "        nt_samples, nt_features = X_test.shape\n",
459 |     "        # Gram matrix\n",
460 |     "\n",
461 |     "        print(self.K.shape)\n",
462 |     "\n",
463 |     "        P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
464 |     "        q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
465 |     "        A = cvxopt.matrix(y, (1,n_samples))\n",
466 |     "        A = matrix(A, (1,n_samples), 'd') #changes done\n",
467 |     "        b = cvxopt.matrix(0.0)\n",
468 |     "        #print(P,q,A,b)\n",
469 |     "        if self.C is None:\n",
470 |     "            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
471 |     "            h = cvxopt.matrix(np.zeros(n_samples))\n",
472 |     "            \n",
473 |     "        else:\n",
474 |     "            tmp1 = np.diag(np.ones(n_samples) * -1)\n",
475 |     "            tmp2 = np.identity(n_samples)\n",
476 |     "            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
477 |     "            tmp1 = np.zeros(n_samples)\n",
478 |     "            tmp2 = np.ones(n_samples) * self.C\n",
479 |     "            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
480 |     "        # solve QP problem\n",
481 |     "        solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
482 |     "        print(solution['status'])\n",
483 |     "        # Lagrange multipliers\n",
484 |     "        a = np.ravel(solution['x'])\n",
485 |     "        a_org = np.ravel(solution['x'])\n",
486 |     "        # Support vectors have non zero lagrange multipliers\n",
487 |     "        for i in range(n_samples):\n",
488 |     "            sv=np.logical_or(self.a_org <self.m, self.a_org > 1e-5)\n",
489 |     "        #print(sv.shape)\n",
490 |     "        ind = np.arange(len(a))[sv]\n",
491 |     "        self.a = a[sv]\n",
492 |     "        self.sv = X_train[sv]\n",
493 |     "        self.sv_y = y[sv]\n",
494 |     "        #print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
495 |     "\n",
496 |     "        # Intercept\n",
497 |     "        self.b = 0\n",
498 |     "        for n in range(len(self.a)):\n",
499 |     "            self.b += self.sv_y[n]\n",
500 |     "            self.b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
501 |     "        self.b /= len(self.a)\n",
502 |     "        print(self.b)\n",
503 |     "\n",
504 |     "        # Weight vector\n",
505 |     "        if self.kernel == gaussian_kernel:\n",
506 |     "            self.w = np.zeros(n_features)\n",
507 |     "            for n in range(len(self.a)):\n",
508 |     "                self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
509 |     "        else :\n",
510 |     "            self.w = None        \n",
511 |     "        \n",
512 |     "    def project(self, X):\n",
513 |     "        if self.w is None:\n",
514 |     "            return np.dot(X, self.w) + self.b\n",
515 |     "        else:\n",
516 |     "            y_predict = np.zeros(len(X))\n",
517 |     "            X=np.asarray(X)\n",
518 |     "            for i in range(len(X)):\n",
519 |     "                s = 0\n",
520 |     "                for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
521 |     "                    s += a * sv_y * gaussian_kernel(X[i], sv)\n",
522 |     "                y_predict[i] = s\n",
523 |     "              #  print(y_predict[i])\n",
524 |     "            return y_predict + self.b\n",
525 |     "\n",
526 |     "    def predict(self, X):\n",
527 |     "        return np.sign(self.project(X))"
528 |    ]
529 |   },
530 |   {
531 |    "cell_type": "code",
532 |    "execution_count": 69,
533 |    "metadata": {},
534 |    "outputs": [
535 |     {
536 |      "name": "stdout",
537 |      "output_type": "stream",
538 |      "text": [
539 |       "(614, 614)\n",
540 |       "     pcost       dcost       gap    pres   dres\n",
541 |       " 0:  3.1997e+04 -2.6998e+06  3e+06  1e-01  3e-13\n",
542 |       " 1:  7.0493e+02 -3.7645e+05  4e+05  1e-02  2e-13\n",
543 |       " 2: -1.5080e+04 -8.7294e+04  7e+04  2e-03  2e-13\n",
544 |       " 3: -2.0698e+04 -4.2280e+04  2e+04  3e-04  2e-13\n",
545 |       " 4: -2.3448e+04 -3.5410e+04  1e+04  2e-04  3e-13\n",
546 |       " 5: -2.4919e+04 -3.1494e+04  7e+03  7e-05  3e-13\n",
547 |       " 6: -2.5867e+04 -2.9015e+04  3e+03  3e-05  3e-13\n",
548 |       " 7: -2.6304e+04 -2.8026e+04  2e+03  1e-05  3e-13\n",
549 |       " 8: -2.6625e+04 -2.7314e+04  7e+02  2e-06  4e-13\n",
550 |       " 9: -2.6775e+04 -2.7035e+04  3e+02  7e-07  4e-13\n",
551 |       "10: -2.6855e+04 -2.6900e+04  4e+01  4e-08  4e-13\n",
552 |       "11: -2.6872e+04 -2.6874e+04  2e+00  1e-09  4e-13\n",
553 |       "12: -2.6873e+04 -2.6873e+04  4e-02  2e-11  4e-13\n",
554 |       "13: -2.6873e+04 -2.6873e+04  6e-04  2e-12  4e-13\n",
555 |       "Optimal solution found.\n",
556 |       "optimal\n",
557 |       "(268,)\n",
558 |       "(614,)\n",
559 |       "(614, 614)\n",
560 |       "     pcost       dcost       gap    pres   dres\n",
561 |       " 0:  3.1997e+04 -2.6998e+06  3e+06  1e-01  3e-13\n",
562 |       " 1:  7.0493e+02 -3.7645e+05  4e+05  1e-02  2e-13\n",
563 |       " 2: -1.5080e+04 -8.7294e+04  7e+04  2e-03  2e-13\n",
564 |       " 3: -2.0698e+04 -4.2280e+04  2e+04  3e-04  2e-13\n",
565 |       " 4: -2.3448e+04 -3.5410e+04  1e+04  2e-04  3e-13\n",
566 |       " 5: -2.4919e+04 -3.1494e+04  7e+03  7e-05  3e-13\n",
567 |       " 6: -2.5867e+04 -2.9015e+04  3e+03  3e-05  3e-13\n",
568 |       " 7: -2.6304e+04 -2.8026e+04  2e+03  1e-05  3e-13\n",
569 |       " 8: -2.6625e+04 -2.7314e+04  7e+02  2e-06  4e-13\n",
570 |       " 9: -2.6775e+04 -2.7035e+04  3e+02  7e-07  4e-13\n",
571 |       "10: -2.6855e+04 -2.6900e+04  4e+01  4e-08  4e-13\n",
572 |       "11: -2.6872e+04 -2.6874e+04  2e+00  1e-09  4e-13\n",
573 |       "12: -2.6873e+04 -2.6873e+04  4e-02  2e-11  4e-13\n",
574 |       "13: -2.6873e+04 -2.6873e+04  6e-04  2e-12  4e-13\n",
575 |       "Optimal solution found.\n",
576 |       "optimal\n",
577 |       "-1.0325312375429936\n",
578 |       "y_test min 53\n",
579 |       "y_test max 101\n",
580 |       "y_pred min 30\n",
581 |       "y_pred max 89\n",
582 |       "0.5660377358490566 0.8811881188118812\n",
583 |       "GM 0.7062476390256938\n",
584 |       "119 out of 154 predictions correct\n",
585 |       "Accuracy 0.7727272727272727\n"
586 |      ]
587 |     }
588 |    ],
589 |    "source": [
590 |     "\n",
591 |     "if __name__ == \"__main__\":\n",
592 |     "    import pylab as pl           \n",
593 |     "    def hyp_svm():\n",
594 |     "        \n",
595 |     "        clf = HYP_SVM(C=100.0)\n",
596 |     "        typ=2\n",
597 |     "        clf.m_func(X_train,X_test,y_train)\n",
598 |     "        clf.fit(X_train,X_test, y_train)\n",
599 |     "        y_predict = clf.predict(X_test)\n",
600 |     "        gm(y_predict,y_test)\n",
601 |     "        correct = np.sum(y_predict == y_test)\n",
602 |     "        print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
603 |     "        print(\"Accuracy\",correct/len(y_predict))\n",
604 |     "\n",
605 |     "    hyp_svm()    "
606 |    ]
607 |   },
608 |   {
609 |    "cell_type": "code",
610 |    "execution_count": null,
611 |    "metadata": {},
612 |    "outputs": [],
613 |    "source": []
614 |   },
615 |   {
616 |    "cell_type": "code",
617 |    "execution_count": null,
618 |    "metadata": {},
619 |    "outputs": [],
620 |    "source": []
621 |   },
622 |   {
623 |    "cell_type": "code",
624 |    "execution_count": null,
625 |    "metadata": {},
626 |    "outputs": [],
627 |    "source": []
628 |   },
629 |   {
630 |    "cell_type": "code",
631 |    "execution_count": null,
632 |    "metadata": {},
633 |    "outputs": [],
634 |    "source": []
635 |   },
636 |   {
637 |    "cell_type": "code",
638 |    "execution_count": null,
639 |    "metadata": {},
640 |    "outputs": [],
641 |    "source": []
642 |   },
643 |   {
644 |    "cell_type": "code",
645 |    "execution_count": null,
646 |    "metadata": {},
647 |    "outputs": [],
648 |    "source": []
649 |   },
650 |   {
651 |    "cell_type": "code",
652 |    "execution_count": null,
653 |    "metadata": {},
654 |    "outputs": [],
655 |    "source": []
656 |   },
657 |   {
658 |    "cell_type": "code",
659 |    "execution_count": null,
660 |    "metadata": {},
661 |    "outputs": [],
662 |    "source": []
663 |   },
664 |   {
665 |    "cell_type": "code",
666 |    "execution_count": null,
667 |    "metadata": {},
668 |    "outputs": [],
669 |    "source": []
670 |   },
671 |   {
672 |    "cell_type": "code",
673 |    "execution_count": null,
674 |    "metadata": {},
675 |    "outputs": [],
676 |    "source": []
677 |   },
678 |   {
679 |    "cell_type": "code",
680 |    "execution_count": null,
681 |    "metadata": {},
682 |    "outputs": [],
683 |    "source": []
684 |   },
685 |   {
686 |    "cell_type": "code",
687 |    "execution_count": null,
688 |    "metadata": {},
689 |    "outputs": [],
690 |    "source": []
691 |   },
692 |   {
693 |    "cell_type": "code",
694 |    "execution_count": null,
695 |    "metadata": {},
696 |    "outputs": [],
697 |    "source": []
698 |   },
699 |   {
700 |    "cell_type": "code",
701 |    "execution_count": null,
702 |    "metadata": {},
703 |    "outputs": [],
704 |    "source": []
705 |   },
706 |   {
707 |    "cell_type": "code",
708 |    "execution_count": null,
709 |    "metadata": {},
710 |    "outputs": [],
711 |    "source": []
712 |   },
713 |   {
714 |    "cell_type": "markdown",
715 |    "metadata": {},
716 |    "source": [
717 |     "# Normal SVM using CVXOPT"
718 |    ]
719 |   },
720 |   {
721 |    "cell_type": "code",
722 |    "execution_count": 145,
723 |    "metadata": {},
724 |    "outputs": [],
725 |    "source": [
726 |     "from cvxopt import matrix\n",
727 |     "class SVM(object):\n",
728 |     "\n",
729 |     "    def __init__(self, kernel=gaussian_kernel, C=None):\n",
730 |     "        self.kernel = kernel\n",
731 |     "        self.C = C\n",
732 |     "        if self.C is not None: self.C = float(self.C)\n",
733 |     "    def fit(self, X, y):\n",
734 |     "        self.kernel = gaussian_kernel\n",
735 |     "        n_samples, n_features = X.shape\n",
736 |     "        # Gram matrix\n",
737 |     "        K = np.zeros((n_samples, n_samples))\n",
738 |     "        for i in range(n_samples):\n",
739 |     "            for j in range(n_samples):\n",
740 |     "                K[i,j] = gaussian_kernel(X[i], X[j])\n",
741 |     "               # print(K[i,j])\n",
742 |     "        print(K.shape)\n",
743 |     "\n",
744 |     "        P = cvxopt.matrix(np.outer(y,y) * K)\n",
745 |     "        q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
746 |     "        A = cvxopt.matrix(y, (1,n_samples))\n",
747 |     "        A = matrix(A, (1,n_samples), 'd') #changes done\n",
748 |     "        b = cvxopt.matrix(0.0)\n",
749 |     "        #print(P,q,A,b)\n",
750 |     "        if self.C is None:\n",
751 |     "            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
752 |     "            h = cvxopt.matrix(np.zeros(n_samples))\n",
753 |     "            \n",
754 |     "        else:\n",
755 |     "            tmp1 = np.diag(np.ones(n_samples) * -1)\n",
756 |     "            tmp2 = np.identity(n_samples)\n",
757 |     "            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
758 |     "            tmp1 = np.zeros(n_samples)\n",
759 |     "            tmp2 = np.ones(n_samples) * self.C\n",
760 |     "            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
761 |     "        # solve QP problem\n",
762 |     "        solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
763 |     "        print(solution['status'])\n",
764 |     "        # Lagrange multipliers\n",
765 |     "        a = np.ravel(solution['x'])\n",
766 |     "       # print(a)\n",
767 |     "        # Support vectors have non zero lagrange multipliers\n",
768 |     "        sv = a > 1e-5\n",
769 |     "        print(sv.shape)\n",
770 |     "        ind = np.arange(len(a))[sv]\n",
771 |     "        self.a = a[sv]\n",
772 |     "        self.sv = X[sv]\n",
773 |     "        self.sv_y = y[sv]\n",
774 |     "        print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
775 |     "\n",
776 |     "        # Intercept\n",
777 |     "        self.b = 0\n",
778 |     "        for n in range(len(self.a)):\n",
779 |     "            self.b += self.sv_y[n]\n",
780 |     "            self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])\n",
781 |     "        self.b /= len(self.a)\n",
782 |     "\n",
783 |     "        # Weight vector\n",
784 |     "        if self.kernel == gaussian_kernel:\n",
785 |     "            self.w = np.zeros(n_features)\n",
786 |     "            for n in range(len(self.a)):\n",
787 |     "                self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
788 |     "                #print(self.w)\n",
789 |     "        else:\n",
790 |     "            self.w = None\n",
791 |     "\n",
792 |     "    def project(self, X):\n",
793 |     "        if self.w is None:\n",
794 |     "            return np.dot(X, self.w) + self.b\n",
795 |     "        else:\n",
796 |     "            y_predict = np.zeros(len(X))\n",
797 |     "            X=np.asarray(X)\n",
798 |     "            for i in range(len(X)):\n",
799 |     "                s = 0\n",
800 |     "                for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
801 |     "                    s += a * sv_y * gaussian_kernel(X[i], sv)\n",
802 |     "                y_predict[i] = s\n",
803 |     "              #  print(y_predict[i])\n",
804 |     "            return y_predict + self.b\n",
805 |     "\n",
806 |     "    def predict(self, X):\n",
807 |     "        return np.sign(self.project(X))"
808 |    ]
809 |   },
810 |   {
811 |    "cell_type": "code",
812 |    "execution_count": 146,
813 |    "metadata": {},
814 |    "outputs": [
815 |     {
816 |      "name": "stdout",
817 |      "output_type": "stream",
818 |      "text": [
819 |       "(614, 614)\n",
820 |       "     pcost       dcost       gap    pres   dres\n",
821 |       " 0:  4.0124e+04 -2.8615e+06  4e+06  2e-01  3e-13\n",
822 |       " 1:  1.0960e+04 -3.9278e+05  4e+05  1e-02  3e-13\n",
823 |       " 2: -1.1678e+04 -1.0938e+05  1e+05  2e-03  2e-13\n",
824 |       " 3: -1.8013e+04 -4.6040e+04  3e+04  4e-04  3e-13\n",
825 |       " 4: -2.0703e+04 -3.7635e+04  2e+04  2e-04  3e-13\n",
826 |       " 5: -2.2896e+04 -3.0055e+04  7e+03  7e-05  3e-13\n",
827 |       " 6: -2.3582e+04 -2.8515e+04  5e+03  4e-05  3e-13\n",
828 |       " 7: -2.4334e+04 -2.6705e+04  2e+03  2e-05  3e-13\n",
829 |       " 8: -2.4847e+04 -2.5598e+04  8e+02  3e-06  4e-13\n",
830 |       " 9: -2.5003e+04 -2.5317e+04  3e+02  1e-06  3e-13\n",
831 |       "10: -2.5063e+04 -2.5207e+04  1e+02  2e-12  4e-13\n",
832 |       "11: -2.5108e+04 -2.5145e+04  4e+01  1e-12  4e-13\n",
833 |       "12: -2.5120e+04 -2.5128e+04  8e+00  7e-13  4e-13\n",
834 |       "13: -2.5124e+04 -2.5124e+04  2e-01  5e-13  4e-13\n",
835 |       "14: -2.5124e+04 -2.5124e+04  5e-03  5e-13  4e-13\n",
836 |       "Optimal solution found.\n",
837 |       "optimal\n",
838 |       "(614,)\n",
839 |       "404 support vectors out of 614 points\n",
840 |       "y_test min 54\n",
841 |       "y_test max 100\n",
842 |       "y_pred min 29\n",
843 |       "y_pred max 78\n",
844 |       "0.5370370370370371 0.78\n",
845 |       "GM 0.6472162612982533\n",
846 |       "107 out of 154 predictions correct\n",
847 |       "Accuracy 0.6948051948051948\n"
848 |      ]
849 |     }
850 |    ],
851 |    "source": [
852 |     "\n",
853 |     "if __name__ == \"__main__\":\n",
854 |     "    import pylab as pl           \n",
855 |     "    def normal_svm():\n",
856 |     "        \n",
857 |     "        clf = SVM(C=100.0)\n",
858 |     "        clf.fit(X_train, y_train)\n",
859 |     "        y_predict = clf.predict(X_test)\n",
860 |     "        gm(y_predict,y_test)\n",
861 |     "        correct = np.sum(y_predict == y_test)\n",
862 |     "        print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
863 |     "        print(\"Accuracy\",correct/len(y_predict))\n",
864 |     "\n",
865 |     "    normal_svm()    "
866 |    ]
867 |   },
868 |   {
869 |    "cell_type": "code",
870 |    "execution_count": null,
871 |    "metadata": {},
872 |    "outputs": [],
873 |    "source": []
874 |   }
875 |  ],
876 |  "metadata": {
877 |   "kernelspec": {
878 |    "display_name": "Python 3",
879 |    "language": "python",
880 |    "name": "python3"
881 |   },
882 |   "language_info": {
883 |    "codemirror_mode": {
884 |     "name": "ipython",
885 |     "version": 3
886 |    },
887 |    "file_extension": ".py",
888 |    "mimetype": "text/x-python",
889 |    "name": "python",
890 |    "nbconvert_exporter": "python",
891 |    "pygments_lexer": "ipython3",
892 |    "version": "3.6.5"
893 |   }
894 |  },
895 |  "nbformat": 4,
896 |  "nbformat_minor": 2
897 | }
898 | 


--------------------------------------------------------------------------------
/FUZZY SVM Pageblock.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "C:\\Users\\HP\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
 13 |       "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "from numpy import linalg\n",
 20 |     "import cvxopt\n",
 21 |     "import cvxopt.solvers\n",
 22 |     "import pandas as pd\n",
 23 |     "from sklearn import cross_validation\n",
 24 |     "from sklearn.metrics import classification_report\n",
 25 |     "from sklearn.metrics import accuracy_score\n",
 26 |     "from cvxopt import matrix as cvxopt_matrix\n",
 27 |     "from cvxopt import solvers as cvxopt_solvers\n",
 28 |     "from sklearn import svm\n",
 29 |     "import math "
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "data": {
 39 |       "text/html": [
 40 |        "<div>\n",
 41 |        "<style scoped>\n",
 42 |        "    .dataframe tbody tr th:only-of-type {\n",
 43 |        "        vertical-align: middle;\n",
 44 |        "    }\n",
 45 |        "\n",
 46 |        "    .dataframe tbody tr th {\n",
 47 |        "        vertical-align: top;\n",
 48 |        "    }\n",
 49 |        "\n",
 50 |        "    .dataframe thead th {\n",
 51 |        "        text-align: right;\n",
 52 |        "    }\n",
 53 |        "</style>\n",
 54 |        "<table border=\"1\" class=\"dataframe\">\n",
 55 |        "  <thead>\n",
 56 |        "    <tr style=\"text-align: right;\">\n",
 57 |        "      <th></th>\n",
 58 |        "      <th>0</th>\n",
 59 |        "      <th>1</th>\n",
 60 |        "      <th>2</th>\n",
 61 |        "      <th>3</th>\n",
 62 |        "      <th>4</th>\n",
 63 |        "      <th>5</th>\n",
 64 |        "      <th>6</th>\n",
 65 |        "      <th>7</th>\n",
 66 |        "      <th>8</th>\n",
 67 |        "      <th>9</th>\n",
 68 |        "      <th>10</th>\n",
 69 |        "    </tr>\n",
 70 |        "  </thead>\n",
 71 |        "  <tbody>\n",
 72 |        "    <tr>\n",
 73 |        "      <th>0</th>\n",
 74 |        "      <td>48.0</td>\n",
 75 |        "      <td>402.0</td>\n",
 76 |        "      <td>19296.0</td>\n",
 77 |        "      <td>8.375</td>\n",
 78 |        "      <td>0.055</td>\n",
 79 |        "      <td>0.094</td>\n",
 80 |        "      <td>4.13</td>\n",
 81 |        "      <td>1058.0</td>\n",
 82 |        "      <td>1814.0</td>\n",
 83 |        "      <td>256.0</td>\n",
 84 |        "      <td>1.0</td>\n",
 85 |        "    </tr>\n",
 86 |        "    <tr>\n",
 87 |        "      <th>1</th>\n",
 88 |        "      <td>304.0</td>\n",
 89 |        "      <td>463.0</td>\n",
 90 |        "      <td>140752.0</td>\n",
 91 |        "      <td>1.523</td>\n",
 92 |        "      <td>0.063</td>\n",
 93 |        "      <td>0.121</td>\n",
 94 |        "      <td>3.91</td>\n",
 95 |        "      <td>8898.0</td>\n",
 96 |        "      <td>17081.0</td>\n",
 97 |        "      <td>2273.0</td>\n",
 98 |        "      <td>1.0</td>\n",
 99 |        "    </tr>\n",
100 |        "    <tr>\n",
101 |        "      <th>2</th>\n",
102 |        "      <td>306.0</td>\n",
103 |        "      <td>465.0</td>\n",
104 |        "      <td>142290.0</td>\n",
105 |        "      <td>1.520</td>\n",
106 |        "      <td>0.055</td>\n",
107 |        "      <td>0.123</td>\n",
108 |        "      <td>2.69</td>\n",
109 |        "      <td>7861.0</td>\n",
110 |        "      <td>17452.0</td>\n",
111 |        "      <td>2925.0</td>\n",
112 |        "      <td>1.0</td>\n",
113 |        "    </tr>\n",
114 |        "    <tr>\n",
115 |        "      <th>3</th>\n",
116 |        "      <td>45.0</td>\n",
117 |        "      <td>79.0</td>\n",
118 |        "      <td>3555.0</td>\n",
119 |        "      <td>1.756</td>\n",
120 |        "      <td>0.087</td>\n",
121 |        "      <td>0.195</td>\n",
122 |        "      <td>4.81</td>\n",
123 |        "      <td>308.0</td>\n",
124 |        "      <td>693.0</td>\n",
125 |        "      <td>64.0</td>\n",
126 |        "      <td>1.0</td>\n",
127 |        "    </tr>\n",
128 |        "    <tr>\n",
129 |        "      <th>4</th>\n",
130 |        "      <td>311.0</td>\n",
131 |        "      <td>463.0</td>\n",
132 |        "      <td>143993.0</td>\n",
133 |        "      <td>1.489</td>\n",
134 |        "      <td>0.088</td>\n",
135 |        "      <td>0.160</td>\n",
136 |        "      <td>3.93</td>\n",
137 |        "      <td>12631.0</td>\n",
138 |        "      <td>23092.0</td>\n",
139 |        "      <td>3212.0</td>\n",
140 |        "      <td>1.0</td>\n",
141 |        "    </tr>\n",
142 |        "  </tbody>\n",
143 |        "</table>\n",
144 |        "</div>"
145 |       ],
146 |       "text/plain": [
147 |        "      0      1         2      3      4      5     6        7        8   \\\n",
148 |        "0   48.0  402.0   19296.0  8.375  0.055  0.094  4.13   1058.0   1814.0   \n",
149 |        "1  304.0  463.0  140752.0  1.523  0.063  0.121  3.91   8898.0  17081.0   \n",
150 |        "2  306.0  465.0  142290.0  1.520  0.055  0.123  2.69   7861.0  17452.0   \n",
151 |        "3   45.0   79.0    3555.0  1.756  0.087  0.195  4.81    308.0    693.0   \n",
152 |        "4  311.0  463.0  143993.0  1.489  0.088  0.160  3.93  12631.0  23092.0   \n",
153 |        "\n",
154 |        "       9    10  \n",
155 |        "0   256.0  1.0  \n",
156 |        "1  2273.0  1.0  \n",
157 |        "2  2925.0  1.0  \n",
158 |        "3    64.0  1.0  \n",
159 |        "4  3212.0  1.0  "
160 |       ]
161 |      },
162 |      "execution_count": 2,
163 |      "metadata": {},
164 |      "output_type": "execute_result"
165 |     }
166 |    ],
167 |    "source": [
168 |     "train = pd.read_csv(\"modifiedpage.csv\", header=None)\n",
169 |     "train.head()"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 3,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "data": {
179 |       "text/html": [
180 |        "<div>\n",
181 |        "<style scoped>\n",
182 |        "    .dataframe tbody tr th:only-of-type {\n",
183 |        "        vertical-align: middle;\n",
184 |        "    }\n",
185 |        "\n",
186 |        "    .dataframe tbody tr th {\n",
187 |        "        vertical-align: top;\n",
188 |        "    }\n",
189 |        "\n",
190 |        "    .dataframe thead th {\n",
191 |        "        text-align: right;\n",
192 |        "    }\n",
193 |        "</style>\n",
194 |        "<table border=\"1\" class=\"dataframe\">\n",
195 |        "  <thead>\n",
196 |        "    <tr style=\"text-align: right;\">\n",
197 |        "      <th></th>\n",
198 |        "      <th>0</th>\n",
199 |        "      <th>1</th>\n",
200 |        "      <th>2</th>\n",
201 |        "      <th>3</th>\n",
202 |        "      <th>4</th>\n",
203 |        "      <th>5</th>\n",
204 |        "      <th>6</th>\n",
205 |        "      <th>7</th>\n",
206 |        "      <th>8</th>\n",
207 |        "      <th>9</th>\n",
208 |        "    </tr>\n",
209 |        "  </thead>\n",
210 |        "  <tbody>\n",
211 |        "    <tr>\n",
212 |        "      <th>0</th>\n",
213 |        "      <td>48.0</td>\n",
214 |        "      <td>402.0</td>\n",
215 |        "      <td>19296.0</td>\n",
216 |        "      <td>8.375</td>\n",
217 |        "      <td>0.055</td>\n",
218 |        "      <td>0.094</td>\n",
219 |        "      <td>4.13</td>\n",
220 |        "      <td>1058.0</td>\n",
221 |        "      <td>1814.0</td>\n",
222 |        "      <td>256.0</td>\n",
223 |        "    </tr>\n",
224 |        "    <tr>\n",
225 |        "      <th>1</th>\n",
226 |        "      <td>304.0</td>\n",
227 |        "      <td>463.0</td>\n",
228 |        "      <td>140752.0</td>\n",
229 |        "      <td>1.523</td>\n",
230 |        "      <td>0.063</td>\n",
231 |        "      <td>0.121</td>\n",
232 |        "      <td>3.91</td>\n",
233 |        "      <td>8898.0</td>\n",
234 |        "      <td>17081.0</td>\n",
235 |        "      <td>2273.0</td>\n",
236 |        "    </tr>\n",
237 |        "    <tr>\n",
238 |        "      <th>2</th>\n",
239 |        "      <td>306.0</td>\n",
240 |        "      <td>465.0</td>\n",
241 |        "      <td>142290.0</td>\n",
242 |        "      <td>1.520</td>\n",
243 |        "      <td>0.055</td>\n",
244 |        "      <td>0.123</td>\n",
245 |        "      <td>2.69</td>\n",
246 |        "      <td>7861.0</td>\n",
247 |        "      <td>17452.0</td>\n",
248 |        "      <td>2925.0</td>\n",
249 |        "    </tr>\n",
250 |        "    <tr>\n",
251 |        "      <th>3</th>\n",
252 |        "      <td>45.0</td>\n",
253 |        "      <td>79.0</td>\n",
254 |        "      <td>3555.0</td>\n",
255 |        "      <td>1.756</td>\n",
256 |        "      <td>0.087</td>\n",
257 |        "      <td>0.195</td>\n",
258 |        "      <td>4.81</td>\n",
259 |        "      <td>308.0</td>\n",
260 |        "      <td>693.0</td>\n",
261 |        "      <td>64.0</td>\n",
262 |        "    </tr>\n",
263 |        "    <tr>\n",
264 |        "      <th>4</th>\n",
265 |        "      <td>311.0</td>\n",
266 |        "      <td>463.0</td>\n",
267 |        "      <td>143993.0</td>\n",
268 |        "      <td>1.489</td>\n",
269 |        "      <td>0.088</td>\n",
270 |        "      <td>0.160</td>\n",
271 |        "      <td>3.93</td>\n",
272 |        "      <td>12631.0</td>\n",
273 |        "      <td>23092.0</td>\n",
274 |        "      <td>3212.0</td>\n",
275 |        "    </tr>\n",
276 |        "  </tbody>\n",
277 |        "</table>\n",
278 |        "</div>"
279 |       ],
280 |       "text/plain": [
281 |        "       0      1         2      3      4      5     6        7        8       9\n",
282 |        "0   48.0  402.0   19296.0  8.375  0.055  0.094  4.13   1058.0   1814.0   256.0\n",
283 |        "1  304.0  463.0  140752.0  1.523  0.063  0.121  3.91   8898.0  17081.0  2273.0\n",
284 |        "2  306.0  465.0  142290.0  1.520  0.055  0.123  2.69   7861.0  17452.0  2925.0\n",
285 |        "3   45.0   79.0    3555.0  1.756  0.087  0.195  4.81    308.0    693.0    64.0\n",
286 |        "4  311.0  463.0  143993.0  1.489  0.088  0.160  3.93  12631.0  23092.0  3212.0"
287 |       ]
288 |      },
289 |      "execution_count": 3,
290 |      "metadata": {},
291 |      "output_type": "execute_result"
292 |     }
293 |    ],
294 |    "source": [
295 |     "features = train.columns[0:10]\n",
296 |     "X = train[features]\n",
297 |     "y = train[10]\n",
298 |     "X.head()"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": 4,
304 |    "metadata": {},
305 |    "outputs": [],
306 |    "source": [
307 |     "X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=40)\n"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": 5,
313 |    "metadata": {},
314 |    "outputs": [
315 |     {
316 |      "name": "stdout",
317 |      "output_type": "stream",
318 |      "text": [
319 |       "(4378, 10) (1095, 10)\n"
320 |      ]
321 |     }
322 |    ],
323 |    "source": [
324 |     "print(X_train.shape,X_test.shape)"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 6,
330 |    "metadata": {},
331 |    "outputs": [],
332 |    "source": [
333 |     "X_train=np.asarray(X_train)\n",
334 |     "y_train=np.asarray(y_train)"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": 17,
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": [
343 |     "def linear_kernel(x1, x2):\n",
344 |     "    return np.dot(x1, x2)\n",
345 |     "\n",
346 |     "def polynomial_kernel(x, y, p=3):\n",
347 |     "    return (1 + np.dot(x, y)) ** p\n",
348 |     "\n",
349 |     "def gaussian_kernel(x, y, sigma=90.0):\n",
350 |     "   # print(-linalg.norm(x-y)**2)\n",
351 |     "    x=np.asarray(x)\n",
352 |     "    y=np.asarray(y)\n",
353 |     "    return np.exp((-linalg.norm(x-y)**2) / (2 * (sigma ** 2)))\n",
354 |     "\n",
355 |     "def gm(y_predict,y_test):\n",
356 |     "    test_min=0\n",
357 |     "    test_max=0\n",
358 |     "    pred_min=0\n",
359 |     "    pred_max=0\n",
360 |     "    y_test=np.asarray(y_test)\n",
361 |     "    for i in range(0,1095):\n",
362 |     "        if(y_test[i]==1):\n",
363 |     "             test_min=test_min+1\n",
364 |     "        else:\n",
365 |     "             test_max=test_max+1\n",
366 |     "    print(\"y_test min\",test_min)       \n",
367 |     "    print(\"y_test max\",test_max)\n",
368 |     "    for i in range(0,1095):\n",
369 |     "        if(y_predict[i]==1 and y_predict[i]==y_test[i]):\n",
370 |     "             pred_min=pred_min+1\n",
371 |     "        elif(y_predict[i]==-1 and y_predict[i]==y_test[i]):\n",
372 |     "             pred_max=pred_max+1\n",
373 |     "    print(\"y_pred min\",pred_min)       \n",
374 |     "    print(\"y_pred max\",pred_max)\n",
375 |     "    se=pred_min/test_min\n",
376 |     "    sp=pred_max/test_max\n",
377 |     "    print(se,sp)\n",
378 |     "    gm=math.sqrt(se*sp)\n",
379 |     "    print(\"GM\",gm)"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "markdown",
384 |    "metadata": {},
385 |    "source": [
386 |     "# FSVM using Hyperplane"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "code",
391 |    "execution_count": 18,
392 |    "metadata": {},
393 |    "outputs": [],
394 |    "source": [
395 |     "from cvxopt import matrix\n",
396 |     "class HYP_SVM(object):\n",
397 |     "\n",
398 |     "    def __init__(self, kernel=gaussian_kernel, C=None):\n",
399 |     "        self.kernel = kernel\n",
400 |     "        self.C = C\n",
401 |     "        if self.C is not None: self.C = float(self.C)\n",
402 |     "    def m_func(self, X_train,X_test, y):\n",
403 |     "        n_samples, n_features = X_train.shape \n",
404 |     "        nt_samples, nt_features= X_test.shape\n",
405 |     "        self.K = np.zeros((n_samples, n_samples))\n",
406 |     "        for i in range(n_samples):\n",
407 |     "            for j in range(n_samples):\n",
408 |     "                self.K[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
409 |     "               # print(K[i,j])\n",
410 |     "        X_train=np.asarray(X_train)\n",
411 |     "        X_test=np.asarray(X_test)\n",
412 |     "        K1 = np.zeros((n_samples, n_samples))\n",
413 |     "        for i in range(n_samples):\n",
414 |     "            for j in range(n_samples):\n",
415 |     "                K1[i,j] = gaussian_kernel(X_train[i], X_train[j])\n",
416 |     "               # print(K[i,j])\n",
417 |     "        print(K1.shape)\n",
418 |     "        P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
419 |     "        q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
420 |     "        A = cvxopt.matrix(y, (1,n_samples))\n",
421 |     "        A = matrix(A, (1,n_samples), 'd') #changes done\n",
422 |     "        b = cvxopt.matrix(0.0)\n",
423 |     "        #print(P,q,A,b)\n",
424 |     "        if self.C is None:\n",
425 |     "            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
426 |     "            h = cvxopt.matrix(np.zeros(n_samples))\n",
427 |     "            \n",
428 |     "        else:\n",
429 |     "            tmp1 = np.diag(np.ones(n_samples) * -1)\n",
430 |     "            tmp2 = np.identity(n_samples)\n",
431 |     "            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
432 |     "            tmp1 = np.zeros(n_samples)\n",
433 |     "            tmp2 = np.ones(n_samples) * self.C\n",
434 |     "            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
435 |     "        # solve QP problem\n",
436 |     "        solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
437 |     "        print(solution['status'])\n",
438 |     "        # Lagrange multipliers\n",
439 |     "        a = np.ravel(solution['x'])\n",
440 |     "        a_org = np.ravel(solution['x'])\n",
441 |     "        # Support vectors have non zero lagrange multipliers\n",
442 |     "        sv = a > 1e-5\n",
443 |     "        #print(sv.shape)\n",
444 |     "        ind = np.arange(len(a))[sv]\n",
445 |     "        self.a_org=a\n",
446 |     "        self.a = a[sv]\n",
447 |     "        self.sv = X_train[sv]\n",
448 |     "        self.sv_y = y[sv]\n",
449 |     "        self.sv_yorg=y\n",
450 |     "        self.kernel = gaussian_kernel\n",
451 |     "        X_train=np.asarray(X_train)\n",
452 |     "        b = 0\n",
453 |     "        for n in range(len(self.a)):\n",
454 |     "            b += self.sv_y[n]\n",
455 |     "            b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
456 |     "        b /= len(self.a)\n",
457 |     "       # print(self.a_org[1])\n",
458 |     "        #print(self.a_org.shape,self.sv_yorg.shape,K.shape)\n",
459 |     "        w_phi=0\n",
460 |     "        total=0\n",
461 |     "        for n in range(len(self.a_org)):\n",
462 |     "            w_phi = self.a_org[n] * self.sv_yorg[n] * K1[n] \n",
463 |     "        self.d_hyp=np.zeros(n_samples)\n",
464 |     "        for n in range(len(self.a_org)):\n",
465 |     "            self.d_hyp += self.sv_yorg[n]*(w_phi+b)\n",
466 |     "        func=np.zeros((n_samples))\n",
467 |     "        func=np.asarray(func)\n",
468 |     "        typ=1\n",
469 |     "        if(typ==1):\n",
470 |     "            for i in range(n_samples):\n",
471 |     "                func[i]=1-(self.d_hyp[i]/(np.amax(self.d_hyp[i])+0.000001))\n",
472 |     "        beta=0.8\n",
473 |     "        if(typ==2):\n",
474 |     "            for i in range(n_samples):\n",
475 |     "                func[i]=2/(1+beta*self.d_hyp[i])\n",
476 |     "        r_max=103/4074\n",
477 |     "        r_min=1\n",
478 |     "        self.m=func[0:115]*r_min\n",
479 |     "        print(self.m.shape)\n",
480 |     "        self.m=np.append(self.m,func[115:5473]*r_max)\n",
481 |     "        print(self.m.shape)\n",
482 |     "        \n",
483 |     " ##############################################################################\n",
484 |     "\n",
485 |     "\n",
486 |     "    def fit(self, X_train,X_test, y):\n",
487 |     "        self.kernel = gaussian_kernel\n",
488 |     "        n_samples, n_features = X_train.shape \n",
489 |     "        nt_samples, nt_features = X_test.shape\n",
490 |     "        # Gram matrix\n",
491 |     "\n",
492 |     "        print(self.K.shape)\n",
493 |     "\n",
494 |     "        P = cvxopt.matrix(np.outer(y,y) * self.K)\n",
495 |     "        q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
496 |     "        A = cvxopt.matrix(y, (1,n_samples))\n",
497 |     "        A = matrix(A, (1,n_samples), 'd') #changes done\n",
498 |     "        b = cvxopt.matrix(0.0)\n",
499 |     "        #print(P,q,A,b)\n",
500 |     "        if self.C is None:\n",
501 |     "            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
502 |     "            h = cvxopt.matrix(np.zeros(n_samples))\n",
503 |     "            \n",
504 |     "        else:\n",
505 |     "            tmp1 = np.diag(np.ones(n_samples) * -1)\n",
506 |     "            tmp2 = np.identity(n_samples)\n",
507 |     "            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
508 |     "            tmp1 = np.zeros(n_samples)\n",
509 |     "            tmp2 = np.ones(n_samples) * self.C\n",
510 |     "            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
511 |     "        # solve QP problem\n",
512 |     "        solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
513 |     "        print(solution['status'])\n",
514 |     "        # Lagrange multipliers\n",
515 |     "        a = np.ravel(solution['x'])\n",
516 |     "        a_org = np.ravel(solution['x'])\n",
517 |     "        # Support vectors have non zero lagrange multipliers\n",
518 |     "        for i in range(n_samples):\n",
519 |     "            sv=np.logical_or(self.a_org <self.m, self.a_org > 1e-5)\n",
520 |     "        #print(sv.shape)\n",
521 |     "        ind = np.arange(len(a))[sv]\n",
522 |     "        self.a = a[sv]\n",
523 |     "        self.sv = X_train[sv]\n",
524 |     "        self.sv_y = y[sv]\n",
525 |     "        #print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
526 |     "\n",
527 |     "        # Intercept\n",
528 |     "        self.b = 0\n",
529 |     "        for n in range(len(self.a)):\n",
530 |     "            self.b += self.sv_y[n]\n",
531 |     "            self.b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n",
532 |     "        self.b /= len(self.a)\n",
533 |     "        print(self.b)\n",
534 |     "\n",
535 |     "        # Weight vector\n",
536 |     "        if self.kernel == gaussian_kernel:\n",
537 |     "            self.w = np.zeros(n_features)\n",
538 |     "            for n in range(len(self.a)):\n",
539 |     "                self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
540 |     "        else :\n",
541 |     "            self.w = None        \n",
542 |     "        \n",
543 |     "    def project(self, X):\n",
544 |     "        if self.w is None:\n",
545 |     "            return np.dot(X, self.w) + self.b\n",
546 |     "        else:\n",
547 |     "            y_predict = np.zeros(len(X))\n",
548 |     "            X=np.asarray(X)\n",
549 |     "            for i in range(len(X)):\n",
550 |     "                s = 0\n",
551 |     "                for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
552 |     "                    s += a * sv_y * gaussian_kernel(X[i], sv)\n",
553 |     "                y_predict[i] = s\n",
554 |     "              #  print(y_predict[i])\n",
555 |     "            return y_predict + self.b\n",
556 |     "\n",
557 |     "    def predict(self, X):\n",
558 |     "        return np.sign(self.project(X))"
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "code",
563 |    "execution_count": null,
564 |    "metadata": {},
565 |    "outputs": [],
566 |    "source": [
567 |     "\n",
568 |     "if __name__ == \"__main__\":\n",
569 |     "    import pylab as pl           \n",
570 |     "    def hyp_svm():\n",
571 |     "        \n",
572 |     "        clf = HYP_SVM(C=100.0)\n",
573 |     "        typ=2\n",
574 |     "        clf.m_func(X_train,X_test,y_train)\n",
575 |     "        clf.fit(X_train,X_test, y_train)\n",
576 |     "        y_predict = clf.predict(X_test)\n",
577 |     "        gm(y_predict,y_test)\n",
578 |     "        correct = np.sum(y_predict == y_test)\n",
579 |     "        print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
580 |     "        print(\"Accuracy\",correct/len(y_predict))\n",
581 |     "\n",
582 |     "    hyp_svm()      "
583 |    ]
584 |   },
585 |   {
586 |    "cell_type": "code",
587 |    "execution_count": null,
588 |    "metadata": {},
589 |    "outputs": [],
590 |    "source": []
591 |   },
592 |   {
593 |    "cell_type": "code",
594 |    "execution_count": null,
595 |    "metadata": {},
596 |    "outputs": [],
597 |    "source": []
598 |   },
599 |   {
600 |    "cell_type": "code",
601 |    "execution_count": 10,
602 |    "metadata": {},
603 |    "outputs": [
604 |     {
605 |      "name": "stdout",
606 |      "output_type": "stream",
607 |      "text": [
608 |       "Overall RBF KERNEL SVM accuracy:  0.9808219178082191\n"
609 |      ]
610 |     }
611 |    ],
612 |    "source": [
613 |     "clf_svm = svm.SVC(kernel='rbf', gamma=0.001, C=100)\n",
614 |     "clf_svm.fit(X_train, y_train)\n",
615 |     "y_pred_svm = clf_svm.predict(X_test) \n",
616 |     "acc_svm = accuracy_score(y_test, y_pred_svm)\n",
617 |     "print (\"Overall RBF KERNEL SVM accuracy: \",acc_svm)"
618 |    ]
619 |   },
620 |   {
621 |    "cell_type": "code",
622 |    "execution_count": null,
623 |    "metadata": {},
624 |    "outputs": [],
625 |    "source": []
626 |   },
627 |   {
628 |    "cell_type": "code",
629 |    "execution_count": null,
630 |    "metadata": {},
631 |    "outputs": [],
632 |    "source": []
633 |   },
634 |   {
635 |    "cell_type": "code",
636 |    "execution_count": null,
637 |    "metadata": {},
638 |    "outputs": [],
639 |    "source": []
640 |   },
641 |   {
642 |    "cell_type": "code",
643 |    "execution_count": null,
644 |    "metadata": {},
645 |    "outputs": [],
646 |    "source": []
647 |   },
648 |   {
649 |    "cell_type": "code",
650 |    "execution_count": null,
651 |    "metadata": {},
652 |    "outputs": [],
653 |    "source": []
654 |   },
655 |   {
656 |    "cell_type": "code",
657 |    "execution_count": null,
658 |    "metadata": {},
659 |    "outputs": [],
660 |    "source": []
661 |   },
662 |   {
663 |    "cell_type": "code",
664 |    "execution_count": null,
665 |    "metadata": {},
666 |    "outputs": [],
667 |    "source": []
668 |   },
669 |   {
670 |    "cell_type": "code",
671 |    "execution_count": null,
672 |    "metadata": {},
673 |    "outputs": [],
674 |    "source": []
675 |   },
676 |   {
677 |    "cell_type": "code",
678 |    "execution_count": null,
679 |    "metadata": {},
680 |    "outputs": [],
681 |    "source": []
682 |   },
683 |   {
684 |    "cell_type": "code",
685 |    "execution_count": null,
686 |    "metadata": {},
687 |    "outputs": [],
688 |    "source": []
689 |   },
690 |   {
691 |    "cell_type": "code",
692 |    "execution_count": null,
693 |    "metadata": {},
694 |    "outputs": [],
695 |    "source": []
696 |   },
697 |   {
698 |    "cell_type": "code",
699 |    "execution_count": null,
700 |    "metadata": {},
701 |    "outputs": [],
702 |    "source": []
703 |   },
704 |   {
705 |    "cell_type": "markdown",
706 |    "metadata": {},
707 |    "source": [
708 |     "# Normal SVM using CVXOPT"
709 |    ]
710 |   },
711 |   {
712 |    "cell_type": "code",
713 |    "execution_count": 21,
714 |    "metadata": {},
715 |    "outputs": [],
716 |    "source": [
717 |     "from cvxopt import matrix\n",
718 |     "class SVM(object):\n",
719 |     "\n",
720 |     "    def __init__(self, kernel=gaussian_kernel, C=None):\n",
721 |     "        self.kernel = kernel\n",
722 |     "        self.C = C\n",
723 |     "        if self.C is not None: self.C = float(self.C)\n",
724 |     "    def fit(self, X, y):\n",
725 |     "        self.kernel = gaussian_kernel\n",
726 |     "        n_samples, n_features = X.shape\n",
727 |     "        # Gram matrix\n",
728 |     "        K = np.zeros((n_samples, n_samples))\n",
729 |     "        for i in range(n_samples):\n",
730 |     "            for j in range(n_samples):\n",
731 |     "                K[i,j] = gaussian_kernel(X[i], X[j])\n",
732 |     "               # print(K[i,j])\n",
733 |     "        print(K.shape)\n",
734 |     "\n",
735 |     "        P = cvxopt.matrix(np.outer(y,y) * K)\n",
736 |     "        q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
737 |     "        A = cvxopt.matrix(y, (1,n_samples))\n",
738 |     "        A = matrix(A, (1,n_samples), 'd') #changes done\n",
739 |     "        b = cvxopt.matrix(0.0)\n",
740 |     "        #print(P,q,A,b)\n",
741 |     "        if self.C is None:\n",
742 |     "            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
743 |     "            h = cvxopt.matrix(np.zeros(n_samples))\n",
744 |     "            \n",
745 |     "        else:\n",
746 |     "            tmp1 = np.diag(np.ones(n_samples) * -1)\n",
747 |     "            tmp2 = np.identity(n_samples)\n",
748 |     "            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
749 |     "            tmp1 = np.zeros(n_samples)\n",
750 |     "            tmp2 = np.ones(n_samples) * self.C\n",
751 |     "            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
752 |     "        # solve QP problem\n",
753 |     "        solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
754 |     "        print(solution['status'])\n",
755 |     "        # Lagrange multipliers\n",
756 |     "        a = np.ravel(solution['x'])\n",
757 |     "       # print(a)\n",
758 |     "        # Support vectors have non zero lagrange multipliers\n",
759 |     "        sv = a > 1e-5\n",
760 |     "        print(sv.shape)\n",
761 |     "        ind = np.arange(len(a))[sv]\n",
762 |     "        self.a = a[sv]\n",
763 |     "        self.sv = X[sv]\n",
764 |     "        self.sv_y = y[sv]\n",
765 |     "        print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n",
766 |     "\n",
767 |     "        # Intercept\n",
768 |     "        self.b = 0\n",
769 |     "        for n in range(len(self.a)):\n",
770 |     "            self.b += self.sv_y[n]\n",
771 |     "            self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])\n",
772 |     "        self.b /= len(self.a)\n",
773 |     "\n",
774 |     "        # Weight vector\n",
775 |     "        if self.kernel == gaussian_kernel:\n",
776 |     "            self.w = np.zeros(n_features)\n",
777 |     "            for n in range(len(self.a)):\n",
778 |     "                self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n",
779 |     "                #print(self.w)\n",
780 |     "        else:\n",
781 |     "            self.w = None\n",
782 |     "\n",
783 |     "    def project(self, X):\n",
784 |     "        if self.w is None:\n",
785 |     "            return np.dot(X, self.w) + self.b\n",
786 |     "        else:\n",
787 |     "            y_predict = np.zeros(len(X))\n",
788 |     "            X=np.asarray(X)\n",
789 |     "            for i in range(len(X)):\n",
790 |     "                s = 0\n",
791 |     "                for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n",
792 |     "                    s += a * sv_y * gaussian_kernel(X[i], sv)\n",
793 |     "                y_predict[i] = s\n",
794 |     "              #  print(y_predict[i])\n",
795 |     "            return y_predict + self.b\n",
796 |     "\n",
797 |     "    def predict(self, X):\n",
798 |     "        return np.sign(self.project(X))"
799 |    ]
800 |   },
801 |   {
802 |    "cell_type": "code",
803 |    "execution_count": 22,
804 |    "metadata": {},
805 |    "outputs": [
806 |     {
807 |      "name": "stdout",
808 |      "output_type": "stream",
809 |      "text": [
810 |       "(3341, 3341)\n",
811 |       "     pcost       dcost       gap    pres   dres\n",
812 |       " 0: -1.4904e+04 -2.0036e+06  2e+06  2e-02  9e-13\n",
813 |       " 1: -1.3268e+04 -1.4210e+05  1e+05  4e-04  8e-13\n",
814 |       " 2: -1.5305e+04 -5.5521e+04  4e+04  3e-05  8e-13\n",
815 |       " 3: -1.5484e+04 -5.3758e+04  4e+04  3e-05  8e-13\n",
816 |       " 4: -1.6200e+04 -3.0338e+04  1e+04  5e-06  8e-13\n",
817 |       " 5: -1.6293e+04 -2.9163e+04  1e+04  3e-06  7e-13\n",
818 |       " 6: -1.6519e+04 -2.4440e+04  8e+03  1e-06  7e-13\n",
819 |       " 7: -1.6658e+04 -2.0918e+04  4e+03  6e-07  7e-13\n",
820 |       " 8: -1.6753e+04 -1.8317e+04  2e+03  2e-07  7e-13\n",
821 |       " 9: -1.6785e+04 -1.7377e+04  6e+02  5e-08  7e-13\n",
822 |       "10: -1.6797e+04 -1.6906e+04  1e+02  5e-09  8e-13\n",
823 |       "11: -1.6799e+04 -1.6829e+04  3e+01  8e-11  8e-13\n",
824 |       "12: -1.6799e+04 -1.6820e+04  2e+01  4e-11  7e-13\n",
825 |       "13: -1.6800e+04 -1.6806e+04  6e+00  9e-12  8e-13\n",
826 |       "14: -1.6800e+04 -1.6805e+04  6e+00  7e-12  8e-13\n",
827 |       "15: -1.6800e+04 -1.6803e+04  4e+00  1e-12  9e-13\n",
828 |       "16: -1.6800e+04 -1.6802e+04  2e+00  2e-13  8e-13\n",
829 |       "17: -1.6800e+04 -1.6801e+04  1e+00  9e-14  7e-13\n",
830 |       "18: -1.6800e+04 -1.6801e+04  1e+00  6e-13  7e-13\n",
831 |       "19: -1.6800e+04 -1.6801e+04  9e-01  1e-12  7e-13\n",
832 |       "20: -1.6800e+04 -1.6801e+04  8e-01  1e-12  7e-13\n",
833 |       "21: -1.6800e+04 -1.6800e+04  3e-01  3e-13  7e-13\n",
834 |       "22: -1.6800e+04 -1.6800e+04  1e-01  7e-13  8e-13\n",
835 |       "23: -1.6800e+04 -1.6800e+04  8e-02  1e-12  7e-13\n",
836 |       "24: -1.6800e+04 -1.6800e+04  2e-02  1e-12  7e-13\n",
837 |       "25: -1.6800e+04 -1.6800e+04  1e-02  7e-13  8e-13\n",
838 |       "Optimal solution found.\n",
839 |       "optimal\n",
840 |       "(3341,)\n",
841 |       "3341 support vectors out of 3341 points\n",
842 |       "y_test min 19\n",
843 |       "y_test max 817\n",
844 |       "y_pred min 0\n",
845 |       "y_pred max 817\n",
846 |       "0.0 1.0\n",
847 |       "GM 0.0\n",
848 |       "817 out of 836 predictions correct\n",
849 |       "Accuracy 0.9772727272727273\n"
850 |      ]
851 |     }
852 |    ],
853 |    "source": [
854 |     "\n",
855 |     "if __name__ == \"__main__\":\n",
856 |     "    import pylab as pl           \n",
857 |     "    def normal_svm():\n",
858 |     "        \n",
859 |     "        clf = SVM(C=100.0)\n",
860 |     "        clf.fit(X_train, y_train)\n",
861 |     "        y_predict = clf.predict(X_test)\n",
862 |     "        gm(y_predict,y_test)\n",
863 |     "        correct = np.sum(y_predict == y_test)\n",
864 |     "        print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n",
865 |     "        print(\"Accuracy\",correct/len(y_predict))\n",
866 |     "\n",
867 |     "    normal_svm()    "
868 |    ]
869 |   },
870 |   {
871 |    "cell_type": "code",
872 |    "execution_count": null,
873 |    "metadata": {},
874 |    "outputs": [],
875 |    "source": []
876 |   }
877 |  ],
878 |  "metadata": {
879 |   "kernelspec": {
880 |    "display_name": "Python 3",
881 |    "language": "python",
882 |    "name": "python3"
883 |   },
884 |   "language_info": {
885 |    "codemirror_mode": {
886 |     "name": "ipython",
887 |     "version": 3
888 |    },
889 |    "file_extension": ".py",
890 |    "mimetype": "text/x-python",
891 |    "name": "python",
892 |    "nbconvert_exporter": "python",
893 |    "pygments_lexer": "ipython3",
894 |    "version": "3.6.5"
895 |   }
896 |  },
897 |  "nbformat": 4,
898 |  "nbformat_minor": 2
899 | }
900 | 


--------------------------------------------------------------------------------