├── FSVM-CIL Fuzzy Support Vector Machines.pdf ├── README.md ├── LICENSE ├── haberman.csv ├── modifiedhaberman.csv ├── Preprocessing PIma_indians.ipynb ├── Preprocessing pagebreak.ipynb ├── Preprocessing Haberman.ipynb ├── Preprocessing Abalone.ipynb ├── pima-indians-diabetes.csv ├── FUZZY SVM Haberman.ipynb ├── FUZZY SVM.ipynb └── FUZZY SVM Pageblock.ipynb /FSVM-CIL Fuzzy Support Vector Machines.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityasahugit/Fuzzy-SVM/HEAD/FSVM-CIL Fuzzy Support Vector Machines.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fuzzy-SVM 2 | 3 | Based on research paper “FSVM-CIL: Fuzzy Support Vector Machines for Class Imbalance Learning” by Rukshan Batuwita and Vasile Palade which discuss Fuzzy concept 4 | 5 | It is used for optimazation of algorithm for imbalanced datasets which do not have 1:1 no. of instances of each class. 6 | 7 | # Datasets used 8 | 9 | Pima Indians diabities dataset has 35:65 ratio in their class. 10 | haberman, abalone,pageblocks which have diff class ratio. 11 | 12 | # Notetbooks 13 | 14 | Preprocessing notebooks are to make a new version of each dataset such that thier class ratio after splitting in test and training is same. 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 ADITYA SAHU 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /haberman.csv: -------------------------------------------------------------------------------- 1 | 30,64,1,1 2 | 30,62,3,1 3 | 30,65,0,1 4 | 31,59,2,1 5 | 31,65,4,1 6 | 33,58,10,1 7 | 33,60,0,1 8 | 34,59,0,2 9 | 34,66,9,2 10 | 34,58,30,1 11 | 34,60,1,1 12 | 34,61,10,1 13 | 34,67,7,1 14 | 34,60,0,1 15 | 35,64,13,1 16 | 35,63,0,1 17 | 36,60,1,1 18 | 36,69,0,1 19 | 37,60,0,1 20 | 37,63,0,1 21 | 37,58,0,1 22 | 37,59,6,1 23 | 37,60,15,1 24 | 37,63,0,1 25 | 38,69,21,2 26 | 38,59,2,1 27 | 38,60,0,1 28 | 38,60,0,1 29 | 38,62,3,1 30 | 38,64,1,1 31 | 38,66,0,1 32 | 38,66,11,1 33 | 38,60,1,1 34 | 38,67,5,1 35 | 39,66,0,2 36 | 39,63,0,1 37 | 39,67,0,1 38 | 39,58,0,1 39 | 39,59,2,1 40 | 39,63,4,1 41 | 40,58,2,1 42 | 40,58,0,1 43 | 40,65,0,1 44 | 41,60,23,2 45 | 41,64,0,2 46 | 41,67,0,2 47 | 41,58,0,1 48 | 41,59,8,1 49 | 41,59,0,1 50 | 41,64,0,1 51 | 41,69,8,1 52 | 41,65,0,1 53 | 41,65,0,1 54 | 42,69,1,2 55 | 42,59,0,2 56 | 42,58,0,1 57 | 42,60,1,1 58 | 42,59,2,1 59 | 42,61,4,1 60 | 42,62,20,1 61 | 42,65,0,1 62 | 42,63,1,1 63 | 43,58,52,2 64 | 43,59,2,2 65 | 43,64,0,2 66 | 43,64,0,2 67 | 43,63,14,1 68 | 43,64,2,1 69 | 43,64,3,1 70 | 43,60,0,1 71 | 43,63,2,1 72 | 43,65,0,1 73 | 43,66,4,1 74 | 44,64,6,2 75 | 44,58,9,2 76 | 44,63,19,2 77 | 44,61,0,1 78 | 44,63,1,1 79 | 44,61,0,1 80 | 44,67,16,1 81 | 45,65,6,2 82 | 45,66,0,2 83 | 45,67,1,2 84 | 45,60,0,1 85 | 45,67,0,1 86 | 45,59,14,1 87 | 45,64,0,1 88 | 45,68,0,1 89 | 45,67,1,1 90 | 46,58,2,2 91 | 46,69,3,2 92 | 46,62,5,2 93 | 46,65,20,2 94 | 46,62,0,1 95 | 46,58,3,1 96 | 46,63,0,1 97 | 47,63,23,2 98 | 47,62,0,2 99 | 47,65,0,2 100 | 47,61,0,1 101 | 47,63,6,1 102 | 47,66,0,1 103 | 47,67,0,1 104 | 47,58,3,1 105 | 47,60,4,1 106 | 47,68,4,1 107 | 47,66,12,1 108 | 48,58,11,2 109 | 48,58,11,2 110 | 48,67,7,2 111 | 48,61,8,1 112 | 48,62,2,1 113 | 48,64,0,1 114 | 48,66,0,1 115 | 49,63,0,2 116 | 49,64,10,2 117 | 49,61,1,1 118 | 49,62,0,1 119 | 49,66,0,1 120 | 49,60,1,1 121 | 49,62,1,1 122 | 49,63,3,1 123 | 49,61,0,1 124 | 49,67,1,1 125 | 50,63,13,2 126 | 50,64,0,2 127 | 50,59,0,1 128 | 50,61,6,1 129 | 50,61,0,1 130 | 50,63,1,1 131 | 50,58,1,1 132 | 50,59,2,1 133 | 50,61,0,1 134 | 50,64,0,1 135 | 50,65,4,1 136 | 50,66,1,1 137 | 51,59,13,2 138 | 51,59,3,2 139 | 51,64,7,1 140 | 51,59,1,1 141 | 51,65,0,1 142 | 51,66,1,1 143 | 52,69,3,2 144 | 52,59,2,2 145 | 52,62,3,2 146 | 52,66,4,2 147 | 52,61,0,1 148 | 52,63,4,1 149 | 52,69,0,1 150 | 52,60,4,1 151 | 52,60,5,1 152 | 52,62,0,1 153 | 52,62,1,1 154 | 52,64,0,1 155 | 52,65,0,1 156 | 52,68,0,1 157 | 53,58,4,2 158 | 53,65,1,2 159 | 53,59,3,2 160 | 53,60,9,2 161 | 53,63,24,2 162 | 53,65,12,2 163 | 53,58,1,1 164 | 53,60,1,1 165 | 53,60,2,1 166 | 53,61,1,1 167 | 53,63,0,1 168 | 54,60,11,2 169 | 54,65,23,2 170 | 54,65,5,2 171 | 54,68,7,2 172 | 54,59,7,1 173 | 54,60,3,1 174 | 54,66,0,1 175 | 54,67,46,1 176 | 54,62,0,1 177 | 54,69,7,1 178 | 54,63,19,1 179 | 54,58,1,1 180 | 54,62,0,1 181 | 55,63,6,2 182 | 55,68,15,2 183 | 55,58,1,1 184 | 55,58,0,1 185 | 55,58,1,1 186 | 55,66,18,1 187 | 55,66,0,1 188 | 55,69,3,1 189 | 55,69,22,1 190 | 55,67,1,1 191 | 56,65,9,2 192 | 56,66,3,2 193 | 56,60,0,1 194 | 56,66,2,1 195 | 56,66,1,1 196 | 56,67,0,1 197 | 56,60,0,1 198 | 57,61,5,2 199 | 57,62,14,2 200 | 57,64,1,2 201 | 57,64,9,1 202 | 57,69,0,1 203 | 57,61,0,1 204 | 57,62,0,1 205 | 57,63,0,1 206 | 57,64,0,1 207 | 57,64,0,1 208 | 57,67,0,1 209 | 58,59,0,1 210 | 58,60,3,1 211 | 58,61,1,1 212 | 58,67,0,1 213 | 58,58,0,1 214 | 58,58,3,1 215 | 58,61,2,1 216 | 59,62,35,2 217 | 59,60,0,1 218 | 59,63,0,1 219 | 59,64,1,1 220 | 59,64,4,1 221 | 59,64,0,1 222 | 59,64,7,1 223 | 59,67,3,1 224 | 60,59,17,2 225 | 60,65,0,2 226 | 60,61,1,1 227 | 60,67,2,1 228 | 60,61,25,1 229 | 60,64,0,1 230 | 61,62,5,2 231 | 61,65,0,2 232 | 61,68,1,2 233 | 61,59,0,1 234 | 61,59,0,1 235 | 61,64,0,1 236 | 61,65,8,1 237 | 61,68,0,1 238 | 61,59,0,1 239 | 62,59,13,2 240 | 62,58,0,2 241 | 62,65,19,2 242 | 62,62,6,1 243 | 62,66,0,1 244 | 62,66,0,1 245 | 62,58,0,1 246 | 63,60,1,2 247 | 63,61,0,1 248 | 63,62,0,1 249 | 63,63,0,1 250 | 63,63,0,1 251 | 63,66,0,1 252 | 63,61,9,1 253 | 63,61,28,1 254 | 64,58,0,1 255 | 64,65,22,1 256 | 64,66,0,1 257 | 64,61,0,1 258 | 64,68,0,1 259 | 65,58,0,2 260 | 65,61,2,2 261 | 65,62,22,2 262 | 65,66,15,2 263 | 65,58,0,1 264 | 65,64,0,1 265 | 65,67,0,1 266 | 65,59,2,1 267 | 65,64,0,1 268 | 65,67,1,1 269 | 66,58,0,2 270 | 66,61,13,2 271 | 66,58,0,1 272 | 66,58,1,1 273 | 66,68,0,1 274 | 67,64,8,2 275 | 67,63,1,2 276 | 67,66,0,1 277 | 67,66,0,1 278 | 67,61,0,1 279 | 67,65,0,1 280 | 68,67,0,1 281 | 68,68,0,1 282 | 69,67,8,2 283 | 69,60,0,1 284 | 69,65,0,1 285 | 69,66,0,1 286 | 70,58,0,2 287 | 70,58,4,2 288 | 70,66,14,1 289 | 70,67,0,1 290 | 70,68,0,1 291 | 70,59,8,1 292 | 70,63,0,1 293 | 71,68,2,1 294 | 72,63,0,2 295 | 72,58,0,1 296 | 72,64,0,1 297 | 72,67,3,1 298 | 73,62,0,1 299 | 73,68,0,1 300 | 74,65,3,2 301 | 74,63,0,1 302 | 75,62,1,1 303 | 76,67,0,1 304 | 77,65,3,1 305 | 78,65,1,2 306 | 83,58,2,2 307 | -------------------------------------------------------------------------------- /modifiedhaberman.csv: -------------------------------------------------------------------------------- 1 | 34.0,59.0,0.0,1.0 2 | 34.0,66.0,9.0,1.0 3 | 38.0,69.0,21.0,1.0 4 | 39.0,66.0,0.0,1.0 5 | 41.0,60.0,23.0,1.0 6 | 41.0,64.0,0.0,1.0 7 | 41.0,67.0,0.0,1.0 8 | 42.0,69.0,1.0,1.0 9 | 42.0,59.0,0.0,1.0 10 | 43.0,58.0,52.0,1.0 11 | 43.0,59.0,2.0,1.0 12 | 43.0,64.0,0.0,1.0 13 | 43.0,64.0,0.0,1.0 14 | 44.0,64.0,6.0,1.0 15 | 44.0,58.0,9.0,1.0 16 | 44.0,63.0,19.0,1.0 17 | 30.0,64.0,1.0,-1.0 18 | 30.0,62.0,3.0,-1.0 19 | 30.0,65.0,0.0,-1.0 20 | 31.0,59.0,2.0,-1.0 21 | 31.0,65.0,4.0,-1.0 22 | 33.0,58.0,10.0,-1.0 23 | 33.0,60.0,0.0,-1.0 24 | 34.0,58.0,30.0,-1.0 25 | 34.0,60.0,1.0,-1.0 26 | 34.0,61.0,10.0,-1.0 27 | 34.0,67.0,7.0,-1.0 28 | 34.0,60.0,0.0,-1.0 29 | 35.0,64.0,13.0,-1.0 30 | 35.0,63.0,0.0,-1.0 31 | 36.0,60.0,1.0,-1.0 32 | 36.0,69.0,0.0,-1.0 33 | 37.0,60.0,0.0,-1.0 34 | 37.0,63.0,0.0,-1.0 35 | 37.0,58.0,0.0,-1.0 36 | 37.0,59.0,6.0,-1.0 37 | 37.0,60.0,15.0,-1.0 38 | 37.0,63.0,0.0,-1.0 39 | 38.0,59.0,2.0,-1.0 40 | 38.0,60.0,0.0,-1.0 41 | 38.0,60.0,0.0,-1.0 42 | 38.0,62.0,3.0,-1.0 43 | 38.0,64.0,1.0,-1.0 44 | 38.0,66.0,0.0,-1.0 45 | 38.0,66.0,11.0,-1.0 46 | 38.0,60.0,1.0,-1.0 47 | 38.0,67.0,5.0,-1.0 48 | 39.0,63.0,0.0,-1.0 49 | 39.0,67.0,0.0,-1.0 50 | 39.0,58.0,0.0,-1.0 51 | 39.0,59.0,2.0,-1.0 52 | 39.0,63.0,4.0,-1.0 53 | 40.0,58.0,2.0,-1.0 54 | 40.0,58.0,0.0,-1.0 55 | 40.0,65.0,0.0,-1.0 56 | 41.0,58.0,0.0,-1.0 57 | 41.0,59.0,8.0,-1.0 58 | 41.0,59.0,0.0,-1.0 59 | 41.0,64.0,0.0,-1.0 60 | 41.0,69.0,8.0,-1.0 61 | 41.0,65.0,0.0,-1.0 62 | 45.0,65.0,6.0,1.0 63 | 45.0,66.0,0.0,1.0 64 | 45.0,67.0,1.0,1.0 65 | 46.0,58.0,2.0,1.0 66 | 46.0,69.0,3.0,1.0 67 | 46.0,62.0,5.0,1.0 68 | 46.0,65.0,20.0,1.0 69 | 47.0,63.0,23.0,1.0 70 | 47.0,62.0,0.0,1.0 71 | 47.0,65.0,0.0,1.0 72 | 48.0,58.0,11.0,1.0 73 | 48.0,58.0,11.0,1.0 74 | 48.0,67.0,7.0,1.0 75 | 49.0,63.0,0.0,1.0 76 | 49.0,64.0,10.0,1.0 77 | 50.0,63.0,13.0,1.0 78 | 41.0,65.0,0.0,-1.0 79 | 42.0,58.0,0.0,-1.0 80 | 42.0,60.0,1.0,-1.0 81 | 42.0,59.0,2.0,-1.0 82 | 42.0,61.0,4.0,-1.0 83 | 42.0,62.0,20.0,-1.0 84 | 42.0,65.0,0.0,-1.0 85 | 42.0,63.0,1.0,-1.0 86 | 43.0,63.0,14.0,-1.0 87 | 43.0,64.0,2.0,-1.0 88 | 43.0,64.0,3.0,-1.0 89 | 43.0,60.0,0.0,-1.0 90 | 43.0,63.0,2.0,-1.0 91 | 43.0,65.0,0.0,-1.0 92 | 43.0,66.0,4.0,-1.0 93 | 44.0,61.0,0.0,-1.0 94 | 44.0,63.0,1.0,-1.0 95 | 44.0,61.0,0.0,-1.0 96 | 44.0,67.0,16.0,-1.0 97 | 45.0,60.0,0.0,-1.0 98 | 45.0,67.0,0.0,-1.0 99 | 45.0,59.0,14.0,-1.0 100 | 45.0,64.0,0.0,-1.0 101 | 45.0,68.0,0.0,-1.0 102 | 45.0,67.0,1.0,-1.0 103 | 46.0,62.0,0.0,-1.0 104 | 46.0,58.0,3.0,-1.0 105 | 46.0,63.0,0.0,-1.0 106 | 47.0,61.0,0.0,-1.0 107 | 47.0,63.0,6.0,-1.0 108 | 47.0,66.0,0.0,-1.0 109 | 47.0,67.0,0.0,-1.0 110 | 47.0,58.0,3.0,-1.0 111 | 47.0,60.0,4.0,-1.0 112 | 47.0,68.0,4.0,-1.0 113 | 47.0,66.0,12.0,-1.0 114 | 48.0,61.0,8.0,-1.0 115 | 48.0,62.0,2.0,-1.0 116 | 48.0,64.0,0.0,-1.0 117 | 48.0,66.0,0.0,-1.0 118 | 49.0,61.0,1.0,-1.0 119 | 49.0,62.0,0.0,-1.0 120 | 49.0,66.0,0.0,-1.0 121 | 49.0,60.0,1.0,-1.0 122 | 49.0,62.0,1.0,-1.0 123 | 50.0,64.0,0.0,1.0 124 | 51.0,59.0,13.0,1.0 125 | 51.0,59.0,3.0,1.0 126 | 52.0,69.0,3.0,1.0 127 | 52.0,59.0,2.0,1.0 128 | 52.0,62.0,3.0,1.0 129 | 52.0,66.0,4.0,1.0 130 | 53.0,58.0,4.0,1.0 131 | 53.0,65.0,1.0,1.0 132 | 53.0,59.0,3.0,1.0 133 | 53.0,60.0,9.0,1.0 134 | 53.0,63.0,24.0,1.0 135 | 53.0,65.0,12.0,1.0 136 | 54.0,60.0,11.0,1.0 137 | 54.0,65.0,23.0,1.0 138 | 54.0,65.0,5.0,1.0 139 | 49.0,63.0,3.0,-1.0 140 | 49.0,61.0,0.0,-1.0 141 | 49.0,67.0,1.0,-1.0 142 | 50.0,59.0,0.0,-1.0 143 | 50.0,61.0,6.0,-1.0 144 | 50.0,61.0,0.0,-1.0 145 | 50.0,63.0,1.0,-1.0 146 | 50.0,58.0,1.0,-1.0 147 | 50.0,59.0,2.0,-1.0 148 | 50.0,61.0,0.0,-1.0 149 | 50.0,64.0,0.0,-1.0 150 | 50.0,65.0,4.0,-1.0 151 | 50.0,66.0,1.0,-1.0 152 | 51.0,64.0,7.0,-1.0 153 | 51.0,59.0,1.0,-1.0 154 | 51.0,65.0,0.0,-1.0 155 | 51.0,66.0,1.0,-1.0 156 | 52.0,61.0,0.0,-1.0 157 | 52.0,63.0,4.0,-1.0 158 | 52.0,69.0,0.0,-1.0 159 | 52.0,60.0,4.0,-1.0 160 | 52.0,60.0,5.0,-1.0 161 | 52.0,62.0,0.0,-1.0 162 | 52.0,62.0,1.0,-1.0 163 | 52.0,64.0,0.0,-1.0 164 | 52.0,65.0,0.0,-1.0 165 | 52.0,68.0,0.0,-1.0 166 | 53.0,58.0,1.0,-1.0 167 | 53.0,60.0,1.0,-1.0 168 | 53.0,60.0,2.0,-1.0 169 | 53.0,61.0,1.0,-1.0 170 | 53.0,63.0,0.0,-1.0 171 | 54.0,59.0,7.0,-1.0 172 | 54.0,60.0,3.0,-1.0 173 | 54.0,66.0,0.0,-1.0 174 | 54.0,67.0,46.0,-1.0 175 | 54.0,62.0,0.0,-1.0 176 | 54.0,69.0,7.0,-1.0 177 | 54.0,63.0,19.0,-1.0 178 | 54.0,58.0,1.0,-1.0 179 | 54.0,62.0,0.0,-1.0 180 | 55.0,58.0,1.0,-1.0 181 | 55.0,58.0,0.0,-1.0 182 | 55.0,58.0,1.0,-1.0 183 | 55.0,66.0,18.0,-1.0 184 | 54.0,68.0,7.0,1.0 185 | 55.0,63.0,6.0,1.0 186 | 55.0,68.0,15.0,1.0 187 | 56.0,65.0,9.0,1.0 188 | 56.0,66.0,3.0,1.0 189 | 57.0,61.0,5.0,1.0 190 | 57.0,62.0,14.0,1.0 191 | 57.0,64.0,1.0,1.0 192 | 59.0,62.0,35.0,1.0 193 | 60.0,59.0,17.0,1.0 194 | 60.0,65.0,0.0,1.0 195 | 61.0,62.0,5.0,1.0 196 | 61.0,65.0,0.0,1.0 197 | 61.0,68.0,1.0,1.0 198 | 62.0,59.0,13.0,1.0 199 | 62.0,58.0,0.0,1.0 200 | 55.0,66.0,0.0,-1.0 201 | 55.0,69.0,3.0,-1.0 202 | 55.0,69.0,22.0,-1.0 203 | 55.0,67.0,1.0,-1.0 204 | 56.0,60.0,0.0,-1.0 205 | 56.0,66.0,2.0,-1.0 206 | 56.0,66.0,1.0,-1.0 207 | 56.0,67.0,0.0,-1.0 208 | 56.0,60.0,0.0,-1.0 209 | 57.0,64.0,9.0,-1.0 210 | 57.0,69.0,0.0,-1.0 211 | 57.0,61.0,0.0,-1.0 212 | 57.0,62.0,0.0,-1.0 213 | 57.0,63.0,0.0,-1.0 214 | 57.0,64.0,0.0,-1.0 215 | 57.0,64.0,0.0,-1.0 216 | 57.0,67.0,0.0,-1.0 217 | 58.0,59.0,0.0,-1.0 218 | 58.0,60.0,3.0,-1.0 219 | 58.0,61.0,1.0,-1.0 220 | 58.0,67.0,0.0,-1.0 221 | 58.0,58.0,0.0,-1.0 222 | 58.0,58.0,3.0,-1.0 223 | 58.0,61.0,2.0,-1.0 224 | 59.0,60.0,0.0,-1.0 225 | 59.0,63.0,0.0,-1.0 226 | 59.0,64.0,1.0,-1.0 227 | 59.0,64.0,4.0,-1.0 228 | 59.0,64.0,0.0,-1.0 229 | 59.0,64.0,7.0,-1.0 230 | 59.0,67.0,3.0,-1.0 231 | 60.0,61.0,1.0,-1.0 232 | 60.0,67.0,2.0,-1.0 233 | 60.0,61.0,25.0,-1.0 234 | 60.0,64.0,0.0,-1.0 235 | 61.0,59.0,0.0,-1.0 236 | 61.0,59.0,0.0,-1.0 237 | 61.0,64.0,0.0,-1.0 238 | 61.0,65.0,8.0,-1.0 239 | 61.0,68.0,0.0,-1.0 240 | 61.0,59.0,0.0,-1.0 241 | 62.0,62.0,6.0,-1.0 242 | 62.0,66.0,0.0,-1.0 243 | 62.0,66.0,0.0,-1.0 244 | 62.0,58.0,0.0,-1.0 245 | 62.0,65.0,19.0,1.0 246 | 63.0,60.0,1.0,1.0 247 | 65.0,58.0,0.0,1.0 248 | 65.0,61.0,2.0,1.0 249 | 65.0,62.0,22.0,1.0 250 | 65.0,66.0,15.0,1.0 251 | 66.0,58.0,0.0,1.0 252 | 66.0,61.0,13.0,1.0 253 | 67.0,64.0,8.0,1.0 254 | 67.0,63.0,1.0,1.0 255 | 69.0,67.0,8.0,1.0 256 | 70.0,58.0,0.0,1.0 257 | 70.0,58.0,4.0,1.0 258 | 72.0,63.0,0.0,1.0 259 | 74.0,65.0,3.0,1.0 260 | 78.0,65.0,1.0,1.0 261 | 83.0,58.0,2.0,1.0 262 | 63.0,61.0,0.0,-1.0 263 | 63.0,62.0,0.0,-1.0 264 | 63.0,63.0,0.0,-1.0 265 | 63.0,63.0,0.0,-1.0 266 | 63.0,66.0,0.0,-1.0 267 | 63.0,61.0,9.0,-1.0 268 | 63.0,61.0,28.0,-1.0 269 | 64.0,58.0,0.0,-1.0 270 | 64.0,65.0,22.0,-1.0 271 | 64.0,66.0,0.0,-1.0 272 | 64.0,61.0,0.0,-1.0 273 | 64.0,68.0,0.0,-1.0 274 | 65.0,58.0,0.0,-1.0 275 | 65.0,64.0,0.0,-1.0 276 | 65.0,67.0,0.0,-1.0 277 | 65.0,59.0,2.0,-1.0 278 | 65.0,64.0,0.0,-1.0 279 | 65.0,67.0,1.0,-1.0 280 | 66.0,58.0,0.0,-1.0 281 | 66.0,58.0,1.0,-1.0 282 | 66.0,68.0,0.0,-1.0 283 | 67.0,66.0,0.0,-1.0 284 | 67.0,66.0,0.0,-1.0 285 | 67.0,61.0,0.0,-1.0 286 | 67.0,65.0,0.0,-1.0 287 | 68.0,67.0,0.0,-1.0 288 | 68.0,68.0,0.0,-1.0 289 | 69.0,60.0,0.0,-1.0 290 | 69.0,65.0,0.0,-1.0 291 | 69.0,66.0,0.0,-1.0 292 | 70.0,66.0,14.0,-1.0 293 | 70.0,67.0,0.0,-1.0 294 | 70.0,68.0,0.0,-1.0 295 | 70.0,59.0,8.0,-1.0 296 | 70.0,63.0,0.0,-1.0 297 | 71.0,68.0,2.0,-1.0 298 | 72.0,58.0,0.0,-1.0 299 | 72.0,64.0,0.0,-1.0 300 | 72.0,67.0,3.0,-1.0 301 | 73.0,62.0,0.0,-1.0 302 | 73.0,68.0,0.0,-1.0 303 | 74.0,63.0,0.0,-1.0 304 | 75.0,62.0,1.0,-1.0 305 | 76.0,67.0,0.0,-1.0 306 | 77.0,65.0,3.0,-1.0 307 | -------------------------------------------------------------------------------- /Preprocessing PIma_indians.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ADITYA SAHU" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Pre processing of Pima indians dataset to make imbalance ratio of training and testing same" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "from numpy import linalg\n", 25 | "import pandas as pd" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "data": { 35 | "text/html": [ 36 | "
\n", 37 | "\n", 50 | "\n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | "
123456789
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", 128 | "
" 129 | ], 130 | "text/plain": [ 131 | " 1 2 3 4 5 6 7 8 9\n", 132 | "0 6 148 72 35 0 33.6 0.627 50 1\n", 133 | "1 1 85 66 29 0 26.6 0.351 31 0\n", 134 | "2 8 183 64 0 0 23.3 0.672 32 1\n", 135 | "3 1 89 66 23 94 28.1 0.167 21 0\n", 136 | "4 0 137 40 35 168 43.1 2.288 33 1" 137 | ] 138 | }, 139 | "execution_count": 2, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "train = pd.read_csv(\"pima-indians-diabetes.csv\")\n", 146 | "train.head()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 3, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "0 1\n", 159 | "1 -1\n", 160 | "2 1\n", 161 | "3 -1\n", 162 | "4 1\n", 163 | "5 -1\n", 164 | "6 1\n", 165 | "7 -1\n", 166 | "8 1\n", 167 | "9 1\n", 168 | "10 -1\n", 169 | "11 1\n", 170 | "12 -1\n", 171 | "13 1\n", 172 | "14 1\n", 173 | "15 1\n", 174 | "16 1\n", 175 | "17 1\n", 176 | "18 -1\n", 177 | "19 1\n", 178 | "20 -1\n", 179 | "21 -1\n", 180 | "22 1\n", 181 | "23 1\n", 182 | "24 1\n", 183 | "25 1\n", 184 | "26 1\n", 185 | "27 -1\n", 186 | "28 -1\n", 187 | "29 -1\n", 188 | " ..\n", 189 | "738 -1\n", 190 | "739 1\n", 191 | "740 1\n", 192 | "741 -1\n", 193 | "742 -1\n", 194 | "743 1\n", 195 | "744 -1\n", 196 | "745 -1\n", 197 | "746 1\n", 198 | "747 -1\n", 199 | "748 1\n", 200 | "749 1\n", 201 | "750 1\n", 202 | "751 -1\n", 203 | "752 -1\n", 204 | "753 1\n", 205 | "754 1\n", 206 | "755 1\n", 207 | "756 -1\n", 208 | "757 1\n", 209 | "758 -1\n", 210 | "759 1\n", 211 | "760 -1\n", 212 | "761 1\n", 213 | "762 -1\n", 214 | "763 -1\n", 215 | "764 -1\n", 216 | "765 -1\n", 217 | "766 1\n", 218 | "767 -1\n", 219 | "Name: 9, Length: 768, dtype: int64\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "train['9'] = train['9'].map({1: 1, 0: -1})\n", 225 | "print(train['9'])" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 4, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "\n", 235 | "train=np.asarray(train)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 5, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "min_train=np.zeros((268,9))\n", 245 | "max_train=np.zeros((500,9))\n", 246 | "min_train=np.asarray(min_train)\n", 247 | "max_train=np.asarray(max_train)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 6, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "\n", 257 | "k=0\n", 258 | "l=0\n", 259 | "for i in range(0,768):\n", 260 | " if(train[i][8]==1):\n", 261 | " for j in range(0,9):\n", 262 | " min_train[k][j]=train[i][j] \n", 263 | " k=k+1\n", 264 | " else :\n", 265 | " for j in range(0,9):\n", 266 | " max_train[l][j]=train[i][j]\n", 267 | " l=l+1" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 7, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "name": "stdout", 277 | "output_type": "stream", 278 | "text": [ 279 | "[[ 6. 148. 72. ... 0.627 50. 1. ]\n", 280 | " [ 8. 183. 64. ... 0.672 32. 1. ]\n", 281 | " [ 0. 137. 40. ... 2.288 33. 1. ]\n", 282 | " ...\n", 283 | " [ 6. 190. 92. ... 0.278 66. 1. ]\n", 284 | " [ 9. 170. 74. ... 0.403 43. 1. ]\n", 285 | " [ 1. 126. 60. ... 0.349 47. 1. ]]\n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "print(min_train)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "#768/5 =153.6\n", 298 | "#So we have to make no. of instances in data1,2 as 153 and data3,4,5 as 154 \n", 299 | "#Also min class value will be 35% of 153=53 and max class=100" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 21, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "data1=np.zeros((153,9))\n", 309 | "data2=np.zeros((153,9))\n", 310 | "data3=np.zeros((154,9))\n", 311 | "data4=np.zeros((154,9))\n", 312 | "data5=np.zeros((154,9))" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 22, 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "[0. 0. 0. 0. 0. 0. 0. 0. 0.]\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "for i in range(0,268):\n", 337 | " for j in range(0,9):\n", 338 | " if(i<53):\n", 339 | " data1[i][j]=min_train[i][j]\n", 340 | " elif(52\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | "
012345678910
057351.4000.4000.6572.33142361
167421.1670.4290.8813.60183751
26181083.0000.2870.7414.43318071
357351.4000.3710.7434.33132631
463180.5000.5000.9442.2591741
\n", 133 | "" 134 | ], 135 | "text/plain": [ 136 | " 0 1 2 3 4 5 6 7 8 9 10\n", 137 | "0 5 7 35 1.400 0.400 0.657 2.33 14 23 6 1\n", 138 | "1 6 7 42 1.167 0.429 0.881 3.60 18 37 5 1\n", 139 | "2 6 18 108 3.000 0.287 0.741 4.43 31 80 7 1\n", 140 | "3 5 7 35 1.400 0.371 0.743 4.33 13 26 3 1\n", 141 | "4 6 3 18 0.500 0.500 0.944 2.25 9 17 4 1" 142 | ] 143 | }, 144 | "execution_count": 31, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "train = pd.read_table(\"page-blocks.data\", sep=\"\\s+\",header=None)\n", 151 | "train.head()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 32, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "0 -1\n", 164 | "1 -1\n", 165 | "2 -1\n", 166 | "3 -1\n", 167 | "4 -1\n", 168 | "5 -1\n", 169 | "6 -1\n", 170 | "7 -1\n", 171 | "8 -1\n", 172 | "9 -1\n", 173 | "10 -1\n", 174 | "11 -1\n", 175 | "12 -1\n", 176 | "13 -1\n", 177 | "14 -1\n", 178 | "15 -1\n", 179 | "16 -1\n", 180 | "17 -1\n", 181 | "18 -1\n", 182 | "19 -1\n", 183 | "20 -1\n", 184 | "21 -1\n", 185 | "22 -1\n", 186 | "23 -1\n", 187 | "24 -1\n", 188 | "25 -1\n", 189 | "26 -1\n", 190 | "27 -1\n", 191 | "28 -1\n", 192 | "29 -1\n", 193 | " ..\n", 194 | "5443 -1\n", 195 | "5444 -1\n", 196 | "5445 -1\n", 197 | "5446 -1\n", 198 | "5447 -1\n", 199 | "5448 -1\n", 200 | "5449 -1\n", 201 | "5450 -1\n", 202 | "5451 -1\n", 203 | "5452 -1\n", 204 | "5453 -1\n", 205 | "5454 -1\n", 206 | "5455 -1\n", 207 | "5456 -1\n", 208 | "5457 -1\n", 209 | "5458 -1\n", 210 | "5459 -1\n", 211 | "5460 -1\n", 212 | "5461 -1\n", 213 | "5462 -1\n", 214 | "5463 -1\n", 215 | "5464 -1\n", 216 | "5465 -1\n", 217 | "5466 -1\n", 218 | "5467 -1\n", 219 | "5468 -1\n", 220 | "5469 -1\n", 221 | "5470 -1\n", 222 | "5471 -1\n", 223 | "5472 -1\n", 224 | "Name: 10, Length: 5473, dtype: int64\n" 225 | ] 226 | } 227 | ], 228 | "source": [ 229 | "train[10] = train[10].map({5: 1, 1:-1,2:-1,3:-1,4:-1})\n", 230 | "print(train[10])" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 33, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "train=np.asarray(train)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 34, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "min_train=np.zeros((115,11))\n", 249 | "max_train=np.zeros((5358,11))\n", 250 | "min_train=np.asarray(min_train)\n", 251 | "max_train=np.asarray(max_train)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 62, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "k=0\n", 261 | "l=0\n", 262 | "for i in range(0,5473):\n", 263 | " if(train[i][10]==1):\n", 264 | " for j in range(0,11):\n", 265 | " min_train[k][j]=train[i][j] \n", 266 | " k=k+1\n", 267 | " else :\n", 268 | " for j in range(0,11):\n", 269 | " max_train[l][j]=train[i][j]\n", 270 | " l=l+1" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 63, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "name": "stdout", 280 | "output_type": "stream", 281 | "text": [ 282 | "[[ 5. 7. 35. ... 23. 6. -1.]\n", 283 | " [ 6. 7. 42. ... 37. 5. -1.]\n", 284 | " [ 6. 18. 108. ... 80. 7. -1.]\n", 285 | " ...\n", 286 | " [ 6. 95. 570. ... 519. 104. -1.]\n", 287 | " [ 7. 41. 287. ... 230. 45. -1.]\n", 288 | " [ 8. 1. 8. ... 8. 1. -1.]]\n" 289 | ] 290 | } 291 | ], 292 | "source": [ 293 | "print(max_train)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 64, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "data1=np.zeros((1094,11))\n", 303 | "data2=np.zeros((1094,11))\n", 304 | "data3=np.zeros((1095,11))\n", 305 | "data4=np.zeros((1095,11))\n", 306 | "data5=np.zeros((1095,11))" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 65, 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "name": "stdout", 323 | "output_type": "stream", 324 | "text": [ 325 | "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n" 326 | ] 327 | } 328 | ], 329 | "source": [ 330 | "for i in range(0,115):\n", 331 | " for j in range(0,11):\n", 332 | " if(i<23):\n", 333 | " data1[i][j]=min_train[i][j]\n", 334 | " elif(22\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | "
0123
0306411
1306231
2306501
3315921
4316541
\n", 91 | "" 92 | ], 93 | "text/plain": [ 94 | " 0 1 2 3\n", 95 | "0 30 64 1 1\n", 96 | "1 30 62 3 1\n", 97 | "2 30 65 0 1\n", 98 | "3 31 59 2 1\n", 99 | "4 31 65 4 1" 100 | ] 101 | }, 102 | "execution_count": 4, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "train = pd.read_csv(\"haberman.csv\",header=None)\n", 109 | "train.head()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "0 -1\n", 122 | "1 -1\n", 123 | "2 -1\n", 124 | "3 -1\n", 125 | "4 -1\n", 126 | "5 -1\n", 127 | "6 -1\n", 128 | "7 1\n", 129 | "8 1\n", 130 | "9 -1\n", 131 | "10 -1\n", 132 | "11 -1\n", 133 | "12 -1\n", 134 | "13 -1\n", 135 | "14 -1\n", 136 | "15 -1\n", 137 | "16 -1\n", 138 | "17 -1\n", 139 | "18 -1\n", 140 | "19 -1\n", 141 | "20 -1\n", 142 | "21 -1\n", 143 | "22 -1\n", 144 | "23 -1\n", 145 | "24 1\n", 146 | "25 -1\n", 147 | "26 -1\n", 148 | "27 -1\n", 149 | "28 -1\n", 150 | "29 -1\n", 151 | " ..\n", 152 | "276 -1\n", 153 | "277 -1\n", 154 | "278 -1\n", 155 | "279 -1\n", 156 | "280 -1\n", 157 | "281 1\n", 158 | "282 -1\n", 159 | "283 -1\n", 160 | "284 -1\n", 161 | "285 1\n", 162 | "286 1\n", 163 | "287 -1\n", 164 | "288 -1\n", 165 | "289 -1\n", 166 | "290 -1\n", 167 | "291 -1\n", 168 | "292 -1\n", 169 | "293 1\n", 170 | "294 -1\n", 171 | "295 -1\n", 172 | "296 -1\n", 173 | "297 -1\n", 174 | "298 -1\n", 175 | "299 1\n", 176 | "300 -1\n", 177 | "301 -1\n", 178 | "302 -1\n", 179 | "303 -1\n", 180 | "304 1\n", 181 | "305 1\n", 182 | "Name: 3, Length: 306, dtype: int64\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "train[3] = train[3].map({2:1,1:-1})\n", 188 | "print(train[3])" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 7, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "train=np.asarray(train)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 19, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "min_train=np.zeros((81,4))\n", 207 | "max_train=np.zeros((225,4))\n", 208 | "min_train=np.asarray(min_train)\n", 209 | "max_train=np.asarray(max_train)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 20, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "k=0\n", 219 | "l=0\n", 220 | "for i in range(0,306):\n", 221 | " if(train[i][3]==1):\n", 222 | " for j in range(0,4):\n", 223 | " min_train[k][j]=train[i][j] \n", 224 | " k=k+1\n", 225 | " else :\n", 226 | " for j in range(0,4):\n", 227 | " max_train[l][j]=train[i][j]\n", 228 | " l=l+1" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 21, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "name": "stdout", 238 | "output_type": "stream", 239 | "text": [ 240 | "[[34. 59. 0. 1.]\n", 241 | " [34. 66. 9. 1.]\n", 242 | " [38. 69. 21. 1.]\n", 243 | " [39. 66. 0. 1.]\n", 244 | " [41. 60. 23. 1.]\n", 245 | " [41. 64. 0. 1.]\n", 246 | " [41. 67. 0. 1.]\n", 247 | " [42. 69. 1. 1.]\n", 248 | " [42. 59. 0. 1.]\n", 249 | " [43. 58. 52. 1.]\n", 250 | " [43. 59. 2. 1.]\n", 251 | " [43. 64. 0. 1.]\n", 252 | " [43. 64. 0. 1.]\n", 253 | " [44. 64. 6. 1.]\n", 254 | " [44. 58. 9. 1.]\n", 255 | " [44. 63. 19. 1.]\n", 256 | " [45. 65. 6. 1.]\n", 257 | " [45. 66. 0. 1.]\n", 258 | " [45. 67. 1. 1.]\n", 259 | " [46. 58. 2. 1.]\n", 260 | " [46. 69. 3. 1.]\n", 261 | " [46. 62. 5. 1.]\n", 262 | " [46. 65. 20. 1.]\n", 263 | " [47. 63. 23. 1.]\n", 264 | " [47. 62. 0. 1.]\n", 265 | " [47. 65. 0. 1.]\n", 266 | " [48. 58. 11. 1.]\n", 267 | " [48. 58. 11. 1.]\n", 268 | " [48. 67. 7. 1.]\n", 269 | " [49. 63. 0. 1.]\n", 270 | " [49. 64. 10. 1.]\n", 271 | " [50. 63. 13. 1.]\n", 272 | " [50. 64. 0. 1.]\n", 273 | " [51. 59. 13. 1.]\n", 274 | " [51. 59. 3. 1.]\n", 275 | " [52. 69. 3. 1.]\n", 276 | " [52. 59. 2. 1.]\n", 277 | " [52. 62. 3. 1.]\n", 278 | " [52. 66. 4. 1.]\n", 279 | " [53. 58. 4. 1.]\n", 280 | " [53. 65. 1. 1.]\n", 281 | " [53. 59. 3. 1.]\n", 282 | " [53. 60. 9. 1.]\n", 283 | " [53. 63. 24. 1.]\n", 284 | " [53. 65. 12. 1.]\n", 285 | " [54. 60. 11. 1.]\n", 286 | " [54. 65. 23. 1.]\n", 287 | " [54. 65. 5. 1.]\n", 288 | " [54. 68. 7. 1.]\n", 289 | " [55. 63. 6. 1.]\n", 290 | " [55. 68. 15. 1.]\n", 291 | " [56. 65. 9. 1.]\n", 292 | " [56. 66. 3. 1.]\n", 293 | " [57. 61. 5. 1.]\n", 294 | " [57. 62. 14. 1.]\n", 295 | " [57. 64. 1. 1.]\n", 296 | " [59. 62. 35. 1.]\n", 297 | " [60. 59. 17. 1.]\n", 298 | " [60. 65. 0. 1.]\n", 299 | " [61. 62. 5. 1.]\n", 300 | " [61. 65. 0. 1.]\n", 301 | " [61. 68. 1. 1.]\n", 302 | " [62. 59. 13. 1.]\n", 303 | " [62. 58. 0. 1.]\n", 304 | " [62. 65. 19. 1.]\n", 305 | " [63. 60. 1. 1.]\n", 306 | " [65. 58. 0. 1.]\n", 307 | " [65. 61. 2. 1.]\n", 308 | " [65. 62. 22. 1.]\n", 309 | " [65. 66. 15. 1.]\n", 310 | " [66. 58. 0. 1.]\n", 311 | " [66. 61. 13. 1.]\n", 312 | " [67. 64. 8. 1.]\n", 313 | " [67. 63. 1. 1.]\n", 314 | " [69. 67. 8. 1.]\n", 315 | " [70. 58. 0. 1.]\n", 316 | " [70. 58. 4. 1.]\n", 317 | " [72. 63. 0. 1.]\n", 318 | " [74. 65. 3. 1.]\n", 319 | " [78. 65. 1. 1.]\n", 320 | " [83. 58. 2. 1.]]\n" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "print(min_train)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 23, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "data1=np.zeros((61,4))\n", 335 | "data2=np.zeros((61,4))\n", 336 | "data3=np.zeros((61,4))\n", 337 | "data4=np.zeros((61,4))\n", 338 | "data5=np.zeros((62,4))" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 28, 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "name": "stdout", 355 | "output_type": "stream", 356 | "text": [ 357 | "[0. 0. 0. 0.]\n" 358 | ] 359 | } 360 | ], 361 | "source": [ 362 | "for i in range(0,81):\n", 363 | " for j in range(0,4):\n", 364 | " if(i<16):\n", 365 | " data1[i][j]=min_train[i][j]\n", 366 | " elif(15\n", 37 | "\n", 50 | "\n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | "
sexlengthdiameterheightweight.wweight.sweight.vweight.shrings
0M0.4550.3650.0950.51400.22450.10100.15015
1M0.3500.2650.0900.22550.09950.04850.0707
2F0.5300.4200.1350.67700.25650.14150.2109
3M0.4400.3650.1250.51600.21550.11400.15510
4I0.3300.2550.0800.20500.08950.03950.0557
\n", 128 | "" 129 | ], 130 | "text/plain": [ 131 | " sex length diameter height weight.w weight.s weight.v weight.sh \\\n", 132 | "0 M 0.455 0.365 0.095 0.5140 0.2245 0.1010 0.150 \n", 133 | "1 M 0.350 0.265 0.090 0.2255 0.0995 0.0485 0.070 \n", 134 | "2 F 0.530 0.420 0.135 0.6770 0.2565 0.1415 0.210 \n", 135 | "3 M 0.440 0.365 0.125 0.5160 0.2155 0.1140 0.155 \n", 136 | "4 I 0.330 0.255 0.080 0.2050 0.0895 0.0395 0.055 \n", 137 | "\n", 138 | " rings \n", 139 | "0 15 \n", 140 | "1 7 \n", 141 | "2 9 \n", 142 | "3 10 \n", 143 | "4 7 " 144 | ] 145 | }, 146 | "execution_count": 2, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "train = pd.read_csv(\"Abalone.csv\")\n", 153 | "train.head()" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 3, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "train=train.replace(to_replace=['M', 'F', 'I'], value=[1, 2, 3])" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 4, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "0 1\n", 175 | "1 -1\n", 176 | "2 -1\n", 177 | "3 -1\n", 178 | "4 -1\n", 179 | "5 -1\n", 180 | "6 -1\n", 181 | "7 -1\n", 182 | "8 -1\n", 183 | "9 -1\n", 184 | "10 -1\n", 185 | "11 -1\n", 186 | "12 -1\n", 187 | "13 -1\n", 188 | "14 -1\n", 189 | "15 -1\n", 190 | "16 -1\n", 191 | "17 -1\n", 192 | "18 -1\n", 193 | "19 -1\n", 194 | "20 -1\n", 195 | "21 -1\n", 196 | "22 -1\n", 197 | "23 -1\n", 198 | "24 -1\n", 199 | "25 -1\n", 200 | "26 -1\n", 201 | "27 -1\n", 202 | "28 1\n", 203 | "29 -1\n", 204 | " ..\n", 205 | "4147 -1\n", 206 | "4148 -1\n", 207 | "4149 -1\n", 208 | "4150 -1\n", 209 | "4151 -1\n", 210 | "4152 -1\n", 211 | "4153 -1\n", 212 | "4154 -1\n", 213 | "4155 -1\n", 214 | "4156 -1\n", 215 | "4157 -1\n", 216 | "4158 -1\n", 217 | "4159 -1\n", 218 | "4160 -1\n", 219 | "4161 -1\n", 220 | "4162 -1\n", 221 | "4163 -1\n", 222 | "4164 -1\n", 223 | "4165 -1\n", 224 | "4166 -1\n", 225 | "4167 -1\n", 226 | "4168 -1\n", 227 | "4169 -1\n", 228 | "4170 -1\n", 229 | "4171 -1\n", 230 | "4172 -1\n", 231 | "4173 -1\n", 232 | "4174 -1\n", 233 | "4175 -1\n", 234 | "4176 -1\n", 235 | "Name: rings, Length: 4177, dtype: int64\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "train['rings'] = train['rings'].map({15: 1, 1:-1,2:-1,3:-1,4:-1,5:-1,6:-1,7:-1,8:-1,9:-1,10:-1,11:-1,12:-1,13:-1,14:-1,16:-1,17:-1,18:-1,19:-1,20:-1,21:-1,22:-1,23:-1,24:-1,25:-1,26:-1,27:-1,28:-1,29:-1})\n", 241 | "print(train['rings'])" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 5, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "train=np.asarray(train)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 6, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "min_train=np.zeros((103,9))\n", 260 | "max_train=np.zeros((4074,9))\n", 261 | "min_train=np.asarray(min_train)\n", 262 | "max_train=np.asarray(max_train)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 7, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "k=0\n", 272 | "l=0\n", 273 | "for i in range(0,4177):\n", 274 | " if(train[i][8]==1):\n", 275 | " for j in range(0,9):\n", 276 | " min_train[k][j]=train[i][j] \n", 277 | " k=k+1\n", 278 | " else :\n", 279 | " for j in range(0,9):\n", 280 | " max_train[l][j]=train[i][j]\n", 281 | " l=l+1" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 8, 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "name": "stdout", 291 | "output_type": "stream", 292 | "text": [ 293 | "[[1. 0.455 0.365 0.095 0.514 0.2245 0.101 0.15 1. ]\n", 294 | " [1. 0.605 0.475 0.18 0.9365 0.394 0.219 0.295 1. ]\n", 295 | " [2. 0.68 0.56 0.165 1.639 0.6055 0.2805 0.46 1. ]\n", 296 | " [2. 0.6 0.475 0.15 1.0075 0.4425 0.221 0.28 1. ]\n", 297 | " [1. 0.565 0.425 0.135 0.8115 0.341 0.1675 0.255 1. ]\n", 298 | " [1. 0.695 0.56 0.19 1.494 0.588 0.3425 0.485 1. ]\n", 299 | " [1. 0.55 0.435 0.145 0.843 0.328 0.1915 0.255 1. ]\n", 300 | " [1. 0.53 0.435 0.16 0.883 0.316 0.164 0.335 1. ]\n", 301 | " [1. 0.59 0.475 0.145 1.053 0.4415 0.262 0.325 1. ]\n", 302 | " [1. 0.56 0.45 0.16 0.922 0.432 0.178 0.26 1. ]\n", 303 | " [2. 0.53 0.415 0.16 0.783 0.2935 0.158 0.245 1. ]\n", 304 | " [2. 0.575 0.46 0.185 1.094 0.4485 0.217 0.345 1. ]\n", 305 | " [1. 0.6 0.495 0.165 1.2415 0.485 0.2775 0.34 1. ]\n", 306 | " [1. 0.56 0.45 0.175 1.011 0.3835 0.2065 0.37 1. ]\n", 307 | " [2. 0.635 0.505 0.17 1.415 0.605 0.297 0.365 1. ]\n", 308 | " [1. 0.63 0.505 0.225 1.525 0.56 0.3335 0.45 1. ]\n", 309 | " [2. 0.535 0.415 0.185 0.8415 0.314 0.1585 0.3 1. ]\n", 310 | " [1. 0.61 0.475 0.165 1.116 0.428 0.2205 0.315 1. ]\n", 311 | " [2. 0.565 0.45 0.195 1.0035 0.406 0.2505 0.285 1. ]\n", 312 | " [1. 0.565 0.465 0.175 0.995 0.3895 0.183 0.37 1. ]\n", 313 | " [1. 0.605 0.47 0.18 1.1405 0.3755 0.2805 0.385 1. ]\n", 314 | " [1. 0.59 0.5 0.165 1.1045 0.4565 0.2425 0.34 1. ]\n", 315 | " [2. 0.62 0.47 0.14 1.0325 0.3605 0.224 0.36 1. ]\n", 316 | " [2. 0.64 0.54 0.175 1.221 0.51 0.259 0.39 1. ]\n", 317 | " [1. 0.57 0.465 0.125 0.849 0.3785 0.1765 0.24 1. ]\n", 318 | " [2. 0.625 0.515 0.15 1.2415 0.5235 0.3065 0.36 1. ]\n", 319 | " [1. 0.655 0.53 0.175 1.2635 0.486 0.2635 0.415 1. ]\n", 320 | " [2. 0.625 0.5 0.15 0.953 0.3445 0.2235 0.305 1. ]\n", 321 | " [2. 0.62 0.47 0.225 1.115 0.378 0.2145 0.36 1. ]\n", 322 | " [1. 0.6 0.47 0.175 1.105 0.4865 0.247 0.315 1. ]\n", 323 | " [1. 0.585 0.455 0.225 1.055 0.3815 0.221 0.365 1. ]\n", 324 | " [2. 0.5 0.375 0.14 0.604 0.242 0.1415 0.179 1. ]\n", 325 | " [1. 0.42 0.325 0.115 0.2885 0.1 0.057 0.1135 1. ]\n", 326 | " [3. 0.45 0.35 0.145 0.525 0.2085 0.1 0.1655 1. ]\n", 327 | " [3. 0.465 0.36 0.105 0.498 0.214 0.116 0.14 1. ]\n", 328 | " [2. 0.485 0.38 0.15 0.605 0.2155 0.14 0.18 1. ]\n", 329 | " [1. 0.565 0.44 0.185 0.909 0.344 0.2325 0.255 1. ]\n", 330 | " [1. 0.555 0.44 0.15 1.092 0.416 0.212 0.4405 1. ]\n", 331 | " [1. 0.525 0.41 0.13 0.99 0.3865 0.243 0.295 1. ]\n", 332 | " [2. 0.52 0.4 0.12 0.6515 0.261 0.2015 0.165 1. ]\n", 333 | " [1. 0.52 0.4 0.12 0.823 0.298 0.1805 0.265 1. ]\n", 334 | " [1. 0.695 0.515 0.175 1.5165 0.578 0.4105 0.39 1. ]\n", 335 | " [2. 0.605 0.495 0.19 1.437 0.469 0.2655 0.41 1. ]\n", 336 | " [1. 0.57 0.43 0.12 1.0615 0.348 0.167 0.31 1. ]\n", 337 | " [1. 0.585 0.405 0.15 1.2565 0.435 0.202 0.325 1. ]\n", 338 | " [1. 0.505 0.385 0.145 0.6775 0.236 0.179 0.2 1. ]\n", 339 | " [1. 0.465 0.35 0.14 0.5755 0.2015 0.1505 0.19 1. ]\n", 340 | " [2. 0.47 0.36 0.145 0.537 0.1725 0.1375 0.195 1. ]\n", 341 | " [1. 0.55 0.415 0.175 1.042 0.3295 0.2325 0.2905 1. ]\n", 342 | " [1. 0.515 0.405 0.145 0.695 0.215 0.1635 0.234 1. ]\n", 343 | " [2. 0.48 0.4 0.125 0.759 0.2125 0.179 0.24 1. ]\n", 344 | " [1. 0.66 0.53 0.17 1.3905 0.5905 0.212 0.453 1. ]\n", 345 | " [1. 0.64 0.565 0.23 1.521 0.644 0.372 0.406 1. ]\n", 346 | " [2. 0.7 0.535 0.175 1.773 0.6805 0.48 0.512 1. ]\n", 347 | " [1. 0.62 0.495 0.195 1.5145 0.579 0.346 0.5195 1. ]\n", 348 | " [2. 0.675 0.55 0.18 1.6885 0.562 0.3705 0.6 1. ]\n", 349 | " [2. 0.595 0.48 0.2 0.975 0.358 0.2035 0.34 1. ]\n", 350 | " [1. 0.645 0.495 0.185 1.4935 0.5265 0.2785 0.455 1. ]\n", 351 | " [2. 0.56 0.435 0.185 1.106 0.422 0.2435 0.33 1. ]\n", 352 | " [2. 0.61 0.48 0.175 1.0675 0.391 0.216 0.42 1. ]\n", 353 | " [1. 0.635 0.51 0.21 1.598 0.6535 0.2835 0.58 1. ]\n", 354 | " [1. 0.695 0.57 0.2 2.033 0.751 0.4255 0.685 1. ]\n", 355 | " [2. 0.505 0.395 0.145 0.6515 0.2695 0.153 0.205 1. ]\n", 356 | " [2. 0.525 0.425 0.145 0.7995 0.3345 0.209 0.24 1. ]\n", 357 | " [3. 0.48 0.39 0.145 0.5825 0.2315 0.121 0.255 1. ]\n", 358 | " [1. 0.59 0.46 0.155 0.906 0.327 0.1485 0.335 1. ]\n", 359 | " [2. 0.6 0.47 0.2 1.031 0.392 0.2035 0.29 1. ]\n", 360 | " [1. 0.65 0.545 0.16 1.2425 0.487 0.296 0.48 1. ]\n", 361 | " [3. 0.555 0.455 0.17 0.8435 0.309 0.1905 0.3 1. ]\n", 362 | " [3. 0.655 0.515 0.145 1.25 0.5265 0.283 0.315 1. ]\n", 363 | " [3. 0.62 0.485 0.17 1.208 0.4805 0.3045 0.33 1. ]\n", 364 | " [3. 0.52 0.415 0.16 0.595 0.2105 0.142 0.26 1. ]\n", 365 | " [1. 0.49 0.39 0.135 0.592 0.242 0.096 0.1835 1. ]\n", 366 | " [2. 0.52 0.4 0.13 0.6245 0.215 0.2065 0.17 1. ]\n", 367 | " [1. 0.495 0.4 0.14 0.7775 0.2015 0.18 0.25 1. ]\n", 368 | " [1. 0.66 0.535 0.2 1.791 0.733 0.318 0.54 1. ]\n", 369 | " [1. 0.65 0.52 0.195 1.676 0.693 0.44 0.47 1. ]\n", 370 | " [1. 0.64 0.49 0.14 1.194 0.4445 0.238 0.375 1. ]\n", 371 | " [1. 0.605 0.49 0.155 1.153 0.503 0.2505 0.295 1. ]\n", 372 | " [1. 0.605 0.47 0.115 1.114 0.3925 0.291 0.31 1. ]\n", 373 | " [2. 0.505 0.41 0.135 0.657 0.291 0.133 0.195 1. ]\n", 374 | " [2. 0.665 0.53 0.185 1.3955 0.456 0.3205 0.49 1. ]\n", 375 | " [3. 0.48 0.38 0.125 0.523 0.2105 0.1045 0.175 1. ]\n", 376 | " [2. 0.69 0.54 0.185 1.5715 0.6935 0.318 0.47 1. ]\n", 377 | " [1. 0.555 0.435 0.135 0.858 0.377 0.1585 0.29 1. ]\n", 378 | " [1. 0.635 0.48 0.19 1.467 0.5825 0.303 0.42 1. ]\n", 379 | " [2. 0.61 0.495 0.19 1.213 0.464 0.306 0.365 1. ]\n", 380 | " [2. 0.465 0.39 0.14 0.5555 0.213 0.1075 0.215 1. ]\n", 381 | " [2. 0.605 0.475 0.145 1.0185 0.4695 0.225 0.27 1. ]\n", 382 | " [1. 0.535 0.42 0.16 0.72 0.275 0.164 0.225 1. ]\n", 383 | " [2. 0.71 0.575 0.175 1.555 0.6465 0.3705 0.52 1. ]\n", 384 | " [2. 0.48 0.37 0.13 0.5885 0.2475 0.1505 0.1595 1. ]\n", 385 | " [3. 0.66 0.525 0.18 1.6935 0.6025 0.4005 0.42 1. ]\n", 386 | " [2. 0.52 0.405 0.145 0.829 0.3535 0.1685 0.205 1. ]\n", 387 | " [1. 0.495 0.4 0.12 0.6605 0.2605 0.161 0.19 1. ]\n", 388 | " [2. 0.5 0.39 0.13 0.6355 0.2505 0.1635 0.195 1. ]\n", 389 | " [1. 0.545 0.44 0.165 0.744 0.2875 0.204 0.25 1. ]\n", 390 | " [2. 0.645 0.5 0.225 1.626 0.587 0.4055 0.41 1. ]\n", 391 | " [2. 0.61 0.49 0.17 1.1775 0.5655 0.2385 0.295 1. ]\n", 392 | " [2. 0.67 0.545 0.16 1.5415 0.5985 0.2565 0.495 1. ]\n", 393 | " [1. 0.445 0.345 0.14 0.476 0.2055 0.1015 0.1085 1. ]\n", 394 | " [3. 0.52 0.405 0.14 0.6765 0.2865 0.146 0.205 1. ]\n", 395 | " [2. 0.54 0.44 0.16 1.0905 0.391 0.2295 0.355 1. ]]\n" 396 | ] 397 | } 398 | ], 399 | "source": [ 400 | "print(min_train)" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 14, 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [ 409 | "data1=np.zeros((834,9))\n", 410 | "data2=np.zeros((835,9))\n", 411 | "data3=np.zeros((836,9))\n", 412 | "data4=np.zeros((836,9))\n", 413 | "data5=np.zeros((836,9))" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": {}, 420 | "outputs": [], 421 | "source": [] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 16, 426 | "metadata": {}, 427 | "outputs": [ 428 | { 429 | "name": "stdout", 430 | "output_type": "stream", 431 | "text": [ 432 | "[2. 0.54 0.44 0.16 1.0905 0.391 0.2295 0.355 1. ]\n" 433 | ] 434 | } 435 | ], 436 | "source": [ 437 | "for i in range(0,103):\n", 438 | " for j in range(0,9):\n", 439 | " if(i<20):\n", 440 | " data1[i][j]=min_train[i][j]\n", 441 | " elif(19\n", 41 | "\n", 54 | "\n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | "
0123
034.059.00.01.0
134.066.09.01.0
238.069.021.01.0
339.066.00.01.0
441.060.023.01.0
\n", 102 | "" 103 | ], 104 | "text/plain": [ 105 | " 0 1 2 3\n", 106 | "0 34.0 59.0 0.0 1.0\n", 107 | "1 34.0 66.0 9.0 1.0\n", 108 | "2 38.0 69.0 21.0 1.0\n", 109 | "3 39.0 66.0 0.0 1.0\n", 110 | "4 41.0 60.0 23.0 1.0" 111 | ] 112 | }, 113 | "execution_count": 2, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "train = pd.read_csv(\"modifiedhaberman.csv\", header=None)\n", 120 | "train.head()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 3, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/html": [ 131 | "
\n", 132 | "\n", 145 | "\n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | "
012
034.059.00.0
134.066.09.0
238.069.021.0
339.066.00.0
441.060.023.0
\n", 187 | "
" 188 | ], 189 | "text/plain": [ 190 | " 0 1 2\n", 191 | "0 34.0 59.0 0.0\n", 192 | "1 34.0 66.0 9.0\n", 193 | "2 38.0 69.0 21.0\n", 194 | "3 39.0 66.0 0.0\n", 195 | "4 41.0 60.0 23.0" 196 | ] 197 | }, 198 | "execution_count": 3, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "features = train.columns[0:3]\n", 205 | "X = train[features]\n", 206 | "y = train[3]\n", 207 | "X.head()" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 4, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=0)\n" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 5, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "(244, 3) (62, 3)\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "print(X_train.shape,X_test.shape)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 6, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "X_train=np.asarray(X_train)\n", 243 | "y_train=np.asarray(y_train)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 7, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "def linear_kernel(x1, x2):\n", 253 | " return np.dot(x1, x2)\n", 254 | "\n", 255 | "def polynomial_kernel(x, y, p=3):\n", 256 | " return (1 + np.dot(x, y)) ** p\n", 257 | "\n", 258 | "def gaussian_kernel(x, y, sigma=100.0):\n", 259 | " # print(-linalg.norm(x-y)**2)\n", 260 | " x=np.asarray(x)\n", 261 | " y=np.asarray(y)\n", 262 | " return np.exp((-linalg.norm(x-y)**2) / (2 * (sigma ** 2)))\n", 263 | "\n", 264 | "def gm(y_predict,y_test):\n", 265 | " test_min=0\n", 266 | " test_max=0\n", 267 | " pred_min=0\n", 268 | " pred_max=0\n", 269 | " y_test=np.asarray(y_test)\n", 270 | " for i in range(0,62):\n", 271 | " if(y_test[i]==1):\n", 272 | " test_min=test_min+1\n", 273 | " else:\n", 274 | " test_max=test_max+1\n", 275 | " print(\"y_test min\",test_min) \n", 276 | " print(\"y_test max\",test_max)\n", 277 | " for i in range(0,62):\n", 278 | " if(y_predict[i]==1 and y_predict[i]==y_test[i]):\n", 279 | " pred_min=pred_min+1\n", 280 | " elif(y_predict[i]==-1 and y_predict[i]==y_test[i]):\n", 281 | " pred_max=pred_max+1\n", 282 | " print(\"y_pred min\",pred_min) \n", 283 | " print(\"y_pred max\",pred_max)\n", 284 | " se=pred_min/test_min\n", 285 | " sp=pred_max/test_max\n", 286 | " print(se,sp)\n", 287 | " gm=math.sqrt(se*sp)\n", 288 | " print(\"GM\",gm)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "# FSVM using Hyperplane" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 14, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "from cvxopt import matrix\n", 305 | "class HYP_SVM(object):\n", 306 | "\n", 307 | " def __init__(self, kernel=gaussian_kernel, C=None):\n", 308 | " self.kernel = kernel\n", 309 | " self.C = C\n", 310 | " if self.C is not None: self.C = float(self.C)\n", 311 | " def m_func(self, X_train,X_test, y):\n", 312 | " n_samples, n_features = X_train.shape \n", 313 | " nt_samples, nt_features= X_test.shape\n", 314 | " self.K = np.zeros((n_samples, n_samples))\n", 315 | " for i in range(n_samples):\n", 316 | " for j in range(n_samples):\n", 317 | " self.K[i,j] = gaussian_kernel(X_train[i], X_train[j])\n", 318 | " # print(K[i,j])\n", 319 | " X_train=np.asarray(X_train)\n", 320 | " X_test=np.asarray(X_test)\n", 321 | " K1 = np.zeros((n_samples, n_samples))\n", 322 | " for i in range(n_samples):\n", 323 | " for j in range(n_samples):\n", 324 | " K1[i,j] = gaussian_kernel(X_train[i], X_train[j])\n", 325 | " # print(K[i,j])\n", 326 | " print(K1.shape)\n", 327 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n", 328 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n", 329 | " A = cvxopt.matrix(y, (1,n_samples))\n", 330 | " A = matrix(A, (1,n_samples), 'd') #changes done\n", 331 | " b = cvxopt.matrix(0.0)\n", 332 | " #print(P,q,A,b)\n", 333 | " if self.C is None:\n", 334 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n", 335 | " h = cvxopt.matrix(np.zeros(n_samples))\n", 336 | " \n", 337 | " else:\n", 338 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n", 339 | " tmp2 = np.identity(n_samples)\n", 340 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n", 341 | " tmp1 = np.zeros(n_samples)\n", 342 | " tmp2 = np.ones(n_samples) * self.C\n", 343 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n", 344 | " # solve QP problem\n", 345 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n", 346 | " print(solution['status'])\n", 347 | " # Lagrange multipliers\n", 348 | " a = np.ravel(solution['x'])\n", 349 | " a_org = np.ravel(solution['x'])\n", 350 | " # Support vectors have non zero lagrange multipliers\n", 351 | " sv = a > 1e-5\n", 352 | " #print(sv.shape)\n", 353 | " ind = np.arange(len(a))[sv]\n", 354 | " self.a_org=a\n", 355 | " self.a = a[sv]\n", 356 | " self.sv = X_train[sv]\n", 357 | " self.sv_y = y[sv]\n", 358 | " self.sv_yorg=y\n", 359 | " self.kernel = gaussian_kernel\n", 360 | " X_train=np.asarray(X_train)\n", 361 | " b = 0\n", 362 | " for n in range(len(self.a)):\n", 363 | " b += self.sv_y[n]\n", 364 | " b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n", 365 | " b /= len(self.a)\n", 366 | " # print(self.a_org[1])\n", 367 | " #print(self.a_org.shape,self.sv_yorg.shape,K.shape)\n", 368 | " w_phi=0\n", 369 | " total=0\n", 370 | " for n in range(len(self.a_org)):\n", 371 | " w_phi = self.a_org[n] * self.sv_yorg[n] * K1[n] \n", 372 | " self.d_hyp=np.zeros(n_samples)\n", 373 | " for n in range(len(self.a_org)):\n", 374 | " self.d_hyp += self.sv_yorg[n]*(w_phi+b)\n", 375 | " func=np.zeros((n_samples))\n", 376 | " func=np.asarray(func)\n", 377 | " typ=2\n", 378 | " if(typ==1):\n", 379 | " for i in range(n_samples):\n", 380 | " func[i]=1-(self.d_hyp[i]/(np.amax(self.d_hyp[i])+0.000001))\n", 381 | " beta=0.8\n", 382 | " if(typ==2):\n", 383 | " for i in range(n_samples):\n", 384 | " func[i]=2/(1+beta*self.d_hyp[i])\n", 385 | " r_max=26/74\n", 386 | " r_min=1\n", 387 | " self.m=func[0:81]*r_min\n", 388 | " print(self.m.shape)\n", 389 | " self.m=np.append(self.m,func[81:306]*r_max)\n", 390 | " print(self.m.shape)\n", 391 | " \n", 392 | " ##############################################################################\n", 393 | "\n", 394 | "\n", 395 | " def fit(self, X_train,X_test, y):\n", 396 | " self.kernel = gaussian_kernel\n", 397 | " n_samples, n_features = X_train.shape \n", 398 | " nt_samples, nt_features = X_test.shape\n", 399 | " # Gram matrix\n", 400 | "\n", 401 | " print(self.K.shape)\n", 402 | "\n", 403 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n", 404 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n", 405 | " A = cvxopt.matrix(y, (1,n_samples))\n", 406 | " A = matrix(A, (1,n_samples), 'd') #changes done\n", 407 | " b = cvxopt.matrix(0.0)\n", 408 | " #print(P,q,A,b)\n", 409 | " if self.C is None:\n", 410 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n", 411 | " h = cvxopt.matrix(np.zeros(n_samples))\n", 412 | " \n", 413 | " else:\n", 414 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n", 415 | " tmp2 = np.identity(n_samples)\n", 416 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n", 417 | " tmp1 = np.zeros(n_samples)\n", 418 | " tmp2 = np.ones(n_samples) * self.C\n", 419 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n", 420 | " # solve QP problem\n", 421 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n", 422 | " print(solution['status'])\n", 423 | " # Lagrange multipliers\n", 424 | " a = np.ravel(solution['x'])\n", 425 | " a_org = np.ravel(solution['x'])\n", 426 | " # Support vectors have non zero lagrange multipliers\n", 427 | " for i in range(n_samples):\n", 428 | " sv=np.logical_or(self.a_org 1e-5)\n", 429 | " #print(sv.shape)\n", 430 | " ind = np.arange(len(a))[sv]\n", 431 | " self.a = a[sv]\n", 432 | " self.sv = X_train[sv]\n", 433 | " self.sv_y = y[sv]\n", 434 | " #print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n", 435 | "\n", 436 | " # Intercept\n", 437 | " self.b = 0\n", 438 | " for n in range(len(self.a)):\n", 439 | " self.b += self.sv_y[n]\n", 440 | " self.b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n", 441 | " self.b /= len(self.a)\n", 442 | " print(self.b)\n", 443 | "\n", 444 | " # Weight vector\n", 445 | " if self.kernel == gaussian_kernel:\n", 446 | " self.w = np.zeros(n_features)\n", 447 | " for n in range(len(self.a)):\n", 448 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n", 449 | " else :\n", 450 | " self.w = None \n", 451 | " \n", 452 | " def project(self, X):\n", 453 | " if self.w is None:\n", 454 | " return np.dot(X, self.w) + self.b\n", 455 | " else:\n", 456 | " y_predict = np.zeros(len(X))\n", 457 | " X=np.asarray(X)\n", 458 | " for i in range(len(X)):\n", 459 | " s = 0\n", 460 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n", 461 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n", 462 | " y_predict[i] = s\n", 463 | " # print(y_predict[i])\n", 464 | " return y_predict + self.b\n", 465 | "\n", 466 | " def predict(self, X):\n", 467 | " return np.sign(self.project(X))" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 15, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "name": "stdout", 477 | "output_type": "stream", 478 | "text": [ 479 | "(244, 244)\n", 480 | " pcost dcost gap pres dres\n", 481 | " 0: 8.9609e+03 -4.9961e+05 5e+05 1e-13 2e-13\n", 482 | " 1: -2.9301e+03 -8.8506e+04 9e+04 3e-13 1e-13\n", 483 | " 2: -7.7538e+03 -2.2899e+04 2e+04 6e-14 1e-13\n", 484 | " 3: -9.3958e+03 -1.5271e+04 6e+03 1e-13 2e-13\n", 485 | " 4: -9.9982e+03 -1.3503e+04 4e+03 3e-13 2e-13\n", 486 | " 5: -1.0390e+04 -1.2500e+04 2e+03 1e-13 2e-13\n", 487 | " 6: -1.0647e+04 -1.1812e+04 1e+03 8e-14 2e-13\n", 488 | " 7: -1.0823e+04 -1.1430e+04 6e+02 3e-13 2e-13\n", 489 | " 8: -1.0890e+04 -1.1286e+04 4e+02 2e-13 2e-13\n", 490 | " 9: -1.0957e+04 -1.1157e+04 2e+02 3e-13 2e-13\n", 491 | "10: -1.0999e+04 -1.1094e+04 1e+02 3e-13 2e-13\n", 492 | "11: -1.1013e+04 -1.1065e+04 5e+01 3e-13 2e-13\n", 493 | "12: -1.1026e+04 -1.1045e+04 2e+01 2e-13 3e-13\n", 494 | "13: -1.1029e+04 -1.1040e+04 1e+01 3e-13 2e-13\n", 495 | "14: -1.1030e+04 -1.1039e+04 1e+01 1e-13 2e-13\n", 496 | "15: -1.1033e+04 -1.1036e+04 3e+00 3e-13 3e-13\n", 497 | "16: -1.1034e+04 -1.1035e+04 1e+00 9e-14 3e-13\n", 498 | "17: -1.1034e+04 -1.1034e+04 4e-01 2e-13 2e-13\n", 499 | "18: -1.1034e+04 -1.1034e+04 6e-02 2e-13 3e-13\n", 500 | "19: -1.1034e+04 -1.1034e+04 9e-04 1e-13 3e-13\n", 501 | "Optimal solution found.\n", 502 | "optimal\n", 503 | "(81,)\n", 504 | "(244,)\n", 505 | "(244, 244)\n", 506 | " pcost dcost gap pres dres\n", 507 | " 0: 8.9609e+03 -4.9961e+05 5e+05 1e-13 2e-13\n", 508 | " 1: -2.9301e+03 -8.8506e+04 9e+04 3e-13 1e-13\n", 509 | " 2: -7.7538e+03 -2.2899e+04 2e+04 6e-14 1e-13\n", 510 | " 3: -9.3958e+03 -1.5271e+04 6e+03 1e-13 2e-13\n", 511 | " 4: -9.9982e+03 -1.3503e+04 4e+03 3e-13 2e-13\n", 512 | " 5: -1.0390e+04 -1.2500e+04 2e+03 1e-13 2e-13\n", 513 | " 6: -1.0647e+04 -1.1812e+04 1e+03 8e-14 2e-13\n", 514 | " 7: -1.0823e+04 -1.1430e+04 6e+02 3e-13 2e-13\n", 515 | " 8: -1.0890e+04 -1.1286e+04 4e+02 2e-13 2e-13\n", 516 | " 9: -1.0957e+04 -1.1157e+04 2e+02 3e-13 2e-13\n", 517 | "10: -1.0999e+04 -1.1094e+04 1e+02 3e-13 2e-13\n", 518 | "11: -1.1013e+04 -1.1065e+04 5e+01 3e-13 2e-13\n", 519 | "12: -1.1026e+04 -1.1045e+04 2e+01 2e-13 3e-13\n", 520 | "13: -1.1029e+04 -1.1040e+04 1e+01 3e-13 2e-13\n", 521 | "14: -1.1030e+04 -1.1039e+04 1e+01 1e-13 2e-13\n", 522 | "15: -1.1033e+04 -1.1036e+04 3e+00 3e-13 3e-13\n", 523 | "16: -1.1034e+04 -1.1035e+04 1e+00 9e-14 3e-13\n", 524 | "17: -1.1034e+04 -1.1034e+04 4e-01 2e-13 2e-13\n", 525 | "18: -1.1034e+04 -1.1034e+04 6e-02 2e-13 3e-13\n", 526 | "19: -1.1034e+04 -1.1034e+04 9e-04 1e-13 3e-13\n", 527 | "Optimal solution found.\n", 528 | "optimal\n", 529 | "-0.7094793824855187\n", 530 | "y_test min 21\n", 531 | "y_test max 41\n", 532 | "y_pred min 5\n", 533 | "y_pred max 35\n", 534 | "0.23809523809523808 0.8536585365853658\n", 535 | "GM 0.45083481733371616\n", 536 | "40 out of 62 predictions correct\n", 537 | "Accuracy 0.6451612903225806\n" 538 | ] 539 | } 540 | ], 541 | "source": [ 542 | "\n", 543 | "if __name__ == \"__main__\":\n", 544 | " import pylab as pl \n", 545 | " def hyp_svm():\n", 546 | " \n", 547 | " clf = HYP_SVM(C=100.0)\n", 548 | " clf.m_func(X_train,X_test,y_train)\n", 549 | " clf.fit(X_train,X_test, y_train)\n", 550 | " y_predict = clf.predict(X_test)\n", 551 | " gm(y_predict,y_test)\n", 552 | " correct = np.sum(y_predict == y_test)\n", 553 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n", 554 | " print(\"Accuracy\",correct/len(y_predict))\n", 555 | "\n", 556 | " hyp_svm() " 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": null, 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": null, 569 | "metadata": {}, 570 | "outputs": [], 571 | "source": [] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 13, 576 | "metadata": {}, 577 | "outputs": [ 578 | { 579 | "name": "stdout", 580 | "output_type": "stream", 581 | "text": [ 582 | "Overall RBF KERNEL SVM accuracy: 0.6290322580645161\n" 583 | ] 584 | } 585 | ], 586 | "source": [ 587 | "clf_svm = svm.SVC(kernel='rbf', gamma=0.0001, C=100)\n", 588 | "clf_svm.fit(X_train, y_train)\n", 589 | "y_pred_svm = clf_svm.predict(X_test) \n", 590 | "acc_svm = accuracy_score(y_test, y_pred_svm)\n", 591 | "print (\"Overall RBF KERNEL SVM accuracy: \",acc_svm)" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": null, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": null, 611 | "metadata": {}, 612 | "outputs": [], 613 | "source": [] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": null, 618 | "metadata": {}, 619 | "outputs": [], 620 | "source": [] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": null, 625 | "metadata": {}, 626 | "outputs": [], 627 | "source": [] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": null, 632 | "metadata": {}, 633 | "outputs": [], 634 | "source": [] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": null, 639 | "metadata": {}, 640 | "outputs": [], 641 | "source": [] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": {}, 647 | "outputs": [], 648 | "source": [] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [] 656 | }, 657 | { 658 | "cell_type": "code", 659 | "execution_count": null, 660 | "metadata": {}, 661 | "outputs": [], 662 | "source": [] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "execution_count": null, 667 | "metadata": {}, 668 | "outputs": [], 669 | "source": [] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": null, 674 | "metadata": {}, 675 | "outputs": [], 676 | "source": [] 677 | }, 678 | { 679 | "cell_type": "markdown", 680 | "metadata": {}, 681 | "source": [ 682 | "# Normal SVM using CVXOPT" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": 8, 688 | "metadata": {}, 689 | "outputs": [], 690 | "source": [ 691 | "from cvxopt import matrix\n", 692 | "class SVM(object):\n", 693 | "\n", 694 | " def __init__(self, kernel=gaussian_kernel, C=None):\n", 695 | " self.kernel = kernel\n", 696 | " self.C = C\n", 697 | " if self.C is not None: self.C = float(self.C)\n", 698 | " def fit(self, X, y):\n", 699 | " self.kernel = gaussian_kernel\n", 700 | " n_samples, n_features = X.shape\n", 701 | " # Gram matrix\n", 702 | " K = np.zeros((n_samples, n_samples))\n", 703 | " for i in range(n_samples):\n", 704 | " for j in range(n_samples):\n", 705 | " K[i,j] = gaussian_kernel(X[i], X[j])\n", 706 | " # print(K[i,j])\n", 707 | " print(K.shape)\n", 708 | "\n", 709 | " P = cvxopt.matrix(np.outer(y,y) * K)\n", 710 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n", 711 | " A = cvxopt.matrix(y, (1,n_samples))\n", 712 | " A = matrix(A, (1,n_samples), 'd') #changes done\n", 713 | " b = cvxopt.matrix(0.0)\n", 714 | " #print(P,q,A,b)\n", 715 | " if self.C is None:\n", 716 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n", 717 | " h = cvxopt.matrix(np.zeros(n_samples))\n", 718 | " \n", 719 | " else:\n", 720 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n", 721 | " tmp2 = np.identity(n_samples)\n", 722 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n", 723 | " tmp1 = np.zeros(n_samples)\n", 724 | " tmp2 = np.ones(n_samples) * self.C\n", 725 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n", 726 | " # solve QP problem\n", 727 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n", 728 | " print(solution['status'])\n", 729 | " # Lagrange multipliers\n", 730 | " a = np.ravel(solution['x'])\n", 731 | " # print(a)\n", 732 | " # Support vectors have non zero lagrange multipliers\n", 733 | " sv = a > 1e-5\n", 734 | " print(sv.shape)\n", 735 | " ind = np.arange(len(a))[sv]\n", 736 | " self.a = a[sv]\n", 737 | " self.sv = X[sv]\n", 738 | " self.sv_y = y[sv]\n", 739 | " print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n", 740 | "\n", 741 | " # Intercept\n", 742 | " self.b = 0\n", 743 | " for n in range(len(self.a)):\n", 744 | " self.b += self.sv_y[n]\n", 745 | " self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])\n", 746 | " self.b /= len(self.a)\n", 747 | "\n", 748 | " # Weight vector\n", 749 | " if self.kernel == gaussian_kernel:\n", 750 | " self.w = np.zeros(n_features)\n", 751 | " for n in range(len(self.a)):\n", 752 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n", 753 | " #print(self.w)\n", 754 | " else:\n", 755 | " self.w = None\n", 756 | "\n", 757 | " def project(self, X):\n", 758 | " if self.w is None:\n", 759 | " return np.dot(X, self.w) + self.b\n", 760 | " else:\n", 761 | " y_predict = np.zeros(len(X))\n", 762 | " X=np.asarray(X)\n", 763 | " for i in range(len(X)):\n", 764 | " s = 0\n", 765 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n", 766 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n", 767 | " y_predict[i] = s\n", 768 | " # print(y_predict[i])\n", 769 | " return y_predict + self.b\n", 770 | "\n", 771 | " def predict(self, X):\n", 772 | " return np.sign(self.project(X))" 773 | ] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "execution_count": 9, 778 | "metadata": {}, 779 | "outputs": [ 780 | { 781 | "name": "stdout", 782 | "output_type": "stream", 783 | "text": [ 784 | "(244, 244)\n", 785 | " pcost dcost gap pres dres\n", 786 | " 0: 8.9609e+03 -4.9961e+05 5e+05 1e-13 2e-13\n", 787 | " 1: -2.9301e+03 -8.8506e+04 9e+04 3e-13 1e-13\n", 788 | " 2: -7.7538e+03 -2.2899e+04 2e+04 6e-14 1e-13\n", 789 | " 3: -9.3958e+03 -1.5271e+04 6e+03 1e-13 2e-13\n", 790 | " 4: -9.9982e+03 -1.3503e+04 4e+03 3e-13 2e-13\n", 791 | " 5: -1.0390e+04 -1.2500e+04 2e+03 1e-13 2e-13\n", 792 | " 6: -1.0647e+04 -1.1812e+04 1e+03 8e-14 2e-13\n", 793 | " 7: -1.0823e+04 -1.1430e+04 6e+02 3e-13 2e-13\n", 794 | " 8: -1.0890e+04 -1.1286e+04 4e+02 2e-13 2e-13\n", 795 | " 9: -1.0957e+04 -1.1157e+04 2e+02 3e-13 2e-13\n", 796 | "10: -1.0999e+04 -1.1094e+04 1e+02 3e-13 2e-13\n", 797 | "11: -1.1013e+04 -1.1065e+04 5e+01 3e-13 2e-13\n", 798 | "12: -1.1026e+04 -1.1045e+04 2e+01 2e-13 3e-13\n", 799 | "13: -1.1029e+04 -1.1040e+04 1e+01 3e-13 2e-13\n", 800 | "14: -1.1030e+04 -1.1039e+04 1e+01 1e-13 2e-13\n", 801 | "15: -1.1033e+04 -1.1036e+04 3e+00 3e-13 3e-13\n", 802 | "16: -1.1034e+04 -1.1035e+04 1e+00 9e-14 3e-13\n", 803 | "17: -1.1034e+04 -1.1034e+04 4e-01 2e-13 2e-13\n", 804 | "18: -1.1034e+04 -1.1034e+04 6e-02 2e-13 3e-13\n", 805 | "19: -1.1034e+04 -1.1034e+04 9e-04 1e-13 3e-13\n", 806 | "Optimal solution found.\n", 807 | "optimal\n", 808 | "(244,)\n", 809 | "206 support vectors out of 244 points\n", 810 | "y_test min 21\n", 811 | "y_test max 41\n", 812 | "y_pred min 5\n", 813 | "y_pred max 35\n", 814 | "0.23809523809523808 0.8536585365853658\n", 815 | "GM 0.45083481733371616\n", 816 | "40 out of 62 predictions correct\n", 817 | "Accuracy 0.6451612903225806\n" 818 | ] 819 | } 820 | ], 821 | "source": [ 822 | "\n", 823 | "if __name__ == \"__main__\":\n", 824 | " import pylab as pl \n", 825 | " def normal_svm():\n", 826 | " \n", 827 | " clf = SVM(C=100.0)\n", 828 | " clf.fit(X_train, y_train)\n", 829 | " y_predict = clf.predict(X_test)\n", 830 | " gm(y_predict,y_test)\n", 831 | " correct = np.sum(y_predict == y_test)\n", 832 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n", 833 | " print(\"Accuracy\",correct/len(y_predict))\n", 834 | "\n", 835 | " normal_svm() " 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": null, 841 | "metadata": {}, 842 | "outputs": [], 843 | "source": [] 844 | } 845 | ], 846 | "metadata": { 847 | "kernelspec": { 848 | "display_name": "Python 3", 849 | "language": "python", 850 | "name": "python3" 851 | }, 852 | "language_info": { 853 | "codemirror_mode": { 854 | "name": "ipython", 855 | "version": 3 856 | }, 857 | "file_extension": ".py", 858 | "mimetype": "text/x-python", 859 | "name": "python", 860 | "nbconvert_exporter": "python", 861 | "pygments_lexer": "ipython3", 862 | "version": "3.6.5" 863 | } 864 | }, 865 | "nbformat": 4, 866 | "nbformat_minor": 2 867 | } 868 | -------------------------------------------------------------------------------- /FUZZY SVM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "C:\\Users\\HP\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 13 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import numpy as np\n", 19 | "from numpy import linalg\n", 20 | "import cvxopt\n", 21 | "import cvxopt.solvers\n", 22 | "import pandas as pd\n", 23 | "from sklearn import cross_validation\n", 24 | "from sklearn.metrics import classification_report\n", 25 | "from sklearn.metrics import accuracy_score\n", 26 | "from cvxopt import matrix as cvxopt_matrix\n", 27 | "from cvxopt import solvers as cvxopt_solvers\n", 28 | "from sklearn import svm\n", 29 | "import math " 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/html": [ 40 | "
\n", 41 | "\n", 54 | "\n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | "
012345678
06.0148.072.035.00.033.60.62750.01.0
18.0183.064.00.00.023.30.67232.01.0
20.0137.040.035.0168.043.12.28833.01.0
33.078.050.032.088.031.00.24826.01.0
42.0197.070.045.0543.030.50.15853.01.0
\n", 132 | "
" 133 | ], 134 | "text/plain": [ 135 | " 0 1 2 3 4 5 6 7 8\n", 136 | "0 6.0 148.0 72.0 35.0 0.0 33.6 0.627 50.0 1.0\n", 137 | "1 8.0 183.0 64.0 0.0 0.0 23.3 0.672 32.0 1.0\n", 138 | "2 0.0 137.0 40.0 35.0 168.0 43.1 2.288 33.0 1.0\n", 139 | "3 3.0 78.0 50.0 32.0 88.0 31.0 0.248 26.0 1.0\n", 140 | "4 2.0 197.0 70.0 45.0 543.0 30.5 0.158 53.0 1.0" 141 | ] 142 | }, 143 | "execution_count": 2, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "train = pd.read_csv(\"modifiedpima.csv\", header=None)\n", 150 | "train.head()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 3, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/html": [ 161 | "
\n", 162 | "\n", 175 | "\n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | "
01234567
06.0148.072.035.00.033.60.62750.0
18.0183.064.00.00.023.30.67232.0
20.0137.040.035.0168.043.12.28833.0
33.078.050.032.088.031.00.24826.0
42.0197.070.045.0543.030.50.15853.0
\n", 247 | "
" 248 | ], 249 | "text/plain": [ 250 | " 0 1 2 3 4 5 6 7\n", 251 | "0 6.0 148.0 72.0 35.0 0.0 33.6 0.627 50.0\n", 252 | "1 8.0 183.0 64.0 0.0 0.0 23.3 0.672 32.0\n", 253 | "2 0.0 137.0 40.0 35.0 168.0 43.1 2.288 33.0\n", 254 | "3 3.0 78.0 50.0 32.0 88.0 31.0 0.248 26.0\n", 255 | "4 2.0 197.0 70.0 45.0 543.0 30.5 0.158 53.0" 256 | ] 257 | }, 258 | "execution_count": 3, 259 | "metadata": {}, 260 | "output_type": "execute_result" 261 | } 262 | ], 263 | "source": [ 264 | "features = train.columns[0:8]\n", 265 | "X = train[features]\n", 266 | "y = train[8]\n", 267 | "X.head()" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 53, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=10)\n" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 54, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "(614, 8) (154, 8)\n" 289 | ] 290 | } 291 | ], 292 | "source": [ 293 | "print(X_train.shape,X_test.shape)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 55, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "X_train=np.asarray(X_train)\n", 303 | "y_train=np.asarray(y_train)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 56, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "def linear_kernel(x1, x2):\n", 313 | " return np.dot(x1, x2)\n", 314 | "\n", 315 | "def polynomial_kernel(x, y, p=3):\n", 316 | " return (1 + np.dot(x, y)) ** p\n", 317 | "\n", 318 | "def gaussian_kernel(x, y, sigma=100.0):\n", 319 | " # print(-linalg.norm(x-y)**2)\n", 320 | " x=np.asarray(x)\n", 321 | " y=np.asarray(y)\n", 322 | " return np.exp((-linalg.norm(x-y)**2) / (2 * (sigma ** 2)))\n", 323 | "\n", 324 | "def gm(y_predict,y_test):\n", 325 | " test_min=0\n", 326 | " test_max=0\n", 327 | " pred_min=0\n", 328 | " pred_max=0\n", 329 | " y_test=np.asarray(y_test)\n", 330 | " for i in range(0,154):\n", 331 | " if(y_test[i]==1):\n", 332 | " test_min=test_min+1\n", 333 | " else:\n", 334 | " test_max=test_max+1\n", 335 | " print(\"y_test min\",test_min) \n", 336 | " print(\"y_test max\",test_max)\n", 337 | " for i in range(0,154):\n", 338 | " if(y_predict[i]==1 and y_predict[i]==y_test[i]):\n", 339 | " pred_min=pred_min+1\n", 340 | " elif(y_predict[i]==-1 and y_predict[i]==y_test[i]):\n", 341 | " pred_max=pred_max+1\n", 342 | " print(\"y_pred min\",pred_min) \n", 343 | " print(\"y_pred max\",pred_max)\n", 344 | " se=pred_min/test_min\n", 345 | " sp=pred_max/test_max\n", 346 | " print(se,sp)\n", 347 | " gm=math.sqrt(se*sp)\n", 348 | " print(\"GM\",gm)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "# FSVM using Hyperplane" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 68, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "from cvxopt import matrix\n", 365 | "class HYP_SVM(object):\n", 366 | "\n", 367 | " def __init__(self, kernel=gaussian_kernel, C=None):\n", 368 | " self.kernel = kernel\n", 369 | " self.C = C\n", 370 | " if self.C is not None: self.C = float(self.C)\n", 371 | " def m_func(self, X_train,X_test, y):\n", 372 | " n_samples, n_features = X_train.shape \n", 373 | " nt_samples, nt_features= X_test.shape\n", 374 | " self.K = np.zeros((n_samples, n_samples))\n", 375 | " for i in range(n_samples):\n", 376 | " for j in range(n_samples):\n", 377 | " self.K[i,j] = gaussian_kernel(X_train[i], X_train[j])\n", 378 | " # print(K[i,j])\n", 379 | " X_train=np.asarray(X_train)\n", 380 | " X_test=np.asarray(X_test)\n", 381 | " K1 = np.zeros((n_samples, n_samples))\n", 382 | " for i in range(n_samples):\n", 383 | " for j in range(n_samples):\n", 384 | " K1[i,j] = gaussian_kernel(X_train[i], X_train[j])\n", 385 | " # print(K[i,j])\n", 386 | " print(K1.shape)\n", 387 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n", 388 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n", 389 | " A = cvxopt.matrix(y, (1,n_samples))\n", 390 | " A = matrix(A, (1,n_samples), 'd') #changes done\n", 391 | " b = cvxopt.matrix(0.0)\n", 392 | " #print(P,q,A,b)\n", 393 | " if self.C is None:\n", 394 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n", 395 | " h = cvxopt.matrix(np.zeros(n_samples))\n", 396 | " \n", 397 | " else:\n", 398 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n", 399 | " tmp2 = np.identity(n_samples)\n", 400 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n", 401 | " tmp1 = np.zeros(n_samples)\n", 402 | " tmp2 = np.ones(n_samples) * self.C\n", 403 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n", 404 | " # solve QP problem\n", 405 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n", 406 | " print(solution['status'])\n", 407 | " # Lagrange multipliers\n", 408 | " a = np.ravel(solution['x'])\n", 409 | " a_org = np.ravel(solution['x'])\n", 410 | " # Support vectors have non zero lagrange multipliers\n", 411 | " sv = a > 1e-5\n", 412 | " #print(sv.shape)\n", 413 | " ind = np.arange(len(a))[sv]\n", 414 | " self.a_org=a\n", 415 | " self.a = a[sv]\n", 416 | " self.sv = X_train[sv]\n", 417 | " self.sv_y = y[sv]\n", 418 | " self.sv_yorg=y\n", 419 | " self.kernel = gaussian_kernel\n", 420 | " X_train=np.asarray(X_train)\n", 421 | " b = 0\n", 422 | " for n in range(len(self.a)):\n", 423 | " b += self.sv_y[n]\n", 424 | " b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n", 425 | " b /= len(self.a)\n", 426 | " # print(self.a_org[1])\n", 427 | " #print(self.a_org.shape,self.sv_yorg.shape,K.shape)\n", 428 | " w_phi=0\n", 429 | " total=0\n", 430 | " for n in range(len(self.a_org)):\n", 431 | " w_phi = self.a_org[n] * self.sv_yorg[n] * K1[n] \n", 432 | " self.d_hyp=np.zeros(n_samples)\n", 433 | " for n in range(len(self.a_org)):\n", 434 | " self.d_hyp += self.sv_yorg[n]*(w_phi+b)\n", 435 | " func=np.zeros((n_samples))\n", 436 | " func=np.asarray(func)\n", 437 | " typ=2\n", 438 | " if(typ==1):\n", 439 | " for i in range(n_samples):\n", 440 | " func[i]=1-(self.d_hyp[i]/(np.amax(self.d_hyp[i])+0.000001))\n", 441 | " beta=0.2\n", 442 | " if(typ==2):\n", 443 | " for i in range(n_samples):\n", 444 | " func[i]=2/(1+beta*self.d_hyp[i])\n", 445 | " r_max=268/500\n", 446 | " r_min=1\n", 447 | " self.m=func[0:268]*r_min\n", 448 | " print(self.m.shape)\n", 449 | " self.m=np.append(self.m,func[268:768]*r_max)\n", 450 | " print(self.m.shape)\n", 451 | " \n", 452 | " ##############################################################################\n", 453 | "\n", 454 | "\n", 455 | " def fit(self, X_train,X_test, y):\n", 456 | " self.kernel = gaussian_kernel\n", 457 | " n_samples, n_features = X_train.shape \n", 458 | " nt_samples, nt_features = X_test.shape\n", 459 | " # Gram matrix\n", 460 | "\n", 461 | " print(self.K.shape)\n", 462 | "\n", 463 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n", 464 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n", 465 | " A = cvxopt.matrix(y, (1,n_samples))\n", 466 | " A = matrix(A, (1,n_samples), 'd') #changes done\n", 467 | " b = cvxopt.matrix(0.0)\n", 468 | " #print(P,q,A,b)\n", 469 | " if self.C is None:\n", 470 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n", 471 | " h = cvxopt.matrix(np.zeros(n_samples))\n", 472 | " \n", 473 | " else:\n", 474 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n", 475 | " tmp2 = np.identity(n_samples)\n", 476 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n", 477 | " tmp1 = np.zeros(n_samples)\n", 478 | " tmp2 = np.ones(n_samples) * self.C\n", 479 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n", 480 | " # solve QP problem\n", 481 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n", 482 | " print(solution['status'])\n", 483 | " # Lagrange multipliers\n", 484 | " a = np.ravel(solution['x'])\n", 485 | " a_org = np.ravel(solution['x'])\n", 486 | " # Support vectors have non zero lagrange multipliers\n", 487 | " for i in range(n_samples):\n", 488 | " sv=np.logical_or(self.a_org 1e-5)\n", 489 | " #print(sv.shape)\n", 490 | " ind = np.arange(len(a))[sv]\n", 491 | " self.a = a[sv]\n", 492 | " self.sv = X_train[sv]\n", 493 | " self.sv_y = y[sv]\n", 494 | " #print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n", 495 | "\n", 496 | " # Intercept\n", 497 | " self.b = 0\n", 498 | " for n in range(len(self.a)):\n", 499 | " self.b += self.sv_y[n]\n", 500 | " self.b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n", 501 | " self.b /= len(self.a)\n", 502 | " print(self.b)\n", 503 | "\n", 504 | " # Weight vector\n", 505 | " if self.kernel == gaussian_kernel:\n", 506 | " self.w = np.zeros(n_features)\n", 507 | " for n in range(len(self.a)):\n", 508 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n", 509 | " else :\n", 510 | " self.w = None \n", 511 | " \n", 512 | " def project(self, X):\n", 513 | " if self.w is None:\n", 514 | " return np.dot(X, self.w) + self.b\n", 515 | " else:\n", 516 | " y_predict = np.zeros(len(X))\n", 517 | " X=np.asarray(X)\n", 518 | " for i in range(len(X)):\n", 519 | " s = 0\n", 520 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n", 521 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n", 522 | " y_predict[i] = s\n", 523 | " # print(y_predict[i])\n", 524 | " return y_predict + self.b\n", 525 | "\n", 526 | " def predict(self, X):\n", 527 | " return np.sign(self.project(X))" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 69, 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "name": "stdout", 537 | "output_type": "stream", 538 | "text": [ 539 | "(614, 614)\n", 540 | " pcost dcost gap pres dres\n", 541 | " 0: 3.1997e+04 -2.6998e+06 3e+06 1e-01 3e-13\n", 542 | " 1: 7.0493e+02 -3.7645e+05 4e+05 1e-02 2e-13\n", 543 | " 2: -1.5080e+04 -8.7294e+04 7e+04 2e-03 2e-13\n", 544 | " 3: -2.0698e+04 -4.2280e+04 2e+04 3e-04 2e-13\n", 545 | " 4: -2.3448e+04 -3.5410e+04 1e+04 2e-04 3e-13\n", 546 | " 5: -2.4919e+04 -3.1494e+04 7e+03 7e-05 3e-13\n", 547 | " 6: -2.5867e+04 -2.9015e+04 3e+03 3e-05 3e-13\n", 548 | " 7: -2.6304e+04 -2.8026e+04 2e+03 1e-05 3e-13\n", 549 | " 8: -2.6625e+04 -2.7314e+04 7e+02 2e-06 4e-13\n", 550 | " 9: -2.6775e+04 -2.7035e+04 3e+02 7e-07 4e-13\n", 551 | "10: -2.6855e+04 -2.6900e+04 4e+01 4e-08 4e-13\n", 552 | "11: -2.6872e+04 -2.6874e+04 2e+00 1e-09 4e-13\n", 553 | "12: -2.6873e+04 -2.6873e+04 4e-02 2e-11 4e-13\n", 554 | "13: -2.6873e+04 -2.6873e+04 6e-04 2e-12 4e-13\n", 555 | "Optimal solution found.\n", 556 | "optimal\n", 557 | "(268,)\n", 558 | "(614,)\n", 559 | "(614, 614)\n", 560 | " pcost dcost gap pres dres\n", 561 | " 0: 3.1997e+04 -2.6998e+06 3e+06 1e-01 3e-13\n", 562 | " 1: 7.0493e+02 -3.7645e+05 4e+05 1e-02 2e-13\n", 563 | " 2: -1.5080e+04 -8.7294e+04 7e+04 2e-03 2e-13\n", 564 | " 3: -2.0698e+04 -4.2280e+04 2e+04 3e-04 2e-13\n", 565 | " 4: -2.3448e+04 -3.5410e+04 1e+04 2e-04 3e-13\n", 566 | " 5: -2.4919e+04 -3.1494e+04 7e+03 7e-05 3e-13\n", 567 | " 6: -2.5867e+04 -2.9015e+04 3e+03 3e-05 3e-13\n", 568 | " 7: -2.6304e+04 -2.8026e+04 2e+03 1e-05 3e-13\n", 569 | " 8: -2.6625e+04 -2.7314e+04 7e+02 2e-06 4e-13\n", 570 | " 9: -2.6775e+04 -2.7035e+04 3e+02 7e-07 4e-13\n", 571 | "10: -2.6855e+04 -2.6900e+04 4e+01 4e-08 4e-13\n", 572 | "11: -2.6872e+04 -2.6874e+04 2e+00 1e-09 4e-13\n", 573 | "12: -2.6873e+04 -2.6873e+04 4e-02 2e-11 4e-13\n", 574 | "13: -2.6873e+04 -2.6873e+04 6e-04 2e-12 4e-13\n", 575 | "Optimal solution found.\n", 576 | "optimal\n", 577 | "-1.0325312375429936\n", 578 | "y_test min 53\n", 579 | "y_test max 101\n", 580 | "y_pred min 30\n", 581 | "y_pred max 89\n", 582 | "0.5660377358490566 0.8811881188118812\n", 583 | "GM 0.7062476390256938\n", 584 | "119 out of 154 predictions correct\n", 585 | "Accuracy 0.7727272727272727\n" 586 | ] 587 | } 588 | ], 589 | "source": [ 590 | "\n", 591 | "if __name__ == \"__main__\":\n", 592 | " import pylab as pl \n", 593 | " def hyp_svm():\n", 594 | " \n", 595 | " clf = HYP_SVM(C=100.0)\n", 596 | " typ=2\n", 597 | " clf.m_func(X_train,X_test,y_train)\n", 598 | " clf.fit(X_train,X_test, y_train)\n", 599 | " y_predict = clf.predict(X_test)\n", 600 | " gm(y_predict,y_test)\n", 601 | " correct = np.sum(y_predict == y_test)\n", 602 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n", 603 | " print(\"Accuracy\",correct/len(y_predict))\n", 604 | "\n", 605 | " hyp_svm() " 606 | ] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": null, 611 | "metadata": {}, 612 | "outputs": [], 613 | "source": [] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": null, 618 | "metadata": {}, 619 | "outputs": [], 620 | "source": [] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": null, 625 | "metadata": {}, 626 | "outputs": [], 627 | "source": [] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": null, 632 | "metadata": {}, 633 | "outputs": [], 634 | "source": [] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": null, 639 | "metadata": {}, 640 | "outputs": [], 641 | "source": [] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": {}, 647 | "outputs": [], 648 | "source": [] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [] 656 | }, 657 | { 658 | "cell_type": "code", 659 | "execution_count": null, 660 | "metadata": {}, 661 | "outputs": [], 662 | "source": [] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "execution_count": null, 667 | "metadata": {}, 668 | "outputs": [], 669 | "source": [] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": null, 674 | "metadata": {}, 675 | "outputs": [], 676 | "source": [] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": {}, 682 | "outputs": [], 683 | "source": [] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": null, 688 | "metadata": {}, 689 | "outputs": [], 690 | "source": [] 691 | }, 692 | { 693 | "cell_type": "code", 694 | "execution_count": null, 695 | "metadata": {}, 696 | "outputs": [], 697 | "source": [] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": null, 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": null, 709 | "metadata": {}, 710 | "outputs": [], 711 | "source": [] 712 | }, 713 | { 714 | "cell_type": "markdown", 715 | "metadata": {}, 716 | "source": [ 717 | "# Normal SVM using CVXOPT" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 145, 723 | "metadata": {}, 724 | "outputs": [], 725 | "source": [ 726 | "from cvxopt import matrix\n", 727 | "class SVM(object):\n", 728 | "\n", 729 | " def __init__(self, kernel=gaussian_kernel, C=None):\n", 730 | " self.kernel = kernel\n", 731 | " self.C = C\n", 732 | " if self.C is not None: self.C = float(self.C)\n", 733 | " def fit(self, X, y):\n", 734 | " self.kernel = gaussian_kernel\n", 735 | " n_samples, n_features = X.shape\n", 736 | " # Gram matrix\n", 737 | " K = np.zeros((n_samples, n_samples))\n", 738 | " for i in range(n_samples):\n", 739 | " for j in range(n_samples):\n", 740 | " K[i,j] = gaussian_kernel(X[i], X[j])\n", 741 | " # print(K[i,j])\n", 742 | " print(K.shape)\n", 743 | "\n", 744 | " P = cvxopt.matrix(np.outer(y,y) * K)\n", 745 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n", 746 | " A = cvxopt.matrix(y, (1,n_samples))\n", 747 | " A = matrix(A, (1,n_samples), 'd') #changes done\n", 748 | " b = cvxopt.matrix(0.0)\n", 749 | " #print(P,q,A,b)\n", 750 | " if self.C is None:\n", 751 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n", 752 | " h = cvxopt.matrix(np.zeros(n_samples))\n", 753 | " \n", 754 | " else:\n", 755 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n", 756 | " tmp2 = np.identity(n_samples)\n", 757 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n", 758 | " tmp1 = np.zeros(n_samples)\n", 759 | " tmp2 = np.ones(n_samples) * self.C\n", 760 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n", 761 | " # solve QP problem\n", 762 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n", 763 | " print(solution['status'])\n", 764 | " # Lagrange multipliers\n", 765 | " a = np.ravel(solution['x'])\n", 766 | " # print(a)\n", 767 | " # Support vectors have non zero lagrange multipliers\n", 768 | " sv = a > 1e-5\n", 769 | " print(sv.shape)\n", 770 | " ind = np.arange(len(a))[sv]\n", 771 | " self.a = a[sv]\n", 772 | " self.sv = X[sv]\n", 773 | " self.sv_y = y[sv]\n", 774 | " print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n", 775 | "\n", 776 | " # Intercept\n", 777 | " self.b = 0\n", 778 | " for n in range(len(self.a)):\n", 779 | " self.b += self.sv_y[n]\n", 780 | " self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])\n", 781 | " self.b /= len(self.a)\n", 782 | "\n", 783 | " # Weight vector\n", 784 | " if self.kernel == gaussian_kernel:\n", 785 | " self.w = np.zeros(n_features)\n", 786 | " for n in range(len(self.a)):\n", 787 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n", 788 | " #print(self.w)\n", 789 | " else:\n", 790 | " self.w = None\n", 791 | "\n", 792 | " def project(self, X):\n", 793 | " if self.w is None:\n", 794 | " return np.dot(X, self.w) + self.b\n", 795 | " else:\n", 796 | " y_predict = np.zeros(len(X))\n", 797 | " X=np.asarray(X)\n", 798 | " for i in range(len(X)):\n", 799 | " s = 0\n", 800 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n", 801 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n", 802 | " y_predict[i] = s\n", 803 | " # print(y_predict[i])\n", 804 | " return y_predict + self.b\n", 805 | "\n", 806 | " def predict(self, X):\n", 807 | " return np.sign(self.project(X))" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": 146, 813 | "metadata": {}, 814 | "outputs": [ 815 | { 816 | "name": "stdout", 817 | "output_type": "stream", 818 | "text": [ 819 | "(614, 614)\n", 820 | " pcost dcost gap pres dres\n", 821 | " 0: 4.0124e+04 -2.8615e+06 4e+06 2e-01 3e-13\n", 822 | " 1: 1.0960e+04 -3.9278e+05 4e+05 1e-02 3e-13\n", 823 | " 2: -1.1678e+04 -1.0938e+05 1e+05 2e-03 2e-13\n", 824 | " 3: -1.8013e+04 -4.6040e+04 3e+04 4e-04 3e-13\n", 825 | " 4: -2.0703e+04 -3.7635e+04 2e+04 2e-04 3e-13\n", 826 | " 5: -2.2896e+04 -3.0055e+04 7e+03 7e-05 3e-13\n", 827 | " 6: -2.3582e+04 -2.8515e+04 5e+03 4e-05 3e-13\n", 828 | " 7: -2.4334e+04 -2.6705e+04 2e+03 2e-05 3e-13\n", 829 | " 8: -2.4847e+04 -2.5598e+04 8e+02 3e-06 4e-13\n", 830 | " 9: -2.5003e+04 -2.5317e+04 3e+02 1e-06 3e-13\n", 831 | "10: -2.5063e+04 -2.5207e+04 1e+02 2e-12 4e-13\n", 832 | "11: -2.5108e+04 -2.5145e+04 4e+01 1e-12 4e-13\n", 833 | "12: -2.5120e+04 -2.5128e+04 8e+00 7e-13 4e-13\n", 834 | "13: -2.5124e+04 -2.5124e+04 2e-01 5e-13 4e-13\n", 835 | "14: -2.5124e+04 -2.5124e+04 5e-03 5e-13 4e-13\n", 836 | "Optimal solution found.\n", 837 | "optimal\n", 838 | "(614,)\n", 839 | "404 support vectors out of 614 points\n", 840 | "y_test min 54\n", 841 | "y_test max 100\n", 842 | "y_pred min 29\n", 843 | "y_pred max 78\n", 844 | "0.5370370370370371 0.78\n", 845 | "GM 0.6472162612982533\n", 846 | "107 out of 154 predictions correct\n", 847 | "Accuracy 0.6948051948051948\n" 848 | ] 849 | } 850 | ], 851 | "source": [ 852 | "\n", 853 | "if __name__ == \"__main__\":\n", 854 | " import pylab as pl \n", 855 | " def normal_svm():\n", 856 | " \n", 857 | " clf = SVM(C=100.0)\n", 858 | " clf.fit(X_train, y_train)\n", 859 | " y_predict = clf.predict(X_test)\n", 860 | " gm(y_predict,y_test)\n", 861 | " correct = np.sum(y_predict == y_test)\n", 862 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n", 863 | " print(\"Accuracy\",correct/len(y_predict))\n", 864 | "\n", 865 | " normal_svm() " 866 | ] 867 | }, 868 | { 869 | "cell_type": "code", 870 | "execution_count": null, 871 | "metadata": {}, 872 | "outputs": [], 873 | "source": [] 874 | } 875 | ], 876 | "metadata": { 877 | "kernelspec": { 878 | "display_name": "Python 3", 879 | "language": "python", 880 | "name": "python3" 881 | }, 882 | "language_info": { 883 | "codemirror_mode": { 884 | "name": "ipython", 885 | "version": 3 886 | }, 887 | "file_extension": ".py", 888 | "mimetype": "text/x-python", 889 | "name": "python", 890 | "nbconvert_exporter": "python", 891 | "pygments_lexer": "ipython3", 892 | "version": "3.6.5" 893 | } 894 | }, 895 | "nbformat": 4, 896 | "nbformat_minor": 2 897 | } 898 | -------------------------------------------------------------------------------- /FUZZY SVM Pageblock.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "C:\\Users\\HP\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 13 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import numpy as np\n", 19 | "from numpy import linalg\n", 20 | "import cvxopt\n", 21 | "import cvxopt.solvers\n", 22 | "import pandas as pd\n", 23 | "from sklearn import cross_validation\n", 24 | "from sklearn.metrics import classification_report\n", 25 | "from sklearn.metrics import accuracy_score\n", 26 | "from cvxopt import matrix as cvxopt_matrix\n", 27 | "from cvxopt import solvers as cvxopt_solvers\n", 28 | "from sklearn import svm\n", 29 | "import math " 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/html": [ 40 | "
\n", 41 | "\n", 54 | "\n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | "
012345678910
048.0402.019296.08.3750.0550.0944.131058.01814.0256.01.0
1304.0463.0140752.01.5230.0630.1213.918898.017081.02273.01.0
2306.0465.0142290.01.5200.0550.1232.697861.017452.02925.01.0
345.079.03555.01.7560.0870.1954.81308.0693.064.01.0
4311.0463.0143993.01.4890.0880.1603.9312631.023092.03212.01.0
\n", 144 | "
" 145 | ], 146 | "text/plain": [ 147 | " 0 1 2 3 4 5 6 7 8 \\\n", 148 | "0 48.0 402.0 19296.0 8.375 0.055 0.094 4.13 1058.0 1814.0 \n", 149 | "1 304.0 463.0 140752.0 1.523 0.063 0.121 3.91 8898.0 17081.0 \n", 150 | "2 306.0 465.0 142290.0 1.520 0.055 0.123 2.69 7861.0 17452.0 \n", 151 | "3 45.0 79.0 3555.0 1.756 0.087 0.195 4.81 308.0 693.0 \n", 152 | "4 311.0 463.0 143993.0 1.489 0.088 0.160 3.93 12631.0 23092.0 \n", 153 | "\n", 154 | " 9 10 \n", 155 | "0 256.0 1.0 \n", 156 | "1 2273.0 1.0 \n", 157 | "2 2925.0 1.0 \n", 158 | "3 64.0 1.0 \n", 159 | "4 3212.0 1.0 " 160 | ] 161 | }, 162 | "execution_count": 2, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "train = pd.read_csv(\"modifiedpage.csv\", header=None)\n", 169 | "train.head()" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 3, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/html": [ 180 | "
\n", 181 | "\n", 194 | "\n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | "
0123456789
048.0402.019296.08.3750.0550.0944.131058.01814.0256.0
1304.0463.0140752.01.5230.0630.1213.918898.017081.02273.0
2306.0465.0142290.01.5200.0550.1232.697861.017452.02925.0
345.079.03555.01.7560.0870.1954.81308.0693.064.0
4311.0463.0143993.01.4890.0880.1603.9312631.023092.03212.0
\n", 278 | "
" 279 | ], 280 | "text/plain": [ 281 | " 0 1 2 3 4 5 6 7 8 9\n", 282 | "0 48.0 402.0 19296.0 8.375 0.055 0.094 4.13 1058.0 1814.0 256.0\n", 283 | "1 304.0 463.0 140752.0 1.523 0.063 0.121 3.91 8898.0 17081.0 2273.0\n", 284 | "2 306.0 465.0 142290.0 1.520 0.055 0.123 2.69 7861.0 17452.0 2925.0\n", 285 | "3 45.0 79.0 3555.0 1.756 0.087 0.195 4.81 308.0 693.0 64.0\n", 286 | "4 311.0 463.0 143993.0 1.489 0.088 0.160 3.93 12631.0 23092.0 3212.0" 287 | ] 288 | }, 289 | "execution_count": 3, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "features = train.columns[0:10]\n", 296 | "X = train[features]\n", 297 | "y = train[10]\n", 298 | "X.head()" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 4, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=40)\n" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 5, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "name": "stdout", 317 | "output_type": "stream", 318 | "text": [ 319 | "(4378, 10) (1095, 10)\n" 320 | ] 321 | } 322 | ], 323 | "source": [ 324 | "print(X_train.shape,X_test.shape)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 6, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "X_train=np.asarray(X_train)\n", 334 | "y_train=np.asarray(y_train)" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 17, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "def linear_kernel(x1, x2):\n", 344 | " return np.dot(x1, x2)\n", 345 | "\n", 346 | "def polynomial_kernel(x, y, p=3):\n", 347 | " return (1 + np.dot(x, y)) ** p\n", 348 | "\n", 349 | "def gaussian_kernel(x, y, sigma=90.0):\n", 350 | " # print(-linalg.norm(x-y)**2)\n", 351 | " x=np.asarray(x)\n", 352 | " y=np.asarray(y)\n", 353 | " return np.exp((-linalg.norm(x-y)**2) / (2 * (sigma ** 2)))\n", 354 | "\n", 355 | "def gm(y_predict,y_test):\n", 356 | " test_min=0\n", 357 | " test_max=0\n", 358 | " pred_min=0\n", 359 | " pred_max=0\n", 360 | " y_test=np.asarray(y_test)\n", 361 | " for i in range(0,1095):\n", 362 | " if(y_test[i]==1):\n", 363 | " test_min=test_min+1\n", 364 | " else:\n", 365 | " test_max=test_max+1\n", 366 | " print(\"y_test min\",test_min) \n", 367 | " print(\"y_test max\",test_max)\n", 368 | " for i in range(0,1095):\n", 369 | " if(y_predict[i]==1 and y_predict[i]==y_test[i]):\n", 370 | " pred_min=pred_min+1\n", 371 | " elif(y_predict[i]==-1 and y_predict[i]==y_test[i]):\n", 372 | " pred_max=pred_max+1\n", 373 | " print(\"y_pred min\",pred_min) \n", 374 | " print(\"y_pred max\",pred_max)\n", 375 | " se=pred_min/test_min\n", 376 | " sp=pred_max/test_max\n", 377 | " print(se,sp)\n", 378 | " gm=math.sqrt(se*sp)\n", 379 | " print(\"GM\",gm)" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "# FSVM using Hyperplane" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 18, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "from cvxopt import matrix\n", 396 | "class HYP_SVM(object):\n", 397 | "\n", 398 | " def __init__(self, kernel=gaussian_kernel, C=None):\n", 399 | " self.kernel = kernel\n", 400 | " self.C = C\n", 401 | " if self.C is not None: self.C = float(self.C)\n", 402 | " def m_func(self, X_train,X_test, y):\n", 403 | " n_samples, n_features = X_train.shape \n", 404 | " nt_samples, nt_features= X_test.shape\n", 405 | " self.K = np.zeros((n_samples, n_samples))\n", 406 | " for i in range(n_samples):\n", 407 | " for j in range(n_samples):\n", 408 | " self.K[i,j] = gaussian_kernel(X_train[i], X_train[j])\n", 409 | " # print(K[i,j])\n", 410 | " X_train=np.asarray(X_train)\n", 411 | " X_test=np.asarray(X_test)\n", 412 | " K1 = np.zeros((n_samples, n_samples))\n", 413 | " for i in range(n_samples):\n", 414 | " for j in range(n_samples):\n", 415 | " K1[i,j] = gaussian_kernel(X_train[i], X_train[j])\n", 416 | " # print(K[i,j])\n", 417 | " print(K1.shape)\n", 418 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n", 419 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n", 420 | " A = cvxopt.matrix(y, (1,n_samples))\n", 421 | " A = matrix(A, (1,n_samples), 'd') #changes done\n", 422 | " b = cvxopt.matrix(0.0)\n", 423 | " #print(P,q,A,b)\n", 424 | " if self.C is None:\n", 425 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n", 426 | " h = cvxopt.matrix(np.zeros(n_samples))\n", 427 | " \n", 428 | " else:\n", 429 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n", 430 | " tmp2 = np.identity(n_samples)\n", 431 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n", 432 | " tmp1 = np.zeros(n_samples)\n", 433 | " tmp2 = np.ones(n_samples) * self.C\n", 434 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n", 435 | " # solve QP problem\n", 436 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n", 437 | " print(solution['status'])\n", 438 | " # Lagrange multipliers\n", 439 | " a = np.ravel(solution['x'])\n", 440 | " a_org = np.ravel(solution['x'])\n", 441 | " # Support vectors have non zero lagrange multipliers\n", 442 | " sv = a > 1e-5\n", 443 | " #print(sv.shape)\n", 444 | " ind = np.arange(len(a))[sv]\n", 445 | " self.a_org=a\n", 446 | " self.a = a[sv]\n", 447 | " self.sv = X_train[sv]\n", 448 | " self.sv_y = y[sv]\n", 449 | " self.sv_yorg=y\n", 450 | " self.kernel = gaussian_kernel\n", 451 | " X_train=np.asarray(X_train)\n", 452 | " b = 0\n", 453 | " for n in range(len(self.a)):\n", 454 | " b += self.sv_y[n]\n", 455 | " b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n", 456 | " b /= len(self.a)\n", 457 | " # print(self.a_org[1])\n", 458 | " #print(self.a_org.shape,self.sv_yorg.shape,K.shape)\n", 459 | " w_phi=0\n", 460 | " total=0\n", 461 | " for n in range(len(self.a_org)):\n", 462 | " w_phi = self.a_org[n] * self.sv_yorg[n] * K1[n] \n", 463 | " self.d_hyp=np.zeros(n_samples)\n", 464 | " for n in range(len(self.a_org)):\n", 465 | " self.d_hyp += self.sv_yorg[n]*(w_phi+b)\n", 466 | " func=np.zeros((n_samples))\n", 467 | " func=np.asarray(func)\n", 468 | " typ=1\n", 469 | " if(typ==1):\n", 470 | " for i in range(n_samples):\n", 471 | " func[i]=1-(self.d_hyp[i]/(np.amax(self.d_hyp[i])+0.000001))\n", 472 | " beta=0.8\n", 473 | " if(typ==2):\n", 474 | " for i in range(n_samples):\n", 475 | " func[i]=2/(1+beta*self.d_hyp[i])\n", 476 | " r_max=103/4074\n", 477 | " r_min=1\n", 478 | " self.m=func[0:115]*r_min\n", 479 | " print(self.m.shape)\n", 480 | " self.m=np.append(self.m,func[115:5473]*r_max)\n", 481 | " print(self.m.shape)\n", 482 | " \n", 483 | " ##############################################################################\n", 484 | "\n", 485 | "\n", 486 | " def fit(self, X_train,X_test, y):\n", 487 | " self.kernel = gaussian_kernel\n", 488 | " n_samples, n_features = X_train.shape \n", 489 | " nt_samples, nt_features = X_test.shape\n", 490 | " # Gram matrix\n", 491 | "\n", 492 | " print(self.K.shape)\n", 493 | "\n", 494 | " P = cvxopt.matrix(np.outer(y,y) * self.K)\n", 495 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n", 496 | " A = cvxopt.matrix(y, (1,n_samples))\n", 497 | " A = matrix(A, (1,n_samples), 'd') #changes done\n", 498 | " b = cvxopt.matrix(0.0)\n", 499 | " #print(P,q,A,b)\n", 500 | " if self.C is None:\n", 501 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n", 502 | " h = cvxopt.matrix(np.zeros(n_samples))\n", 503 | " \n", 504 | " else:\n", 505 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n", 506 | " tmp2 = np.identity(n_samples)\n", 507 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n", 508 | " tmp1 = np.zeros(n_samples)\n", 509 | " tmp2 = np.ones(n_samples) * self.C\n", 510 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n", 511 | " # solve QP problem\n", 512 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n", 513 | " print(solution['status'])\n", 514 | " # Lagrange multipliers\n", 515 | " a = np.ravel(solution['x'])\n", 516 | " a_org = np.ravel(solution['x'])\n", 517 | " # Support vectors have non zero lagrange multipliers\n", 518 | " for i in range(n_samples):\n", 519 | " sv=np.logical_or(self.a_org 1e-5)\n", 520 | " #print(sv.shape)\n", 521 | " ind = np.arange(len(a))[sv]\n", 522 | " self.a = a[sv]\n", 523 | " self.sv = X_train[sv]\n", 524 | " self.sv_y = y[sv]\n", 525 | " #print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n", 526 | "\n", 527 | " # Intercept\n", 528 | " self.b = 0\n", 529 | " for n in range(len(self.a)):\n", 530 | " self.b += self.sv_y[n]\n", 531 | " self.b -= np.sum(self.a * self.sv_y * self.K[ind[n],sv])\n", 532 | " self.b /= len(self.a)\n", 533 | " print(self.b)\n", 534 | "\n", 535 | " # Weight vector\n", 536 | " if self.kernel == gaussian_kernel:\n", 537 | " self.w = np.zeros(n_features)\n", 538 | " for n in range(len(self.a)):\n", 539 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n", 540 | " else :\n", 541 | " self.w = None \n", 542 | " \n", 543 | " def project(self, X):\n", 544 | " if self.w is None:\n", 545 | " return np.dot(X, self.w) + self.b\n", 546 | " else:\n", 547 | " y_predict = np.zeros(len(X))\n", 548 | " X=np.asarray(X)\n", 549 | " for i in range(len(X)):\n", 550 | " s = 0\n", 551 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n", 552 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n", 553 | " y_predict[i] = s\n", 554 | " # print(y_predict[i])\n", 555 | " return y_predict + self.b\n", 556 | "\n", 557 | " def predict(self, X):\n", 558 | " return np.sign(self.project(X))" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": {}, 565 | "outputs": [], 566 | "source": [ 567 | "\n", 568 | "if __name__ == \"__main__\":\n", 569 | " import pylab as pl \n", 570 | " def hyp_svm():\n", 571 | " \n", 572 | " clf = HYP_SVM(C=100.0)\n", 573 | " typ=2\n", 574 | " clf.m_func(X_train,X_test,y_train)\n", 575 | " clf.fit(X_train,X_test, y_train)\n", 576 | " y_predict = clf.predict(X_test)\n", 577 | " gm(y_predict,y_test)\n", 578 | " correct = np.sum(y_predict == y_test)\n", 579 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n", 580 | " print(\"Accuracy\",correct/len(y_predict))\n", 581 | "\n", 582 | " hyp_svm() " 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "metadata": {}, 589 | "outputs": [], 590 | "source": [] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": null, 595 | "metadata": {}, 596 | "outputs": [], 597 | "source": [] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 10, 602 | "metadata": {}, 603 | "outputs": [ 604 | { 605 | "name": "stdout", 606 | "output_type": "stream", 607 | "text": [ 608 | "Overall RBF KERNEL SVM accuracy: 0.9808219178082191\n" 609 | ] 610 | } 611 | ], 612 | "source": [ 613 | "clf_svm = svm.SVC(kernel='rbf', gamma=0.001, C=100)\n", 614 | "clf_svm.fit(X_train, y_train)\n", 615 | "y_pred_svm = clf_svm.predict(X_test) \n", 616 | "acc_svm = accuracy_score(y_test, y_pred_svm)\n", 617 | "print (\"Overall RBF KERNEL SVM accuracy: \",acc_svm)" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": null, 623 | "metadata": {}, 624 | "outputs": [], 625 | "source": [] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": null, 630 | "metadata": {}, 631 | "outputs": [], 632 | "source": [] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": null, 637 | "metadata": {}, 638 | "outputs": [], 639 | "source": [] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": null, 644 | "metadata": {}, 645 | "outputs": [], 646 | "source": [] 647 | }, 648 | { 649 | "cell_type": "code", 650 | "execution_count": null, 651 | "metadata": {}, 652 | "outputs": [], 653 | "source": [] 654 | }, 655 | { 656 | "cell_type": "code", 657 | "execution_count": null, 658 | "metadata": {}, 659 | "outputs": [], 660 | "source": [] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": null, 665 | "metadata": {}, 666 | "outputs": [], 667 | "source": [] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": {}, 673 | "outputs": [], 674 | "source": [] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": null, 679 | "metadata": {}, 680 | "outputs": [], 681 | "source": [] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "execution_count": null, 686 | "metadata": {}, 687 | "outputs": [], 688 | "source": [] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": null, 693 | "metadata": {}, 694 | "outputs": [], 695 | "source": [] 696 | }, 697 | { 698 | "cell_type": "code", 699 | "execution_count": null, 700 | "metadata": {}, 701 | "outputs": [], 702 | "source": [] 703 | }, 704 | { 705 | "cell_type": "markdown", 706 | "metadata": {}, 707 | "source": [ 708 | "# Normal SVM using CVXOPT" 709 | ] 710 | }, 711 | { 712 | "cell_type": "code", 713 | "execution_count": 21, 714 | "metadata": {}, 715 | "outputs": [], 716 | "source": [ 717 | "from cvxopt import matrix\n", 718 | "class SVM(object):\n", 719 | "\n", 720 | " def __init__(self, kernel=gaussian_kernel, C=None):\n", 721 | " self.kernel = kernel\n", 722 | " self.C = C\n", 723 | " if self.C is not None: self.C = float(self.C)\n", 724 | " def fit(self, X, y):\n", 725 | " self.kernel = gaussian_kernel\n", 726 | " n_samples, n_features = X.shape\n", 727 | " # Gram matrix\n", 728 | " K = np.zeros((n_samples, n_samples))\n", 729 | " for i in range(n_samples):\n", 730 | " for j in range(n_samples):\n", 731 | " K[i,j] = gaussian_kernel(X[i], X[j])\n", 732 | " # print(K[i,j])\n", 733 | " print(K.shape)\n", 734 | "\n", 735 | " P = cvxopt.matrix(np.outer(y,y) * K)\n", 736 | " q = cvxopt.matrix(np.ones(n_samples) * -1)\n", 737 | " A = cvxopt.matrix(y, (1,n_samples))\n", 738 | " A = matrix(A, (1,n_samples), 'd') #changes done\n", 739 | " b = cvxopt.matrix(0.0)\n", 740 | " #print(P,q,A,b)\n", 741 | " if self.C is None:\n", 742 | " G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n", 743 | " h = cvxopt.matrix(np.zeros(n_samples))\n", 744 | " \n", 745 | " else:\n", 746 | " tmp1 = np.diag(np.ones(n_samples) * -1)\n", 747 | " tmp2 = np.identity(n_samples)\n", 748 | " G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n", 749 | " tmp1 = np.zeros(n_samples)\n", 750 | " tmp2 = np.ones(n_samples) * self.C\n", 751 | " h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n", 752 | " # solve QP problem\n", 753 | " solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n", 754 | " print(solution['status'])\n", 755 | " # Lagrange multipliers\n", 756 | " a = np.ravel(solution['x'])\n", 757 | " # print(a)\n", 758 | " # Support vectors have non zero lagrange multipliers\n", 759 | " sv = a > 1e-5\n", 760 | " print(sv.shape)\n", 761 | " ind = np.arange(len(a))[sv]\n", 762 | " self.a = a[sv]\n", 763 | " self.sv = X[sv]\n", 764 | " self.sv_y = y[sv]\n", 765 | " print(\"%d support vectors out of %d points\" % (len(self.a), n_samples))\n", 766 | "\n", 767 | " # Intercept\n", 768 | " self.b = 0\n", 769 | " for n in range(len(self.a)):\n", 770 | " self.b += self.sv_y[n]\n", 771 | " self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])\n", 772 | " self.b /= len(self.a)\n", 773 | "\n", 774 | " # Weight vector\n", 775 | " if self.kernel == gaussian_kernel:\n", 776 | " self.w = np.zeros(n_features)\n", 777 | " for n in range(len(self.a)):\n", 778 | " self.w += self.a[n] * self.sv_y[n] * self.sv[n]\n", 779 | " #print(self.w)\n", 780 | " else:\n", 781 | " self.w = None\n", 782 | "\n", 783 | " def project(self, X):\n", 784 | " if self.w is None:\n", 785 | " return np.dot(X, self.w) + self.b\n", 786 | " else:\n", 787 | " y_predict = np.zeros(len(X))\n", 788 | " X=np.asarray(X)\n", 789 | " for i in range(len(X)):\n", 790 | " s = 0\n", 791 | " for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):\n", 792 | " s += a * sv_y * gaussian_kernel(X[i], sv)\n", 793 | " y_predict[i] = s\n", 794 | " # print(y_predict[i])\n", 795 | " return y_predict + self.b\n", 796 | "\n", 797 | " def predict(self, X):\n", 798 | " return np.sign(self.project(X))" 799 | ] 800 | }, 801 | { 802 | "cell_type": "code", 803 | "execution_count": 22, 804 | "metadata": {}, 805 | "outputs": [ 806 | { 807 | "name": "stdout", 808 | "output_type": "stream", 809 | "text": [ 810 | "(3341, 3341)\n", 811 | " pcost dcost gap pres dres\n", 812 | " 0: -1.4904e+04 -2.0036e+06 2e+06 2e-02 9e-13\n", 813 | " 1: -1.3268e+04 -1.4210e+05 1e+05 4e-04 8e-13\n", 814 | " 2: -1.5305e+04 -5.5521e+04 4e+04 3e-05 8e-13\n", 815 | " 3: -1.5484e+04 -5.3758e+04 4e+04 3e-05 8e-13\n", 816 | " 4: -1.6200e+04 -3.0338e+04 1e+04 5e-06 8e-13\n", 817 | " 5: -1.6293e+04 -2.9163e+04 1e+04 3e-06 7e-13\n", 818 | " 6: -1.6519e+04 -2.4440e+04 8e+03 1e-06 7e-13\n", 819 | " 7: -1.6658e+04 -2.0918e+04 4e+03 6e-07 7e-13\n", 820 | " 8: -1.6753e+04 -1.8317e+04 2e+03 2e-07 7e-13\n", 821 | " 9: -1.6785e+04 -1.7377e+04 6e+02 5e-08 7e-13\n", 822 | "10: -1.6797e+04 -1.6906e+04 1e+02 5e-09 8e-13\n", 823 | "11: -1.6799e+04 -1.6829e+04 3e+01 8e-11 8e-13\n", 824 | "12: -1.6799e+04 -1.6820e+04 2e+01 4e-11 7e-13\n", 825 | "13: -1.6800e+04 -1.6806e+04 6e+00 9e-12 8e-13\n", 826 | "14: -1.6800e+04 -1.6805e+04 6e+00 7e-12 8e-13\n", 827 | "15: -1.6800e+04 -1.6803e+04 4e+00 1e-12 9e-13\n", 828 | "16: -1.6800e+04 -1.6802e+04 2e+00 2e-13 8e-13\n", 829 | "17: -1.6800e+04 -1.6801e+04 1e+00 9e-14 7e-13\n", 830 | "18: -1.6800e+04 -1.6801e+04 1e+00 6e-13 7e-13\n", 831 | "19: -1.6800e+04 -1.6801e+04 9e-01 1e-12 7e-13\n", 832 | "20: -1.6800e+04 -1.6801e+04 8e-01 1e-12 7e-13\n", 833 | "21: -1.6800e+04 -1.6800e+04 3e-01 3e-13 7e-13\n", 834 | "22: -1.6800e+04 -1.6800e+04 1e-01 7e-13 8e-13\n", 835 | "23: -1.6800e+04 -1.6800e+04 8e-02 1e-12 7e-13\n", 836 | "24: -1.6800e+04 -1.6800e+04 2e-02 1e-12 7e-13\n", 837 | "25: -1.6800e+04 -1.6800e+04 1e-02 7e-13 8e-13\n", 838 | "Optimal solution found.\n", 839 | "optimal\n", 840 | "(3341,)\n", 841 | "3341 support vectors out of 3341 points\n", 842 | "y_test min 19\n", 843 | "y_test max 817\n", 844 | "y_pred min 0\n", 845 | "y_pred max 817\n", 846 | "0.0 1.0\n", 847 | "GM 0.0\n", 848 | "817 out of 836 predictions correct\n", 849 | "Accuracy 0.9772727272727273\n" 850 | ] 851 | } 852 | ], 853 | "source": [ 854 | "\n", 855 | "if __name__ == \"__main__\":\n", 856 | " import pylab as pl \n", 857 | " def normal_svm():\n", 858 | " \n", 859 | " clf = SVM(C=100.0)\n", 860 | " clf.fit(X_train, y_train)\n", 861 | " y_predict = clf.predict(X_test)\n", 862 | " gm(y_predict,y_test)\n", 863 | " correct = np.sum(y_predict == y_test)\n", 864 | " print(\"%d out of %d predictions correct\" % (correct, len(y_predict)))\n", 865 | " print(\"Accuracy\",correct/len(y_predict))\n", 866 | "\n", 867 | " normal_svm() " 868 | ] 869 | }, 870 | { 871 | "cell_type": "code", 872 | "execution_count": null, 873 | "metadata": {}, 874 | "outputs": [], 875 | "source": [] 876 | } 877 | ], 878 | "metadata": { 879 | "kernelspec": { 880 | "display_name": "Python 3", 881 | "language": "python", 882 | "name": "python3" 883 | }, 884 | "language_info": { 885 | "codemirror_mode": { 886 | "name": "ipython", 887 | "version": 3 888 | }, 889 | "file_extension": ".py", 890 | "mimetype": "text/x-python", 891 | "name": "python", 892 | "nbconvert_exporter": "python", 893 | "pygments_lexer": "ipython3", 894 | "version": "3.6.5" 895 | } 896 | }, 897 | "nbformat": 4, 898 | "nbformat_minor": 2 899 | } 900 | --------------------------------------------------------------------------------