├── README.md ├── dataset.txt └── src └── Logistic.java /README.md: -------------------------------------------------------------------------------- 1 | logistic-regression 2 | =================== 3 | 4 | A simple implementation of logisitic regression in Java -------------------------------------------------------------------------------- /dataset.txt: -------------------------------------------------------------------------------- 1 | # http://www.sph.emory.edu/~dkleinb/datasets/cancer.dat 2 | 10009 1 0 0 1 0 1 3 | 10025 0 0 1 2 0 0 4 | 10038 1 0 0 1 1 0 5 | 10042 0 0 0 0 1 0 6 | 10049 0 0 1 0 0 0 7 | 10113 0 0 1 0 1 0 8 | 10131 0 0 1 2 1 0 9 | 10160 1 0 0 0 0 0 10 | 10164 0 0 1 0 1 0 11 | 10189 1 0 1 0 0 0 12 | 10215 0 0 1 0 1 0 13 | 10216 0 0 1 0 0 0 14 | 10235 0 0 1 0 1 0 15 | 10270 1 0 0 1 0 0 16 | 10282 1 0 0 0 1 0 17 | 10303 2 0 0 0 1 0 18 | 10346 1 0 0 2 1 0 19 | 10380 2 0 0 0 1 0 20 | 10429 2 0 1 0 0 0 21 | 10441 0 0 1 0 1 0 22 | 10443 0 0 1 2 0 0 23 | 10463 0 0 0 0 0 0 24 | 10475 0 0 1 0 1 0 25 | 10489 1 0 1 0 1 1 26 | 10518 0 0 1 2 1 0 27 | 10529 1 0 1 0 0 0 28 | 10545 0 0 1 0 0 0 29 | 10546 0 0 0 2 0 0 30 | 10575 1 0 0 0 1 0 31 | 10579 2 0 1 0 0 0 32 | 10581 2 0 1 1 1 0 33 | 10600 1 0 1 1 0 0 34 | 10627 1 0 1 2 0 0 35 | 10653 1 0 0 1 1 0 36 | 10664 0 0 0 0 1 0 37 | 10691 1 1 0 0 1 0 38 | 10692 1 0 1 2 1 0 39 | 10711 0 0 0 0 1 0 40 | 10714 0 0 1 0 0 0 41 | 10739 1 0 1 1 1 0 42 | 10750 1 0 1 0 1 0 43 | 10764 2 0 1 2 0 0 44 | 10770 0 0 1 2 1 0 45 | 10780 0 0 1 0 1 0 46 | 10784 2 0 1 0 1 0 47 | 10785 0 0 1 0 1 0 48 | 10788 1 0 0 0 0 0 49 | 10815 1 0 0 0 1 0 50 | 10816 0 0 0 0 1 0 51 | 10818 0 0 1 2 1 0 52 | 11095 0 1 1 0 0 0 53 | 11146 0 1 0 0 1 0 54 | 11206 2 1 0 0 0 0 55 | 11223 2 1 0 0 0 0 56 | 11236 1 1 0 2 0 0 57 | 11244 1 1 0 0 0 1 58 | 11245 0 1 0 0 0 0 59 | 11278 2 1 0 0 1 0 60 | 11322 0 1 0 0 1 0 61 | 11326 2 1 0 2 1 0 62 | 11329 2 1 0 2 1 0 63 | 11344 1 1 0 2 1 0 64 | 11358 0 1 0 0 0 1 65 | 11417 2 1 1 0 1 0 66 | 11421 2 1 0 1 1 0 67 | 11484 1 1 0 0 0 1 68 | 11499 2 1 0 0 0 0 69 | 11503 1 1 0 0 1 0 70 | 11527 1 1 0 0 0 0 71 | 11540 2 1 0 1 1 0 72 | 11580 1 1 0 0 1 0 73 | 11583 1 0 1 1 0 1 74 | 11592 2 1 0 1 1 0 75 | 11604 0 1 0 0 1 0 76 | 11625 1 0 1 0 0 0 77 | 20035 0 0 1 0 0 1 78 | 20053 1 0 0 0 0 0 79 | 20070 0 0 0 2 1 0 80 | 20074 1 0 1 2 0 1 81 | 20146 1 0 0 1 1 0 82 | 20149 2 0 1 2 1 0 83 | 20158 2 0 0 0 1 0 84 | 20185 1 0 0 1 1 0 85 | 20193 1 0 1 0 1 0 86 | 20194 0 0 1 0 0 0 87 | 20205 1 0 0 2 1 0 88 | 20206 2 0 1 1 1 0 89 | 20265 0 0 1 0 1 0 90 | 20311 0 0 0 0 1 0 91 | 20328 2 0 0 1 0 1 92 | 20353 0 0 1 0 0 0 93 | 20372 0 0 0 0 0 0 94 | 20405 1 0 1 1 1 1 95 | 20413 2 0 1 0 1 0 96 | 20427 0 0 0 0 0 0 97 | 20455 1 0 1 0 1 0 98 | 20462 0 0 0 0 1 0 99 | 20472 0 0 0 2 0 0 100 | 20485 0 0 0 0 0 0 101 | 20523 0 0 1 2 0 0 102 | 20539 0 0 1 0 1 0 103 | 20554 0 0 1 0 0 1 104 | 20565 0 0 0 2 1 0 105 | 20566 1 0 1 1 1 0 106 | 20567 1 0 0 1 1 0 107 | 20568 0 0 1 0 1 0 108 | 20569 1 0 0 0 0 0 109 | 20571 1 0 1 0 1 0 110 | 20581 2 0 0 0 1 0 111 | 20583 1 0 0 0 1 0 112 | 20585 2 0 0 1 1 0 113 | 20586 0 0 1 2 1 0 114 | 20591 1 0 1 2 0 0 115 | 20595 0 0 1 2 1 0 116 | 20597 1 0 0 0 0 0 117 | 20599 0 0 1 0 1 0 118 | 20607 0 0 0 1 1 0 119 | 20611 1 0 0 0 1 0 120 | 20612 2 0 0 1 1 0 121 | 20614 1 0 0 1 1 0 122 | 20615 1 0 1 0 0 0 123 | 21017 1 1 0 1 1 0 124 | 21058 2 1 0 0 1 0 125 | 21063 0 1 0 0 0 0 126 | 21084 1 1 0 1 0 1 127 | 21087 1 1 0 2 1 0 128 | 21098 0 1 0 0 0 0 129 | 21099 1 1 0 2 0 0 130 | 21113 0 1 0 0 1 0 131 | 21114 1 1 0 0 1 1 132 | 21116 1 1 0 2 1 0 133 | 21117 1 0 0 2 1 0 134 | 21138 2 1 1 1 1 0 135 | 21154 0 1 0 0 1 0 136 | 21165 0 1 0 0 1 0 137 | 21181 2 1 0 0 0 1 138 | 21183 1 1 0 2 1 0 139 | 21231 1 1 0 0 1 0 140 | 21234 1 1 1 0 0 0 141 | 21286 2 1 0 2 1 0 142 | 21352 2 1 1 1 0 0 143 | 21395 0 1 0 0 1 0 144 | 21417 1 1 0 2 1 0 145 | 21423 0 1 0 0 1 0 146 | 21426 1 1 0 1 1 0 147 | 21433 0 1 0 0 1 0 148 | 21435 0 1 0 0 0 0 149 | 21436 1 1 0 0 0 0 150 | 21439 1 1 0 2 1 0 151 | 21446 1 1 0 0 0 0 152 | 21448 0 1 1 2 0 0 153 | 21453 2 1 0 0 1 0 154 | 30042 2 0 1 0 0 1 155 | 30080 0 0 1 0 1 0 156 | 301003 1 0 1 0 0 0 157 | 301009 0 0 1 2 1 0 158 | 301017 0 0 1 0 0 0 159 | 30154 1 0 1 0 1 0 160 | 30176 0 0 1 0 1 0 161 | 30210 0 0 1 0 1 0 162 | 30239 1 0 1 0 1 0 163 | 30311 0 0 0 0 0 1 164 | 30382 0 0 1 2 1 0 165 | 30387 0 0 1 0 1 0 166 | 30415 0 0 1 0 1 0 167 | 30428 0 0 1 0 0 0 168 | 30479 0 0 1 0 0 1 169 | 30485 0 0 1 2 1 0 170 | 30493 2 0 1 2 1 0 171 | 30519 0 0 1 0 1 0 172 | 30532 0 0 1 0 1 0 173 | 30541 0 0 1 0 1 0 174 | 30567 1 0 0 0 0 0 175 | 30569 2 0 1 1 1 0 176 | 30578 0 0 1 0 0 1 177 | 30579 1 0 1 0 0 0 178 | 30596 1 0 1 1 1 0 179 | 30597 1 0 1 1 0 0 180 | 30618 0 0 1 0 0 0 181 | 30622 1 0 1 1 1 0 182 | 30627 1 0 1 2 0 0 183 | 30648 2 0 0 0 1 0 184 | 30655 0 0 1 0 0 1 185 | 30658 0 0 1 0 1 0 186 | 30667 0 0 1 0 1 0 187 | 30678 1 0 1 0 0 0 188 | 30701 0 0 1 0 0 0 189 | 30703 2 0 1 1 0 0 190 | 30710 0 0 1 2 0 0 191 | 30713 1 0 0 1 1 1 192 | 30716 0 0 0 0 1 0 193 | 30721 0 0 0 0 0 1 194 | 30723 0 0 1 0 1 0 195 | 30724 2 0 1 2 1 0 196 | 30733 1 0 0 1 0 0 197 | 30734 0 0 1 0 0 0 198 | 30736 2 0 0 1 1 1 199 | 30737 0 0 1 0 0 0 200 | 30740 0 0 1 0 1 0 201 | 30742 2 0 1 0 1 0 202 | 30743 0 0 1 0 1 0 203 | 30745 2 0 0 0 1 0 204 | 30754 1 0 1 0 1 0 205 | 30758 1 0 0 0 1 0 206 | 30764 0 0 1 0 0 1 207 | 30765 2 0 0 0 0 0 208 | 30769 2 0 0 1 1 0 209 | 30772 0 0 1 0 1 0 210 | 30774 0 0 0 0 1 0 211 | 30784 2 0 1 0 0 0 212 | 30786 1 0 1 0 1 0 213 | 30787 0 0 0 0 1 0 214 | 30789 1 0 1 0 1 0 215 | 30800 0 0 1 0 0 0 216 | 30801 1 0 1 0 1 0 217 | 30803 1 0 1 0 1 0 218 | 30806 1 0 1 0 1 0 219 | 30817 0 0 1 2 0 0 220 | 30819 2 0 1 0 1 1 221 | 30822 0 0 1 0 1 0 222 | 30823 0 0 1 2 1 0 223 | 30834 0 0 0 0 0 0 224 | 30836 0 0 1 0 1 0 225 | 30837 1 0 1 0 1 0 226 | 30840 0 0 1 0 1 0 227 | 30841 1 0 1 0 0 0 228 | 30844 0 0 1 0 1 0 229 | 30845 0 0 1 0 0 0 230 | 30847 1 0 1 0 0 0 231 | 30848 0 0 1 0 1 0 232 | 30850 0 0 1 0 1 0 233 | 30856 1 0 0 0 1 0 234 | 30858 0 0 1 0 0 0 235 | 30860 0 0 0 0 1 0 236 | 30862 1 0 1 1 1 0 237 | 30864 0 0 0 2 0 0 238 | 30867 0 0 1 0 1 0 239 | 30869 0 0 1 0 1 0 240 | 30887 0 0 1 0 1 0 241 | 30900 1 0 0 1 1 0 242 | 30913 2 0 0 0 1 0 243 | 30914 1 0 0 0 0 0 244 | 30922 2 0 0 2 1 0 245 | 30923 0 0 1 2 1 0 246 | 30927 1 0 1 0 0 1 247 | 30929 0 0 1 2 1 0 248 | 30933 0 0 1 2 1 0 249 | 30940 0 0 1 0 1 0 250 | 30943 1 0 1 2 1 0 251 | 30945 0 0 0 2 0 0 252 | 30951 1 0 0 0 0 0 253 | 30964 0 0 0 2 1 0 254 | 30969 0 0 1 0 1 0 255 | 30979 2 0 0 0 1 0 256 | 30980 1 0 0 0 0 0 257 | 30982 1 0 0 1 1 0 258 | 30990 1 0 1 1 1 0 259 | 30991 1 0 1 0 1 1 260 | 30999 0 0 1 0 1 0 261 | 31056 1 1 0 2 1 0 262 | 31068 1 1 0 1 0 0 263 | 31108 2 1 0 2 1 0 264 | 31168 1 1 1 0 0 0 265 | 31191 0 1 1 0 0 0 266 | 31229 0 1 1 0 0 1 267 | 31263 0 1 0 0 1 0 268 | 31281 1 1 1 0 0 0 269 | 31340 1 1 1 0 1 0 270 | 31375 0 1 0 0 1 0 271 | 31401 0 1 1 0 0 1 272 | 31480 1 1 1 1 1 0 273 | 31501 1 1 0 2 1 0 274 | 31514 0 1 0 2 0 0 275 | 31518 1 1 0 2 1 0 276 | 31532 0 0 1 2 1 0 277 | 31543 2 1 1 1 1 0 278 | 31588 0 1 0 0 1 0 279 | 31590 0 0 1 0 1 0 280 | 31591 2 1 0 1 1 0 281 | 31595 0 1 0 0 1 0 282 | 31596 1 1 0 0 0 0 283 | 31598 1 1 0 0 1 0 284 | 31599 0 1 0 0 0 0 285 | 31605 0 1 1 0 0 0 286 | 31612 2 1 0 0 1 0 287 | 31615 2 1 0 0 0 0 288 | 31628 1 1 0 0 1 0 289 | 31640 2 1 0 1 1 0 290 | -------------------------------------------------------------------------------- /src/Logistic.java: -------------------------------------------------------------------------------- 1 | import java.io.File; 2 | import java.io.FileNotFoundException; 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.List; 6 | import java.util.Scanner; 7 | 8 | /** 9 | * Performs simple logistic regression. 10 | * User: tpeng 11 | * Date: 6/22/12 12 | * Time: 11:01 PM 13 | * 14 | * @author tpeng 15 | * @author Matthieu Labas 16 | */ 17 | public class Logistic { 18 | 19 | /** the learning rate */ 20 | private double rate; 21 | 22 | /** the weight to learn */ 23 | private double[] weights; 24 | 25 | /** the number of iterations */ 26 | private int ITERATIONS = 3000; 27 | 28 | public Logistic(int n) { 29 | this.rate = 0.0001; 30 | weights = new double[n]; 31 | } 32 | 33 | private static double sigmoid(double z) { 34 | return 1.0 / (1.0 + Math.exp(-z)); 35 | } 36 | 37 | public void train(List instances) { 38 | for (int n=0; n readDataSet(String file) throws FileNotFoundException { 73 | List dataset = new ArrayList(); 74 | Scanner scanner = null; 75 | try { 76 | scanner = new Scanner(new File(file)); 77 | while(scanner.hasNextLine()) { 78 | String line = scanner.nextLine(); 79 | if (line.startsWith("#")) { 80 | continue; 81 | } 82 | String[] columns = line.split("\\s+"); 83 | 84 | // skip first column and last column is the label 85 | int i = 1; 86 | int[] data = new int[columns.length-2]; 87 | for (i=1; i instances = readDataSet("dataset.txt"); 104 | Logistic logistic = new Logistic(5); 105 | logistic.train(instances); 106 | int[] x = {2, 1, 1, 0, 1}; 107 | System.out.println("prob(1|x) = " + logistic.classify(x)); 108 | 109 | int[] x2 = {1, 0, 1, 0, 0}; 110 | System.out.println("prob(1|x2) = " + logistic.classify(x2)); 111 | 112 | } 113 | 114 | } 115 | --------------------------------------------------------------------------------