├── README.md ├── LICENSE └── linearRegression.js /README.md: -------------------------------------------------------------------------------- 1 | # NodeJS Machine Learning 2 | I am learning some stuff about machine learning and will use this repository to code some examples with nodeJS. 3 | You have some improvements? Create an issue! 4 | 5 | ## General setup 6 | I don't use any nodejs packages at the moment so just download this repo and use the simple `node filename` command. 7 | 8 | ## linearRegression 9 | You can use `node linearRegression.js` to learn a linear regression function. 10 | 11 | Steps: 12 | 13 | - specify your training data using the startData variable inside `linearRegression` 14 | - to modify the training rate `alpha` and some other stuff you can change the `options` object -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Ole Kröger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /linearRegression.js: -------------------------------------------------------------------------------- 1 | console.time('time'); 2 | 3 | // define all options 4 | var options = { 5 | // set the starting alpha 6 | alpha: 1, 7 | // break when the all theta values doesn't change more than changeBreak % 8 | changeBreak: 1, 9 | // the number of maximum steps for the minimize function 10 | steps: 100000 11 | }; 12 | options.changeBreak /= 100; 13 | 14 | // the start data (x1,x2,x3,y) 15 | var startData = [ 16 | [30,2,4, 424], 17 | [40,3,10, 556], 18 | [43,2,2, 544], 19 | [50,2,8, 644], 20 | [80,3,10, 956], 21 | [100,3,4, 1126], 22 | ]; 23 | 24 | // n => # features 25 | var n = startData[0].length-1; 26 | 27 | // # training data 28 | var m = startData.length; 29 | 30 | // set all thetas to 0 31 | var theta = []; 32 | for (var i = 0; i <= n; i++) { 33 | theta[i] = 0; 34 | } 35 | 36 | // average and reange values for the features 37 | var averageValues= []; 38 | var rangeValues = []; 39 | 40 | var derivations = []; 41 | var cache = {derivation: []}; 42 | 43 | // normalize the data using (feature-average)/range 44 | var data = normalize(); 45 | 46 | // try to minimize the difs between hypo and training data 47 | minimize(); 48 | 49 | // log all training data and the last hypo 50 | testLog(); 51 | 52 | console.timeEnd('time'); 53 | 54 | /** 55 | * Normalizes the startData using 56 | * {@link range range values} & {@link average average} 57 | * @returns {Object} the normalized data 58 | */ 59 | function normalize() { 60 | for (var i = 0; i < n; i++) { 61 | averageValues[i] = average(i); 62 | rangeValues[i] = range(i); 63 | } 64 | 65 | var newData = []; 66 | for (var i = 0; i < m; i++) { 67 | newData[i] = []; 68 | for (var c = 0; c < n ; c++) { 69 | newData[i][c] = (startData[i][c]-averageValues[c])/rangeValues[c]; 70 | } 71 | newData[i][n] = startData[i][n]; 72 | } 73 | return newData; 74 | } 75 | 76 | /** 77 | * Return the colum for a specific feature 78 | * @param {Number} feature feature number (0 for x1) 79 | * @returns {Number} maxium value - minimum value 80 | */ 81 | function range(feature) { 82 | var max = startData[0][feature]; 83 | var min = max; 84 | for (var i = 1; i < m; i++) { 85 | max = (max < startData[i][feature]) ? startData[i][feature] : max; 86 | min = (min > startData[i][feature]) ? startData[i][feature] : min; 87 | } 88 | return max-min; 89 | } 90 | 91 | /** 92 | * Return the average number for a specific feature 93 | * @param {Number} feature feature number (0 for x1) 94 | * @returns {Number} average value 95 | */ 96 | function average(feature) { 97 | var sum = 0; 98 | for (var i = 0; i < m; i++) { 99 | sum += startData[i][feature]; 100 | } 101 | return sum/m; 102 | } 103 | 104 | /** 105 | * Calculate the hypothesis for a given input 106 | * @param {Array} x the x values 107 | * @param {Boolean} [normalize=false] true => normalize the x values 108 | * @returns {Number} the hypothesis y for the given input 109 | */ 110 | function hypothesis(x,normalize) { 111 | normalize = (typeof normalize === 'undefined') ? false : true; 112 | if (normalize) { 113 | for(var i = 0; i < n; i++) { 114 | x[i] = (x[i]-averageValues[i])/rangeValues[i]; 115 | } 116 | } 117 | 118 | var result = 0; 119 | x.unshift(1); 120 | for (var i = 0; i <= n; i++) { 121 | result += theta[i]*x[i]; 122 | } 123 | return result; 124 | } 125 | 126 | /** 127 | * Return the derivation of the quadratic difference function 128 | * @param {Array} params [iteration,index_x] 129 | * @returns {Number} the derivation 130 | */ 131 | function derivation(params) { 132 | var i = params[0]; 133 | var index_x = params[1]; 134 | if (!index_x) { 135 | var hypo = hypothesis(data[i].slice(0,n))-data[i][n]; 136 | cache.derivation[i] = hypo; 137 | return hypo; 138 | } else { 139 | if ('derivation' in cache) { 140 | return cache.derivation[i]*data[i][index_x-1]; 141 | } 142 | return (hypothesis(data[i].slice(0,n))-data[i][n])*data[i][index_x-1]; 143 | } 144 | } 145 | 146 | /** 147 | * Sum the return values of a function 148 | * @param {Function} func function that should be called iteration times 149 | * @param {Array} params params of the function func 150 | * @param {Array} iteration [iteration variable name (ie. 'i'),start,end] 151 | * @returns {Number} the sum 152 | */ 153 | function Sum(func,params,iteration) { 154 | var result = 0; 155 | var itParam = false; 156 | for (var p = 0; p < params.length; p++) { 157 | if (iteration[0] === params[p]) { 158 | itParam = p; 159 | break; 160 | } 161 | } 162 | for (var i = iteration[1]; i <= iteration[2]; i++) { 163 | if (itParam !== false) { 164 | params[itParam] = i; 165 | } 166 | result += func(params); 167 | } 168 | return result; 169 | } 170 | 171 | /** 172 | * Minimize the difference between hypo and training data 173 | */ 174 | function minimize() { 175 | var minChangeBreak = 1-options.changeBreak; 176 | var maxChangeBreak = 1+options.changeBreak; 177 | for (var i=1; i <= options.steps; i++) { 178 | var temp = []; 179 | var breakNr = 0; 180 | for (var t=0; t <= n; t++) { 181 | temp[t] = theta[t]-(options.alpha/m)*Sum(derivation,['i',t],['i',0,m-1]); 182 | if ((temp[t]/theta[t]) > minChangeBreak && (temp[t]/theta[t]) < maxChangeBreak) { 183 | breakNr++; 184 | } 185 | } 186 | theta = temp; 187 | if (breakNr == n+1) { 188 | console.log('break after iteration: '+i); 189 | break; 190 | } 191 | } 192 | console.log('theta: ',theta); 193 | } 194 | 195 | function testLog() { 196 | for(var i = 0; i < m; i++) { 197 | console.log('hypo for ',startData[i].slice(0,n),': ',hypothesis(startData[i].slice(0,n),true)); 198 | } 199 | } 200 | 201 | --------------------------------------------------------------------------------