├── README.md
├── LICENSE
└── linearRegression.js


/README.md:
--------------------------------------------------------------------------------
 1 | # NodeJS Machine Learning
 2 | I am learning some stuff about machine learning and will use this repository to code some examples with nodeJS.
 3 | You have some improvements? Create an issue!
 4 | 
 5 | ## General setup
 6 | I don't use any nodejs packages at the moment so just download this repo and use the simple `node filename` command.
 7 | 
 8 | ## linearRegression
 9 | You can use `node linearRegression.js` to learn a linear regression function. 
10 | 
11 | Steps:
12 | 
13 | - specify your training data using the startData variable inside `linearRegression`
14 | - to modify the training rate `alpha` and some other stuff you can change the `options` object


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Ole Kröger
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/linearRegression.js:
--------------------------------------------------------------------------------
  1 | console.time('time');
  2 | 
  3 | // define all options
  4 | var options = {
  5 | 	// set the starting alpha
  6 | 	alpha: 1,
  7 | 	// break when the all theta values doesn't change more than changeBreak %
  8 | 	changeBreak: 1,
  9 | 	// the number of maximum steps for the minimize function
 10 | 	steps: 100000
 11 | };
 12 | options.changeBreak /= 100;
 13 | 
 14 | // the start data (x1,x2,x3,y)
 15 | var startData = [
 16 | 			[30,2,4, 424], 
 17 | 			[40,3,10, 556],
 18 | 			[43,2,2, 544],
 19 | 			[50,2,8, 644],
 20 | 			[80,3,10, 956],
 21 | 			[100,3,4, 1126],
 22 | 		];
 23 | 
 24 | // n => # features
 25 | var n = startData[0].length-1;
 26 | 
 27 | // # training data
 28 | var m = startData.length;
 29 | 
 30 | // set all thetas to 0
 31 | var theta = [];
 32 | for (var i = 0; i <= n; i++) {
 33 | 	theta[i] = 0;
 34 | }
 35 | 
 36 | // average and reange values for the features
 37 | var averageValues= [];
 38 | var rangeValues	 = [];
 39 | 
 40 | var derivations	 = [];
 41 | var cache		 = {derivation: []};
 42 | 
 43 | // normalize the data using (feature-average)/range
 44 | var data = normalize();
 45 | 
 46 | // try to minimize the difs between hypo and training data
 47 | minimize();
 48 | 
 49 | // log all training data and the last hypo
 50 | testLog();
 51 | 
 52 | console.timeEnd('time');
 53 | 
 54 | /**
 55 |  * Normalizes the startData using 
 56 |  * {@link range range values} & {@link average average}
 57 |  * @returns {Object} the normalized data
 58 |  */
 59 | function normalize() {
 60 | 	for (var i = 0; i < n; i++) {
 61 | 		averageValues[i] = average(i);
 62 | 		rangeValues[i] 	 = range(i);
 63 | 	}
 64 | 	
 65 | 	var newData = [];
 66 | 	for (var i = 0; i < m; i++) {
 67 | 		newData[i] = [];
 68 | 		for (var c = 0; c < n ; c++) {
 69 | 			newData[i][c] = (startData[i][c]-averageValues[c])/rangeValues[c];
 70 | 		}
 71 | 		newData[i][n] = startData[i][n];
 72 | 	}
 73 | 	return newData;
 74 | }
 75 | 
 76 | /**
 77 |  * Return the colum for a specific feature 
 78 |  * @param   {Number} feature feature number (0 for x1)
 79 |  * @returns {Number} maxium value - minimum value
 80 |  */
 81 | function range(feature) {
 82 | 	var max = startData[0][feature];
 83 | 	var min = max;
 84 | 	for (var i = 1; i < m; i++) {
 85 | 		max = (max < startData[i][feature]) ? startData[i][feature] : max;
 86 | 		min = (min > startData[i][feature]) ? startData[i][feature] : min;
 87 | 	}
 88 | 	return max-min;
 89 | }
 90 | 
 91 | /**
 92 |  * Return the average number for a specific feature
 93 |  * @param   {Number} feature feature number (0 for x1) 
 94 |  * @returns {Number} average value
 95 |  */
 96 | function average(feature) {
 97 | 	var sum = 0;
 98 | 	for (var i = 0; i < m; i++) {
 99 | 		sum += startData[i][feature];
100 | 	}
101 | 	return sum/m;
102 | }
103 | 
104 | /**
105 |  * Calculate the hypothesis for a given input
106 |  * @param   {Array}   x                 the x values 
107 |  * @param   {Boolean} [normalize=false] true => normalize the x values 
108 |  * @returns {Number}  the hypothesis y for the given input
109 |  */
110 | function hypothesis(x,normalize) {
111 | 	normalize = (typeof normalize === 'undefined') ? false : true;
112 | 	if (normalize) {
113 | 		for(var i = 0; i < n; i++) {
114 | 			x[i] = (x[i]-averageValues[i])/rangeValues[i];	
115 | 		}
116 | 	}
117 | 	
118 | 	var result = 0;
119 | 	x.unshift(1);
120 | 	for (var i = 0; i <= n; i++) {
121 | 		result += theta[i]*x[i];	
122 | 	}
123 | 	return result;
124 | }
125 | 
126 | /**
127 |  * Return the derivation of the quadratic difference function
128 |  * @param   {Array}  params [iteration,index_x]
129 |  * @returns {Number} the derivation
130 |  */
131 | function derivation(params) {
132 | 	var i = params[0];
133 | 	var index_x = params[1];
134 | 	if (!index_x) {
135 | 		var hypo = hypothesis(data[i].slice(0,n))-data[i][n];
136 | 		cache.derivation[i] = hypo;
137 | 		return hypo;
138 | 	} else {
139 | 		if ('derivation' in cache) {
140 | 			return cache.derivation[i]*data[i][index_x-1];		
141 | 		}
142 | 		return (hypothesis(data[i].slice(0,n))-data[i][n])*data[i][index_x-1];	
143 | 	}	
144 | }
145 | 
146 | /**
147 |  * Sum the return values of a function
148 |  * @param   {Function} func      function that should be called iteration times
149 |  * @param   {Array}    params    params of the function func
150 |  * @param   {Array}    iteration [iteration variable name (ie. 'i'),start,end]
151 |  * @returns {Number}   the sum
152 |  */
153 | function Sum(func,params,iteration) {
154 | 	var result = 0;
155 | 	var itParam = false;
156 | 	for (var p = 0; p < params.length; p++) {
157 | 		if (iteration[0] === params[p]) {
158 | 			itParam = p;
159 | 			break;
160 | 		}
161 | 	}
162 | 	for (var i = iteration[1]; i <= iteration[2]; i++) {
163 | 		if (itParam !== false) {
164 | 			params[itParam] = i;
165 | 		}
166 | 		result += func(params);
167 | 	}
168 | 	return result;
169 | }
170 | 
171 | /**
172 |  * Minimize the difference between hypo and training data
173 |  */
174 | function minimize() {
175 | 	var minChangeBreak = 1-options.changeBreak;
176 | 	var maxChangeBreak = 1+options.changeBreak;
177 | 	for (var i=1; i <= options.steps; i++) {
178 | 		var temp = [];
179 | 		var breakNr = 0; 
180 | 		for (var t=0; t <= n; t++) {
181 | 			temp[t] = theta[t]-(options.alpha/m)*Sum(derivation,['i',t],['i',0,m-1]);
182 | 			if ((temp[t]/theta[t]) > minChangeBreak && (temp[t]/theta[t]) < maxChangeBreak) {
183 | 				breakNr++;
184 | 			}
185 | 		}
186 | 		theta = temp;
187 | 		if (breakNr == n+1) {
188 | 			console.log('break after iteration: '+i);
189 | 			break;	
190 | 		}
191 | 	}
192 | 	console.log('theta: ',theta);
193 | }
194 | 
195 | function testLog() {
196 | 	for(var i = 0; i < m; i++) {
197 | 		console.log('hypo for ',startData[i].slice(0,n),': ',hypothesis(startData[i].slice(0,n),true));	
198 | 	}
199 | }
200 | 
201 | 


--------------------------------------------------------------------------------