├── .DS_Store
├── .gitignore
├── 001_knn
├── knn.js
└── knn_specs.js
├── 002_kmeans
├── .DS_Store
├── kmeans.js
└── kmeans_specs.js
├── 003_reinforcement
├── .DS_Store
├── display.html
├── lib
│ ├── misc.js
│ ├── rl_run.js
│ ├── rl_visible.js
│ └── style.css
├── rl.js
└── rl_specs.js
├── lib
├── mnist_reader.js
├── rand.js
├── small_mnist_data.json
└── writer.js
├── package.json
└── readme.md
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/js-ml-workshop/d121931df51cc42ad3e91736f0842f77c07f7387/.DS_Store
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | nodemodules
3 |
--------------------------------------------------------------------------------
/001_knn/knn.js:
--------------------------------------------------------------------------------
1 |
2 | //Start off with what passes the first test.
3 | function KNN(kSize){
4 | this.kSize = kSize;
5 | this.points = [];
6 | }
7 |
8 |
9 |
10 | module.exports = KNN
--------------------------------------------------------------------------------
/001_knn/knn_specs.js:
--------------------------------------------------------------------------------
1 |
2 | var KNN = require('./knn');
3 | var expect = require('chai').expect;
4 | var mnist = require('../lib/mnist_reader');
5 | var writer = require('../lib/writer');
6 | var randomPoints = require('../lib/rand');
7 | /*randomPoints is a function. You would use it like this:
8 |
9 | var testPoints = randomPoints(100,[1,1],[2,2]);
10 |
11 | This would return an array of length 100.
12 |
13 | Each element of the array would be an array / vector of length two, because
14 | the second and third parameter are of length two.
15 |
16 | The values in each vector would be between [2,2] and [3,3]--that is,
17 | the n-th value in each vector is chosen by multiplying the n-th
18 | value in the first parameter with a random number from 0 to 1,
19 | and then adding the n-th value of the third parameter.
20 |
21 | A more natural way to say this is that randomPoints gives you an array of
22 | points (of the length of the first argument) filling a hypercube (with
23 | the dimensions of the third argument) and starting at the point
24 | defined by the second argument.
25 | */
26 |
27 |
28 |
29 |
30 | describe('Testing the basic KNN functionality.', function(){
31 |
32 | //KNN should be a constructor function.
33 | it('is a function', function(){
34 | expect(typeof KNN).to.equal('function');
35 | });
36 |
37 | //Objects created by the KNN should have a kSize variable,
38 | //set by the constructor, and a points array.
39 | //The points array holds the training data.
40 | it('creates instances with k-size and an empty points array', function(){
41 | var knn = new KNN(10);
42 | expect(knn.kSize).to.equal(10);
43 | expect(knn.points).to.be.empty;
44 | });
45 |
46 | //The train function should add to the points array.
47 | describe('The train function works', function(){
48 |
49 | it('has a train function', function(){
50 | var knn = new KNN(10);
51 | expect(typeof knn.train).to.equal('function');
52 | });
53 |
54 | /*
55 | Train takes an array of training data and stores it.
56 |
57 | Each training element, in an array of training data, is an array of length two.
58 | The first element of the subarray is a vector -- an array of numbers, of arbitrary length.
59 | The second element of the subarray is a classification -- a number.
60 |
61 | This is the standard way that training data is represented throughout
62 | this file.
63 |
64 | Example Training Data: [ [ [1,2,3],0 ] , [ [1,2,4],0 ] , [ [-1,2,3],1 ] , ... ]
65 | */
66 | it('concatenates the points handed to the train function onto the points already within it', function(){
67 | var knn = new KNN(10);
68 | //Making fake data
69 | var typeA = randomPoints(100,[1,1],[0,0]).map(function(n){ return [n,1] });
70 | var typeB = randomPoints(100,[1,1],[1,0]).map(function(n){ return [n,0] });
71 | knn.train(typeA);
72 | expect(knn.points.length).to.equal(100);
73 | expect(knn.points[0][0]).to.equal(typeA[0][0])
74 | knn.train(typeB);
75 | expect(knn.points.length).to.equal(200);
76 | expect(knn.points[100][0]).to.equal(typeB[0][0])
77 | });
78 |
79 | });
80 |
81 | describe('The predict and predictSingle function works.', function(){
82 |
83 | /*If you want to build the predict & predictSingle functions from scratch, without
84 | any guidance, you can leave this as an xdescribe rather than a describe.
85 |
86 | However, if you implement these functions, it will be much easier to write predictSingle,
87 | and they do provide a useful way to break down the things that predictSingle will need to do.
88 | */
89 | xdescribe('Optional helper functions to be used in predict and predictSingle', function(){
90 |
91 | /* The purpose of the function '_distance' is to find the Euclidean norm
92 | as between two vectors.
93 |
94 | As input, it should take two vectors of any size.
95 |
96 | As output, it should return the distance between them as determined by the
97 | Euclidean norm.
98 |
99 | */
100 | it('has function _distance, which works as expected', function(){
101 | var knn = new KNN(3);
102 | expect(typeof knn._distance).to.equal('function');
103 | var one_dim = knn._distance([0],[1]);
104 | var two_dim = knn._distance([0,0],[1,0]);
105 | var thr_dim = knn._distance([0,1,0],[0,2,0]);
106 | var for_dim = knn._distance([0,2,10,10],[0,2,11,10]);
107 | expect(one_dim).to.equal(1)
108 | expect(two_dim).to.equal(1)
109 | expect(thr_dim).to.equal(1)
110 | expect(for_dim).to.equal(1)
111 | });
112 |
113 | /* The purpose of the function '_distances' is to take a single, unclassified vector,
114 | and find the distance between that vector and a bunch of other, already-classified vectors.
115 |
116 | As input, it should take a vector as the first argument.
117 |
118 | It should take an array of training data as the second argument.
119 | See the comment on "train" to see what this will look like.
120 |
121 | As output, it should give an array of sub-arrays which have length two.
122 | The first element of each n-th sub-array should be
123 | the distance between the first vector handed in, and the n-th
124 | element of the training data.
125 |
126 | And the second element should be the classification
127 | of the n-th element of the input array.
128 |
129 | Example Output: [ [.1,0] , [.2,0] , [.3, 1] , [.4,0] ... ]
130 | */
131 | it('has function _distances, which works as expected', function(){
132 | var knn = new KNN(10);
133 | expect(typeof knn._distances).to.equal('function');
134 | //Making fake data
135 | var typeA = randomPoints(100,[1,1],[0,0]).map(function(n){ return [n,1] });
136 | var typeB = randomPoints(100,[1,1],[1,0]).map(function(n){ return [n,0] });
137 | var distances = knn._distances([0,0], [[[1,0],1]].concat(typeA.concat(typeB)));
138 | expect(distances.length).to.equal(201);
139 | expect(distances[0].length).to.equal(2);
140 | expect(distances[0][0]).to.equal(1);
141 | });
142 |
143 | /*The basic purpose of _sorted is to take the output of _distances and sort it by distance.
144 |
145 | The input should be an array of sub-arrays of length two.
146 | The first element is a distance.
147 | The second input is the classification corresponding to that distance.
148 |
149 | The output of the second is an array of classifications,
150 | ordered by the distances that (used to) accompany each classification.
151 | Example Output: [1,0,1,0,2,3,2,1,1,2,2,0,0]
152 | */
153 | it('has function _sorted, which works as expected', function(){
154 | var knn = new KNN(3);
155 | expect(typeof knn._sorted).to.equal('function');
156 | var sorted = knn._sorted([[100,0],[10,1],[20,1],[110,0],[120,1]]);
157 | expect(sorted[0]).to.equal(1)
158 | expect(sorted[1]).to.equal(1)
159 | expect(sorted[2]).to.equal(0)
160 | expect(sorted[3]).to.equal(0)
161 | expect(sorted[4]).to.equal(1)
162 | });
163 |
164 | /*The basic purpose of _majority is to tell you what classification is most common
165 | among the first k elements of an sorted list of distances and classifications.
166 |
167 | It takes as input two parameters.
168 | The first is k.
169 | The second is the output of _sorted--a sorted list of classifications,
170 | going from closest to furthest.
171 |
172 | It gives as output the most common classification among the
173 | elements from index 0 to index k in the array passed in.
174 |
175 | Example Behavior:
176 | --knn._majority(3,[1,2,1,2,2,2,1,1,1,1]) would return 1, because 1 is the most common element among the first three.
177 | --knn._majority(5,[1,2,1,2,2,2,1,1,1,1]) would return 2, because 2 is the most common element among the first five.
178 | */
179 | it('has function _majority, which works as expected', function(){
180 | var knn = new KNN(3);
181 | expect(typeof knn._majority).to.equal('function');
182 | expect(knn._majority(3,[1,2,1,2,2,2,1,1,1,1])).to.equal(1)
183 | expect(knn._majority(5,[1,2,1,2,2,2,1,1,1,1])).to.equal(2)
184 | });
185 |
186 | })
187 |
188 | /* The purpose of predictSingle is to take a single vector
189 | and use the training-data in the knn function to predict
190 | what classification it has. */
191 | it('has a predictSingle function, which takes a single vector', function(){
192 | var knn = new KNN(10);
193 | expect(typeof knn.predictSingle).to.equal('function');
194 | var knn = new KNN(10);
195 | //Making fake data
196 | var typeA = randomPoints(100,[1,1],[0,0]).map(function(n){ return [n,1] });
197 | var typeB = randomPoints(100,[1,1],[0,1]).map(function(n){ return [n,0] });
198 | knn.train(typeA);
199 | knn.train(typeB);
200 | expect(knn.predictSingle([0,.5])).to.equal(1)
201 | expect(knn.predictSingle([0,.5])).to.equal(1)
202 | expect(knn.predictSingle([1,1.5])).to.equal(0)
203 | expect(knn.predictSingle([1,1.5])).to.equal(0)
204 | });
205 |
206 | /* This is just like predictSingle, except it applies it across
207 | an array of vectors.
208 | */
209 | it('has a predict function, which takes an array of vectors', function(){
210 | var knn = new KNN(10);
211 | expect(typeof knn.predict).to.equal('function');
212 | //Making fake data
213 | var typeA = randomPoints(100,[1,1],[0,0]).map(function(n){ return [n,1] });
214 | var typeB = randomPoints(100,[1,1],[1,0]).map(function(n){ return [n,0] });
215 | knn.train(typeA);
216 | knn.train(typeB);
217 | var sample = randomPoints(100,[1,1],[1,0])
218 | var results = knn.predict(sample);
219 | //Note this isn't actually testing anything it gets back, just
220 | //that it runs.
221 | });
222 |
223 | /* The purpose of score is to take in another set of data in the same format as training data.
224 | Rather than add this data to the array of training data in the object, however,
225 | it should run "predict" on the data and see how often the predictions of the algorithm
226 | square with the actual value in the data.
227 |
228 | It is important, when running such validation on an algorithm, to use different data
229 | than you trained with. I.e.--you should use one set of data to train the algorithm,
230 | then an entirely different set of data to test it. If you don't do this, you cannot
231 | get an accurate view of how good your algorithm-as-trained is at generalizing and predicting.
232 |
233 | Why would this be?
234 |
235 | */
236 | it('has a score function, which takes another set of the testing data and returns a number from 1 to 0', function(){
237 | var knn = new KNN(10);
238 | expect(typeof knn.score).to.equal('function');
239 | //Making fake data
240 | var typeA = randomPoints(100,[1,1],[0,0]).map(function(n){ return [n,0] });
241 | var typeB = randomPoints(100,[1,1],[1,0]).map(function(n){ return [n,1] });
242 | knn.train(typeA);
243 | knn.train(typeB);
244 | var typeB = randomPoints(100,[1,1],[.95,0]).map(function(n){ return [n,1] });
245 | expect(knn.score(typeB) > 0.6).to.equal(true);
246 | expect(knn.score(typeB) <= 1).to.equal(true);
247 | });
248 |
249 | });
250 | });
251 |
252 |
253 | /*Switch this to a describe after you've completed the above.*/
254 | xdescribe('Testing the KNN with data from the MNIST', function(){
255 |
256 | //Need more time, to handle all the data.
257 | this.timeout(10000);
258 |
259 | xit('Can handle somewhat chaotic data', function(){
260 | var knn = new KNN(1);
261 | var typeA = randomPoints(1000,[1,1],[0,0]).map(function(n){ return [n,0] });
262 | var typeB = randomPoints(1000,[1,1],[.75,0]).map(function(n){ return [n,1] });
263 | knn.train(typeA);
264 | knn.train(typeB);
265 | var typeB = randomPoints(100,[1,1],[0,0]).map(function(n){ return [n,0] });
266 | var score = knn.score(typeB);
267 | console.log("The program got a score of " + score + ", which means it got " + (score * 100) + "% correct.");
268 | });
269 |
270 | /* The data given to the algorithm here is the mnist data.
271 | This means the algorithm is trying to classify into 10 different
272 | classes, rather than 2, as in the above. The input vectors are 724 elements
273 | long as well, rather than 2, as in the above. So this is a significantly
274 | more complex problem, with more chaotic data.
275 |
276 | Note that initially, while working from only 100 elements of training
277 | data, the algorithm is only able to get a fairly low score--although
278 | still far better than random chance.
279 |
280 | Try to improve that score. Does fiddling with the k-number alter it
281 | very much? Or does adding more training data alter it more?
282 |
283 | Running this will create .gif images, each showing how your program classified a particular image.
284 | What kind of mistakes does the program make?
285 | Are any of them mistakes you might make, in reading a digit?
286 | How could you improve accuracy of the program?
287 |
288 | */
289 | xit('Can be trained off the mnist data', function(done){
290 | var allElements = mnist.allElements(); //Should load up all 5000 elements
291 | var knn = new KNN(2);
292 | var trainingSet = allElements.slice(0,600); //Make the training set
293 | var testingSet = allElements.slice(1000,1100); //Make the testing set
294 | knn.train(trainingSet)
295 | var score = knn.score(testingSet);
296 | console.log("The program got a score of " + score + ", which means it got " + (score * 100) + "% correct.");
297 |
298 | var toClassify = testingSet.map(function(n){return n[0]});
299 | var toExport = knn.predict(toClassify).map(function(n, index){ return [toClassify[index],n]; } );
300 | writer.exportClassified(toExport, done);
301 | });
302 |
303 | });
--------------------------------------------------------------------------------
/002_kmeans/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/js-ml-workshop/d121931df51cc42ad3e91736f0842f77c07f7387/002_kmeans/.DS_Store
--------------------------------------------------------------------------------
/002_kmeans/kmeans.js:
--------------------------------------------------------------------------------
1 |
2 | //Again, I'll start this off with the very beginning of the constructor function.
3 | function KMeans(options){
4 | if (options == undefined){options = {};}
5 | this.minClusterMove = options.minClusterMove || 0.0001;
6 | this.clusterAttempts = 10;
7 | this.points = [];
8 | }
9 |
10 |
11 | module.exports = KMeans
--------------------------------------------------------------------------------
/002_kmeans/kmeans_specs.js:
--------------------------------------------------------------------------------
1 |
2 | var KMeans = require('./kmeans');
3 | var expect = require('chai').expect;
4 | var randomPoints = require('../lib/rand');
5 | var mnist = require('../lib/mnist_reader');
6 | var writer = require('../lib/writer');
7 |
8 | /*The function 'distance' is useful for tests, to see if values are converging on
9 | the correct location for the centroid.
10 |
11 | Yes, you can use it for the '_distance' function below if you wish.
12 | */
13 | var distance = function(one,two){
14 | return Math.sqrt(one.reduce(function(old, _, index){return old + Math.pow( one[index] - two[index], 2) }, 0));
15 | }
16 |
17 |
18 | describe('Testing required k-means functionality.', function(){
19 |
20 | it('is a function', function(){
21 | expect(typeof KMeans).to.equal('function');
22 | });
23 |
24 | it('should have all the requisite functions', function(){
25 | var km = new KMeans();
26 | //The array 'points' is the set of vectors with which the algorithm is to be trained.
27 | expect(km.points).to.be.empty;
28 | //When all the clusters move less than this per training period, k-means stops adjusting
29 | expect(km.minClusterMove !== undefined).to.be.true;
30 | expect(typeof km.train).to.equal('function'); //Adds vectors to points
31 | expect(typeof km.clusters).to.equal('function'); //Returns a list of centroids
32 | });
33 |
34 | it(' adds points to the list of points that it trains with, when train is called', function(){
35 | var knn = new KMeans();
36 | var typeA = randomPoints(100,[1,1],[0,0]);
37 | knn.train(typeA);
38 | expect(knn.points.length).to.equal(100);
39 | });
40 |
41 | /* As in the prior exercise, the following are functions which might make
42 | the task of writing the function 'clusters' much easier. But it is
43 | not necessary that you use them in any way. I suggest that you
44 | look through them in any event, though, to get an idea of the kinds of
45 | functions you will need to write.
46 | */
47 | xdescribe('Optional helper functions to help build the k-means algorithm', function(){
48 |
49 | /* The function '_distance' takes as input two vectors of any length,
50 | and returns the Euclidean norm of the difference between them
51 | That is, it takes two vectors of length n and returns the Euclidean distance
52 | between the positions that they indicate in n-dimensional space.
53 | */
54 | it('has _distance, which determines the Euclidean norm / distance between two vectors', function(){
55 | var km = new KMeans();
56 | expect(typeof km._distance).to.equal('function');
57 | expect(km._distance([0,0],[3,4])).to.equal(5);
58 | expect(km._distance([20,0],[21,0])).to.equal(1);
59 | expect(km._distance([20,20],[20,20])).to.equal(0);
60 | });
61 |
62 | /* The function '_max' takes as input an array and a function.
63 | The function should return a number when any element of the
64 | array is fed into it. '_max' itself will return the value
65 | from the array which is greatest when fed into the function
66 | passed to max.
67 |
68 | Example: knn._max(['a','bb','c','aaa','cc'], function(n){return n.length; }) will return 'aaa'
69 |
70 | This is actually pretty much the same as the lodash max,
71 | so if you want to you can just use that.
72 | */
73 | it('has _max, which takes an array and a function and returns the element from the array for which what the function returns is highest', function(){
74 | var km = new KMeans();
75 | expect(typeof km._max).to.equal('function');
76 | expect(km._max([0,1,2,3,4,5,6], function(n){return n;})).to.equal(6);
77 | expect(km._max([0,1,2,3,4,5,6], function(n){return -n;})).to.equal(0);
78 | expect(km._max(['a','sas','asdasd','ssd'], function(n){return n.length;})).to.equal('asdasd');
79 | //With index
80 | expect(km._max([0,1,2,3,4,5,6], function(n, index){return -index;})).to.equal(0);
81 | expect(km._max([10,1,2,3,4,5,6], function(n, index){return -index;})).to.equal(10);
82 | expect(km._max([7,1,2,6,4,3,2], function(n, index){return index+n;})).to.equal(6);
83 | });
84 |
85 | /* The function '_clusterEvaluator' takes as input two things--an array of
86 | centroids (vectors) and an array of training points (vectors).
87 |
88 | It then evaluates how good the clustering indicated by the centroids
89 | is, by returning the sum of the squares of the distances from each element in the training points
90 | to the closest of the centroids; that is, it returns the sum of the squares of the
91 | centroid to training-point distances. A list of centroids which has a
92 | smaller such sum is better than a list of centroids
93 | which has a larger such sum. This is because the goal of k-means is to, after all,
94 | minimize this distance; the standard algorithm which we implement is merely
95 | a way of trying to do so.
96 |
97 | Note that you would NOT want to use this function in a straightforward way in
98 | '_max' above, because the LOWER the value returned from this the
99 | better the clustering, while '_max' returns the HIGHEST value. But you could use a function
100 | that calls it.
101 |
102 | Might want to use '_distance'.
103 | */
104 | it('has _clusterEvaluator, which scores clusters according to the average distances from points to centroids in each', function(){
105 | var km = new KMeans();
106 | expect(typeof km._clusterEvaluator).to.equal('function');
107 | expect(km._clusterEvaluator( [[0,0],[100,100]],[[1,0],[0,1],[101,100],[100,101]] ) ).to.equal(4);
108 | expect(km._clusterEvaluator( [[0,0],[100,100]],[[2,0],[0,2],[102,100],[100,102]] ) ).to.equal(16);
109 | expect(km._clusterEvaluator( [[0,0]],[[3,0],[0,3],[0,-3],[-3,0]] ) ).to.equal(36);
110 | });
111 |
112 | /* The function '_averageLocation' takes an array of vectors and returns the mean
113 | location of the vectors in the array.
114 |
115 | This could obviously be useful when determining the center of a group of vectors.
116 | */
117 | it('has _averageLocation, which takes an array of vectors and returns the mean location', function(){
118 | var km = new KMeans();
119 | expect(typeof km._averageLocation).to.equal('function');
120 | expect(km._averageLocation([[1,1],[1,1],[4,4]])).to.eql([2,2])
121 | expect(km._averageLocation([[1,1],[1,1],[2,2],[2,2]])).to.eql([1.5,1.5])
122 | expect(km._averageLocation([[1,1],[2,2],[3,3]])).to.eql([2,2])
123 | expect(km._averageLocation([[1,1],[1,1]])).to.eql([1,1])
124 | expect(km._averageLocation([[1,2],[1,2],[2,3],[3,4],[3,4]])).to.eql([2,3])
125 | expect(km._averageLocation([[1,10],[1,10],[2,10],[3,10],[3,10]])).to.eql([2,10])
126 | });
127 |
128 | /* The following function, '_shiftCentroids', is rather the heart of k-means.
129 |
130 | It takes as input (1) an array of centroids, and an (1) array of all the training data.
131 | Each of the elements of these are vectors--that is, simply arrays of numbers.
132 |
133 | It outputs a new list of shifted centroids. This list is produced by
134 | dividing the training data into groups, each group consisting of the data points
135 | closer to one centroid than to any other centroid, and then shifting that centroid
136 | to the mean location of the groups.
137 |
138 | This should probably be the longest function that you write.
139 | I got it down to 12 lines; see if you can do better.
140 |
141 | May want to use '_distance', '_averageLocation.''
142 | */
143 | it('has _shiftCentroids, which takes centroids, and all the points and shifts centroids a step', function(){
144 | var km = new KMeans();
145 | expect(typeof km._shiftCentroids).to.equal('function');
146 | //Shouldn't shift anything at all.
147 | expect(km._shiftCentroids([[0,0],[100,100]],[[1,0],[0,1],[-1,0],[0,-1],[100,101],[101,100],[99,100],[100,99]])).to.eql([[0,0],[100,100]]);
148 | //Should shift one of the centroids, but not the other one.
149 | expect(km._shiftCentroids([[0,0],[100,100]],[[1,0],[0,1],[-1,0],[0,-1],[200,201],[201,200],[199,200],[200,199]])).to.eql([[0,0],[200,200]]);
150 | //Should shift both of the centroids.
151 | expect(km._shiftCentroids([[0,1],[101,100]],[[4,0],[0,4],[-4,0],[0,-4],[200,202],[202,200],[198,200],[200,198]])).to.eql([[0,0],[200,200]]);
152 | });
153 |
154 | /* The function '_haveShifted' takes two lists of vectors.
155 | The n-th elements of each of which are taken to correspond to before
156 | and after states for centroids shifted through the above.
157 |
158 | If any of them have shifted more than a tiny amount, it returns true.
159 |
160 | Might want to use '_distance.''
161 | */
162 | it('has _haveShifted, which takes two arrays of centroids, and determines if they have shifted', function(){
163 | var km = new KMeans();
164 | expect(typeof km._shiftCentroids).to.equal('function');
165 | expect(km._haveShifted([[1,1,1]],[[1,1,1]])).to.equal(false)
166 | expect(km._haveShifted([[1,1,1],[2,2,2]],[[1,1,1],[2,2,2]])).to.equal(false)
167 | expect(km._haveShifted([[1,1,1],[2,2,2]],[[1,1,1],[2,2,2.4]])).to.equal(true)
168 | expect(km._haveShifted([[1,1,1.1],[2,2,2]],[[1,1,1],[2,2,2]])).to.equal(true)
169 | expect(km._haveShifted([[1,1.01,1],[2,2,2]],[[1,1,1],[2,2,2]])).to.equal(true)
170 | expect(km._haveShifted([[1,1],[2,2]],[[1,1],[2,2]])).to.equal(false)
171 | expect(km._haveShifted([[1],[2]],[[1],[2]])).to.equal(false)
172 | expect(km._haveShifted([[1],[2]],[[1],[2.1]])).to.equal(true)
173 | });
174 |
175 | /* If you follow the path lined out here, .cluster will return the best cluster
176 | of several iterations of k-means.
177 |
178 | This function, '_clusters', simply returns a single group of clusters produced by one
179 | complete iteration of k-means. That is, it loops through Lloyd's algorithm until
180 | the centroids have ceased moving or ceased moving by more than a certain amount.
181 |
182 | This will probably use '_shiftCentroids' and '_haveShifted'. You might find it useful to
183 | write an ancillary function that helps you choose random initial locations from the
184 | vectors passed in; I did this in a somewhat complex functional map, and there are
185 | probably more straightforward ways to do it.
186 |
187 | This is probably the second most difficult function to write, after _shiftCentroids.
188 |
189 | It takes as input (1) the number of clusters whose centers it is trying to locate as well as
190 | (2) the data it is trying to find clusters on.
191 | */
192 | it('has _clusters, which returns a single cluster, but without trying multiple iterations of k-means', function(){
193 | var km = new KMeans();
194 | expect(typeof km._clusters).to.equal('function');
195 | //Returns the right number.
196 | expect(km._clusters(1,[[1,1],[0,0]]).length).to.equal(1);
197 | expect(km._clusters(2,[[1,1],[0,0]]).length).to.equal(2);
198 | expect(km._clusters(3,[[1,1],[0,0],[2,2]]).length).to.equal(3);
199 | //Returns in a sensical location
200 | var temp = km._clusters(2,[[1,0],[0,0]]);
201 | expect( distance([0,0], temp[0]) == 1 || distance([0,0], temp[0]) == 0 ).to.equal(true)
202 | expect( distance([0,0], temp[1]) == 1 || distance([0,0], temp[1]) == 0 ).to.equal(true)
203 | });
204 |
205 | /* The function '_manyClusters' invokes '_clusters' several times, and each time
206 | adds the results of '_clusters' to an array it returns
207 |
208 | It takes as input (1) the number of times to invoke '_clusters' and (2) the
209 | number of clusters to tell '_clusters' to locate.
210 |
211 | (It pulls the vectors that it will be passing to _clusters'
212 | second argument from this.points.)
213 | */
214 | it('has _manyClusters, which is returns an array of clusters', function(){
215 | var km = new KMeans();
216 | var typeA = randomPoints(100,[1,1],[0,0]);
217 | km.train(typeA);
218 | expect(typeof km._manyClusters).to.equal('function');
219 | expect(km._manyClusters(10,2).length).to.equal(10);
220 | expect(km._manyClusters(10,2)[0].length).to.equal(2);
221 | });
222 |
223 | });
224 |
225 | /*Alright, all of that prep work done, now for the rest of the problem.*/
226 | describe('The algorithm can find locations successfully with the function clusters.', function(){
227 |
228 | /* The function .clusters takes a number. It returns an array of vectors,
229 | each vector being the location of a centroid determined by the function.
230 |
231 | To make this function accurate, you'll probably need to iterate through
232 | the basic k-means algorithm several times and return the best result.
233 |
234 | ------
235 |
236 | If you slogged through the entirety of the above preperator work, here's
237 | a bonus. This is the entire amount of code needed for '.clusters',
238 | assuming you pass all the above tests (and assuming that you do so in a
239 | stable fashion).
240 |
241 | KMeans.prototype.clusters = function(clusterNum){
242 | var self = this;
243 | return this._max( this._manyClusters( this.clusterAttempts, clusterNum ) , function(cluster){
244 | return -self._clusterEvaluator(cluster, self.points);
245 | });
246 | }
247 |
248 | Tada! That's it.
249 | */
250 |
251 | it(' can determine the location of two centroids with .clusters, being told there are two', function(){
252 | var knn = new KMeans();
253 | var typeA = randomPoints(100,[1,1],[0,0]);
254 | var typeB = randomPoints(100,[1,1],[50,0]);
255 | var both = typeB.concat(typeA);
256 | knn.train(both);
257 | var res = knn.clusters(2);
258 | expect(res.length).to.equal(2);
259 | expect( (distance(res[0], [0.5,0.5]) < .1) || (distance(res[0], [50.5,0.5]) < .1) ).to.be.true;
260 | expect( (distance(res[1], [0.5,0.5]) < .1) || (distance(res[1], [50.5,0.5]) < .1) ).to.be.true;
261 | });
262 |
263 | it(' can determine the location of three centroids, being told there are three', function(){
264 | var knn = new KMeans();
265 | var typeA = randomPoints(100,[1,1],[0,0]);
266 | var typeB = randomPoints(100,[1,1],[30,0]);
267 | var typeC = randomPoints(100,[1,1],[15,20]);
268 | var all = typeA.concat(typeB.concat(typeC));
269 | knn.train(all);
270 | var res = knn.clusters(3);
271 | expect(res.length).to.equal(3);
272 | expect( (distance(res[0], [0.5,0.5]) < .1) || (distance(res[0], [30.5,0.5]) < .1) || (distance(res[0], [15.5,20.5]) < .1)).to.be.true;
273 | expect( (distance(res[1], [0.5,0.5]) < .1) || (distance(res[1], [30.5,0.5]) < .1) || (distance(res[1], [15.5,20.5]) < .1)).to.be.true;
274 | expect( (distance(res[2], [0.5,0.5]) < .1) || (distance(res[2], [30.5,0.5]) < .1) || (distance(res[2], [15.5,20.5]) < .1)).to.be.true;
275 | });
276 |
277 | it(' can determine the location of three closer centroids, being told there are three', function(){
278 | var knn = new KMeans();
279 | var typeA = randomPoints(100,[1,1],[0,0]);
280 | var typeB = randomPoints(100,[1,1],[3,0]);
281 | var typeC = randomPoints(100,[1,1],[1.5,2]);
282 | var all = typeA.concat(typeB.concat(typeC));
283 | knn.train(all);
284 | var res = knn.clusters(3);
285 | expect(res.length).to.equal(3);
286 | expect( (distance(res[0], [0.5,0.5]) < .1) || (distance(res[0], [3.5,0.5]) < .1) || (distance(res[0], [2,2.5]) < .1)).to.be.true;
287 | expect( (distance(res[1], [0.5,0.5]) < .1) || (distance(res[1], [3.5,0.5]) < .1) || (distance(res[1], [2,2.5]) < .1)).to.be.true;
288 | expect( (distance(res[2], [0.5,0.5]) < .1) || (distance(res[2], [3.5,0.5]) < .1) || (distance(res[2], [2,2.5]) < .1)).to.be.true;
289 | });
290 | });
291 |
292 | /* So, "successfully" is in quotation marks because there is no very rigorous idea of what success is.
293 | The below tests will always pass when run, assuming you give them sufficient
294 | time to finish. They produce, however, some data in the folder in which this runs,
295 | which will display grouped members of the clusters produced by the algorithm.
296 |
297 | In this context, "success" means that the each of the clusters seem to have something in common with
298 | each other. It would be a little too much to hope for, with this rather crude algorithm, to have
299 | all the members of each cluster be the same character.
300 |
301 | Consider how well the algorith sorts the characters 1 and 0 into separate groups,
302 | as well as the characters 5 and 9, and the characters 4, 6, and 8. Some
303 | of these groupings are "better" than others. Which and why?
304 | */
305 | describe('The algorithm finds centroids for the MNIST data "successfully"', function(){
306 |
307 | it('finds centroids data consisting of the characters 1 and 0', function(done){
308 | this.timeout(10000);
309 | var knn = new KMeans();
310 | var someElements = mnist.zeroAndOne().map(function(n){return n[0]}).slice(0,100);
311 | knn.train(someElements)
312 | console.log('Finding clusters for images consisting of 1 and 0')
313 | var res = knn.clusters(2);
314 | console.log('Writing images...');
315 | writer.exportGrouped(res, someElements, done, "zeroOne")
316 | });
317 |
318 | it('finds centroids for data consisting of the characters 5 and 9', function(done){
319 | this.timeout(10000);
320 | var knn = new KMeans();
321 | var someElements = mnist.fiveAndNine().map(function(n){return n[0]}).slice(0,120);
322 | knn.train(someElements)
323 | console.log('Finding clusters for images consisting of 5 and 9')
324 | var res = knn.clusters(2);
325 | console.log('Writing images...');
326 | writer.exportGrouped(res, someElements, done, "fiveNine")
327 | });
328 |
329 | it('finds centroids for data consisting of the characters 4 and 6 and 8', function(done){
330 | this.timeout(10000);
331 | var knn = new KMeans();
332 | var someElements = mnist.fourAndSixAndEight().map(function(n){return n[0]}).slice(0,150);
333 | knn.train(someElements)
334 | console.log('Finding clusters for images consisting of 4 and 6 and 8')
335 | var res = knn.clusters(3);
336 | console.log('Writing images...');
337 | writer.exportGrouped(res, someElements, done, "fourSixEight")
338 | });
339 |
340 | });
341 |
342 | });
343 |
344 | /*EXTRA CREDIT:
345 |
346 | As stated in the text of the workshop, there are two distinct clustering
347 | tasks: (1) determining where a given number of clusters are located in space
348 | given a particular data set and (2) determining how many clusters are located
349 | in a given space, given a particular dataset. All of the above pertains to
350 | the first task. The extra credit pertains to the second.
351 |
352 | The basic task is to write an additional function, 'findClusters', which can
353 | determine how many clusters are in a given set of data. The tests below start
354 | out with very obvious separations of clusters (i.e., two clumps of data which
355 | are very, very far apart) and continue on to much more difficult separations.
356 |
357 | The basic strategy that you'll want to follow is to use the existing 'clusters'
358 | function to find the best set of 1, 2, 3... k clusters. Each of these will
359 | almost certainly have a smaller average centroid-training point distance than
360 | the prior--this would be true even if the data were completely randomly
361 | distributed without any structure at all. However, if there are clusters in the
362 | data, then the rate at which the average centroid-training point distance decreases
363 | will drop once you've run out of clusters.
364 |
365 | For instance, if there are two tight clusters with centers at [0,0] and [0,50],
366 | then a clustering of 1 cluster will probably have an average centroid-training point
367 | distance of 25 or so. A clustering with 2 clusters might have an average distance
368 | of .5--and a clustering with 3 might have an average distance of 0.45, with 4 might have
369 | 0.4, and so on. The distance decreases as the number of clusters increases, but after the
370 | average distance begins to decrease at a slow and steady rate then it probably
371 | means you have exceeded the optimum number of clusters.
372 |
373 | ...of course, most cases will not be nearly as clear as the above.
374 | */
375 |
376 | xdescribe('Testing optional k-means functionality', function(){
377 |
378 | xdescribe('The algorithm can determine the number of clusters, not being told how many there are.', function(){
379 |
380 | /* The function 'findClusters' should take a number, which is the maximum
381 | number of clusters it should search for.
382 |
383 | It should return an array of centroids, just like 'clusters', but
384 | the array should have anything from 1 to the input number
385 | centroids in it, depending on the most natural way to split
386 | the data.
387 |
388 | It took me about 25 lines of code to do this, although that
389 | could have probably been minimized.
390 | */
391 | it('can determine the number of clusters when there are two, very separate', function(){
392 | var knn = new KMeans();
393 | var typeA = randomPoints(100,[1,1],[0,0]);
394 | var typeB = randomPoints(100,[1,1],[50,0]);
395 | var both = typeA.concat(typeB)
396 | knn.train(both);
397 | var res = knn.findClusters(10);
398 | expect(res.length).to.equal(2);
399 | });
400 |
401 | it('can determine the number of clusters when there are three, pretty separate', function(){
402 | var knn = new KMeans();
403 | var typeA = randomPoints(100,[1,1],[0,0]);
404 | var typeB = randomPoints(100,[1,1],[5,0]);
405 | var typeC = randomPoints(100,[1,1],[3,4]);
406 | var all = typeA.concat(typeB.concat(typeC));
407 | knn.train(all)
408 | var res = knn.findClusters(10);
409 | expect(res.length).to.equal(3);
410 | });
411 |
412 | it('can determine the number of clusters when there are four, fairly close', function(){
413 | var knn = new KMeans();
414 | var typeA = randomPoints(100,[1,1],[0,0]);
415 | var typeB = randomPoints(100,[1,1],[2,0]);
416 | var typeC = randomPoints(100,[1,1],[1,1]);
417 | var typeD = randomPoints(100,[1,1],[1,-1]);
418 | var all = typeA.concat(typeB.concat(typeC.concat(typeD)));
419 | knn.train(all)
420 | var res = knn.findClusters(10);
421 | expect(res.length).to.equal(4);
422 | });
423 |
424 | it('can determine the number of clusters when there are five, pretty close ones', function(){
425 | var knn = new KMeans();
426 | var typeA = randomPoints(100,[1,1],[0,0]);
427 | var typeB = randomPoints(100,[1,1],[2,0]);
428 | var typeC = randomPoints(100,[1,1],[0,2]);
429 | var typeD = randomPoints(100,[1,1],[-2,0]);
430 | var typeE = randomPoints(100,[1,1],[0,-2]);
431 | var all = typeA.concat(typeB.concat(typeC.concat(typeD.concat(typeE))));
432 | knn.train(all)
433 | var res = knn.findClusters(10);
434 | expect(res.length).to.equal(5);
435 | });
436 |
437 | it('can determine the number of clusters when there are four, very, very close ones', function(){
438 | var knn = new KMeans();
439 | var typeB = randomPoints(100,[1,1],[1,0]);
440 | var typeC = randomPoints(100,[1,1],[0,1]);
441 | var typeD = randomPoints(100,[1,1],[-1,0]);
442 | var typeE = randomPoints(100,[1,1],[0,-1]);
443 | var all = typeB.concat(typeC.concat(typeD.concat(typeE)));
444 | knn.train(all)
445 | var res = knn.findClusters(10);
446 | expect(res.length).to.equal(4);
447 | });
448 |
449 | });
450 |
451 | /* This last section is really quite hard.
452 |
453 |
454 | A lot of the ambiguity involved in the idea of a "cluster" comes in here,
455 | as well as the problem that k-means is ill-suited to detecting certain kinds of cluster.
456 |
457 | Anyhow, just do here what you did in the immediately prior section with the fake data.
458 | */
459 | xdescribe('it can determine the number of clusters in the MNIST data as well', function(){
460 |
461 | it('can determine the number of clusters when there are two characters', function(done){
462 | //Fifteen second timeout...
463 | this.timeout(15000);
464 | var knn = new KMeans();
465 | var someElements = mnist.zeroAndOne().map(function(n){return n[0]}).slice(0,100);
466 | knn.train(someElements);
467 | var res = knn.findClusters(8);
468 | expect(res.length).to.equal(2);
469 | writer.exportGrouped(res, someElements, done, "clusterCountedZeroAndOne");
470 | });
471 |
472 | it('can determine the number of clusters when there are three characters', function(done){
473 | //Fifteen second timeout. This can take a bit.
474 | this.timeout(15000);
475 | var knn = new KMeans();
476 | var someElements = mnist.zeroAndOneAndFive().map(function(n){return n[0]}).slice(0,100);
477 | knn.train(someElements);
478 | var res = knn.findClusters(8);
479 | expect(res.length).to.equal(3);
480 | writer.exportGrouped(res, someElements, done, "clusterCountedZeroAndOneAndFive");
481 | });
482 |
483 | });
484 |
485 | });
486 |
487 |
--------------------------------------------------------------------------------
/003_reinforcement/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/js-ml-workshop/d121931df51cc42ad3e91736f0842f77c07f7387/003_reinforcement/.DS_Store
--------------------------------------------------------------------------------
/003_reinforcement/display.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/003_reinforcement/lib/misc.js:
--------------------------------------------------------------------------------
1 |
2 |
3 | //Got this from http://kaisarcode.com/javascript-rotate
4 | function kcRotate(elem,deg){
5 | var Dx;
6 | var Dy;
7 | var iecos;
8 | var iesin;
9 | var halfWidth;
10 | var halfHeight;
11 | var dummy;
12 |
13 | //degrees to radians
14 | var rad=deg*(Math.PI/180);
15 |
16 | //get sine and cosine of rotation angle
17 | iecos=Math.cos(rad);
18 | iesin=Math.sin(rad);
19 |
20 | //get element's size
21 | halfWidth=elem.offsetWidth/2;
22 | halfHeight=elem.offsetHeight/2;
23 |
24 | //calculating position correction values
25 | Dx=-halfWidth*iecos + halfHeight*iesin + halfWidth;
26 | Dy=-halfWidth*iesin - halfHeight*iecos + halfHeight;
27 |
28 | //applying CSS3 rotation
29 | elem.style.transform="rotate("+rad+"rad)";
30 |
31 | //vendor prefixed rotations
32 | elem.style.mozTransform="rotate("+rad+"rad)";
33 | elem.style.webkitTransform="rotate("+rad+"rad)";
34 | elem.style.OTransform="rotate("+rad+"rad)";
35 | elem.style.msTransform="rotate("+rad+"rad)";
36 |
37 | //rotation Matrix for IExplorer
38 | elem.style.filter="progid:DXImageTransform.Microsoft.Matrix(M11="+iecos+", M12="+-iesin+", M21="+iesin+", M22="+iecos+", Dx="+Dx+", Dy="+Dy+", SizingMethod=auto expand)";
39 | elem.style.msFilter="progid:DXImageTransform.Microsoft.Matrix(M11="+iecos+", M12="+-iesin+", M21="+iesin+", M22="+iecos+", Dx="+Dx+", Dy="+Dy+", SizingMethod=auto expand)";
40 |
41 | //Fixing black box issue on IE9
42 | dummy=document.createElement("div");
43 | dummy.innerHTML='';
44 | if(dummy.getElementsByTagName("br").length==1) elem.style.filter="none";
45 | delete dummy;
46 | }
--------------------------------------------------------------------------------
/003_reinforcement/lib/rl_run.js:
--------------------------------------------------------------------------------
1 | (function(){
2 |
3 | var setInitialSettings = function(){
4 | //TODO: Add customization of settings.
5 | return {
6 | player: {
7 | loc: 5,
8 | locSpeed: 0,
9 | ang: ((Math.random() - 0.5) / 10),
10 | angSpeed: 0
11 | },
12 |
13 | world: {
14 | length: 10,
15 | gravity: .4,
16 | friction: 0.99,
17 | movePower: .05,
18 | rotateSize: 10,
19 | stickLength: 100
20 | },
21 |
22 | dev: {
23 | logging: false,
24 | verboseLogging: false
25 | }
26 | }
27 | }
28 |
29 | //This is supposed to rule all physical events. It doesn't have
30 | //anything to do with victory conditions, though, and things like that.
31 | var physics = function(state, moves){
32 |
33 | //Player-controlled stuff.
34 | state.player.locSpeed = state.player.locSpeed - ((moves.left) ? state.world.movePower : 0);
35 | state.player.locSpeed = state.player.locSpeed + ((moves.right) ? state.world.movePower : 0);
36 | state.player.angSpeed = state.player.angSpeed + ((moves.left) ? state.world.movePower : 0) * state.world.rotateSize;
37 | state.player.angSpeed = state.player.angSpeed - ((moves.right) ? state.world.movePower : 0) * state.world.rotateSize;
38 |
39 |
40 | //Environment-controlled stuff.
41 | //Update locations -- movement
42 | state.player.loc = state.player.loc + state.player.locSpeed;
43 | state.player.ang = state.player.ang + state.player.angSpeed;
44 | //Friction
45 | state.player.angSpeed = state.player.angSpeed * state.world.friction;
46 | state.player.locSpeed = state.player.locSpeed * state.world.friction;
47 | //Gravity -- only influences the stick.
48 | state.player.angSpeed = state.player.angSpeed + Math.sin(state.player.ang / 180 * Math.PI) * state.world.gravity;
49 |
50 | return state;
51 |
52 | }
53 |
54 | var allPossibleMoves = [
55 | {left: false, right: false},
56 | {left: true, right: false},
57 | {left: false, right: true}
58 | ];
59 |
60 |
61 | var rules = function(state, endCallback){
62 | var endIt = function(){
63 | state.dev.logging && console.log("Stick fell or ran off track.");
64 | endCallback(state);
65 | }
66 | if (state.player.loc >= state.world.length || state.player.loc <= 0){ endIt();}
67 | if (state.player.ang <= -40 || state.player.ang > 40){ endIt();}
68 | }
69 |
70 | var startGame = function(agent, displayCallback, cb){
71 | var state = setInitialSettings();
72 | var i = window.setInterval(function(){
73 | var moves = agent.decide([state.player.loc-state.world.length/2, state.player.ang, state.player.locSpeed, state.player.angSpeed], 1, allPossibleMoves);
74 | state = physics(state, moves);
75 | displayCallback(state);
76 | rules(state, function(state){
77 | agent.end([state.player.loc, state.player.ang], -10, cb);
78 | clearInterval(i);
79 | });
80 | },40)
81 | };
82 |
83 | var fastGame = function(agent, displayCallback, cb){
84 | var state = setInitialSettings();
85 | var playing = true;
86 | while(playing){
87 | var moves = agent.decide([state.player.loc-state.world.length/2, state.player.ang, state.player.locSpeed, state.player.angSpeed], 1, allPossibleMoves);
88 | state = physics(state, moves);
89 | displayCallback(state);
90 | rules(state, function(state){
91 | agent.end([state.player.loc, state.player.ang],-10, cb);
92 | playing = false;
93 | });
94 | }
95 | };
96 |
97 | //Policy gets called every game-frame.
98 | //It gets called with
99 | window['BalanceGameSlow'] = function(agent, displayCallback){
100 | startGame(agent, displayCallback);
101 | }
102 |
103 | window['BalanceGameFast'] = function(agent, displayCallback, cb){
104 | fastGame(agent, displayCallback, cb);
105 | }
106 |
107 | })();
--------------------------------------------------------------------------------
/003_reinforcement/lib/rl_visible.js:
--------------------------------------------------------------------------------
1 |
2 | var setUpControls = function(options){
3 |
4 | //Create agent which will be used everywhere.
5 | agent = new RLAgent()
6 |
7 | //Only should be done once.
8 | var move = { left: false, right: false }
9 | $(document).keydown(function(e){
10 | if ((e.keyCode || e.which) == 37){move.left = true; move.right=false; }
11 | if ((e.keyCode || e.which) == 39){move.right = true; move.left=false; }
12 | });
13 |
14 | var displayInfo = {
15 | cart:{
16 | left: function(stateLeft, worldSize){return (500-100)*stateLeft/worldSize + 'px'; },
17 | },
18 | stick:{
19 | left: function(stateLeft, worldSize){return 50+(500-100)*stateLeft/worldSize + 'px';},
20 | rotate: function(radians){ return radians * 180 / Math.PI; }
21 | }
22 | }
23 |
24 | var visibleGame = function(state){
25 | $('#cart').css('left', displayInfo.cart.left(state.player.loc, state.world.length));
26 | $('#stick').css('left', displayInfo.stick.left(state.player.loc, state.world.length));
27 | var stick = document.getElementById('stick');
28 | kcRotate(stick, state.player.ang);
29 | }
30 |
31 | //Starts a human playable game.
32 | var startHumanPlayable = function(){
33 | var humanAgent = {
34 | end: function(state, reward){},
35 | decide: function(state, reward){
36 | var temp = { left: move.left, right: move.right };
37 | move.left = false;
38 | move.right = false;
39 | return temp;
40 | }
41 | }
42 | BalanceGameSlow(humanAgent, visibleGame, function(){});
43 | }
44 |
45 | var startAIGame = function(){
46 | BalanceGameSlow(agent, visibleGame, function(){});
47 | }
48 |
49 | var aiPlaying = false;
50 | var timesTrained = 0;
51 | var startAITraining = function(){
52 | if (aiPlaying == false){
53 | aiPlaying = true;
54 | var internal = function(){
55 | BalanceGameFast(
56 | agent,
57 | function(){},
58 | function(){
59 | if (aiPlaying) {
60 | timesTrained = timesTrained + 1;
61 | $('#aiNum').html(timesTrained.toString());
62 | window.setTimeout(internal, 5)
63 | }
64 | }
65 | )};
66 | internal();
67 | }else{
68 | aiPlaying = false;
69 | }
70 | }
71 |
72 | $('#humanPlay').click(function(){ startHumanPlayable(); })
73 | $('#aiTrain').click(function(){ startAITraining(); });
74 | $('#aiPlay').click(function(){ startAIGame(); });
75 |
76 | };
77 |
78 |
79 | $(document).ready(function(){
80 | setUpControls();
81 | });
82 |
83 |
84 |
85 |
86 | //BalanceGameFast({}, userControlledPolicy, function(){points++}, endCallback);
--------------------------------------------------------------------------------
/003_reinforcement/lib/style.css:
--------------------------------------------------------------------------------
1 | #cart{
2 | position: absolute;
3 | width: 100px;
4 | height: 25px;
5 | left: 200px;
6 | top: 225px;
7 | background: #C55;
8 | }
9 |
10 | #stick{
11 | position: absolute;
12 | width: 4px;
13 | height: 350px;
14 | left: 248px;
15 | top: 50px;
16 | background: #BADA55;
17 | }
18 |
19 | #background{
20 | position: absolute;
21 | width: 500px;
22 | height: 250px;
23 | left: 0px;
24 | top: 0px;
25 | z-index: -100;
26 | overflow: hidden;
27 | background: #D1E5E1;
28 | }
29 |
30 | #buttons{
31 | position: absolute;
32 | width: 500px;
33 | height: 350px;
34 | left: 520px;
35 | top: 0px;
36 | }
--------------------------------------------------------------------------------
/003_reinforcement/rl.js:
--------------------------------------------------------------------------------
1 |
2 |
3 | //The task is to define the RL_Agent class.
4 | function RLAgent(){
5 | var self = this;
6 | this.epsilon = 0.1;
7 | this.newEpisode = true;
8 | this.actionValues = {};
9 | this.returns = {};
10 | }
11 |
12 | //Stub
13 | RLAgent.prototype.decide = function(state, reward, allPossibleMoves){
14 | return allPossibleMoves[0];
15 | }
16 |
17 | //Stub
18 | RLAgent.prototype.end = function(state, reward, cb){
19 | cb && cb();
20 | }
21 |
22 |
23 | var module = module || {};
24 | module.exports = RLAgent;
25 |
26 |
--------------------------------------------------------------------------------
/003_reinforcement/rl_specs.js:
--------------------------------------------------------------------------------
1 | var rl = require('./rl');
2 | var expect = require('chai').expect;
3 |
4 | describe('All tests for the reinforcement learning agent', function(){
5 |
6 | describe('Basic helper functions', function(){
7 |
8 | //Same as elsewhere.
9 | it('max function works', function(){
10 | var max = rl.prototype._max;
11 | expect(typeof max).to.equal('function');
12 | //Usual stuff
13 | expect(max([0,1,2,3,4,5,6], function(n, index){return n;})).to.equal(6);
14 | expect(max([0,1,2,3,4,5,6], function(n, index){return -n;})).to.equal(0);
15 | expect(max(['a','sas','asdasd','ssd'], function(n){return n.length;})).to.equal('asdasd');
16 | //With index
17 | expect(max([0,1,2,3,4,5,6], function(n, index){return -index;})).to.equal(0);
18 | expect(max([10,1,2,3,4,5,6], function(n, index){return -index;})).to.equal(10);
19 | expect(max([7,1,2,6,4,3,2], function(n, index){return index+n;})).to.equal(6);
20 | });
21 |
22 | /* _discretize is necessary because, as mentioned in the workshop, the ML
23 | agent cannot learn unless there are a finite number of states, so
24 | it can return to them again and again. Discretize takes an array
25 | of continuous numbers, representing a state, and returns an array
26 | which has had those numbers smashed some of a set of close
27 | integers.
28 |
29 | It takes as input one or two arrays.
30 |
31 | In the case that it takes one array, it returns that array with
32 | each element rounded to the nearest integer.
33 |
34 | In the case that it takes two arrays, it should return an array
35 | with the n-th element of the first adjusted to the nearest multiple
36 | of the n-th element of the second.
37 | */
38 | it('_discretize function works', function(){
39 | var d = rl.prototype._discretize;
40 | expect(typeof d).to.equal('function');
41 | expect(d([1.1,1.2,1.8,2])).to.eql([1,1,2,2])
42 | expect(d([1.01,1.2,1.8,2.9])).to.eql([1,1,2,3])
43 | expect(d([1.01,1.2,1.8,2.9],[10,10,10,10])).to.eql([0,0,0,0]);
44 | expect(d([0.99,0.98,1.8,3.9],[2,2,2,2])).to.eql([0,0,2,4]);
45 | });
46 |
47 | /* _trim is also necessary becase the ML agent cannot learn unless
48 | there are a finite number of states. While _discretize rounds numbers
49 | in an array to particular integers, _trim removes the extreme values from
50 | the numbers in an array. After applying both _trim and _discretize to an array,
51 | one can only get back a finite set of array values.
52 |
53 | Trim takes two arrays. It returns an array, each element of which consists
54 | of the n-th value of the first array clamped between the absolute value
55 | and the negative absolute value of the n-th element of the second.
56 | */
57 | it('_trim function works', function(){
58 | var d = rl.prototype._trim;
59 | expect(typeof d).to.equal('function');
60 | expect(d([1,2,3,4],[1,1,1,1])).to.eql([1,1,1,1]);
61 | expect(d([0,1,2,-1,2],[1,1,1,1,1])).to.eql([0,1,1,-1,1],1);
62 | expect(d([-3,-2,-1,0,1,2,3],[2,2,2,2,2,2,2])).to.eql([-2,-2,-1,0,1,2,2]);
63 | });
64 |
65 | /* As mentioned in the workshop, a big part of reinforcement learning is figuring
66 | out how to estimate the action-value function for a policy. The easiest way to store
67 | action-value estimates in javascript is in an object, with a token for each state-action
68 | pair as the key for the items in the object.
69 |
70 |
71 | _tokenize is a function that takes a state and a move, and returns a single token
72 | used to identify this pair in the object defining the action-value function.
73 |
74 | It should use _trim, _discretize, and toString to do this.
75 |
76 | There aren't any real tests for this, because a lot of the tweaking of the algorithm
77 | can go into how coarse of a discretization you want to use. Keeping in mind that the
78 | state function looks like [location, angle, locationSpeed, angularSpeed], I've had a
79 | lot of success using small rounding values (1) for the first two while using enormous
80 | rounding values (1000) for the second. This effectively reduces the dimensionality of the
81 | state-space from 4d to 2d; while this is a loss of information, it means that it is much
82 | easier to search through the entirety of the state space.
83 | */
84 | it('_tokenize function works', function(){
85 | var inst = new rl();
86 | expect(typeof inst._tokenize).to.equal('function');
87 | })
88 |
89 |
90 | /*
91 | The basic idea of _chooseBestMove is that you pass it a (1) state (an array of real numbers),
92 | an (2) array of all possible moves, and (3) a object which has of estimated action-values
93 | for particular states. It will return the index of the best move in (2).
94 |
95 | In particular, the state will be an array with
96 | [location (position from -5,5), angle (-40 to 40), linearSpeed (smaller), and angularSpeed (smaller)]
97 | The content of allPossible moves doesn't actually matter.
98 | Action values is an object where each key is a tokenization of a state + a move from that state,
99 | and the value for each key is the average return after making that move in that state.
100 |
101 | It will need to round (state) to a discrete value before using it to access the previous
102 | action-value in actionValues.
103 | */
104 | it('_chooseBestMove function works', function(){
105 | //Is a function
106 | var instance = new rl();
107 | expect(typeof instance._chooseBestMove).to.equal('function');
108 | });
109 |
110 | it('_initalizeEpisode exists', function(){
111 | var instance = new rl();
112 | expect(typeof instance._chooseBestMove).to.equal('function');
113 | })
114 |
115 | it('_continueEpisode exists', function(){
116 | var instance = new rl();
117 | expect(typeof instance._chooseBestMove).to.equal('function');
118 | })
119 |
120 | });
121 | /* Finally, to the necessary functions.*/
122 |
123 |
124 |
125 | /* When created, an instance of RLAgent should have a few variables that will be
126 | exceedingly useful later on.
127 | */
128 |
129 | it('has necessary variables', function(){
130 | var inst = new rl();
131 | expect(inst.hasOwnProperty('epsilon')).to.equal(true);
132 | expect(inst.hasOwnProperty('newEpisode')).to.equal(true);
133 | expect(inst.hasOwnProperty('actionValues')).to.equal(true);
134 | expect(inst.hasOwnProperty('returns')).to.equal(true);
135 | });
136 |
137 |
138 | /*
139 | The function 'decide' is called every time-step something occurs in the environment, except for the last
140 | step in the episode, which is called with 'end' instead.
141 |
142 | 'decide' will be called with the following variables: state, reward, and allPossibleMoves.
143 |
144 | state: This is an array which summarizes the state of the cart-with-pole system.
145 | The variables in it are [location, angularLocation, velocity, angularVelocity]
146 | These variables are not integers, and will need to be discretized, as mentioned above.
147 |
148 | reward: This is a number. The reward will be 1 for every frame where the stick does not fall over
149 | and the cart does not run off the edge. The reward will be 0 for the last frame, where this does occur.
150 |
151 | allPossibleMoves: This will be an array containing all possible moves. Decide must return
152 | a value from this array.
153 |
154 | The function "decide" will need to initialize certain variables if this is the first step of a training episode.
155 | It will also need to store the state and reward of each step so that the end function can alter the
156 | action-value function in accord with them.
157 | */
158 | it('has a decide function', function(){
159 | var m = new rl();
160 | expect(typeof m.decide).to.equal('function')
161 | });
162 |
163 | /* The function 'end' is called similarly to 'decide': state, reward, and a callback.
164 |
165 | The state and the reward as as before. The callback must be called when "decide" is done,
166 | or else things will break.
167 |
168 | The only difference is that it is the last step in an episode. So it needs to count up the
169 | rewards following the first occurrence of each state, incorporate them into the averages for
170 | the action-value function, and
171 | */
172 | it('has an end function', function(){
173 | var m = new rl();
174 | expect(typeof m.end).to.equal('function')
175 | });
176 |
177 | })
--------------------------------------------------------------------------------
/lib/mnist_reader.js:
--------------------------------------------------------------------------------
1 | var fs = require('fs');
2 |
3 | module.exports = {
4 |
5 | allElements: function(){
6 | var flt = JSON.parse(fs.readFileSync('../lib/small_mnist_data.json').toString())
7 | .map(function(n){ return [n[0], n[1].indexOf(1)]; });
8 | return flt;
9 | },
10 |
11 | zeroAndOne: function(){
12 | var flt = JSON.parse(fs.readFileSync('../lib/small_mnist_data.json').toString())
13 | .filter(function(n){ return n[1][1] == 1 || n[1][0] == 1 } )
14 | .map(function(n){ return [n[0], n[1].indexOf(1)]; });;
15 | return flt;
16 | },
17 |
18 | fiveAndNine: function(){
19 | var flt = JSON.parse(fs.readFileSync('../lib/small_mnist_data.json').toString())
20 | .filter(function(n){ return n[1][5] == 1 || n[1][9] == 1} )
21 | .map(function(n){ return [n[0], n[1].indexOf(1)]; });;
22 | return flt;
23 | },
24 |
25 | fourAndSixAndEight: function(){
26 | var flt = JSON.parse(fs.readFileSync('../lib/small_mnist_data.json').toString())
27 | .filter(function(n){ return n[1][4] == 1 || n[1][6] == 1 || n[1][8] == 1} )
28 | .map(function(n){ return [n[0], n[1].indexOf(1)]; });;
29 | return flt;
30 | },
31 |
32 | zeroAndOneAndFive: function(){
33 | var flt = JSON.parse(fs.readFileSync('../lib/small_mnist_data.json').toString())
34 | .filter(function(n){ return n[1][0] == 1 || n[1][1] == 1 || n[1][5] == 1} )
35 | .map(function(n){ return [n[0], n[1].indexOf(1)]; });;
36 | return flt;
37 | },
38 |
39 | }
--------------------------------------------------------------------------------
/lib/rand.js:
--------------------------------------------------------------------------------
1 | module.exports = function(numbers, range, bias){
2 | var temp = [];
3 | for (var x = 0; x < numbers; x++){
4 | var point = [];
5 | for (var y = 0; y < range.length; y++){
6 | point.push(range[y] * Math.random() + bias[y]);
7 | }
8 | temp.push(point);
9 | }
10 | return temp;
11 | }
--------------------------------------------------------------------------------
/lib/writer.js:
--------------------------------------------------------------------------------
1 | var lwip = require('lwip');
2 |
3 | function toCol(val){
4 | return {
5 | r: Math.round(val),
6 | g: Math.round(val),
7 | b: Math.round(val)
8 | };
9 | }
10 |
11 | function distance(one,two){
12 | return Math.sqrt(one.reduce(function(old, _, index){return old + Math.pow( one[index] - two[index], 2) }, 0));
13 | }
14 |
15 | module.exports = {
16 | exportClassified: function(classified, done){
17 | var n = 0;
18 | var writeImages = function(currentSetOfImages, i){
19 | lwip.create(28*10+28,28*10+28, function(err, image){
20 | var batch = image.batch();
21 | if(err){return console.log("Something went wrong with image export: ", err);}
22 | var index = 0;
23 | for(var x = 0; x < 10; x++){
24 | for(var y = 0; y < 10; y++){
25 | if (index >= currentSetOfImages.length){
26 | }else{
27 | var currentImage = currentSetOfImages[index];
28 | var pixel = 0;
29 | for(var xx = 0; xx < 28; xx++){
30 | for(var yy = 0; yy < 28; yy++){
31 | batch.setPixel(x*28+xx, y*28+yy, toCol(currentImage[pixel]));
32 | pixel++;
33 | }
34 | }
35 | index++;
36 | }
37 |
38 | }
39 | }
40 | var title = 'classifiedAs'+String(i)+'.gif'
41 | batch.writeFile(title, function(err){
42 | if(err){console.log("Something went wrong with the image export in the final stages: ", err);}
43 | n++;
44 | if (n > 9){ done();}
45 | })
46 | });
47 | }
48 |
49 | for(var i = 0; i < 10; i++){
50 | var currentSetOfImages = classified.filter(function(n){return n[1] == i}).map(function(n){return n[0];});
51 | console.log("There are " , currentSetOfImages.length , " images ostensibly of " , i)
52 | writeImages(currentSetOfImages, i);
53 | }
54 | },
55 |
56 | exportGrouped: function(centroids, allData, done, str){
57 |
58 | var n = 0;
59 | var writeImages = function(currentSetOfImages, i){
60 | lwip.create(28*10+28,28*10+28, function(err, image){
61 | var batch = image.batch();
62 | if(err){return console.log("Something went wrong with image export: ", err);}
63 | var index = 0;
64 | for(var x = 0; x < 10; x++){
65 | for(var y = 0; y < 10; y++){
66 | if (index >= currentSetOfImages.length){
67 | }else{
68 | var currentImage = currentSetOfImages[index];
69 | var pixel = 0;
70 | for(var xx = 0; xx < 28; xx++){
71 | for(var yy = 0; yy < 28; yy++){
72 | batch.setPixel(x*28+xx, y*28+yy, toCol(currentImage[pixel]));
73 | pixel++;
74 | }
75 | }
76 | index++;
77 | }
78 |
79 | }
80 | }
81 | var title = str + '_groupedAs'+String(i+1)+'Of'+ centroids.length +'.gif'
82 | batch.writeFile(title, function(err){
83 | if(err){console.log("Something went wrong with the image export in the final stages: ", err);}
84 | n++;
85 | if (n >= centroids.length){ done();}
86 | })
87 | });
88 | }
89 |
90 |
91 | var belongs = allData.reduce(function(belongs, vector, index){
92 | var index = centroids.reduce(function(old, centroid, inner_index){
93 | var dist = distance(centroid, vector)
94 | return (old.distance < dist) ? old : {index: inner_index, distance: dist};
95 | }, {index: 0, distance: distance(centroids[0], vector) }).index;
96 | belongs[index].push(vector);
97 | return belongs;
98 | }, centroids.map(function(){return []}))
99 |
100 | for(var i = 0; i < centroids.length; i++){
101 | console.log("There are " , belongs[i].length , " images in group " , i)
102 | writeImages(belongs[i], i);
103 | }
104 |
105 | }
106 | };
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ml_workshiop",
3 | "version": "0.0.1",
4 | "description": "A simple machine learning workshop.",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "mocha tests"
8 | },
9 | "author": "James Tillman ",
10 | "license": "ISC",
11 | "dependencies": {
12 | "chai": "^3.0.0",
13 | "lwip": "0.0.9",
14 | "mocha": "^2.4.5"
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # A Gentle Introduction to Machine Learning
2 |
3 | ## Overview
4 |
5 | ### Introduction to the Introduction
6 |
7 | The purpose of this workshop is to give you a broad, accurate, although somewhat cursory understanding of machine learning.
8 |
9 | More particularly, it aims to help you understand and program some of the common algorithms used in machine learning. It aims to guide you through what is involved in training these algorithms and verifying this training. And it aims to do all this over the subfields of supervised machine learning, unsupervised machine learning, and reinforcement learning.
10 |
11 | If none of that made any sense, that's ok. Part of the point of the workshop is to explain these and other terms. I'll introduce them slowly from now on.
12 |
13 | Machine Learning is generally a very mathematical field, which can make it somewhat intimidating. This introduction tries to lower that mathematical barrier by assuming only that the student has (1) a good understanding of Javascript and a (2) decently good knowledge of high school algebra. I will explain some terms from linear algebra before introducing particular algorithms, but these are all quite easy to grasp.
14 |
15 | Ok. So what is machine learning?
16 |
17 | ### What is Machine Learning?
18 |
19 | Let's contrast the process of coding and using a machine learning algorithm with the process of coding and using a non-machine-learning algorithm.
20 |
21 | Suppose you wished to write a non-machine learning algorithm that could detect when an image has a face in it. You might come up with an idea for a program that searches for symetrical dark spots in the image, with a brighter spot above them and two brighter spots below them--that is, two eyes beneath a forehead and above cheeks. (This is the basis of facial detection with [Haar-like features](https://en.wikipedia.org/wiki/Haar-like_features).) You might try out the algorithm on some images, and tweak the settings to make it work better. You could go back and forth between algorithm and data a few times.
22 |
23 | But then, when you were satisfied with it, you could use the function in your program and you would be done. If you had found the algorithm online, you could have just used it in your program without doing anything else.
24 |
25 | The same task could be done by machine learning. In this case, you'd also need to begin with an algorithm. But instead of directly writing an algorithm that detects a face, you would write an algorithm that learns from experience. After writing such an algorithm--or just choosing one from an ML (machine learning) library--you would then train it. To train it, you would show it hundreds, or thousands, or millions of images with faces and images without faces, each correspondingly labelled as having or not having a face. The algorithm would learn how to classify images as having or not having faces from this training data. You would probably go back and forth between tweaking the algorithm and its settings and tweaking the training data.
26 |
27 | Then, after writing the algorithm and training it, you would be able to use it in your program. Even if you had found the algorithm online, you would still have needed to train it, unless you found an algorithm pre-trained to recognize faces.
28 |
29 | So machine learning adds some stages to the process of going from nothing to a working product. Non-ML algorithms are ready to go from the start, once the algorithm is done; ML algorithms need to be trained before they can be used.
30 |
31 | ### A More Formal Definition of ML
32 |
33 | Here's a widely quoted, more formal definition of machine learning:
34 |
35 | > A computer program is said to learn from experience E with respect to some class of tasks T and performance measure P, if its performance at tasks in T, as measured by P, improves with experience E.
36 |
37 | Or, to again put things more colloquially, a program using a machine learning algorithm produces measurably better results the more experience it gains.
38 |
39 | ### Kinds of Machine Learning Algorithm
40 |
41 | Machine learning has broad aplicablity. It's used for computer vision, stock trading, self-driving cars, voice recognition, facial recognition, bioengineering, and more. So of course the field and the algorithms in it can be divided into further sub-categories.
42 |
43 | The main way that machine learning algorithms are divided is by learning method. To put this another way, the main way that ML algorithms are categorized according to the kind of experience they learn from.
44 |
45 | There are other ways to categorize ML algorithms--according to their output, for instance. But classification according to the kind of experience from which ML algorithms learn is probably the most fundamental.
46 |
47 | When divided this way, there are three main categories of ML algorithm:
48 | 1. **Supervised Learning**
49 | 2. **Unsupervised Learning**
50 | 3. **Reinforcement Learning.**
51 |
52 | Each of these categories presents distinct oportunities and challenges; the workshop will be built around implementing one of each of these. So it's worth taking a little time to understand why they are distinct from each other.
53 |
54 | ### Supervised Learning
55 |
56 | In supervised learning, the experience from which the algorithm learns is a set of paired example inputs and desired outputs.
57 |
58 | The face-detection algorithm discussed above would be an example of supervised learning. In it, the data that the algorithm trains on is a set of images together with a value indicating whether there is a face in the image or not.
59 |
60 | Learning to recognize hand-written letters would be another example of supervised learning. The training data in that case would be a set of images of individual hand-written characters, labelled with the correct character. The algorithm would learn from this to correctly label new images of hand-written characters. This is the problem we will work on in the first section of the workshop.
61 |
62 | Both of the above are instances of machine learning that deals with *classification* problems. In such problems, the input is some kind of data and the output is a label identifying that data as belonging to a particular type. Supervised learning can also learn to solve *regression* problems. A regression problem is one in which the desired output, instead of being a label, is a continuous value.
63 |
64 | The recently-popular [Microsoft program](http://how-old.net/#) which tried to identify someone's age from a photo is an example of an attempt to solve a regression problem; the image is the input, and a continuous value is the output. Another example might be a program that predicts stock prices from previous stock prices. We won't be learning any regression algorithms, but it's important to know they exist.
65 |
66 | K-nearest neighbors, support vector machines, naive Bayes, neural networks, and logistic regression are all very popular machine learning algorithms used for supervised learning problems.
67 |
68 | ### Unsupervised Learning
69 |
70 | As the name indicates, the experience from which an algorithm learns in unsupervised learning is a set of example inputs--without any desired outputs attached to them.
71 |
72 | The question that probably comes to mind is "What can one learn from an unlabeled set of data?" It might seem impossible. But it is nevetheless possible to learn to characterize the structure of such data in many interesting ways.
73 |
74 | One thing one can do with unlabelled data is cluster analysis. Supppose you were given a dataset with the heights, weights, shoe-sizes, 500m and 50m sprint times, and maximum bench-press weights of some college athletes. You might find that there would be several different clusters of data points. There might be a cluster of people who weighed more and had large bench-press weights, which might indicate the football players. There might be another cluster of data around people with good 500m sprint times and lower weights, which indicates the track-and-field atheletes. And so on and so forth. Trying to count and locate such clusters can be very interesting, and is one of the things that unsupervised learning algorithms can do.
75 |
76 | Suppose you were to feed a bunch of writing from an unknown alphabet (such as the supposed alphabet of the [Voynich manuscript](https://en.wikipedia.org/wiki/Voynich_manuscript)) into a supervised learning algorithm. A good algorithm could figure out how many characters there were in the alphabet, and tell you what character belonged to each kind.
77 |
78 | There are other things that can be done with unsupervised algorithms, but for now we'll stop at clustering. K-means, principle component analysis, and neural networks can be used for unsupervised learning.
79 |
80 | ### Reinforcement Learning
81 |
82 | Reinforcement learning is signficantly different from either supervised or unsupervised learning.
83 |
84 | In reinforcement learning, the algorithm you write controls an agent inside some particular environment, which gets feedback in the form of a reward or punishment. The agent then learns to act so as to maximize the reward it recieves. Let me give an example from outside of machine learning to help you understand this.
85 |
86 | Once some animal intelligence researchers decided to try to teach a group of dolphins to do [novel tricks](http://www.theguardian.com/science/2003/jul/03/research.science). To do this, they began to reward dolphins with fish whenever the dolphin did some trick that they had not done before during the training session. At first the dolphins recieved many fish, as they ran through their initial repertoire of tricks. Then they slowed down, and seemed to get frustrated because they could not figure out what to do to receive a reward. Finally, they seemed to hit on the idea--and immediately they began to do many things they had never done before, so they could get the reward.
87 |
88 | The dolphins are like the agent controlled by your machine learning algorithm. The fish are like rewards the environment doles out. The problem, then, is for your algorithm to figure out what actions and what sequences of actions will result in the greatest reward--even though the algorithm is never explicitly told this. It may be the case that actions with a great short-term reward result in long-term losses, which is another difficulty with this kind of learning. As in supervised learing, there's a notion of the environment telling you what is right and what is wrong; unlike in supervised learning, though, the signals for right and wrong aren't tied to any particular input or output.
89 |
90 | Reinforcement learning is one of the most complex and interesting types of machine learning, so I'll save any further discussion of it utill the section devoted to it. Temporal difference learning, dynamic programming, and Monte-Carlo techniques are different methods of reinforcement learning.
91 |
92 | ## Prelude: Linear Algebra
93 |
94 | ### Introduction
95 |
96 | Machine learning often depends on linear algebra. The following will give you an extremely basic, theoretically barren, coding-oriented overview of the linear algebra necessary to get through the next few lessons.
97 |
98 | ### Vectors -- Introduction
99 |
100 | Think of a vector as a bunch of numbers that indicate a position or direction. The easiest way to represent a vector in Javascript is as an array of numbers.
101 |
102 | A one-dimensional vector would be a single number that indicates position or direction along a single dimension, like a number line. In writing, you could indicate that variable was a single-dimensional vector by writing **A** ∈ R1. This means simply that **A** is vector composed of one real number. Switching to Javascript array notation: [5] would indicate a position 5 units to the right of the origin of a number line, or perhaps a velocity of [5] from anywhere within the number line; a vector of [-2] would indicate a position 2 units to the left, or perhaps a velocity of from anywhere on the number line [-2].
103 |
104 | A three-dimensional vector, similarly, would indicate position or velocity in three dimensions, such as along an X, Y, and Z location. In writing, you could indicate that the variable **B** was a three-dimensional vector by writing **B** ∈ R3. Switching to Javascript, again: The vector [0,0,0] would indicate something at the center of the coordinate system, or something completely motionless. The vector [0,5,2] would indicate something zero units along the x-axis, five units along the y-axis, and one unit along the z-axis. Or it could represent motion in the corresponding direction.
105 |
106 | A three-dimensonal vector belongs in a three-dimensional space; a one-dimensional vector in a one-dimensional space; and, as we will see, an n-dimensional vector in an n-dimensional space.
107 |
108 | ### Vectors -- Positions and Directions
109 |
110 | Vectors can be understood to represent (among other things) positions or velocities.
111 |
112 | When thinking of them as indicating positions, it is natural to picture their base as sitting at the origin of the n-dimensional space, and their point at the location indicated by the vector itself. On a two-dimensional plane, you could draw the vector [3,5] as an arrow reaching from [0,0] to [3,5]. This corresponds roughly with the way of thinking of vectors as positions.
113 |
114 | Alternately, you could think of a vector as with its base at some other location, and it's point as stretching to a location elsewhere. So you could draw the vector [3,5] as a line reaching from [10,10] to [13,15]. This corresponds more with the way of thinking of vectors as velocity.
115 |
116 | Either way makes sense under different contexts--neither is more right than another. For our purposes, though, vectors will generally be taken to siginify positions in n-dimensional space.
117 |
118 | ### Vectors -- In Higher Dimensions
119 |
120 | Vectors can exist in four, ten, or a million dimensions; like vectors of smaller dimensionality, these can be thought of as indicating position in a higher-dimensional space.
121 |
122 | This sometimes causes difficulty, because people think they need to try to visualize, or internally picture locations in a million-dimensional space. This is, of course, impossible.
123 |
124 | But we are related to mathematical objects rather as the medieval theologians said we were related to immaterial things: even if you cannot _picture_ them, you can still conceive of them and think about them. Don't worry about trying to visualize a vector in a million-dimensional space.
125 |
126 | All the _operations_ for vectors in a lower-dimensional space still apply to vectors in a higher-dimensional space, and so as long as you keep the operations straight you can still understand what is going on. Whether you add, substract, find the norms for, or find hyperplanes defined by three, ten, or million dimensional vectors, all these operations are still defined the same way.
127 |
128 | So what are those operations?
129 |
130 | ### Vector Addition
131 |
132 | Vector addition allows you to add two vectors of equal dimensionality--that is, vectors that have the same number of numbers in them. To do this you just add the first elements of each together, the second elements of each together, and so on, until you come up with a new vector composed of the sums of the corresponding elements of the first two.
133 |
134 | Vector addition is not well-defined for vectors of different lengths -- it is meaningless to add a three-dimensional vector to a five-dimensional vector.
135 |
136 | If we treat vectors as arrays of numbers, the following code adds two vectors.
137 |
138 | function vectorAdd(arrOne, arrTwo){
139 | return arrOne.map(function(_,index){return arrOne[index] + arrTwo[index]});
140 | }
141 |
142 | Geometrically, adding vector **A** and vector **B** could be thought of as moving from the start of **A** to the end of **A**, then moving **B** units from the end of **A**. [1,1] added to [2,5] makes [3,6], which is the same location you would end up at if you started at [0,0], moved [1,1] units, then moved another [2,5] units.
143 |
144 | ### Vector Substraction
145 |
146 | Vector subtraction works pretty much the same way as vector addition. When you subtract vector **A** from vector **B**, you make a vector whose first element is the first element of **B** less the first element of **A**, whose second element is the second element of **B** less the second element of **A**, and so on. Again, subtraction is only well-defined for vectors of the same length.
147 |
148 | Geometrically, this gives you a direction pointing from the position of the subtracted vector to the position of the vector substracted from. So, for instance, if you subtract [3] from [1], you get the vector [-2], which points from [3] to [1]. If you subtract, say, [5,5] from [5,-5], then you get [0,-10], which points from [5,5] to [5,-5].
149 |
150 | This code would subtract two vectors, again assuming we are representing vectors as arrays of numbers:
151 |
152 | function vectorSub(arrOne, arrTwo){
153 | return arrOne.map(function(_,index){return arrOne[index] - arrTwo[index]});
154 | }
155 |
156 | ### The Euclidean Norm
157 |
158 | There are two more thing that you need to know about vectors before we can get to supervised learning.
159 |
160 | The first is what the Euclidean norm of a vector is.
161 |
162 | There are many different **norms** for measuring the length of vectors; a norm is generally speaking a function that assigns a positive length to any vector in any dimensionality. Each norm takes a vector and returns a positive number. The one that we'll use is called the **Euclidean Norm**.
163 |
164 | To produce the Euclidean norm for a vector, take the square root of the sums of the squares of the elements.
165 |
166 | So, for the vector [3,4], the Euclidean Norm is [5]:
167 |
168 | Math.sqrt(3*3+4*4) //returns 5
169 |
170 | This formula should be familiar to anyone who has done geometry. The Euclidean Norm is also defined for arbitrarily-dimensioned vectors, however. The following returns the Euclidean norm for [1,1,1,1]:
171 |
172 | Math.sqrt(1*1 + 1*1 + 1*1 + 1*1); //Returns two
173 |
174 | The following Javascript code would return the Euclidean norm for any vector passed in to it.
175 |
176 | function euclideanNorm(arr){
177 | return Math.sqrt( arr.reduce(function(old, n){return old + n*n },0) )
178 | }
179 |
180 | ### Distance
181 |
182 | The second thing is to know how you can also use the Euclidean norm to find the distance between the points indicated by two vectors, in any dimensionality.
183 |
184 | function dist(vectorOne, vectorTwo){
185 | return euclideanNorm(vectorSub(vectorOne, vectorTwo));
186 | }
187 |
188 | It should now be clear why you can do this. Remember, this distance is well defined for vectors with an arbitrarily large dimensionality as well as vectors in "normal" two and three dimensional spaces.
189 |
190 | ## Supervised Learning
191 |
192 | ### Introduction
193 |
194 | In supervised learning, you'll recall, the training data consists of possible inputs paired with the desired output for that input. From this training data, the algorithm attempts to generalize so that it can predict the correct output when it recieves input it has not seen before.
195 |
196 | More concretely, a supervised learning algorithm might recieve images with cats in them and other images without cats in them, labelled as having cats in them and not having cats in them. And from this training data it should be able to figure out whether a new image that it has never seen before has a cat in it or not.
197 |
198 | The input, in the algorithm that we will consider, will be a vector of arbitrary length, standing for a specific location in N-dimensional space. (This is a fairly standard way of representing input to a ML algorithm.) The output will be a number, indicating a classification.
199 |
200 | ### K-Nearest-Neighbors
201 |
202 | K-nearest neighbors is the algorithm we will use to classify vectors of arbitrary length in N-dimensional space.
203 |
204 | K-nearest neighbors is very simple. The "k" in k-nearest-neighbors stands for an arbitrary constant such as 3 or 7, which will be set whenever the algorithm runs. To explain how it works, I'll give an example of it in action.
205 |
206 | Suppose that we want to be able to tell whether a particular student got into college on the strength of a football scholarship or a track-and-field scholarship from that student's weight and height. To train an algorithm to determine this, we have gathered data for 100 athletes, each labelled by scholarship type and with weight and height information. You could represent this in Javascript with an array like this:
207 |
208 | var trainingData = [
209 | [[220,72],0],
210 | [[160,69],1],
211 | [[250,74],0],
212 | [[150,65],1],
213 | ...
214 | ];
215 |
216 | The first element of each element in the array is the vector giving the weight in pounds and the height in inches of the person in question; the second element is 0 if they are a football player and 1 if they are a track-and-field athlete.
217 |
218 | Each column in a set of training data such as this is referred to as a **feature**. There are two features in the above set of data; the first feature is given in pounds, and the second feature is given in inches.
219 |
220 | Now, having been given this training data, I want to find out which class a new athlete belongs to. I have his stats:
221 |
222 | var unknownAthlete = [230,70];
223 |
224 | As the first step of the k-nearest-neighbors algorithm, I would find the distances between the point defined by this vector and the points defined by every vector in the training data. To find the distance between the first training point and the point to be classified, first find the difference between the first pair of features (230-220) and square it. Find the diffence between the second pair of features (70-72) and square it. Add the two amounts, and find the square root; this is the distance between the first training point and the new point. Do the same thing for all the other training points.
225 |
226 | After finding the distance between the athlete we are trying to classify and all the training data for other atheletes, we could sort them from closest to furthest away. And finally we could then look at the _k_ closest athletes. We would then count up all the classifications among these _k_ nearest athletes. The most common of these we would choose as the classification most likely for the athlete in question.
227 |
228 | ### K-Nearest-Neighbors, continued
229 |
230 | Here's the algorithm, put a bit more--although still not very--formally:
231 |
232 | 1. When training the algorithm, the algorithm simply stores all of the training vectors it receives together with their classification. It applies no processing to them at all.
233 | 2. On being given a point to classify, the algorithm finds the distance (the Euclidean norm) between that point and every one of the stored training points.
234 | 3. The algorithm then looks at the classification of the *k* closest training points. It judges the point that it is attempting to classify to be of the class that is most common among these *k* nearest neighbors.
235 |
236 | This algorithm is fairly simple, but can get surprisingly good results.
237 |
238 | You may use this algorithm as a _binary classifier_--that is, as a classifier that chooses between only two different classes. When doing this, it is common for people to use an odd number for the value of _k_. This makes it impossible for there to be a tie. When using the algorithm to classify more than two classes, you can make it choose randomly between tied classes or use some other ad-hoc measure to resolve conflict.
239 |
240 | ### Coding
241 |
242 | If you look in the folder 001_knn, you'll find the test specs for k-nearest-neighbors as well as a mostly-empty file in which you can implement it.
243 |
244 | Before testing, be sure to have npm installed in the top-level folder, and to have mocha installed globally. Run "mocha knn_specs.js" from the command line to test it.
245 |
246 | There are more specific instructions and advice for how to complete it inside the specs.
247 |
248 | You'll have a chance to see how well k-nearest neighbors does against some randomly-generated data. And you'll also have a chance to see how well k-nearest neighbors does when trained against the MNIST data.
249 |
250 | Put briefly, the MNIST data is a collection of 60,000 labelled, 28 by 28 images of hand-written digits from 0 to 9. Seeing how well an algorithm does against the MNIST data is a fairly standard way of testing out different supervised ML techniques.
251 |
252 | I've already converted a subset of the MNIST data into a format easily accessible in Javascript: each image is represented as an array 724 of numbers, each standing for a pixel in the image. Each number has a value from 0 to 255, depending on whether the corresponding pixel is black or white in the MNIST data.
253 |
254 | When you uncomment tests for the MNIST data, a sub-program will create a set of images in 001_knn; each will display the images that your knn algorithm classed as 0s, or as 1s, or as 2s, and so on.
255 |
256 | ### Problems with K-Nearest-Neighbors
257 |
258 | There are a few problems with k-nearest neighbors as we've implemented it.
259 |
260 | One problem is that the runtime for each prediction increases proportionately with the amount of training data put in. This is very problematic. Generally speaking, runtime should be faster than training, but in k-nearest-neighbors training is nearly instantaneous while runtime grows with the amount of training data.
261 |
262 | Some algorithms get rid of unnecessary training points to try to mitigate this problem. Read what Wikipedia has to say about the [Hart Algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm#Data_reduction) to see one algorithm that does this.
263 |
264 | ## Problems with K-Nearest Neighbors, continued
265 |
266 | Another potential problem with k-nearest neighbors, as we've implemented it, has to do with data scaling.
267 |
268 | In the example above with athletes, imagine if the heights of the athletes were given in miles rather than in inches. If this were the case, then all of the heights would be very small--and so the amount that the heights contribute to the distances in KNN would be very small. It will turn out that height is basically ignored entirely by KNN in such a case.
269 |
270 | But we obviously don't want the results of the KNN algorithm to depend on the units of the data fed into it. To avoid this problem, one thing you can do is **normalize** features. When you normalize the data, you scale it so that the smallest value of any set of features is 0 and the greatest value of any set of features is 1, and every other value for that feature is something in between. This means that, no matter how much variance there is naturally in different parameters in the training data, each parameter in the training data will count equally.
271 |
272 | ### Extra Credit / Other Algorithms
273 |
274 | 1. Implement the Hart Alorithm in your KNN object. More specifically, implement a function which, when run, will remove all superfluous data points given the current training data and current k-size in the algorithm. When reading the Wikipedia summary, remember that different points will remain relevant for different k-numbers, so the algorithm has to take that into account.
275 | 2. Implement a normalize function in your KNN object. When run, it should scale each feature in the training data so that the largest vector is 1 and the smallest is 0.
276 |
277 | ## Unsupervised Learning
278 |
279 | ### Introduction
280 |
281 | In unsupervised learning, the training data for the ML algorithm is not paired with some kind of correct output. The goal is to try to find structure in this data despite this lack.
282 |
283 | One of the ways to try to find structure in data like this is to cluster it. Suppose you had never seen an armadillo, a giraffe, or a manatee. If I were to give you a thousand photos of these three animals, all mixed together without any labelling, you still would be able to tell that the photos were of three kinds of things. You would also be able to group each with its kind. Clustering ML algorithms aim to do this kind of thing.
284 |
285 | The only kind of unsupervised learning we'll be doing is clustering. Unsupervised learning algorithms can do things other than cluster, however.
286 |
287 | For instance, others can do latent variable analysis. In latent variable analysis, the ML algorithm attempts to determine if features in the training data can be summarized as the result of other, unseen, and fewer features. If so, then the values for each of the visible features should be equally well summarized by a set of values for other, invisible features. This both allows one to find possible causal factors for the data and to decrease the dimensionality of the data.
288 |
289 | ### Introduction to Clustering
290 |
291 | There are two distinct clustering tasks:
292 |
293 | 1. Given a particular number of clusters to find in a set of data, find where these clusters are located. For instance, being told that there are three kinds of letters in a set of images of human writing, group these images into three groups.
294 | 2. Given a particular set of data, find how many clusters are contained in it. For instance, being told there are some number of letters in a set of images of human writing, determine how many letters there are in it.
295 |
296 | The second task usually involves the first; trying to find how many clusters are contained in a particular set of data would probably involve figuring out where these clusters are located. The first is the task we'll focus on, though. So we'll start with k-means clustering, which is a method of solving the first task.
297 |
298 | As an aside, you should note that the notion of "cluster" is vague. The notion of what a "good" cluster is, is correspondingly vague; the task of counting how many clusters are in a given set of data is therefore also vague. Suppose you had a data-set of 1000 hand-written characters: every character had been written either by Bob or Alice, and every character was either an "A" or a "D." A's differ from D's in systematic ways--but the characters written by Bob probably differ from the characters written by Alice in systematic ways as well. So should a clustering algorithm group the characters into two groups or into four? It depends on the purpose of the algorithm; there is no universally right answer, only a right answer relative to a situation. Such difficulties are one of the reasons that we're focusing on the first and not the second task.
299 |
300 | ### K-Means -- The Goal
301 |
302 | The goal of k-means is to group the data, a set of n-dimensional vectors, into _k_ clusters that satisfy a particular property.
303 |
304 | Each cluster is defined by one n-dimensional vector--so there are k cluster-defining vectors. Every point in the data belongs to the cluster-defining vector to which it is closest. (If you're familiar with the concept of [Vornoi Cells](https://en.wikipedia.org/wiki/Voronoi_diagram), k-means partitions space into n-dimensional voronoi cells.)
305 |
306 | As I mentioned, however, these cluster-defining vectors should be arranged to satisfy a particular property.
307 |
308 | Informally, the _k_ cluster-defining vectors should be arranged so that they are very close to the data points in their cluster. More formally, the cluster-defining vectors should be arranged to minimize the squared distance from each data point to the vector to which it belongs.
309 |
310 | Let me put all of that into one sentence: A set of data perfectly clustered according to k-means would have _k_ cluster-defining vectors placed such that the sum of the squared distances from each data point to the cluster-defining vector to which it belongs would be the smallest possible amount.
311 |
312 | You'll note that this defines k-means in terms of a goal, however. A goal is not the same as an algorithm. What would be some means of _finding_ these cluster-defining vectors, or at least finding vectors close to them?
313 |
314 | ### K-Means -- Lloyd's Algorithm
315 |
316 | It is common in machine learning to attempt to aproximate an ideal solution with a more rough algorithm.
317 |
318 | Lloyd's Algorithm is a way to come to an aproximation of the above ideal solution. It is by far the most common way of attempting to come to such an aproximation, though, to the extent that often it is simply called the k-means algorithm.
319 |
320 | This is the basic idea behind the algorithm.
321 |
322 | 1. Randomly choose k cluster-defining vectors within the space of the data, where k is the pre-determined number of clusters whose centers must be found. (One way of doing this is to choose random points from the training data as starting locations.)
323 | 2. Assign each data vector to the cluster-defining vector to which it is closest.
324 | 3. Shift each of the k cluster-defining vectors so they are located in the midpoint of the data assigned to them. (A vector located at the average location of a set of other vectors is a "centroid." So another way to say this is to shift each of the k vectors so it is the centroid of the data that belongs to it.)
325 | 4. If none of the centroids shifted, or all of them shifted less than a particular amount, return those vectors as the cluster-defining vectors for the data. Otherwise, go back to step 2 and repeat.
326 |
327 | As stated, the vectors returned from the algorithm are known as "centroids." In mathematics, a centroid occupies the average or mean position of all of the shapes around it in the region, just as the centroids above occupy the average or mean position of all the shapes to which they are closest.
328 |
329 | This might be a bit difficult to understand. If you would like a visual representation, [Wikipedia](https://en.wikipedia.org/wiki/K-means_clustering#Standard_algorithm) has a useful visual explanation of this sequence.
330 |
331 | ### K-Means - Local Minima
332 |
333 | When the centroids have stopped moving, then Lloyd's algorithm has completed. However, this does not mean that the algorithm has found the clustering which best minimizes the sum of the squared distances. Or, to put this another way -- Lloyd's algorithm does not always produce the goal k-means sets forth. It produces an aproximation, and often a bad aproximation.
334 |
335 | Consider a dataset that consists of eight points, portrayed like this in beautiful ASCII art:
336 |
337 | .. ..
338 |
339 | .. ..
340 |
341 | Suppose two centroids randomly chosen were placed something like this.
342 |
343 | .. C ..
344 |
345 | .. C ..
346 |
347 | As it happens, they will both remain in place when k-means runs. The top centroid is closest to the top four points, and at their average location, just as the bottom centroid is closest to the bottom four points, and at their average location. So Lloyd's algorithm will keep them in place, and return them as cluster-defining vectors for the data.
348 |
349 | Even so, a better location for each centroid would clearly be as follows.
350 |
351 | .. ..
352 | C C
353 | .. ..
354 |
355 | The clustering here is much tighter. The sum of the squared distances of each point to the centroid to which it belongs is much less. Nevertheless, there is no guarantee the k-means algorithm will come to this globally best solution.
356 |
357 | Generally speaking, a poor solution that an algorithm can get "stuck" at while attempting to make incremental improvements is called a **local minima** or **local maxima**. It makes sense to call it local minima right now, because we're trying to minimize the sum of squared distances, but the two ideas are otherwise the same.
358 |
359 | Here's another instance of k-means converging on a local minima. The initial locations are on the left; the final convergence is on the right. You will note that although there are four points, the four clusters converged on by k-means are by no means the best possible clusters.
360 |
361 | 
362 |
363 | ### Local Minima, continued
364 |
365 | The problems caused by local minima (or local maxima) are not unique to k-means or unsupervised learning. They happen in every field of machine learning.
366 |
367 | Many algorithms work by slowly shifting their parameters towards a better solution. Just as k-means shifts centroids from location to location in an attempt to minimize the sum of squared distances, other ML algorithms shift their parameters about in an attempt to minimize or maximize some other amount. Sometimes, however, the algorithm finds itself with parameters such that any small shift makes the solution worse--even though this particular set of parameters is not the absolute best. This is a local minima or maxima.
368 |
369 | Here's an analogy for a local maxima. Suppose you're trying to maximize the amount of money that you earn. You could work harder at your current job, which is in retail. You could read books on persuasion to maximize the comission that you make on sales. You could work more hours. But after you've done all these things, you might find that you still do not earn nearly as much as your friend, Bob, who is a software engineer. If you wish to make more money, you might have to change by more than small shifts; you might have to make a career change, and earn less than you currently do for a while.
370 |
371 | Local minima or maxima in ML algorithms are like that. These algorithms fail to find the best overall solution because they get stuck in acceptable local solutions, where any small change in an direction results in the situation becoming worse. And because these algorithms work only by making small changes, they get stuck there.
372 |
373 | Ways of dealing with local minima and maxima constitute an entire field. Different ways are available depending on the algorithm in question.
374 |
375 | One way to try to deal with local minima in k-means is to run Lloyd's algorithm several times; each time, start it with a different randomized set of centroids. Each time you run it, save the solution. After running it so many times, you could choose from these saved solutions the solution which has the smallest squared distance from the data points to the array. There's a good chance this is not the globally best solution, but because it is the best of a sample, it is probably ok.
376 |
377 | ### Coding and Extra Credit
378 |
379 | If you open the folder 002_kmeans, you'll see a spec file defining characteristics for k-means and an empty file in which to build the k-means constructor.
380 |
381 | As before, the spec file can be run by typing "mocha kmeans_specs.js" at the command line. More detailed instructions for what to do are included in the spec file itself. You will need to read these comments.
382 |
383 | When fully uncommented, the specs will also make images of the groups that the algorithm comes up with.
384 |
385 | There is also an extended extra-credit section to the spec file. Feel free to try to do it, but don't be discouraged if you find it difficult.
386 |
387 | ### Problems
388 |
389 | K-means will not locate clusters correctly in many cases.
390 |
391 | Consider what happens in two dimensions, when there is an enormous circular set of data points, with a smaller, tighter, and also circular set of datapoints just a small distance from its edge, like a plannet skimming the surface of a red giant. If you were to place one centroid in the center of the small set, and another in the centroid in the center of the large set, the half-way point between them would be deep within the larger set of data. So in such a case, k-means will classify a large part of the large set as belonging to the small set, erroneously.
392 |
393 | To put the above paragraph more briefly, k-means must always mark the border between centroids at the half-way mark. Sometimes this does not reflect the actual borders between clusters. This is problematic.
394 |
395 | K-means also has difficulty with any data which is not clustered roughly sphereically. If data should cluster in a long narrow crescent, sweeping through 2d, 3d, or 784d space, k-means will not identify it as a single cluster, because clusters always consist in those points closer to a single centroid than to any other centroid. So this, too, is a significant problem with k-means. Nonspherical clusters will result in many different elements being misclassified.
396 |
397 | There are, of course, many other methods of clustering, which evade some of these problems. DBSCAN, OPTICS, and hierarchical clustering are clustering methods.
398 |
399 | ## Reinforcement Learning
400 |
401 | ### Introduction
402 |
403 | In reinforcement learning, the ML algorithm learns how to achieve goals by interacting with an environment.
404 |
405 | This task is very different than either of the former two kinds of machine learning. So we will approach the ideas involved in this gradually. Throughout it all, though, it is good to keep in mind the essence of the task: Reinforcement learning is about exploring an environment to figure out how to achieve particular goals.
406 |
407 | I'll begin by describing a simplified learning problem, which differs from the full reinforcement learning problem in that it does not involve multiple states. After explaining an algorithm that solves it, I'll move to the full reinforcement learning problem.
408 |
409 | ### N-Armed Bandit
410 |
411 | Imagine a one-armed bandit: a casino-style gambling maching, the kind with the spinning wheels of symbols.
412 |
413 | Suppose that instead of having a single arm it has _n_ arms. Each of these arms would return some some particular reward on average. The first arm, for instance, might average 5 on each pull, while the second arm might average 10, and so on. Of course, each arm only returns these rewards on average: Different individual pulls on the first arm might return 0, or 30, or 0.5, or some other number.
414 |
415 | (Suppose also, contrary to fact, that it is possible to win a positive amount on average while playing against the machine.)
416 |
417 | You do not know what any arm averages when you start playing the _n_ armed bandit. Your goal, however, is to maximize the reward you receive over some finite number of pulls--say, a thousand pulls.
418 |
419 | If you knew which arm averaged the most over time, then you could simply use that arm each time to maximize your reward over time. So you probably want to try to determine which arm returns the most on average. Furthermore, the more sure you are about which arm gives the highest average reward, the more sure you can be that you are pulling the arm most likely to maximize your reward over time.
420 |
421 | The only way to be sure about which arm gives the highest average reward, however, is to try each arm. The only way to be very sure it to try each arm a great deal. The more time you spend trying each arm, however, the less time you have to use whatever arm you currently judge best. So the more time you take trying to be sure about which arm is the best, the less time you have to pull the arm you currently judge to be best.
422 |
423 | This is the explore / exploit tradeoff. If you want to use the knowledge you have, you are generally giving up on a chance to gain new knowledge. If you want to gain new knowledge, you are generally giving up a chance to use the knowledge you have. This tradeoff is fundamental to reinforcement learning, to the n-armed bandit problem, and many real-life problems.
424 |
425 | A doctor who has several experimental treatments for a potentially-fatal disease, for instance, is faced with the same basic difficulty. Will he use the most promising treatment from the start, and thereby risk that he is not using a much better treatment? Or will he try many different treatments, and accept that many people will not get the treatment he currently judges best? What should he do to best preserve life?
426 |
427 | ### ɛ-Greedy
428 |
429 | There are many ways to approach the n-armed bandit problem and the explore / exploit tradeoff, but I'm going to focus on a simple method: ɛ-greedy. That's pronounced "epsilon-greedy." (Epsilon is a Greek character typically used by mathematicians to indicate a small amount.)
430 |
431 | The ɛ-greedy algorithm is a slightly more complex version of a simple greedy algorithm. A greedy algorithm would simply maintain an estimate of which arm seems best, and always choose it. It would estimate which arm is best by looking at the average of the amounts each arm has returned so far.
432 |
433 | The ɛ-greedy algorithm does nearly the same thing, but with a few differences. Here's the algorithm:
434 |
435 | 1. Initialize starting guesses for the average value of each of the _n_ arms of the n-armed bandit.
436 | 2a. With probability (1-ɛ), choose whatever arm you estimate to have the highest average value, or choose randomly among those which are tied for the highest average value.
437 | 2b. Otherwise, with probability ɛ, choose any of the _n_ arms at random.
438 | 3. After either choice, update the average value for the arm that you picked.
439 | 4. Go back to 2, until you have run out of choices.
440 |
441 | In short, the ɛ-greedy algorithm is just like a greedy algorithm most of the time: with probability (1-ɛ), it simply chooses whatever arm currently appears to it to be best. But some of the time, it will also choose a random action. So it usually exploits, but with probability ɛ explores. This means that over an (infinite) amount of time, its estimate for the average value returned by each arm must converge to the actual value.
442 |
443 | So much for the _n_ armed bandit. Let's move on to the full reinforcement learning problem.
444 |
445 | ### Reinforcement Learning -- Introduction
446 |
447 | The _n_ armed bandit is an example of a _non-associative_ learning problem. It is non-associative because good and bad actions are not associated with any particular situation or state of the world. An action with low expected reward will always have low expected reward, and an action with a high expected reward always will have a high expected reward. The full reinforcment learning problem, however, is _associative_. The world or the agent in the world can be in different states, and actions that are likely to result in high returns in some states will not in other states. Steering right while driving is sometimes a good move, and sometimes not; it depends on what kind of a situation you're in.
448 |
449 | In a full reinforcement learning problem, the thing learning and making decisions, which is controlled by the algorithm, is called the _agent_. The thing it interacts with is called the _environment_. Agent and environment interact over the course of a series of discrete time steps. The series terminates when the training episode ends, although there are usually many training episodes.
450 |
451 | (Reinforcement learning can be divided into both _episodic_ and _continous_ learning, but to simplify matters I'm going to act as if all learning took place in episodes. Episodes are like games in chess; everything about the environment gets reset after an episode ends, although the agent can still retain knowledge that it has acquired.)
452 |
453 | ### Reinforcement Learning -- Terminology
454 |
455 | As stated, agent and environment interact over a series of discrete times steps. These steps are usually denominated by a lower-case 't'.
456 |
457 | In each time step during the episode, the agent receives some representation of the environment's _state_. This is usually indexed by the time-step that the agent recieves it, and so is called st. On the basis of this state, the agent selects an _action_ which is similarly denominated as at. One time step later, the agent recives a new state st+1 with reward rt+1; it decides to perform action at+1, and so on.
458 |
459 | There are a few constraints placed on these variables. The reward is always a single real number. The goal of the agent, of course, is to maximize the reward received over time, not the reward recieved in any particular time-step. Machine learning agents therefore will try to create a _value function_ that help them estimate the cumulative reward that will follow any particular state, or any particular action in any particular state, and thereby help them choose actions likely to result in great reward over time.
460 |
461 | Another assumption many algorithms make is that there are a finite number of states. In cases where the raw state is defined by real numbers, of course, there could easily be an infinite number of states--there is room for infinite real numbers between 0 and 1. So in many cases the state is made discrete (and potentially finite) by rounding each number defining it to the nearest integer, or tenth of an integer or whatever is suitable fo the application at hand. This is something that you'll do in the exercise.
462 |
463 | Here's an example of a reinforcement agent. Suppose our agent were a program meant to handle elevators in a building. The state could be an array, some of whose values are 1 or 0 depending on whether particular up / down buttons are pressed, and some of whose values indicate the current elevation and speeds of each elevator. The action selected by the agent could be lifting, lowering, or opening any elevator. The reward would be some kind of signal inversely related to the time between button-depression and door-opening at each building floor: it would be higher the shorter this period was. The agent would then aim to maximize the reward over time by lifting, lowering, or opening the elevators to minimize wait-time.
464 |
465 | ### Markov Property
466 |
467 | Let me talk about state for a bit more.
468 |
469 | A state signal, st, is said to satisfy the "Markov Property" when it compactly summarizes all relevant information from past states in the environment.
470 |
471 | What does it mean to "compactly summarize all relevant information"? Suppose I were to tell you the location of all the chess pieces on a board, and ask you what the best move was. Any further information about all the previous locations of these pieces at earlier game-states would be superfluous--it would be unnecessary for determining what the best move is now. So the state signal of the location of all the pieces of the board is Markov.
472 |
473 | Similarly, the location, velocity, and spin of a cannonball is Markov--after you know all these things about it, any further information about its past location, velocity, or spin is irrelevant to predicting its future.
474 |
475 | To put things in a mildly technical fashion, a state signal is Markov if the probability that st+1 = s' given st, is the same as the probability that st+1 = s' given st and any arbitrary set of st-1, st-2... st-n.
476 |
477 | A number of learning algorithms are guaranteed to converge to the best possible solution, given that the state signal they work from satisfies the Markov property. In fact, many if not most real-life state signals do *not* satisfy the Markov Property; the problem we will solve does not have a discrete state signal that does this--although the continuous state signal does. But the algorithms that are guaranteed to work with a Markov state-signal nevertheless also often work moderately well with a state-signal that is not Markov.
478 |
479 | Remember that if the state has continuous values, as the states will in the problem we'll be working with, then there are obviously an infinite number of different states possible. There are a few ways of dealing with this: We'll round each continuous value to a discrete value, so that there are a finite rather than infinite number of different states possible.
480 |
481 | ### Policies
482 |
483 | There are a few more terminological and conceptual things we need to get over before we can get to the algorithm.
484 |
485 | A _policy_ is a mapping from every possible state to an action. Another way to think of this is to say that a policy is a function that takes a state and returns an action. Every agent needs to have a policy, because every agent needs to be able to decide what to do on being given any particular state. A policy is usually denominated by the character 𝜋.
486 |
487 | Policies can be either _stochastic_ or _determinate_. A determinate policy always returns the same action on being given the same state. One writes a determinate policy as a function taking a single state, 𝜋(s), which then returns some action a. On the other hand, a stochastic policy will sometimes return some particular action from a state and sometimes return some other particular action on being given the same state. One can write a stochastic policy as a function taking two variables, 𝜋(s,a), which returns the probability that the policy will follow action a on being given state s.
488 |
489 | This may sound somewhat complex, but is really very simple. When we say a professor has a policy of penalizing five points for every day a paper is late, we're informally indicating how 𝜋(s) maps onto a set of actions for the professor. A policy for a learning agent does exactly the same thing. Agents which learn to drive cars have likely policies indicating that they should step on the brakes before red lights, and press the gas on seeing green lights.
490 |
491 | ### Action-Value Functions
492 |
493 | Some policies are better than other policies. That is, following some policies will lead to a greater cumulative reward from the environment, over time, than following other policies. More formally, one policy is better than another if following the better policy from any state results rewards in greater or equal to those which occur from following the worse policy from any state.
494 |
495 | The policy of waking up when your alarm goes off is better, at least so far as rewards recieved from one's employer, than the policy of hitting snooze two dozen times.
496 |
497 | Each state has a different expected value, then, beneath different policies. Following a particular policy from one state might result in a total subsequent reward of 58, while following a different policy from that same state might result in a total subsequent reward of 1432.
498 |
499 | Similarly, taking some action from any some state has a different expected value beneath different policies. Performing an action in one state beneath one policy might result in a total subsequent reward of 213, while performing the same action from the same state beneath another policy might result in a total subsequent reward of -231.
500 |
501 | Agents often try to estimate the _action-value function_, which gives this latter value for a policy--that is, the action-value function gives the cumulative expected reward which will follow from taking any particular action in any particular state for a particular policy. Terminologically, the action value function is written as Q𝜋(s,a), which is a bit less verbose. This stands for the expected total return beneath policy 𝜋 of making action a in state s. Trying to estimate this value for an arbitrary policy is central to trying to find the best overall policy.
502 |
503 | ### Monte Carlo Value Estimation
504 |
505 | Suppose we have some arbitrary policy . Suppose furthermore it is a "soft" policy--that is, for every possible action in every possible state, there is a non-zero chance of that action being executed. To put this in another way, for a soft policy 𝜋(s,a) > 0 for all states and all actions possible from each state.
506 |
507 | Given such a policy, how can we determine the action-value function (Q𝜋(s,a)) relative to that policy? The problem of answering this question is known as the _prediction problem_ or the task of _policy evaluation_. Different reinforcement learning algorithms often differ chiefly by having different ways of answering this question. We will answer it with a Monte Carlo method, which has the advantage of not requiring you to already know the dynamics of the environment--that is, it does not require you to know the probability that acting in a particular way in a particular state will lead to some other state. Our technique does require that you have the ability to act repeatedly in the environment, however.
508 |
509 | Here's how it goes.
510 |
511 | Suppose that we run through a training episode using soft policy 𝜋 to make decisions. Suppose, furthermore, at each step in the episode we save (1) state-action pair for that step and (2) the reward in that step. After the training episode concludes, we could run through each saved step from beginning to end. While going through the steps, every time we find a state-action pair that we had not seen until that point in the training episode, we could sum up the total rewards which ocurred _after_ the this ocurrence of that state-action pair. This summed value could be pushed to an array specific to that state-action pair. This process could then be repeated through many training episodes.
512 |
513 | After each training episode, an average could be taken from the arrays specific to each state-action pair and this could be used to approximate Q𝜋(s,a) for that policy.
514 |
515 | Because the policy is soft, over time this procedure is guaranteed to visit every state-value pair. Each value which it pushes to the arrays specific to each state-action pair is the the total cumulative reward following that state, over one single episode. As the number of episodes increases, the average of each of these arrays is then guaranteed to approach the action-value function Q𝜋(s,a) for that state and action for policy 𝜋. And this is a Monte-Carlo method of estimating the action-value function over the course of many training episodes.
516 |
517 | Let me write out the algorithm explicitly. I presuppose beneath you have a policy 𝜋 such that 𝜋(s,a) > 0 for every state and every action.
518 |
519 | 1. Initialize a 'returns' object and an 'state-action' object.
520 | 2. For each training episode.
521 | 1. Initialize an array 'steps'.
522 | 2. Run through the episode to the end, while using policy 𝜋. For each time step, push the state-action pair and the reward for that time step to 'steps'.
523 | 3. Step through each of the steps saved after running through the episode. Every time you encounter a state-action pair for the first time, push the sum of the rewards following that state-action pair to the array in the object 'returns' specific to that state-action pair.
524 | 4. Let the properties in the 'action-value' object equal the average of the the arrays beneath the correspoding properties in the 'returns' object.
525 | 3. Conclude after a set number of iterations, or after the values in the 'action-value' object cease changing more than a small amount.
526 |
527 | This will give you an aproximation of the action-value function for all states beneath a set policy.
528 |
529 | ### Being Greedy Again
530 |
531 | The prior section explained how to estimate the state-action function for any soft policy by using Monte-Carlo methods. But it's not enough to estimate the action-value function for a policy. We need to be able to improve the policy as well.
532 |
533 | Suppose now that the specific policy 𝜋 which the learning agent is using, and for which it has estimated the action-value function, is the ɛ-greedy policy. The ɛ-greedy policy was introduced in the context of a the _n_ armed bandit, so let me take a second to explain what this would mean.
534 |
535 | On each time step, the ɛ-greedy policy is given a particular state. It wishes to take the action which will result in the greatest value--so, in this context, this means that it will look a the values in the action-value function accessible from that state. To rephrase the prior sentence: It will examine the action-value (Q𝜋(s,a)) function values for (s, a1), (s,a2), and so on, until it has looked at the values of the action-value function for each of the possible actions from that state. It will then (with probability 1-ɛ) choose the action with the greatest expected cumulative reward. With probability ɛ, of course, it will choose any of the actions available to it randomly.
536 |
537 | This should be a little confusing.
538 |
539 | The above presupposes, you'll notice, that there already is some kind of an action-value function that the ɛ-greedy policy is using. But the process of estimating the action-value function for a given policy, in the prior section, presupposes that there already is some policy for which the the alogorithm is finding the action-value function. This seems a little problematic. As the action-value function changes, it seems like the ɛ-greedy policy will change; and as the ɛ-greedy policy changes, it seems like the action value function relative to that policy will change as well.
540 |
541 | ### General Policy Iteration
542 |
543 | The aforementioned apparent problem is actually one instance of a general reinforcement-learning algorithm called general policy iteration.
544 |
545 | In general policy iteration, one starts with an arbitrary policy and an arbitrary action-value estimate for that policy. One then alternates improving (1) the action-value function relative to the arbitrary policy and (2) making the policy greedy relative to that action-value function. These two changes work against each other, in the sense that each provides a shifting target. But they work together because, in the end, the only stable position for either of them is (1) the optimal action-value function, which gives the value of each state beneath the optimal ɛ-greedy policy and (2) the optimal ɛ-greedy policy.
546 |
547 | ### Complete Monte Carlo Algorithm
548 |
549 | Let me explain the full algorithm
550 |
551 | 1. Initialize the 'returns' object and the 'action-value' object. They can both be empty to start off with.
552 | 2. For each training episode.
553 | 1. Initialize an empty array, 'steps'
554 | 2. Run through the entire episode, while following the ɛ-greedy policy. (In cases where the state-action object is empty for a particular state-action pair, fill it with a default value.) As you run through the episode, push the state-action pair that you follow and the reward particular to each state to the array steps.
555 | 3. Step through each of the steps saved after running through the episode. Every time you encounter a state-action pair for the first time, push the sum of the rewards following that state-action pair to the array in the object 'returns' specific to that state-action pair.
556 | 4. Let the properties in the 'action-value' object equal the average of the the arrays beneath the corresponding 'returns' properties.
557 | 3. Conclude after a set number of iterations, after the values in the 'state-action' object cease to change more than a small amount.
558 |
559 | In effect, what this does is to start off with a policy that is random--the ɛ-greedy policy will initially just choose random actions, because its estimate for the every move will simply be the default value. But those random actions which result in higher returns will soon begin to be chosen more often than those random actions which result in lower returns. So the actions will steadily become less random, although they will always remain random to such an extent that they never stop exploring entirely.
560 |
561 | ### Coding
562 |
563 | As before, you'll find a set of specs and a folder to modify in the folder specific to this section.
564 |
565 | Unlike before, passing the specs isn't really the point of this part of the exercise. There aren't as many, and passing them doesn't mean you've passed the exercise.
566 |
567 | Instead, if you open up "display.html," you'll find a small game. Click on the line that lets you play as a human, and see how easy / difficult it is. The basic idea is to balance a stick on top of a cart by moving the cart back and forth on a limited track. The episode ends when the stick falls beneath a certain angle or the cart runs into the edge. When training your program will recieve a reward of 1 for every step it does not bump into the end of the track or have the stick fall down, and a reward of 0 when this happens.
568 |
569 | The object, then, is to write write a constructor function for an agent which will be trained repeatedly in the environment. Clicking the train line just runs your agent through the environment many times very quickly.
570 |
571 | This is difficult. One thing you should be aware of is that it is easiest to start off with an agent that ignores some of the information the state signal gives it. This is summarized more in the notes for the specs.
572 |
573 | ## Further Resources
574 |
575 | ### Resources
576 |
577 | This was only a very, very superficial introduction to machine learning, which has left out vast quantities of the deep mathematical roots of the subject.
578 |
579 | Here are a few things to follow up with if you're interested in more.
580 |
581 | The first thing would be to gain a really good knowledge of statistics, linear algebra, and calculus. None of the below absolutely require these things, but they're going to be extremely useful for anyone studying ML.
582 |
583 | One thing that this tutorial has not covered at all was neural networks. You can find a really superb and exceedinly gentle introduction to neural networks at (this)[http://neuralnetworksanddeeplearning.com/index.html] website. It uses Python rather than Javascript, but if you want to do Machine Learning you're going to have to learn something other than Javascript anyway; Python is one of several languages that a fair amount of ML work is done in.
584 |
585 | Wikipedia is actually a great resource for ML algorithms. Using Wikipedia to implement a perceptron, a naive Bayesian classifier, or some similar supervised learning algorithm would be a good exercise which should be within the range of people who have completed this tutorial.
586 |
587 | (Udacity)[www.udacity.com] has several machine learning and artificial intelligence courses. Several of them focus too much on using libraries and not sufficiently on comprehension, to my mind, but it is nevertheless a very good resource. They offer a data science nanodegree, which should go into some of these subjects at least moderately deeply. Other MOOCs are supposed to be good as well, although I haven't had time to try them.
588 |
589 | One thing to be aware of is, as far as I can tell, most machine learning seems to focus on supervised and unsupervised learning; reinforcement learning does not fit as well within the "big data analysis" paradigm which seems to dominate machine learning classes. So it seems unlikely that you'll learn reinforcement learning techniques unless you consciously set forth to learn them, unless I am mistaken. Barto and Sutton's "Reinforcement Learning: An Introduction," has been an absolutely invaluable source to me while writing this tutorial, and I highly recommend it as a basic introduction to the topic.
590 |
591 | Finally, if you're interested artificial intelligence simply speaking, Russel and Norvig's "Artificial Intelligence: A Modern Approach" is probably the place you want to start.
592 |
593 |
--------------------------------------------------------------------------------