├── .DS_Store
├── Readme.md
└── tsne.js
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karpathy/tsnejs/13ece5d7e751a5180a59ca396013e800e1059c20/.DS_Store
--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 |
2 | # tSNEJS
3 |
4 | tSNEJS is an implementation of t-SNE visualization algorithm in Javascript.
5 |
6 | t-SNE is a visualization algorithm that embeds things in 2 or 3 dimensions. If you have some data and you can measure their pairwise differences, t-SNE visualization can help you identify clusters in your data. See example below.
7 |
8 | ## Online demo
9 | The main project website has a [live example](http://cs.stanford.edu/people/karpathy/tsnejs/) and more description.
10 |
11 | There is also the [t-SNE CSV demo](http://cs.stanford.edu/people/karpathy/tsnejs/csvdemo.html) that allows you to simply paste CSV data into a textbox and tSNEJS computes and visualizes the embedding on the fly (no coding needed).
12 |
13 | ## Research Paper
14 | The algorithm was originally described in this paper:
15 |
16 | L.J.P. van der Maaten and G.E. Hinton.
17 | Visualizing High-Dimensional Data Using t-SNE. Journal of Machine Learning Research
18 | 9(Nov):2579-2605, 2008.
19 |
20 | You can find the PDF [here](http://jmlr.csail.mit.edu/papers/volume9/vandermaaten08a/vandermaaten08a.pdf).
21 |
22 | ## Example code
23 | Import tsne.js into your document: ``
24 | And then here is some example code:
25 |
26 | ```javascript
27 |
28 | var opt = {}
29 | opt.epsilon = 10; // epsilon is learning rate (10 = default)
30 | opt.perplexity = 30; // roughly how many neighbors each point influences (30 = default)
31 | opt.dim = 2; // dimensionality of the embedding (2 = default)
32 |
33 | var tsne = new tsnejs.tSNE(opt); // create a tSNE instance
34 |
35 | // initialize data. Here we have 3 points and some example pairwise dissimilarities
36 | var dists = [[1.0, 0.1, 0.2], [0.1, 1.0, 0.3], [0.2, 0.1, 1.0]];
37 | tsne.initDataDist(dists);
38 |
39 | for(var k = 0; k < 500; k++) {
40 | tsne.step(); // every time you call this, solution gets better
41 | }
42 |
43 | var Y = tsne.getSolution(); // Y is an array of 2-D points that you can plot
44 | ```
45 |
46 | The data can be passed to tSNEJS as a set of high-dimensional points using the `tsne.initDataRaw(X)` function, where X is an array of arrays (high-dimensional points that need to be embedded). The algorithm computes the Gaussian kernel over these points and then finds the appropriate embedding.
47 |
48 | ## Web Demos
49 | There are two web interfaces to this library that we are aware of:
50 |
51 | - By Andrej, [here](http://cs.stanford.edu/people/karpathy/tsnejs/csvdemo.html).
52 | - By Laurens, [here](http://homepage.tudelft.nl/19j49/tsnejs/), which takes data in different format and can also use Google Spreadsheet input.
53 |
54 | ## About
55 | Send questions to [@karpathy](https://twitter.com/karpathy).
56 |
57 | ## License
58 |
59 | MIT
60 |
61 |
--------------------------------------------------------------------------------
/tsne.js:
--------------------------------------------------------------------------------
1 | // create main global object
2 | var tsnejs = tsnejs || { REVISION: 'ALPHA' };
3 |
4 | (function(global) {
5 | "use strict";
6 |
7 | // utility function
8 | var assert = function(condition, message) {
9 | if (!condition) { throw message || "Assertion failed"; }
10 | }
11 |
12 | // syntax sugar
13 | var getopt = function(opt, field, defaultval) {
14 | if(opt.hasOwnProperty(field)) {
15 | return opt[field];
16 | } else {
17 | return defaultval;
18 | }
19 | }
20 |
21 | // return 0 mean unit standard deviation random number
22 | var return_v = false;
23 | var v_val = 0.0;
24 | var gaussRandom = function() {
25 | if(return_v) {
26 | return_v = false;
27 | return v_val;
28 | }
29 | var u = 2*Math.random()-1;
30 | var v = 2*Math.random()-1;
31 | var r = u*u + v*v;
32 | if(r == 0 || r > 1) return gaussRandom();
33 | var c = Math.sqrt(-2*Math.log(r)/r);
34 | v_val = v*c; // cache this for next function call for efficiency
35 | return_v = true;
36 | return u*c;
37 | }
38 |
39 | // return random normal number
40 | var randn = function(mu, std){ return mu+gaussRandom()*std; }
41 |
42 | // utilitity that creates contiguous vector of zeros of size n
43 | var zeros = function(n) {
44 | if(typeof(n)==='undefined' || isNaN(n)) { return []; }
45 | if(typeof ArrayBuffer === 'undefined') {
46 | // lacking browser support
47 | var arr = new Array(n);
48 | for(var i=0;i 1e-7) Hhere -= pj * Math.log(pj);
140 | }
141 |
142 | // adjust beta based on result
143 | if(Hhere > Htarget) {
144 | // entropy was too high (distribution too diffuse)
145 | // so we need to increase the precision for more peaky distribution
146 | betamin = beta; // move up the bounds
147 | if(betamax === Infinity) { beta = beta * 2; }
148 | else { beta = (beta + betamax) / 2; }
149 |
150 | } else {
151 | // converse case. make distrubtion less peaky
152 | betamax = beta;
153 | if(betamin === -Infinity) { beta = beta / 2; }
154 | else { beta = (beta + betamin) / 2; }
155 | }
156 |
157 | // stopping conditions: too many tries or got a good precision
158 | num++;
159 | if(Math.abs(Hhere - Htarget) < tol) { done = true; }
160 | if(num >= maxtries) { done = true; }
161 | }
162 |
163 | // console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
164 | // copy over the final prow to P at row i
165 | for(var j=0;j 0 ? 1 : x < 0 ? -1 : 0; }
183 |
184 | var tSNE = function(opt) {
185 | var opt = opt || {};
186 | this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
187 | this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
188 | this.epsilon = getopt(opt, "epsilon", 10); // learning rate
189 |
190 | this.iter = 0;
191 | }
192 |
193 | tSNE.prototype = {
194 |
195 | // this function takes a set of high-dimensional points
196 | // and creates matrix P from them using gaussian kernel
197 | initDataRaw: function(X) {
198 | var N = X.length;
199 | var D = X[0].length;
200 | assert(N > 0, " X is empty? You must have some data!");
201 | assert(D > 0, " X[0] is empty? Where is the data?");
202 | var dists = xtod(X); // convert X to distances using gaussian kernel
203 | this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
204 | this.N = N; // back up the size of the dataset
205 | this.initSolution(); // refresh this
206 | },
207 |
208 | // this function takes a given distance matrix and creates
209 | // matrix P from them.
210 | // D is assumed to be provided as a list of lists, and should be symmetric
211 | initDataDist: function(D) {
212 | var N = D.length;
213 | assert(N > 0, " X is empty? You must have some data!");
214 | // convert D to a (fast) typed array version
215 | var dists = zeros(N * N); // allocate contiguous array
216 | for(var i=0;i