├── .DS_Store ├── Readme.md └── tsne.js /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karpathy/tsnejs/13ece5d7e751a5180a59ca396013e800e1059c20/.DS_Store -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | 2 | # tSNEJS 3 | 4 | tSNEJS is an implementation of t-SNE visualization algorithm in Javascript. 5 | 6 | t-SNE is a visualization algorithm that embeds things in 2 or 3 dimensions. If you have some data and you can measure their pairwise differences, t-SNE visualization can help you identify clusters in your data. See example below. 7 | 8 | ## Online demo 9 | The main project website has a [live example](http://cs.stanford.edu/people/karpathy/tsnejs/) and more description. 10 | 11 | There is also the [t-SNE CSV demo](http://cs.stanford.edu/people/karpathy/tsnejs/csvdemo.html) that allows you to simply paste CSV data into a textbox and tSNEJS computes and visualizes the embedding on the fly (no coding needed). 12 | 13 | ## Research Paper 14 | The algorithm was originally described in this paper: 15 | 16 | L.J.P. van der Maaten and G.E. Hinton. 17 | Visualizing High-Dimensional Data Using t-SNE. Journal of Machine Learning Research 18 | 9(Nov):2579-2605, 2008. 19 | 20 | You can find the PDF [here](http://jmlr.csail.mit.edu/papers/volume9/vandermaaten08a/vandermaaten08a.pdf). 21 | 22 | ## Example code 23 | Import tsne.js into your document: `` 24 | And then here is some example code: 25 | 26 | ```javascript 27 | 28 | var opt = {} 29 | opt.epsilon = 10; // epsilon is learning rate (10 = default) 30 | opt.perplexity = 30; // roughly how many neighbors each point influences (30 = default) 31 | opt.dim = 2; // dimensionality of the embedding (2 = default) 32 | 33 | var tsne = new tsnejs.tSNE(opt); // create a tSNE instance 34 | 35 | // initialize data. Here we have 3 points and some example pairwise dissimilarities 36 | var dists = [[1.0, 0.1, 0.2], [0.1, 1.0, 0.3], [0.2, 0.1, 1.0]]; 37 | tsne.initDataDist(dists); 38 | 39 | for(var k = 0; k < 500; k++) { 40 | tsne.step(); // every time you call this, solution gets better 41 | } 42 | 43 | var Y = tsne.getSolution(); // Y is an array of 2-D points that you can plot 44 | ``` 45 | 46 | The data can be passed to tSNEJS as a set of high-dimensional points using the `tsne.initDataRaw(X)` function, where X is an array of arrays (high-dimensional points that need to be embedded). The algorithm computes the Gaussian kernel over these points and then finds the appropriate embedding. 47 | 48 | ## Web Demos 49 | There are two web interfaces to this library that we are aware of: 50 | 51 | - By Andrej, [here](http://cs.stanford.edu/people/karpathy/tsnejs/csvdemo.html). 52 | - By Laurens, [here](http://homepage.tudelft.nl/19j49/tsnejs/), which takes data in different format and can also use Google Spreadsheet input. 53 | 54 | ## About 55 | Send questions to [@karpathy](https://twitter.com/karpathy). 56 | 57 | ## License 58 | 59 | MIT 60 | 61 | -------------------------------------------------------------------------------- /tsne.js: -------------------------------------------------------------------------------- 1 | // create main global object 2 | var tsnejs = tsnejs || { REVISION: 'ALPHA' }; 3 | 4 | (function(global) { 5 | "use strict"; 6 | 7 | // utility function 8 | var assert = function(condition, message) { 9 | if (!condition) { throw message || "Assertion failed"; } 10 | } 11 | 12 | // syntax sugar 13 | var getopt = function(opt, field, defaultval) { 14 | if(opt.hasOwnProperty(field)) { 15 | return opt[field]; 16 | } else { 17 | return defaultval; 18 | } 19 | } 20 | 21 | // return 0 mean unit standard deviation random number 22 | var return_v = false; 23 | var v_val = 0.0; 24 | var gaussRandom = function() { 25 | if(return_v) { 26 | return_v = false; 27 | return v_val; 28 | } 29 | var u = 2*Math.random()-1; 30 | var v = 2*Math.random()-1; 31 | var r = u*u + v*v; 32 | if(r == 0 || r > 1) return gaussRandom(); 33 | var c = Math.sqrt(-2*Math.log(r)/r); 34 | v_val = v*c; // cache this for next function call for efficiency 35 | return_v = true; 36 | return u*c; 37 | } 38 | 39 | // return random normal number 40 | var randn = function(mu, std){ return mu+gaussRandom()*std; } 41 | 42 | // utilitity that creates contiguous vector of zeros of size n 43 | var zeros = function(n) { 44 | if(typeof(n)==='undefined' || isNaN(n)) { return []; } 45 | if(typeof ArrayBuffer === 'undefined') { 46 | // lacking browser support 47 | var arr = new Array(n); 48 | for(var i=0;i 1e-7) Hhere -= pj * Math.log(pj); 140 | } 141 | 142 | // adjust beta based on result 143 | if(Hhere > Htarget) { 144 | // entropy was too high (distribution too diffuse) 145 | // so we need to increase the precision for more peaky distribution 146 | betamin = beta; // move up the bounds 147 | if(betamax === Infinity) { beta = beta * 2; } 148 | else { beta = (beta + betamax) / 2; } 149 | 150 | } else { 151 | // converse case. make distrubtion less peaky 152 | betamax = beta; 153 | if(betamin === -Infinity) { beta = beta / 2; } 154 | else { beta = (beta + betamin) / 2; } 155 | } 156 | 157 | // stopping conditions: too many tries or got a good precision 158 | num++; 159 | if(Math.abs(Hhere - Htarget) < tol) { done = true; } 160 | if(num >= maxtries) { done = true; } 161 | } 162 | 163 | // console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.'); 164 | // copy over the final prow to P at row i 165 | for(var j=0;j 0 ? 1 : x < 0 ? -1 : 0; } 183 | 184 | var tSNE = function(opt) { 185 | var opt = opt || {}; 186 | this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors 187 | this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE 188 | this.epsilon = getopt(opt, "epsilon", 10); // learning rate 189 | 190 | this.iter = 0; 191 | } 192 | 193 | tSNE.prototype = { 194 | 195 | // this function takes a set of high-dimensional points 196 | // and creates matrix P from them using gaussian kernel 197 | initDataRaw: function(X) { 198 | var N = X.length; 199 | var D = X[0].length; 200 | assert(N > 0, " X is empty? You must have some data!"); 201 | assert(D > 0, " X[0] is empty? Where is the data?"); 202 | var dists = xtod(X); // convert X to distances using gaussian kernel 203 | this.P = d2p(dists, this.perplexity, 1e-4); // attach to object 204 | this.N = N; // back up the size of the dataset 205 | this.initSolution(); // refresh this 206 | }, 207 | 208 | // this function takes a given distance matrix and creates 209 | // matrix P from them. 210 | // D is assumed to be provided as a list of lists, and should be symmetric 211 | initDataDist: function(D) { 212 | var N = D.length; 213 | assert(N > 0, " X is empty? You must have some data!"); 214 | // convert D to a (fast) typed array version 215 | var dists = zeros(N * N); // allocate contiguous array 216 | for(var i=0;i