├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── package.json
├── public
├── MaterialIcons-Regular.eot
├── MaterialIcons-Regular.ijmap
├── MaterialIcons-Regular.svg
├── MaterialIcons-Regular.ttf
├── MaterialIcons-Regular.woff
├── MaterialIcons-Regular.woff2
├── favicon.png
├── fmnist.json
├── fmnist_spritesheet.png
├── fmnist_tsne_vs_umap.json
├── global.css
├── hyperparameters_encoded.json
├── mammoth_10k_encoded.json
├── mammoth_3d.json
├── mammoth_tsne_encoded.json
├── materialicons.base64
├── scatter-gl.min.js
├── share.png
├── spritesheet.png
├── supplement-bundle.css
├── supplement-bundle.css.map
└── toy_comparison_encoded.json
├── raw_data
├── mammoth_3d.json
├── mammoth_3d_50k.json
├── mammoth_tsne.json
└── mammoth_umap.json
├── rollup.config.js
├── scripts
├── compress_mammoth.js
├── compress_preprocessed.js
└── deploy.js
├── src
├── article
│ ├── Article.svx
│ ├── Figure.svelte
│ ├── FminstLabel.svelte
│ ├── Layout.svelte
│ ├── Spacer.svelte
│ └── Supplement.svx
├── global.css
├── index.html
├── main.js
├── shared
│ ├── components
│ │ └── Slider.svelte
│ └── js
│ │ ├── generators.js
│ │ ├── parameters.js
│ │ ├── parse-binary.js
│ │ ├── random.js
│ │ ├── run-demo.js
│ │ ├── sine.js
│ │ ├── test-binary.js
│ │ ├── toy-configs.js
│ │ ├── tsne.js
│ │ └── visualize.js
├── supplement.js
└── visualizations
│ ├── cech_visualization
│ ├── Figure.svelte
│ ├── components
│ │ └── Visualization.svelte
│ └── main.js
│ ├── fmnist_visualization
│ ├── Figure.svelte
│ ├── components
│ │ ├── Visualization.svelte
│ │ └── VisualizationSideBySide.svelte
│ ├── js
│ │ └── load-data.js
│ └── main.js
│ ├── hyperparameters_visualization
│ ├── Figure.svelte
│ ├── components
│ │ ├── Preview.svelte
│ │ └── Visualization.svelte
│ ├── js
│ │ ├── load-data.js
│ │ ├── preprocess.js
│ │ └── preprocessed.json
│ └── main.js
│ ├── mammoth_visualization
│ ├── FigureTsne.svelte
│ ├── FigureUmap.svelte
│ ├── components
│ │ ├── Projection2d.svelte
│ │ ├── Projection3d.svelte
│ │ ├── Tsne2d.svelte
│ │ ├── Umap2d.svelte
│ │ ├── VisualizationTsne.svelte
│ │ └── VisualizationUmap.svelte
│ ├── js
│ │ ├── colors.js
│ │ ├── load-data.js
│ │ ├── render.js
│ │ ├── times.js
│ │ └── tween.js
│ ├── tsne.js
│ └── umap.js
│ ├── toy_comparison_visualization
│ ├── Figure.svelte
│ ├── components
│ │ ├── Preview.svelte
│ │ └── Visualization.svelte
│ ├── js
│ │ ├── demos.js
│ │ ├── load_data.js
│ │ ├── preprocess.js
│ │ └── preprocessed.json
│ └── main.js
│ └── toy_visualization
│ ├── Figure.svelte
│ ├── components
│ ├── Parameter.svelte
│ ├── Preview.svelte
│ └── Visualization.svelte
│ └── main.js
└── yarn.lock
/.gitignore:
--------------------------------------------------------------------------------
1 | stats.html
2 | .DS_Store
3 | node_modules
4 | public/bundle.*
5 | public/bundle-supplement.*
6 | public/*.html
7 |
8 | stats.html
9 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Understanding UMAP
2 |
3 | Dimensionality reduction is a powerful tool for machine learning practitioners to visualize and understand large, high dimensional datasets. One of the most widely used techniques for visualization is [t-SNE](https://lvdmaaten.github.io/tsne/), but its performance suffers with large datasets and using it correctly can be [challenging](https://distill.pub/2016/misread-tsne/).
4 |
5 | [UMAP](https://github.com/lmcinnes/umap) is a new technique by McInnes et al. that offers a number of advantages over t-SNE, most notably increased speed and better preservation of the data's global structure. In this article, we'll take a look at the theory behind UMAP in order to better understand how the algorithm works, how to use it effectively, and how its performance compares with t-SNE.
6 |
7 | ```bash
8 | yarn
9 | yarn dev
10 | ```
11 |
12 | #### Publishing to github pages
13 |
14 | ```bash
15 | yarn pub
16 | ```
17 |
18 | #### To develop figures individually
19 |
20 | ```bash
21 | yarn dev:cech
22 | yarn dev:hyperparameters
23 | yarn dev:mammoth-umap
24 | yarn dev:mammoth-tsne
25 | yarn dev:supplement
26 | yarn dev:toy
27 | yarn dev:toy_comparison
28 | ```
29 |
30 | #### Data preprocessing
31 |
32 | For the mammoth figures, the [raw 3D data](https://github.com/MNoichl/UMAP-examples-mammoth-/blob/master/mammoth_a.csv) was downsampled to 50,000 points before being projected with UMAP / t-SNE. These 50,000 points were then randomly subsampled to 10,000 points in order to minimize the payload size.
33 |
34 | _Understanding UMAP_ uses a few tricks to make the data payloads for some of the interactive figures small enough to download in a reasonable time. The `mammoth` figures use a 10-bit encoding scheme to compress the 10,000 data points into a significantly smaller payload. The `hyperparameters` and `toy_comparison` figures precompute UMAP embeddings for all of their different combinations, then use the same 10-bit encoding scheme to compress the data.
35 |
36 | ```bash
37 | yarn preprocess:hyperparameters
38 | yarn preprocess:mammoth
39 | yarn preprocess:toy_comparison
40 | ```
41 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "umap_article",
3 | "version": "0.0.1",
4 | "devDependencies": {
5 | "@babel/core": "^7.6.0",
6 | "@babel/node": "^7.6.1",
7 | "@rollup/plugin-replace": "^2.2.0",
8 | "atob": "^2.1.2",
9 | "btoa": "^1.2.1",
10 | "d3": "^5.12.0",
11 | "esm": "^3.2.25",
12 | "mdsvex": "^0.4.0",
13 | "npm-run-all": "^4.1.5",
14 | "rollup": "^1.12.0",
15 | "rollup-plugin-commonjs": "^10.0.0",
16 | "rollup-plugin-json": "^4.0.0",
17 | "rollup-plugin-livereload": "^1.0.0",
18 | "rollup-plugin-node-resolve": "^5.2.0",
19 | "rollup-plugin-postcss": "^2.0.3",
20 | "rollup-plugin-static-site": "^0.1.0",
21 | "rollup-plugin-svelte": "^5.0.3",
22 | "rollup-plugin-terser": "^4.0.4",
23 | "rollup-plugin-visualizer": "^2.6.0",
24 | "sirv-cli": "^0.4.4",
25 | "svelte": "^3.0.0"
26 | },
27 | "dependencies": {
28 | "canvas": "^2.6.0",
29 | "color": "^3.1.2",
30 | "d3-scale": "^3.1.0",
31 | "gh-pages": "^2.1.1",
32 | "kd-tree-javascript": "^1.0.3",
33 | "lodash": "^4.17.15",
34 | "umap-js": "^1.3.1"
35 | },
36 | "scripts": {
37 | "autobuild": "node node_modules/rollup/dist/bin/rollup -c -w",
38 | "build": "run-s build:main build:supplement",
39 | "build:main": "node node_modules/rollup/dist/bin/rollup -c --environment build:main",
40 | "build:supplement": "node node_modules/rollup/dist/bin/rollup -c --environment build:supplement",
41 | "compress": "yarn preprocess:mammoth && node -r esm scripts/compress_preprocessed.js",
42 | "deploy": "URL_PREFIX=https://pair-code.github.io/understanding-umap run-s build deploy:script",
43 | "deploy:script": "node scripts/deploy.js",
44 | "dev": "run-p start:dev autobuild",
45 | "dev:cech": "FIGURE=cech run-p dev",
46 | "dev:distancefn": "FIGURE=distancefn run-p dev",
47 | "dev:fmnist": "FIGURE=fmnist run-p dev",
48 | "dev:hyperparameters": "FIGURE=hyperparameters run-p dev",
49 | "dev:mammoth-umap": "FIGURE=mammoth-umap run-p dev",
50 | "dev:mammoth-tsne": "FIGURE=mammoth-tsne run-p dev",
51 | "dev:supplement": "FIGURE=supplement run-p dev",
52 | "dev:toy": "FIGURE=toy run-p dev",
53 | "dev:toy_comparison": "FIGURE=toy_comparison run-p dev",
54 | "preprocess": "run-s preprocess:mammoth preprocess:hyperparameters preprocess:toy_comparison compress",
55 | "preprocess:toy_comparison": "node -r esm src/visualizations/toy_comparison_visualization/js/preprocess.js",
56 | "preprocess:hyperparameters": "node -r esm src/visualizations/hyperparameters_visualization/js/preprocess.js",
57 | "preprocess:mammoth": "node -r esm scripts/compress_mammoth.js",
58 | "start": "sirv public --single",
59 | "start:dev": "sirv public --single --dev",
60 | "pub": "URL_PREFIX=https://pair-code.github.io/understanding-umap yarn build && node scripts/deploy.js"
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/public/MaterialIcons-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PAIR-code/understanding-umap/1ff6f3a1aa3c036b7f4153862c5d95186dddb2f4/public/MaterialIcons-Regular.eot
--------------------------------------------------------------------------------
/public/MaterialIcons-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PAIR-code/understanding-umap/1ff6f3a1aa3c036b7f4153862c5d95186dddb2f4/public/MaterialIcons-Regular.ttf
--------------------------------------------------------------------------------
/public/MaterialIcons-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PAIR-code/understanding-umap/1ff6f3a1aa3c036b7f4153862c5d95186dddb2f4/public/MaterialIcons-Regular.woff
--------------------------------------------------------------------------------
/public/MaterialIcons-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PAIR-code/understanding-umap/1ff6f3a1aa3c036b7f4153862c5d95186dddb2f4/public/MaterialIcons-Regular.woff2
--------------------------------------------------------------------------------
/public/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PAIR-code/understanding-umap/1ff6f3a1aa3c036b7f4153862c5d95186dddb2f4/public/favicon.png
--------------------------------------------------------------------------------
/public/fmnist_spritesheet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PAIR-code/understanding-umap/1ff6f3a1aa3c036b7f4153862c5d95186dddb2f4/public/fmnist_spritesheet.png
--------------------------------------------------------------------------------
/public/global.css:
--------------------------------------------------------------------------------
1 | html,
2 | body {
3 | position: relative;
4 | width: 100%;
5 | height: 100%;
6 | }
7 |
8 | body {
9 | color: #333;
10 | margin: 0;
11 | padding: 8px;
12 | box-sizing: border-box;
13 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
14 | Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
15 | }
16 |
17 | a {
18 | color: rgb(0, 100, 200);
19 | text-decoration: none;
20 | }
21 |
22 | a:hover {
23 | text-decoration: underline;
24 | }
25 |
26 | a:visited {
27 | color: rgb(0, 80, 160);
28 | }
29 |
30 | p {
31 | font-size: 1.2rem;
32 | line-height: 1.6em;
33 | }
34 |
35 | label {
36 | display: block;
37 | }
38 |
39 | input,
40 | button,
41 | select,
42 | textarea {
43 | font-family: inherit;
44 | font-size: inherit;
45 | padding: 0.4em;
46 | margin: 0 0 0.5em 0;
47 | box-sizing: border-box;
48 | border: 1px solid #ccc;
49 | border-radius: 2px;
50 | }
51 |
52 | input:disabled {
53 | color: #ccc;
54 | }
55 |
56 | input[type="range"] {
57 | height: 0;
58 | }
59 |
60 | code {
61 | font-family: SFMono-Regular, Consolas, Liberation Mono, Menlo, monospace;
62 | font-style: normal;
63 | padding: 0.2em 0.4em;
64 | margin: 0;
65 | font-size: 85%;
66 | background-color: rgba(27, 31, 35, 0.05);
67 | border-radius: 3px;
68 | }
69 |
70 | .figure-number {
71 | font-weight: 600;
72 | }
73 |
74 | button {
75 | color: #333;
76 | background-color: #f4f4f4;
77 | outline: none;
78 | }
79 |
80 | button:active {
81 | background-color: #ddd;
82 | }
83 |
84 | button:focus {
85 | border-color: #666;
86 | }
87 |
88 | .article-title {
89 | font-size: 3em;
90 | margin-bottom: 0;
91 | }
92 |
93 | .acknowledgements p {
94 | font-size: 1rem;
95 | line-height: 1.4rem;
96 | }
97 |
98 | h2 {
99 | margin-top: 3em;
100 | }
101 |
102 | @font-face {
103 | font-family: "Material Icons";
104 | font-style: normal;
105 | font-weight: 400;
106 | src: url(MaterialIcons-Regular.eot); /* For IE6-8 */
107 | src: local("Material Icons"), local("MaterialIcons-Regular"),
108 | url(MaterialIcons-Regular.woff2) format("woff2"),
109 | url(MaterialIcons-Regular.woff) format("woff"),
110 | url(MaterialIcons-Regular.ttf) format("truetype");
111 | }
112 |
113 | .material-icons {
114 | font-family: "Material Icons";
115 | font-weight: normal;
116 | font-style: normal;
117 | font-size: 24px; /* Preferred icon size */
118 | display: inline-block;
119 | line-height: 1;
120 | text-transform: none;
121 | letter-spacing: normal;
122 | word-wrap: normal;
123 | white-space: nowrap;
124 | direction: ltr;
125 |
126 | /* Support for all WebKit browsers. */
127 | -webkit-font-smoothing: antialiased;
128 | /* Support for Safari and Chrome. */
129 | text-rendering: optimizeLegibility;
130 |
131 | /* Support for Firefox. */
132 | -moz-osx-font-smoothing: grayscale;
133 |
134 | /* Support for IE. */
135 | font-feature-settings: "liga";
136 | }
137 |
--------------------------------------------------------------------------------
/public/share.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PAIR-code/understanding-umap/1ff6f3a1aa3c036b7f4153862c5d95186dddb2f4/public/share.png
--------------------------------------------------------------------------------
/public/spritesheet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PAIR-code/understanding-umap/1ff6f3a1aa3c036b7f4153862c5d95186dddb2f4/public/spritesheet.png
--------------------------------------------------------------------------------
/public/supplement-bundle.css:
--------------------------------------------------------------------------------
1 | figure.svelte-1l13q0g{width:110%;margin-left:-5%;display:flex;flex-direction:column;justify-content:center;align-items:center;margin-top:18px}.figure-container.svelte-1l13q0g{max-width:1000px;width:100%}caption.svelte-1l13q0g{margin:20px 0;font-style:italic;width:80%;font-size:13px}
2 | div.spacer.svelte-6euh3r{width:100%}
3 | section.svelte-1t9ulqu{display:flex;flex-direction:column;align-items:center}.container.svelte-1t9ulqu{width:800px}
4 | canvas.svelte-rm2utp{background-color:#fff;width:100%;margin-bottom:20px}.container.svelte-rm2utp{width:100%;height:100%;display:flex;flex-direction:column;align-items:center;position:relative}.controls.svelte-rm2utp{display:flex;flex-direction:row;justify-content:center;align-items:center;margin-top:6px}label.svelte-rm2utp{margin-right:10px;width:200px}.refresh.svelte-rm2utp{position:absolute;bottom:0;left:0;cursor:pointer}.refresh.svelte-rm2utp:hover{color:#333}
5 | input[type="range"].svelte-fa559t{-webkit-appearance:none;width:100%;background:transparent;border:none;margin:0}input[type="range"].svelte-fa559t::-webkit-slider-thumb{-webkit-appearance:none}input[type="range"].svelte-fa559t:focus{outline:none}input[type="range"].svelte-fa559t::-ms-track{width:100%;cursor:pointer;background:transparent;border-color:transparent;color:transparent}input[type="range"].svelte-fa559t::-webkit-slider-thumb{-webkit-appearance:none;height:14px;width:14px;border-radius:50%;background:steelblue;cursor:pointer;margin-top:-6px}input[type="range"].svelte-fa559t::-moz-range-thumb{height:14px;width:14px;border-radius:50%;background:steelblue;cursor:pointer;border:none}input[type="range"].svelte-fa559t::-ms-thumb{height:14px;width:14px;border-radius:50%;background:grey;cursor:pointer}input[type="range"].svelte-fa559t::-webkit-slider-runnable-track{width:100%;height:3px;cursor:pointer;background:rgba(0, 0, 0, 0.1);border-radius:2px;border:none}input[type="range"].svelte-fa559t:focus::-webkit-slider-runnable-track{background:rgba(0, 0, 0, 0.15)}input[type="range"].svelte-fa559t::-moz-range-track{width:100%;height:3px;cursor:pointer;background:rgba(0, 0, 0, 0.1);border-radius:2px;border:none}input[type="range"].svelte-fa559t::-ms-track{width:100%;height:3px;cursor:pointer;background:rgba(0, 0, 0, 0.1);border-radius:2px;border:none}input[type="range"].svelte-fa559t::-ms-fill-lower{background:rgba(0, 0, 0, 0.1)}input[type="range"].svelte-fa559t:focus::-ms-fill-lower{background:rgba(0, 0, 0, 0.1)}input[type="range"].svelte-fa559t::-ms-fill-upper{background:rgba(0, 0, 0, 0.1)}input[type="range"].svelte-fa559t:focus::-ms-fill-upper{background:rgba(0, 0, 0, 0.1)}
6 |
7 | /*# sourceMappingURL=supplement-bundle.css.map */
--------------------------------------------------------------------------------
/rollup.config.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | import svelte from "rollup-plugin-svelte";
17 | import resolve from "rollup-plugin-node-resolve";
18 | import commonjs from "rollup-plugin-commonjs";
19 | import json from "rollup-plugin-json";
20 | import livereload from "rollup-plugin-livereload";
21 | import { terser } from "rollup-plugin-terser";
22 | import staticSite from "rollup-plugin-static-site";
23 | import replace from "@rollup/plugin-replace";
24 | import postcss from "rollup-plugin-postcss";
25 | import { mdsvex } from "mdsvex";
26 |
27 | const production = !process.env.ROLLUP_WATCH;
28 | const urlPrefix = process.env.URL_PREFIX || "";
29 | const build = process.env.build || "main";
30 | const figure = process.env.FIGURE;
31 |
32 | let input = "src/main.js";
33 | if (figure === "cech") {
34 | input = "src/visualizations/cech_visualization/main.js";
35 | } else if (figure === "fmnist") {
36 | input = "src/visualizations/fmnist_visualization/main.js";
37 | } else if (figure === "hyperparameters") {
38 | input = "src/visualizations/hyperparameters_visualization/main.js";
39 | } else if (figure === "mammoth-umap") {
40 | input = "src/visualizations/mammoth_visualization/umap.js";
41 | } else if (figure === "mammoth-tsne") {
42 | input = "src/visualizations/mammoth_visualization/tsne.js";
43 | } else if (figure === "supplement") {
44 | input = "src/supplement.js";
45 | } else if (figure === "toy_comparison") {
46 | input = "src/visualizations/toy_comparison_visualization/main.js";
47 | } else if (figure === "toy") {
48 | input = "src/visualizations/toy_visualization/main.js";
49 | }
50 |
51 | let outputTitle = "Understanding UMAP";
52 | let outputCss = "public/bundle.css";
53 | let outputBundle = "public/bundle.js";
54 | let outputHtml = "index.html";
55 |
56 | if (build === "main") {
57 | // Default build config
58 | } else if (build === "supplement") {
59 | input = "src/supplement.js";
60 | outputCss = "public/bundle-supplement.css";
61 | outputBundle = "public/bundle-supplement.js";
62 | outputHtml = "supplement.html";
63 | outputTitle = "Understanding UMAP: A deeper dive into UMAP theory";
64 | }
65 |
66 | function processAssetName(str) {
67 | return str.replace("public/", `${urlPrefix}/`);
68 | }
69 |
70 | export default {
71 | external: ["d3", "scatter-gl", "three"],
72 | input,
73 | output: {
74 | sourcemap: true,
75 | format: "iife",
76 | name: "app",
77 | file: outputBundle,
78 | globals: {
79 | d3: "d3",
80 | three: "THREE",
81 | "scatter-gl": "ScatterGL"
82 | }
83 | },
84 | plugins: [
85 | svelte({
86 | extensions: [".svelte", ".svx"],
87 | // enable run-time checks when not in production
88 | dev: !production,
89 | // we'll extract any component CSS out into
90 | // a separate file — better for performance
91 | css: css => {
92 | css.write(outputCss);
93 | },
94 | preprocess: mdsvex({
95 | extension: ".svx"
96 | })
97 | }),
98 | postcss({
99 | extract: "public/global.css"
100 | }),
101 | replace({ __URL_PREFIX__: urlPrefix }),
102 | staticSite({
103 | dir: "public",
104 | filename: outputHtml,
105 | template: {
106 | path: "src/index.html",
107 | func: (templateStr, templateData) => {
108 | return templateStr
109 | .replace("__TITLE__", outputTitle)
110 | .replace("__URL_PREFIX__", urlPrefix)
111 | .replace("__CSS_BUNDLE__", processAssetName(outputCss))
112 | .replace("__JS_BUNDLE__", processAssetName(outputBundle));
113 | }
114 | }
115 | }),
116 |
117 | // If you have external dependencies installed from
118 | // npm, you'll most likely need these plugins. In
119 | // some cases you'll need additional configuration —
120 | // consult the documentation for details:
121 | // https://github.com/rollup/rollup-plugin-commonjs
122 | resolve({
123 | browser: true,
124 | dedupe: importee =>
125 | importee === "svelte" || importee.startsWith("svelte/")
126 | }),
127 | commonjs(),
128 | json(),
129 |
130 | // Watch the `public` directory and refresh the
131 | // browser on changes when not in production
132 | !production && livereload("public"),
133 |
134 | // If we're building for production (npm run build
135 | // instead of npm run dev), minify
136 | production && terser()
137 | ]
138 | };
139 |
--------------------------------------------------------------------------------
/scripts/compress_mammoth.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | const fs = require("fs");
17 | const path = require("path");
18 | const btoa = require("btoa");
19 |
20 | const PUBLIC_DIR = "../public/";
21 |
22 | const { encode, toString } = require("../src/shared/js/parse-binary");
23 | const {
24 | TSNE_MAMMOTH_PERPLEXITIES,
25 | N_BITS_MAMMOTH
26 | } = require("../src/shared/js/parameters");
27 | const mammothData = require("../raw_data/mammoth_umap.json");
28 | const mammothTSNE = require("../raw_data/mammoth_tsne.json");
29 | const mammoth3d = mammothData["3d"];
30 |
31 | const labels = mammothData.labels;
32 | const labelIndices = labels
33 | .map((label, index) => {
34 | return { label, index };
35 | })
36 | .sort((a, b) => a.label - b.label);
37 |
38 | const labelCounts = {};
39 | labelIndices.forEach(({ label }) => {
40 | labelCounts[label] = labelCounts[label] || 0;
41 | labelCounts[label] += 1;
42 | });
43 |
44 | // Process and encode the UMAP projections
45 | const encodedUMAP = {};
46 | Object.keys(mammothData.projections).forEach(key => {
47 | const projection = mammothData.projections[key];
48 |
49 | const output = [];
50 | labelIndices.forEach(({ index }) => {
51 | const point = projection[index];
52 | output.push(point[0], point[1]);
53 | });
54 |
55 | const encoded = encode(output, N_BITS_MAMMOTH);
56 | encodedUMAP[key] = btoa(toString(encoded));
57 | });
58 |
59 | // Sort the 3D points as well
60 | const mammoth3DSorted = [];
61 | labelIndices.forEach(({ index }) => {
62 | const point = mammoth3d[index];
63 | mammoth3DSorted.push([point[1], point[2], point[0]]);
64 | });
65 |
66 | // Process and encode the tsne projections
67 | const encodedTSNE = {};
68 | TSNE_MAMMOTH_PERPLEXITIES.forEach(p => {
69 | const key = `p=${p}`;
70 | const projection = mammothTSNE.projections[key];
71 |
72 | const output = [];
73 | labelIndices.forEach(({ index }) => {
74 | const point = projection[index];
75 | output.push(point[0], point[1]);
76 | });
77 |
78 | const encoded = encode(output, N_BITS_MAMMOTH);
79 | encodedTSNE[key] = btoa(toString(encoded));
80 | });
81 |
82 | const labelOffsets = Object.keys(labelCounts)
83 | .map(index => {
84 | return { index, count: labelCounts[index] };
85 | })
86 | .sort((a, b) => a.index - b.index)
87 | .map(item => item.count);
88 |
89 | const encodedUMAPOutput = {
90 | projections: encodedUMAP,
91 | labelOffsets
92 | };
93 | const s = JSON.stringify(encodedUMAPOutput);
94 | fs.writeFileSync(
95 | path.join(__dirname, PUBLIC_DIR, "mammoth_10k_encoded.json"),
96 | s
97 | );
98 | fs.writeFileSync(
99 | path.join(__dirname, PUBLIC_DIR, "mammoth_3d.json"),
100 | JSON.stringify(mammoth3DSorted)
101 | );
102 | fs.writeFileSync(
103 | path.join(__dirname, PUBLIC_DIR, "mammoth_tsne_encoded.json"),
104 | JSON.stringify(encodedTSNE)
105 | );
106 |
--------------------------------------------------------------------------------
/scripts/compress_preprocessed.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | const fs = require("fs");
17 | const path = require("path");
18 | const _ = require("lodash");
19 | const btoa = require("btoa");
20 |
21 | const PUBLIC_DIR = "../public/";
22 |
23 | const { encode, toString } = require("../src/shared/js/parse-binary");
24 |
25 | const hyperparametersData = require("../src/visualizations/hyperparameters_visualization/js/preprocessed.json");
26 | const toyComparisonData = require("../src/visualizations/toy_comparison_visualization/js/preprocessed.json");
27 |
28 | const {
29 | N_BITS_HYPERPARAMETERS,
30 | N_BITS_TOY_COMPARISON
31 | } = require("../src/shared/js/parameters");
32 |
33 | function _normalize(numbers, nBits) {
34 | let min = 0;
35 | let max = 0;
36 | for (let i = 0; i < numbers.length; i++) {
37 | const number = numbers[i];
38 | min = number < min ? number : min;
39 | max = number > max ? number : max;
40 | }
41 | const scale = 2 ** nBits - 1;
42 | const range = max - min;
43 | const normalized = numbers.map(number => {
44 | return Math.floor(((number - min) / range) * scale);
45 | });
46 | return { normalized, range: { min, max } };
47 | }
48 |
49 | function normalize(points, nBits) {
50 | const nDimensions = points[0].length;
51 | const unzipped = [];
52 | const ranges = [];
53 |
54 | for (let i = 0; i < nDimensions; i++) {
55 | const dimSlice = points.map(point => point[i]);
56 | const { normalized, range } = _normalize(dimSlice, nBits);
57 | unzipped.push(normalized);
58 | ranges.push(range);
59 | }
60 | const zipped = [];
61 | for (let i = 0; i < points.length; i++) {
62 | const point = [];
63 | for (let j = 0; j < nDimensions; j++) {
64 | point.push(unzipped[j][i]);
65 | }
66 | zipped.push(point);
67 | }
68 | return { normalized: zipped, ranges };
69 | }
70 |
71 | // Process and encode the Hyperparameter Visualization Projections
72 | const encodedHyperparameters = hyperparametersData.map(() => ({}));
73 | hyperparametersData.forEach((entry, index) => {
74 | Object.keys(entry).forEach(key => {
75 | const projection = entry[key];
76 |
77 | const { normalized, ranges } = normalize(
78 | projection,
79 | N_BITS_HYPERPARAMETERS
80 | );
81 | const flattened = _.flatten(normalized);
82 | const encoded = encode(flattened, N_BITS_HYPERPARAMETERS);
83 | const data = btoa(toString(encoded));
84 |
85 | encodedHyperparameters[index][key] = {
86 | data,
87 | nDimensions: 2,
88 | length: projection.length,
89 | ranges
90 | };
91 | });
92 | });
93 |
94 | // Process and encode the Toy Comparison Visualization Projections
95 | const encodedToyComparison = toyComparisonData.map(() => ({
96 | umap: {},
97 | tsne: {}
98 | }));
99 | toyComparisonData.forEach((entry, index) => {
100 | Object.keys(entry).forEach(umapOrTsne => {
101 | const dict = entry[umapOrTsne];
102 | Object.keys(dict).forEach(key => {
103 | const projection = dict[key];
104 |
105 | const { normalized, ranges } = normalize(
106 | projection,
107 | N_BITS_TOY_COMPARISON
108 | );
109 | const flattened = _.flatten(normalized);
110 | const encoded = encode(flattened, N_BITS_TOY_COMPARISON);
111 | const data = btoa(toString(encoded));
112 |
113 | encodedToyComparison[index][umapOrTsne][key] = {
114 | data,
115 | nDimensions: 2,
116 | length: projection.length,
117 | ranges
118 | };
119 | });
120 | });
121 | });
122 |
123 | fs.writeFileSync(
124 | path.join(__dirname, PUBLIC_DIR, "hyperparameters_encoded.json"),
125 | JSON.stringify(encodedHyperparameters)
126 | );
127 | fs.writeFileSync(
128 | path.join(__dirname, PUBLIC_DIR, "toy_comparison_encoded.json"),
129 | JSON.stringify(encodedToyComparison)
130 | );
131 |
--------------------------------------------------------------------------------
/scripts/deploy.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | const ghPages = require("gh-pages");
17 |
18 | ghPages.publish("public", () => {
19 | console.log("🚀 published to github pages!");
20 | });
21 |
--------------------------------------------------------------------------------
/src/article/Figure.svelte:
--------------------------------------------------------------------------------
1 |
17 |
18 |
53 |
54 |
55 |
27 |
--------------------------------------------------------------------------------
/src/article/Supplement.svx:
--------------------------------------------------------------------------------
1 | ---
2 | title: A deeper dive into UMAP theory
3 | layout: ./Layout.svelte
4 | ---
5 |
6 |
20 |
21 | ```js exec
22 | import Figure from "./Figure.svelte";
23 | import Spacer from "./Spacer.svelte";
24 | import FmnistLabel from "./FminstLabel.svelte";
25 |
26 | import CechVisualization from "../visualizations/cech_visualization/components/Visualization.svelte";
27 | ```
28 |
29 |
A deeper dive into UMAP theory
30 |
31 | _Andy Coenen, Adam Pearce ([Google PAIR](https://ai.google/research/teams/brain/pair))_
32 |
33 |
34 |
35 | The following explanation offers a rather high-level explanation of the theory behind UMAP, following up on the even simpler overview found in [Understanding UMAP](__URL_PREFIX__/index.html). Those interested in getting the full picture are encouraged to read UMAP's [excellent documentation](https://umap-learn.readthedocs.io/en/latest/how_umap_works.html).
36 |
37 | Most dimensionality reduction algorithms fit into either one of two broad categories: Matrix factorization (such as PCA) or Graph layout (such as t-SNE). At its core, UMAP is a graph layout algorithm, very similar to t-SNE, but with a number of key theoretical underpinnings that give the algorithm a more solid footing.
38 |
39 | In its simplest sense, the UMAP algorithm consists of two steps: construction of a graph in high dimensions followed by an optimization step to find the most similar graph in lower dimensions. In order to achieve this goal, the algorithm relies on a number of insights from algebraic topology and Riemannian geometry. Despite the intimidating mathematics, the intuitions behind the core principles are actually quite simple: UMAP essentially constructs a weighted graph from the high dimensional data, with edge strength representing how “close” a given point is to another, then projects this graph down to a lower dimensionality. The advanced mathematics gives UMAP a solid footing with which to handle the challenges of doing this in high dimensions with real data.
40 |
41 | In order to construct the initial high-dimensional graph, UMAP relies on constructing what’s known as a **Čech complex**, which is a way of representing a topology combinatorially (using sets rather than continuous geometry). In order to get there, we’ll use a basic building block called a **simplex**. Geometrically, a simplex is a k-dimensional object formed by connecting k + 1 points - for example, a 0-simplex is a point, a 1-simplex is a line, and a 2-simplex is a triangle. By thinking of our data as a set of simplices, we can capture a representation of the topology, and by combining those simplices in a specific way to form a Čech complex, we get some theoretical guarantees about how well it represents the topology.
42 |
43 |
44 |
45 |
46 | Figure 3: Adjust the slider to extend a radius outwards from each point, computed by the distance to its nth nearest neighbor. Notice that past the intersection with the first neighbor, the radius begins to get fuzzy, with subsequent connections appearing with less weight;
47 |
48 |
49 |
50 | We begin by considering each point in our data as a sample from a continuous, high-dimensional shape (our topology). We can think of each point as a 0-simplex. By extending out from each point some radius r, and connecting points that overlap, we can construct sets of 1-, 2-, and higher-dimensional simplices. This simplicial complex does a reasonable job of approximating the fundamental topology of the dataset, explained by the [Nerve Theorem](https://en.wikipedia.org/wiki/Nerve_of_a_covering). It turns out that the bulk of the work of representing the topology is actually being done by the 0- and 1-simplices, which constitute what's known as a **Vietoris-Rips Complex**. Most importantly, the **Vietoris-Rips Complex** is much easier to work with computationally, especially for large datasets. By considering just the 0- and 1-simplices, we’ve effectively just constructed a graph, which can be readily projected into a lower-dimensional analogue.
51 |
52 | Unfortunately, real-world high-dimensional data presents a problem that UMAP needs to overcome - picking the right sized radius. Too small a radius and we’ll tend towards isolated, local clusters of points. Too large, and everything becomes connected. This problem is exacerbated by the **curse of dimensionality**, where distances between points become increasingly similar in higher dimensions. UMAP solves this problem in a clever way: Rather than using a fixed radius, UMAP uses a variable radius determined for each point based on the distance to its kth nearest neighbor. Within this local radius, connectedness is then made “fuzzy” by making each connection a probability, with further points less likely to be connected. Since we don’t want any points to be completely isolated, a constraint is added that all points must be connected to at least its closest neighboring point. The final output of this process is a weighted graph, with edge weights representing the likelihood that two points are “connected” in our high-dimensional manifold.
53 |
54 | Note that since each point’s local notion of distance may be different than its neighbors’, we must resolve whether two points are connected based on potentially different directed edge weights. UMAP squares this inconsistency by computing the probability that at least one of the edges exist.
55 |
56 | Once the final, fuzzy simplicial complex is constructed, UMAP projects the data into lower dimensions essentially via a force-directed graph layout algorithm. This optimization step is actually very similar to t-SNE, but by jumping through the theoretical hoops while constructing our initial simplicial complex, UMAP is able to accelerate the optimization and preserve much more global structure than t-SNE.
57 |
--------------------------------------------------------------------------------
/src/global.css:
--------------------------------------------------------------------------------
1 | html,
2 | body {
3 | position: relative;
4 | width: 100%;
5 | height: 100%;
6 | }
7 |
8 | body {
9 | color: #333;
10 | margin: 0;
11 | padding: 8px;
12 | box-sizing: border-box;
13 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
14 | Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
15 | }
16 |
17 | a {
18 | color: rgb(0, 100, 200);
19 | text-decoration: none;
20 | }
21 |
22 | a:hover {
23 | text-decoration: underline;
24 | }
25 |
26 | a:visited {
27 | color: rgb(0, 80, 160);
28 | }
29 |
30 | p {
31 | font-size: 1.2rem;
32 | line-height: 1.6em;
33 | }
34 |
35 | label {
36 | display: block;
37 | }
38 |
39 | input,
40 | button,
41 | select,
42 | textarea {
43 | font-family: inherit;
44 | font-size: inherit;
45 | padding: 0.4em;
46 | margin: 0 0 0.5em 0;
47 | box-sizing: border-box;
48 | border: 1px solid #ccc;
49 | border-radius: 2px;
50 | }
51 |
52 | input:disabled {
53 | color: #ccc;
54 | }
55 |
56 | input[type="range"] {
57 | height: 0;
58 | }
59 |
60 | code {
61 | font-family: SFMono-Regular, Consolas, Liberation Mono, Menlo, monospace;
62 | font-style: normal;
63 | padding: 0.2em 0.4em;
64 | margin: 0;
65 | font-size: 85%;
66 | background-color: rgba(27, 31, 35, 0.05);
67 | border-radius: 3px;
68 | }
69 |
70 | .figure-number {
71 | font-weight: 600;
72 | }
73 |
74 | button {
75 | color: #333;
76 | background-color: #f4f4f4;
77 | outline: none;
78 | }
79 |
80 | button:active {
81 | background-color: #ddd;
82 | }
83 |
84 | button:focus {
85 | border-color: #666;
86 | }
87 |
88 | .article-title {
89 | font-size: 3em;
90 | margin-bottom: 0;
91 | }
92 |
93 | .acknowledgements p {
94 | font-size: 1rem;
95 | line-height: 1.4rem;
96 | }
97 |
98 | h2 {
99 | margin-top: 3em;
100 | }
101 |
102 | @font-face {
103 | font-family: "Material Icons";
104 | font-style: normal;
105 | font-weight: 400;
106 | src: url(MaterialIcons-Regular.eot); /* For IE6-8 */
107 | src: local("Material Icons"), local("MaterialIcons-Regular"),
108 | url(MaterialIcons-Regular.woff2) format("woff2"),
109 | url(MaterialIcons-Regular.woff) format("woff"),
110 | url(MaterialIcons-Regular.ttf) format("truetype");
111 | }
112 |
113 | .material-icons {
114 | font-family: "Material Icons";
115 | font-weight: normal;
116 | font-style: normal;
117 | font-size: 24px; /* Preferred icon size */
118 | display: inline-block;
119 | line-height: 1;
120 | text-transform: none;
121 | letter-spacing: normal;
122 | word-wrap: normal;
123 | white-space: nowrap;
124 | direction: ltr;
125 |
126 | /* Support for all WebKit browsers. */
127 | -webkit-font-smoothing: antialiased;
128 | /* Support for Safari and Chrome. */
129 | text-rendering: optimizeLegibility;
130 |
131 | /* Support for Firefox. */
132 | -moz-osx-font-smoothing: grayscale;
133 |
134 | /* Support for IE. */
135 | font-feature-settings: "liga";
136 | }
137 |
--------------------------------------------------------------------------------
/src/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Understanding UMAP
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/src/main.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | import Article from "./article/Article.svx";
17 | import "./global.css";
18 |
19 | export default new Article({
20 | target: document.body
21 | });
22 |
--------------------------------------------------------------------------------
/src/shared/components/Slider.svelte:
--------------------------------------------------------------------------------
1 |
27 |
28 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/src/shared/js/parameters.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | export const TSNE_MAMMOTH_PERPLEXITIES = [
17 | 5,
18 | 15,
19 | 30,
20 | 50,
21 | 100,
22 | 200,
23 | 500,
24 | 1000,
25 | 2000
26 | ];
27 |
28 | export const N_BITS_HYPERPARAMETERS = 8;
29 | export const N_BITS_TOY_COMPARISON = 8;
30 | export const N_BITS_MAMMOTH = 10;
31 |
--------------------------------------------------------------------------------
/src/shared/js/parse-binary.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | class BitArray {
17 | constructor(length) {
18 | length = Math.ceil(length / 32);
19 | this.backingArray = new Uint32Array(length);
20 | this.length = length;
21 | }
22 | get(n) {
23 | return (this.backingArray[(n / 32) | 0] & (1 << n % 32)) !== 0;
24 | }
25 | on(n) {
26 | this.backingArray[(n / 32) | 0] |= 1 << n % 32;
27 | }
28 | off(n) {
29 | this.backingArray[(n / 32) | 0] &= ~(1 << n % 32);
30 | }
31 | toggle(n) {
32 | this.backingArray[(n / 32) | 0] ^= 1 << n % 32;
33 | }
34 | }
35 |
36 | module.exports.encode = function encode(numbers, nBits) {
37 | const count = numbers.length;
38 | const encodeBitArray = new BitArray(count * nBits);
39 |
40 | for (let i = 0; i < numbers.length; i++) {
41 | const number = numbers[i];
42 | for (let j = 0; j < nBits; j++) {
43 | const bitIndex = i * nBits + j;
44 | if ((number & (2 ** j)) !== 0) encodeBitArray.on(bitIndex);
45 | }
46 | }
47 |
48 | const byteArrayLength = Math.ceil((count * nBits) / 8);
49 | const byteArray = new Uint8Array(byteArrayLength);
50 | for (let i = 0; i < byteArray.length; i++) {
51 | let number = 0;
52 | for (let j = 0; j < 8; j++) {
53 | const bitIndex = i * 8 + j;
54 | const bit = encodeBitArray.get(bitIndex);
55 | number += bit ? 2 ** j : 0;
56 | }
57 | byteArray[i] = number;
58 | }
59 | return byteArray;
60 | };
61 |
62 | module.exports.decode = function(byteArray, count, nBits) {
63 | const totalBits = count * nBits;
64 | const parseBitArray = new BitArray(totalBits);
65 | for (let i = 0; i < byteArray.length; i++) {
66 | const byte = byteArray[i];
67 | for (let j = 0; j < 8; j++) {
68 | const bitIndex = i * 8 + j;
69 | if (byte & (2 ** j)) parseBitArray.on(bitIndex);
70 | }
71 | }
72 |
73 | const parsed = [];
74 | for (let index = 0; index < count; index++) {
75 | let number = 0;
76 | for (let bitIndex = 0; bitIndex < nBits; bitIndex++) {
77 | const absBitIndex = index * nBits + bitIndex;
78 | if (parseBitArray.get(absBitIndex)) number += 2 ** bitIndex;
79 | }
80 | parsed.push(number);
81 | }
82 | return parsed;
83 | };
84 |
85 | module.exports.toString = function toString(arr) {
86 | return String.fromCharCode.apply(null, arr);
87 | };
88 |
89 | module.exports.fromString = function fromString(str) {
90 | return Uint8Array.from(str, x => x.charCodeAt(0));
91 | };
92 |
--------------------------------------------------------------------------------
/src/shared/js/random.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | export class Random {
17 | constructor(seed) {
18 | this._seed = seed % 2147483647;
19 | if (this._seed <= 0) this._seed += 2147483646;
20 | }
21 | next() {
22 | return (this._seed = (this._seed * 16807) % 2147483647);
23 | }
24 | nextFloat(opt_minOrMax, opt_max) {
25 | // We know that result of next() will be 1 to 2147483646 (inclusive).
26 | return (this.next() - 1) / 2147483646;
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/shared/js/run-demo.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | import { visualize } from "./visualize";
17 | import { UMAP } from "umap-js";
18 | import { Point } from "./generators";
19 |
20 | export const N_EPOCHS = 400;
21 |
22 | const timescale = d3
23 | .scaleLinear()
24 | .domain([0, 20, 50, 100, 200, 6000])
25 | .range([60, 30, 20, 10, 0]);
26 |
27 | export function runDemo(points, canvas, options, stepCb) {
28 | const demo = {};
29 | let paused = false;
30 | let step = 0;
31 | let chunk = 5;
32 | let frameId;
33 |
34 | // const tsne = new tsnejs.tSNE(options);
35 | const data = points.map(point => point.coords);
36 | const umap = new UMAP({
37 | nEpochs: N_EPOCHS,
38 | ...options
39 | });
40 | umap.initializeFit(data);
41 |
42 | // const dists = distanceMatrix(points);
43 | // tsne.initDataDist(dists);
44 |
45 | function iterate() {
46 | if (paused) return;
47 |
48 | // control speed at which we iterate
49 | // if (step = 200) chunk = 10;
50 | for (let k = 0; k < chunk; k++) {
51 | umap.step();
52 | ++step;
53 | }
54 |
55 | //inform the caller about the current step
56 | stepCb(step);
57 |
58 | // update the solution and render
59 | // const solution = tsne.getSolution().map(function(coords, i) {
60 | // return new Point(coords, points[i].color);
61 | // });
62 | const solution = umap.getEmbedding().map((coords, i) => {
63 | return new Point(coords, points[i].color);
64 | });
65 | visualize(solution, canvas, ""); //removed message
66 |
67 | //control the loop.
68 | const timeout = timescale(step);
69 | setTimeout(function() {
70 | frameId = window.requestAnimationFrame(iterate);
71 | }, timeout);
72 | }
73 |
74 | demo.pause = function() {
75 | if (paused) return; // already paused
76 | paused = true;
77 | window.cancelAnimationFrame(frameId);
78 | };
79 | demo.unpause = function() {
80 | if (!paused) return; // already unpaused
81 | paused = false;
82 | iterate();
83 | };
84 | demo.paused = function() {
85 | return paused;
86 | };
87 | demo.destroy = function() {
88 | demo.pause();
89 | };
90 | iterate();
91 | return demo;
92 | }
93 |
--------------------------------------------------------------------------------
/src/shared/js/sine.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | import { Random } from "./random";
17 |
18 | const random = new Random(112);
19 | const constrain = (n, max, min = 0) => Math.max(Math.min(n, max), min);
20 | const getRandom = x => Math.floor(x * random.nextFloat());
21 | const getRandomPoint = () => ({
22 | x: getRandom(width),
23 | y: getRandom(height)
24 | });
25 | const getSinePoint = (width, height, pointRadius) => {
26 | let x = getRandom(width);
27 | let y = (Math.sin((x / width) * 6 * Math.PI) * height) / 4 + height / 2;
28 | y = y + getRandom(height / 5) * (random.nextFloat() > 0.5 ? 1 : -1);
29 | return {
30 | x: constrain(x, width - pointRadius, pointRadius),
31 | y: constrain(y, height - pointRadius, pointRadius)
32 | };
33 | };
34 |
35 | export function makeSine(nPoints, dimensions, pointRadius = 5) {
36 | const { width, height } = dimensions;
37 |
38 | const points = [];
39 | for (let i = 0; i < nPoints; i++) {
40 | const point = getSinePoint(width, height, pointRadius);
41 | points.push({ ...point, index: i });
42 | }
43 |
44 | return points;
45 | }
46 |
--------------------------------------------------------------------------------
/src/shared/js/test-binary.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | const { encode, decode } = require("./parse-binary");
17 |
18 | const numbers = [];
19 | const N_BITS = 10;
20 | const COUNT = 1000;
21 | for (let i = 0; i < COUNT; i++) {
22 | const num = Math.floor(Math.random() * 2 ** N_BITS);
23 | numbers.push(num);
24 | }
25 |
26 | const [encoded] = encode(numbers, N_BITS);
27 | const [decoded] = decode(encoded, COUNT, N_BITS);
28 |
29 | let isEqual = numbers.length === decoded.length;
30 | for (let i = 0; i < encoded.length; i++) {
31 | if (numbers[i] !== decoded[i]) {
32 | isEqual = false;
33 | }
34 | }
35 | console.log(isEqual);
36 |
--------------------------------------------------------------------------------
/src/shared/js/tsne.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | // utility function
17 | var assert = function(condition, message) {
18 | if (!condition) {
19 | throw message || "Assertion failed";
20 | }
21 | };
22 |
23 | // syntax sugar
24 | var getopt = function(opt, field, defaultval) {
25 | if (opt.hasOwnProperty(field)) {
26 | return opt[field];
27 | } else {
28 | return defaultval;
29 | }
30 | };
31 |
32 | // return 0 mean unit standard deviation random number
33 | var return_v = false;
34 | var v_val = 0.0;
35 |
36 | var gaussRandom = function(rng) {
37 | if (return_v) {
38 | return_v = false;
39 | return v_val;
40 | }
41 | var u = 2 * rng() - 1;
42 | var v = 2 * rng() - 1;
43 | var r = u * u + v * v;
44 | if (r == 0 || r > 1) return gaussRandom(rng);
45 | var c = Math.sqrt((-2 * Math.log(r)) / r);
46 | v_val = v * c; // cache this for next function call for efficiency
47 | return_v = true;
48 | return u * c;
49 | };
50 |
51 | // return random normal number
52 | var randn = function(rng, mu, std) {
53 | return mu + gaussRandom(rng) * std;
54 | };
55 |
56 | // utilitity that creates contiguous vector of zeros of size n
57 | var zeros = function(n) {
58 | if (typeof n === "undefined" || isNaN(n)) {
59 | return [];
60 | }
61 | if (typeof ArrayBuffer === "undefined") {
62 | // lacking browser support
63 | var arr = new Array(n);
64 | for (var i = 0; i < n; i++) {
65 | arr[i] = 0;
66 | }
67 | return arr;
68 | } else {
69 | return new Float64Array(n); // typed arrays are faster
70 | }
71 | };
72 |
73 | // utility that returns 2d array filled with random numbers from generator rng
74 | var randn2d = function(n, d, rng) {
75 | var x = [];
76 | for (var i = 0; i < n; i++) {
77 | var xhere = [];
78 | for (var j = 0; j < d; j++) {
79 | xhere.push(randn(rng, 0.0, 1e-4));
80 | }
81 | x.push(xhere);
82 | }
83 | return x;
84 | };
85 |
86 | // utility that returns 2d array filled with value s
87 | var arrayofs = function(n, d, s) {
88 | var x = [];
89 | for (var i = 0; i < n; i++) {
90 | var xhere = [];
91 | for (var j = 0; j < d; j++) {
92 | xhere.push(s);
93 | }
94 | x.push(xhere);
95 | }
96 | return x;
97 | };
98 |
99 | // compute L2 distance between two vectors
100 | var L2 = function(x1, x2) {
101 | var D = x1.length;
102 | var d = 0;
103 | for (var i = 0; i < D; i++) {
104 | var x1i = x1[i];
105 | var x2i = x2[i];
106 | d += (x1i - x2i) * (x1i - x2i);
107 | }
108 | return d;
109 | };
110 |
111 | // compute pairwise distance in all vectors in X
112 | var xtod = function(X) {
113 | var N = X.length;
114 | var dist = zeros(N * N); // allocate contiguous array
115 | for (var i = 0; i < N; i++) {
116 | for (var j = i + 1; j < N; j++) {
117 | var d = L2(X[i], X[j]);
118 | dist[i * N + j] = d;
119 | dist[j * N + i] = d;
120 | }
121 | }
122 | return dist;
123 | };
124 |
125 | // compute (p_{i|j} + p_{j|i})/(2n)
126 | var d2p = function(D, perplexity, tol) {
127 | var Nf = Math.sqrt(D.length); // this better be an integer
128 | var N = Math.floor(Nf);
129 | assert(N === Nf, "D should have square number of elements.");
130 | var Htarget = Math.log(perplexity); // target entropy of distribution
131 | var P = zeros(N * N); // temporary probability matrix
132 |
133 | var prow = zeros(N); // a temporary storage compartment
134 | for (var i = 0; i < N; i++) {
135 | var betamin = -Infinity;
136 | var betamax = Infinity;
137 | var beta = 1; // initial value of precision
138 | var done = false;
139 | var maxtries = 50;
140 |
141 | // perform binary search to find a suitable precision beta
142 | // so that the entropy of the distribution is appropriate
143 | var num = 0;
144 | while (!done) {
145 | //debugger;
146 |
147 | // compute entropy and kernel row with beta precision
148 | var psum = 0.0;
149 | for (var j = 0; j < N; j++) {
150 | var pj = Math.exp(-D[i * N + j] * beta);
151 | if (i === j) {
152 | pj = 0;
153 | } // we dont care about diagonals
154 | prow[j] = pj;
155 | psum += pj;
156 | }
157 | // normalize p and compute entropy
158 | var Hhere = 0.0;
159 | for (var j = 0; j < N; j++) {
160 | var pj = prow[j] / psum;
161 | prow[j] = pj;
162 | if (pj > 1e-7) Hhere -= pj * Math.log(pj);
163 | }
164 |
165 | // adjust beta based on result
166 | if (Hhere > Htarget) {
167 | // entropy was too high (distribution too diffuse)
168 | // so we need to increase the precision for more peaky distribution
169 | betamin = beta; // move up the bounds
170 | if (betamax === Infinity) {
171 | beta = beta * 2;
172 | } else {
173 | beta = (beta + betamax) / 2;
174 | }
175 | } else {
176 | // converse case. make distrubtion less peaky
177 | betamax = beta;
178 | if (betamin === -Infinity) {
179 | beta = beta / 2;
180 | } else {
181 | beta = (beta + betamin) / 2;
182 | }
183 | }
184 |
185 | // stopping conditions: too many tries or got a good precision
186 | num++;
187 | if (Math.abs(Hhere - Htarget) < tol) {
188 | done = true;
189 | }
190 | if (num >= maxtries) {
191 | done = true;
192 | }
193 | }
194 |
195 | // copy over the final prow to P at row i
196 | for (var j = 0; j < N; j++) {
197 | P[i * N + j] = prow[j];
198 | }
199 | } // end loop over examples i
200 |
201 | // symmetrize P and normalize it to sum to 1 over all ij
202 | var Pout = zeros(N * N);
203 | var N2 = N * 2;
204 | for (var i = 0; i < N; i++) {
205 | for (var j = 0; j < N; j++) {
206 | Pout[i * N + j] = Math.max((P[i * N + j] + P[j * N + i]) / N2, 1e-100);
207 | }
208 | }
209 |
210 | return Pout;
211 | };
212 |
213 | // helper function
214 | function sign(x) {
215 | return x > 0 ? 1 : x < 0 ? -1 : 0;
216 | }
217 |
218 | /**
219 | * @param {?Object} opt Options.
220 | * @constructor
221 | */
222 | export const TSNE = function(opt) {
223 | opt = opt || {};
224 | this.perplexity = getopt(opt, "perplexity", 30);
225 | this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
226 | this.epsilon = getopt(opt, "epsilon", 10); // learning rate
227 | this.rng = getopt(opt, "rng", Math.random);
228 |
229 | this.iter = 0;
230 | };
231 |
232 | TSNE.prototype = {
233 | // this function takes a set of high-dimensional points
234 | // and creates matrix P from them using gaussian kernel
235 | initDataRaw: function(X) {
236 | var N = X.length;
237 | var D = X[0].length;
238 | assert(N > 0, " X is empty? You must have some data!");
239 | assert(D > 0, " X[0] is empty? Where is the data?");
240 | var dists = xtod(X); // convert X to distances using gaussian kernel
241 | this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
242 | this.N = N; // back up the size of the dataset
243 | this.initSolution(); // refresh this
244 | },
245 |
246 | // this function takes a fattened distance matrix and creates
247 | // matrix P from them.
248 | // D is assumed to be provided as an array of size N^2.
249 | initDataDist: function(D) {
250 | var N = Math.sqrt(D.length);
251 | this.P = d2p(D, this.perplexity, 1e-4);
252 | this.N = N;
253 | this.initSolution(); // refresh this
254 | },
255 |
256 | // (re)initializes the solution to random
257 | initSolution: function() {
258 | // generate random solution to t-SNE
259 | this.Y = randn2d(this.N, this.dim, this.rng); // the solution
260 | this.gains = arrayofs(this.N, this.dim, 1.0); // step gains
261 | // to accelerate progress in unchanging directions
262 | this.ystep = arrayofs(this.N, this.dim, 0.0); // momentum accumulator
263 | this.iter = 0;
264 | },
265 |
266 | // return pointer to current solution
267 | getSolution: function() {
268 | return this.Y;
269 | },
270 |
271 | // perform a single step of optimization to improve the embedding
272 | step: function() {
273 | this.iter += 1;
274 | var N = this.N;
275 |
276 | var cg = this.costGrad(this.Y); // evaluate gradient
277 | var cost = cg.cost;
278 | var grad = cg.grad;
279 |
280 | // perform gradient step
281 | var ymean = zeros(this.dim);
282 | for (var i = 0; i < N; i++) {
283 | for (var d = 0; d < this.dim; d++) {
284 | var gid = grad[i][d];
285 | var sid = this.ystep[i][d];
286 | var gainid = this.gains[i][d];
287 |
288 | // compute gain update
289 | var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
290 | if (newgain < 0.01) newgain = 0.01; // clamp
291 | this.gains[i][d] = newgain; // store for next turn
292 |
293 | // compute momentum step direction
294 | var momval = this.iter < 250 ? 0.5 : 0.8;
295 | var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
296 | this.ystep[i][d] = newsid; // remember the step we took
297 |
298 | // step!
299 | this.Y[i][d] += newsid;
300 |
301 | ymean[d] += this.Y[i][d]; // accumulate mean so that we
302 | // can center later
303 | }
304 | }
305 |
306 | // reproject Y to be zero mean
307 | for (var i = 0; i < N; i++) {
308 | for (var d = 0; d < this.dim; d++) {
309 | this.Y[i][d] -= ymean[d] / N;
310 | }
311 | }
312 |
313 | //if(this.iter%100===0) console.log('iter ' + this.iter + ',
314 | //cost: ' + cost);
315 | return cost; // return current cost
316 | },
317 |
318 | // for debugging: gradient check
319 | debugGrad: function() {
320 | var N = this.N;
321 |
322 | var cg = this.costGrad(this.Y); // evaluate gradient
323 | var cost = cg.cost;
324 | var grad = cg.grad;
325 |
326 | var e = 1e-5;
327 | for (var i = 0; i < N; i++) {
328 | for (var d = 0; d < this.dim; d++) {
329 | var yold = this.Y[i][d];
330 |
331 | this.Y[i][d] = yold + e;
332 | var cg0 = this.costGrad(this.Y);
333 |
334 | this.Y[i][d] = yold - e;
335 | var cg1 = this.costGrad(this.Y);
336 | var analytic = grad[i][d];
337 | var numerical = (cg0.cost - cg1.cost) / (2 * e);
338 | console.log(
339 | i +
340 | "," +
341 | d +
342 | ": gradcheck analytic: " +
343 | analytic +
344 | " vs. numerical: " +
345 | numerical
346 | );
347 |
348 | this.Y[i][d] = yold;
349 | }
350 | }
351 | },
352 |
353 | // return cost and gradient, given an arrangement
354 | costGrad: function(Y) {
355 | var N = this.N;
356 | var dim = this.dim; // dim of output space
357 | var P = this.P;
358 |
359 | var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima
360 |
361 | // compute current Q distribution, unnormalized first
362 | var Qu = zeros(N * N);
363 | var qsum = 0.0;
364 | for (var i = 0; i < N; i++) {
365 | for (var j = i + 1; j < N; j++) {
366 | var dsum = 0.0;
367 | for (var d = 0; d < dim; d++) {
368 | var dhere = Y[i][d] - Y[j][d];
369 | dsum += dhere * dhere;
370 | }
371 | var qu = 1.0 / (1.0 + dsum); // Student t-distribution
372 | Qu[i * N + j] = qu;
373 | Qu[j * N + i] = qu;
374 | qsum += 2 * qu;
375 | }
376 | }
377 |
378 | var cost = 0.0;
379 | var grad = [];
380 | for (var i = 0; i < N; i++) {
381 | var gsum = new Array(dim); // init grad for point i
382 | for (var d = 0; d < dim; d++) {
383 | gsum[d] = 0.0;
384 | }
385 | for (var j = 0; j < N; j++) {
386 | var normedProb = Math.max(Qu[i * N + j] / qsum, 1e-100);
387 | cost += -P[i * N + j] * Math.log(normedProb); // accumulate cost
388 | var premult = 4 * (pmul * P[i * N + j] - normedProb) * Qu[i * N + j];
389 | for (var d = 0; d < dim; d++) {
390 | gsum[d] += premult * (Y[i][d] - Y[j][d]);
391 | }
392 | }
393 | grad.push(gsum);
394 | }
395 |
396 | return { cost: cost, grad: grad };
397 | }
398 | };
399 |
--------------------------------------------------------------------------------
/src/shared/js/visualize.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | const pointsCache = new Map();
17 | export function getPoints(demo, params) {
18 | if (!params) {
19 | params = demo.options.map(option => option.start);
20 | }
21 |
22 | return demo.generator.apply(null, params);
23 | }
24 |
25 | const overrideCache = new Map();
26 | export function getDemoPreviewOverride(demo, params) {
27 | if (!params) {
28 | params = demo.options.map(option => option.start);
29 | }
30 |
31 | if (demo.previewOverride) {
32 | return demo.previewOverride.apply(null, params);
33 | }
34 | return null;
35 | }
36 |
37 | // Helper function to draw a circle.
38 | // TODO: replace with canvas blitting for web rendering
39 | function circle(g, x, y, r) {
40 | g.beginPath();
41 | g.arc(x, y, r, 0, 2 * Math.PI);
42 | g.fill();
43 | g.stroke();
44 | }
45 |
46 | // Visualize the given points with the given message.
47 | // If "no3d" is set, ignore the 3D cue for size.
48 | export function visualize(points, canvas, message, no3d) {
49 | var width = canvas.width;
50 | var height = canvas.height;
51 | var g = canvas.getContext("2d");
52 | g.fillStyle = "white";
53 | g.fillRect(0, 0, width, height);
54 | var xExtent = d3.extent(points, function(p) {
55 | return p.coords[0];
56 | });
57 | var yExtent = d3.extent(points, function(p) {
58 | return p.coords[1];
59 | });
60 | var zExtent = d3.extent(points, function(p) {
61 | return p.coords[2];
62 | });
63 | var zScale = d3
64 | .scaleLinear()
65 | .domain(zExtent)
66 | .range([2, 10]);
67 |
68 | var centerX = (xExtent[0] + xExtent[1]) / 2;
69 | var centerY = (yExtent[0] + yExtent[1]) / 2;
70 | var scale =
71 | Math.min(width, height) /
72 | Math.max(xExtent[1] - xExtent[0], yExtent[1] - yExtent[0]);
73 | scale *= 0.9; // Leave a little margin.
74 | g.strokeStyle = "rgba(255,255,255,.5)";
75 | var is3d = !no3d && points[0].coords.length > 2;
76 | var index = [];
77 | var n = points.length;
78 | if (is3d) {
79 | for (var i = 0; i < n; i++) {
80 | index[i] = i;
81 | }
82 | index.sort(function(a, b) {
83 | return d3.ascending(points[a].coords[2], points[b].coords[2]);
84 | });
85 | }
86 |
87 | for (var i = 0; i < n; i++) {
88 | var p = is3d ? points[index[i]] : points[i];
89 | g.fillStyle = p.color;
90 | var x = (p.coords[0] - centerX) * scale + width / 2;
91 | var y = -(p.coords[1] - centerY) * scale + height / 2;
92 | var r = is3d ? zScale(p.coords[2]) : 4;
93 | circle(g, x, y, r);
94 |
95 | if (!is3d) {
96 | p.px = x;
97 | p.py = y;
98 | }
99 | }
100 |
101 | if (message) {
102 | g.fillStyle = "#000";
103 | g.font = "24pt Lato";
104 | g.fillText(message, 8, 34);
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/src/supplement.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | import Supplement from "./article/Supplement.svx";
17 | import "./global.css";
18 |
19 | export default new Supplement({
20 | target: document.body
21 | });
22 |
--------------------------------------------------------------------------------
/src/visualizations/cech_visualization/Figure.svelte:
--------------------------------------------------------------------------------
1 |
20 |
21 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/src/visualizations/cech_visualization/components/Visualization.svelte:
--------------------------------------------------------------------------------
1 |
188 |
189 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 | refresh
248 |
249 |
250 |
--------------------------------------------------------------------------------
/src/visualizations/cech_visualization/main.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | import Figure from "./Figure.svelte";
17 |
18 | export default new Figure({
19 | target: document.body
20 | });
21 |
--------------------------------------------------------------------------------
/src/visualizations/fmnist_visualization/Figure.svelte:
--------------------------------------------------------------------------------
1 |
23 |
24 |
27 |
28 |
29 | {#if sideBySide}
30 |
31 | {:else}
32 |
33 | {/if}
34 |
35 |
--------------------------------------------------------------------------------
/src/visualizations/fmnist_visualization/components/Visualization.svelte:
--------------------------------------------------------------------------------
1 |
159 |
160 |
232 |
233 |
234 |
235 |
236 |
239 | UMAP
240 |
241 |
244 | t-SNE
245 |
246 |
247 |
248 | {#if isLoaded}
249 |
253 | All
254 |
255 | {#each data.labelNames as labelName, labelIndex}
256 |
302 |
--------------------------------------------------------------------------------
/src/visualizations/fmnist_visualization/js/load-data.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | // Reorder the labels in order to group similar categories.
17 | const reorderedLabels = [0, 6, 2, 4, 3, 5, 7, 9, 1, 8];
18 |
19 | function process(data) {
20 | const originalToReordered = {};
21 | reorderedLabels.forEach(
22 | (oldIndex, newIndex) => (originalToReordered[oldIndex] = newIndex)
23 | );
24 | data.labels = data.labels.map(index => {
25 | return originalToReordered[index];
26 | });
27 |
28 | const originalLabels = [...data.labelNames];
29 | data.labelNames = reorderedLabels.map(index => {
30 | return originalLabels[index];
31 | });
32 | return data;
33 | }
34 |
35 | export async function loadData() {
36 | const res = await fetch(`fmnist_tsne_vs_umap.json`);
37 | const json = await res.json();
38 | return process(json);
39 | }
40 |
--------------------------------------------------------------------------------
/src/visualizations/fmnist_visualization/main.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | import Figure from "./Figure.svelte";
17 |
18 | export default new Figure({
19 | target: document.body
20 | });
21 |
--------------------------------------------------------------------------------
/src/visualizations/hyperparameters_visualization/Figure.svelte:
--------------------------------------------------------------------------------
1 |
20 |
21 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/src/visualizations/hyperparameters_visualization/components/Preview.svelte:
--------------------------------------------------------------------------------
1 |
67 |
68 |
146 |
147 |
306 |
--------------------------------------------------------------------------------
/src/visualizations/toy_visualization/main.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 Google LLC All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | import Figure from "./Figure.svelte";
17 |
18 | export default new Figure({
19 | target: document.body
20 | });
21 |
--------------------------------------------------------------------------------