├── .github
└── workflows
│ └── npm-publish.yaml
├── .gitignore
├── LICENSE
├── README.md
├── babel.config.js
├── build
└── hclust.min.js
├── package.json
├── scripts
├── build.sh
└── test.sh
├── src
└── hclust.js
└── test
├── chrispolis.hcluster.min.js
├── dataset-1.json
├── dataset-2.json
└── test.js
/.github/workflows/npm-publish.yaml:
--------------------------------------------------------------------------------
1 | name: npm-publish
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | npm-publish:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Checkout code
13 | uses: actions/checkout@v2
14 | - name: Set up Node
15 | uses: actions/setup-node@v1
16 | with:
17 | node-version: '12.x'
18 | - name: Publish
19 | if: github.ref == 'refs/heads/master'
20 | uses: mikeal/merge-release@master
21 | env:
22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23 | NPM_AUTH_TOKEN: ${{ secrets.NPM_AUTH_TOKEN }}
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | yarn.lock
3 | yarn-error.log
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Greene Laboratory
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### hclust
2 | [Agglomerative hierarchical clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) in JavaScript
3 |
4 | Inspired by the MIT-licensed [hcluster.js](https://github.com/cmpolis/hcluster.js) by [@ChrisPolis](https://twitter.com/chrispolis).
5 | [See the comparison of the two below](#comparison-with-hclusterjs).
6 |
7 | ---
8 |
9 | ### Usage
10 |
11 | #### Browser
12 |
13 | ```html
14 |
15 |
20 | ```
21 |
22 | #### Node
23 |
24 | `npm install @greenelab/hclust`
25 |
26 | or
27 |
28 | `yarn add @greenelab/hclust`
29 |
30 | then
31 |
32 | ```javascript
33 | import { clusterData } from '@greenelab/hclust';
34 | import { euclideanDistance } from '@greenelab/hclust';
35 | import { avgDistance } from '@greenelab/hclust';
36 | ```
37 |
38 | ---
39 |
40 | ### `clusterData({ data, key, distance, linkage, onProgress })`
41 |
42 | #### Parameters
43 |
44 | **`data`**
45 |
46 | The data you want to cluster, in the format:
47 |
48 | ```javascript
49 | [
50 | ...
51 | [ ... 1, 2, 3 ...],
52 | [ ... 1, 2, 3 ...],
53 | [ ... 1, 2, 3 ...],
54 | ...
55 | ]
56 | ```
57 |
58 | or if `key` parameter is specified:
59 |
60 | ```javascript
61 | [
62 | ...
63 | { someKey: [ ... 1, 2, 3 ...] },
64 | { someKey: [ ... 1, 2, 3 ...] },
65 | { someKey: [ ... 1, 2, 3 ...] },
66 | ...
67 | ]
68 | ```
69 |
70 | The entries in the outer array can be considered the `rows` and the entries within each `row` array can be considered the `cols`.
71 | Each `row` should have the same number of `cols`.
72 |
73 | *Default value:* `[]`
74 |
75 | **`key`**
76 |
77 | A `string` key to specify which values to extract from the `data` array.
78 | If omitted, `data` is assumed to be an array of arrays.
79 | If specified, `data` is assumed to be array of objects, each with a key that contains the values for that `row`.
80 |
81 | *Default value:* `''`
82 |
83 | **`distance`**
84 |
85 | A function to calculate the distance between two equal-dimension vectors, used in calculating the distance matrix, in the format:
86 |
87 | ```javascript
88 | function (arrayA, arrayB) { return someNumber; }
89 | ```
90 |
91 | The function receives two equal-length arrays of numbers (ints or floats) and should return a number (int or float).
92 |
93 | *Default value:* `euclideanDistance` from this `hclust` package
94 |
95 | **`linkage`**
96 |
97 | A function to calculate the distance between pairs of clusters based on a distance matrix, used in determining linkage criterion, in the format:
98 |
99 | ```javascript
100 | function (arrayA, arrayB, distanceMatrix) { return someNumber; }
101 | ```
102 |
103 | The function receives two sets of indexes and the distance matrix computed between each datum and every other datum.
104 | The function should return a number (int or float)
105 |
106 | *Default value:* `averageDistance` from this `hclust` package
107 |
108 | **`onProgress`**
109 |
110 | A function that is called several times throughout clustering, and is provided the current progress through the clustering, in the format:
111 |
112 | ```javascript
113 | function (progress) { }
114 | ```
115 |
116 | The function receives the percent progress between `0` and `1`.
117 |
118 | *Default value:* an internal function that `console.log`'s the progress
119 |
120 | **Note:** [`postMessage`](https://developer.mozilla.org/en-US/docs/Web/API/Worker/postMessage) is called in the same places as `onProgress`, if the script is running as a [web worker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).
121 |
122 | #### Returns
123 |
124 | ```javascript
125 | const { clusters, distances, order, clustersGivenK } = clusterData(...);
126 | ```
127 |
128 | **`clusters`**
129 |
130 | The resulting cluster tree, in the format:
131 |
132 | ```javascript
133 | {
134 | indexes: [ ... Number, Number, Number ... ],
135 | height: Number,
136 | children: [ ... {}, {}, {} ... ]
137 | }
138 |
139 | ```
140 |
141 | **`distances`**
142 |
143 | The computed distance matrix, in the format:
144 |
145 | ```javascript
146 | [
147 | ...
148 | [ ... Number, Number, Number ...],
149 | [ ... Number, Number, Number ...],
150 | [ ... Number, Number, Number ...]
151 | ...
152 | ]
153 | ```
154 |
155 | **`order`**
156 |
157 | The new order of the data, in terms of original data array indexes, in the format:
158 |
159 | ```javascript
160 | [ ... Number, Number, Number ... ]
161 | ```
162 |
163 | Equivalent to `clusters.indexes` and `clustersGivenK[1]`.
164 |
165 | **`clustersGivenK`**
166 |
167 | A list of tree slices in terms of original data array indexes, where index = K, in the format:
168 |
169 | ```javascript
170 | [
171 | [], // K = 0
172 | [ [] ], // K = 1
173 | [ [], [] ], // K = 2
174 | [ [], [], [] ], // K = 3
175 | [ [], [], [], [] ], // K = 4
176 | [ [], [], [], [], [] ] // K = 5
177 | ...
178 | ]
179 | ```
180 |
181 | ---
182 |
183 | ### `euclideanDistance(arrayA, arrayB)`
184 |
185 | Calculates the [euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) between two equal-dimension vectors.
186 |
187 | ---
188 |
189 | ### `avgDistance(arrayA, arrayB, distanceMatrix)`
190 |
191 | Calculates the average distance between pairs of clusters based on a distance matrix.
192 |
193 | ---
194 |
195 | ### Comparison with [hcluster.js](https://github.com/cmpolis/hcluster.js)
196 |
197 | - This package does not duplicate items from the original dataset in the results.
198 | Results are given in terms of indexes, either with respect to the original dataset or the distance matrix.
199 | - This package uses more modern JavaScript syntaxes and practices to make the code cleaner and simpler.
200 | - This package provides an `onProgress` callback and calls `postMessage` for use in [web workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).
201 | Because clustering can take a long time with large data sets, you may want to run it as a web worker so the browser doesn't freeze for a long time, and you may need a callback so you can give users visual feedback on its progress.
202 | - This package makes some performance optimizations, such as removing unnecessary loops through big sets.
203 | It has been tested on various OS's (Windows, Mac, Linux, iOS, Android), devices (desktop, laptop, mobile), browsers (Chrome, Firefox, Safari), contexts (main thread, web worker), and hosting locations (local, online).
204 | The results vary widely, and are likely sensitive to the specifics of hardware, cpu cores, browser implementation, etc.
205 | But in general, this package is more performant than `hcluster.js`, to varying degrees, and is always at least as performant on average.
206 | Chrome seems to see the most performance gains (up to 10x, when the row number is high), while Firefox seems to see no gains.
207 | - This package does not touch the input data object, whereas the `hcluster.js` package does.
208 | D3 often expects you to mutate data objects directly, which is now typically considered bad practice in JavaScript.
209 | Instead, this package returns the useful data from the clustering algorithm (including the distance matrix), and allows you to mutate or not mutate the data object depending on your needs.
210 | - This package leaves out the `minDistance` or `maxDistance` functions that are built into `hcluster.js`, because -- per [this reference](https://onlinelibrary.wiley.com/doi/abs/10.1002/9780470316801.ch5) -- they are not as effective as `averageDistance`.
211 |
212 | ---
213 |
214 | ### Making changes to the library
215 |
216 | 1. [Install Node](https://nodejs.org/en/download/)
217 | 2. [Install Yarn](https://classic.yarnpkg.com/en/docs/install)
218 | 3. Clone this repo and navigate to it in your command terminal
219 | 4. Run `yarn install` to install this package's dependencies
220 | 5. Make desired changes to `./src/hclust.js`
221 | 6. Run `yarn test` to automatically rebuild the library and run test suite
222 | 7. Run `yarn build` to just rebuild the library, and output the compiled contents to `./build/hclust.min.js`
223 | 8. Commit changes to repo if necessary. *Make sure to run the build command before committing; it won't happen automatically.*
224 |
225 | ---
226 |
227 | ### Similar libraries
228 |
229 | [cmpolis/hcluster.js](https://github.com/cmpolis/hcluster.js)
230 | [harthur/clustering](https://github.com/harthur/clustering)
231 | [mljs/hclust](https://github.com/mljs/hclust)
232 | [math-utils/hierarchical-clustering](https://github.com/math-utils/hierarchical-clustering)
233 |
234 | ---
235 |
236 | ### Further reading
237 |
238 | The [AGNES](https://onlinelibrary.wiley.com/doi/abs/10.1002/9780470316801.ch5) (AGglomerative NESting) method; continuously merge nodes that have the least dissimilarity.
239 |
--------------------------------------------------------------------------------
/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | presets: [
3 | [
4 | '@babel/preset-env',
5 | {
6 | targets: {
7 | node: 'current',
8 | },
9 | },
10 | ],
11 | ],
12 | };
13 |
--------------------------------------------------------------------------------
/build/hclust.min.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports.clusterData = exports.averageDistance = exports.euclideanDistance = void 0;
7 |
8 | // get euclidean distance between two equal-dimension vectors
9 | const euclideanDistance = (a, b) => {
10 | const size = Math.min(a.length, b.length);
11 | let sum = 0;
12 |
13 | for (let index = 0; index < size; index++) sum += (a[index] - b[index]) * (a[index] - b[index]);
14 |
15 | return Math.sqrt(sum);
16 | }; // get average distance between sets of indexes, given distance matrix
17 |
18 |
19 | exports.euclideanDistance = euclideanDistance;
20 |
21 | const averageDistance = (setA, setB, distances) => {
22 | let distance = 0;
23 |
24 | for (const a of setA) {
25 | for (const b of setB) distance += distances[a][b];
26 | }
27 |
28 | return distance / setA.length / setB.length;
29 | }; // update progress by calling user onProgress and postMessage for web workers
30 |
31 |
32 | exports.averageDistance = averageDistance;
33 |
34 | const updateProgress = (stepNumber, stepProgress, onProgress) => {
35 | // currently only two distinct steps: computing distance matrix and clustering
36 | const progress = stepNumber / 2 + stepProgress / 2; // if onProgress is defined and is a function, call onProgress
37 |
38 | if (typeof onProgress === 'function') onProgress(progress); // if this script is being run as a web worker, call postMessage
39 |
40 | if (typeof WorkerGlobalScope !== 'undefined' && self instanceof WorkerGlobalScope) postMessage(progress);
41 | }; // default onProgress function. console logs progress
42 |
43 |
44 | const logProgress = progress => console.log('Clustering: ', (progress * 100).toFixed(1) + '%'); // the main clustering function
45 |
46 |
47 | const clusterData = ({
48 | data = [],
49 | key = '',
50 | distance = euclideanDistance,
51 | linkage = averageDistance,
52 | onProgress = logProgress
53 | }) => {
54 | // extract values from specified key
55 | if (key) data = data.map(datum => datum[key]); // compute distance between each data point and every other data point
56 | // N x N matrix where N = data.length
57 |
58 | const distances = data.map((datum, index) => {
59 | updateProgress(0, index / (data.length - 1), onProgress); // get distance between datum and other datum
60 |
61 | return data.map(otherDatum => distance(datum, otherDatum));
62 | }); // initialize clusters to match data
63 |
64 | const clusters = data.map((datum, index) => ({
65 | height: 0,
66 | indexes: [Number(index)]
67 | })); // keep track of all tree slices
68 |
69 | let clustersGivenK = []; // iterate through data
70 |
71 | for (let iteration = 0; iteration < data.length; iteration++) {
72 | updateProgress(1, (iteration + 1) / data.length, onProgress); // add current tree slice
73 |
74 | clustersGivenK.push(clusters.map(cluster => cluster.indexes)); // dont find clusters to merge when only one cluster left
75 |
76 | if (iteration >= data.length - 1) break; // initialize smallest distance
77 |
78 | let nearestDistance = Infinity;
79 | let nearestRow = 0;
80 | let nearestCol = 0; // upper triangular matrix of clusters
81 |
82 | for (let row = 0; row < clusters.length; row++) {
83 | for (let col = row + 1; col < clusters.length; col++) {
84 | // calculate distance between clusters
85 | const distance = linkage(clusters[row].indexes, clusters[col].indexes, distances); // update smallest distance
86 |
87 | if (distance < nearestDistance) {
88 | nearestDistance = distance;
89 | nearestRow = row;
90 | nearestCol = col;
91 | }
92 | }
93 | } // merge nearestRow and nearestCol clusters together
94 |
95 |
96 | const newCluster = {
97 | indexes: [...clusters[nearestRow].indexes, ...clusters[nearestCol].indexes],
98 | height: nearestDistance,
99 | children: [clusters[nearestRow], clusters[nearestCol]]
100 | }; // remove nearestRow and nearestCol clusters
101 | // splice higher index first so it doesn't affect second splice
102 |
103 | clusters.splice(Math.max(nearestRow, nearestCol), 1);
104 | clusters.splice(Math.min(nearestRow, nearestCol), 1); // add new merged cluster
105 |
106 | clusters.push(newCluster);
107 | } // assemble full list of tree slices into array where index = k
108 |
109 |
110 | clustersGivenK = [[], ...clustersGivenK.reverse()]; // return useful information
111 |
112 | return {
113 | clusters: clusters[0],
114 | distances: distances,
115 | order: clusters[0].indexes,
116 | clustersGivenK: clustersGivenK
117 | };
118 | };
119 |
120 | exports.clusterData = clusterData;
121 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@greenelab/hclust",
3 | "version": "0.0.0-dev",
4 | "description": "Agglomerative hierarchical clustering in JavaScript",
5 | "keywords": [
6 | "hierarchy",
7 | "hierarchical",
8 | "cluster",
9 | "clustering",
10 | "agglomerative",
11 | "data",
12 | "tree"
13 | ],
14 | "author": "Vincent Rubinetti",
15 | "license": "MIT",
16 | "repository": "git+https://github.com/greenelab/hclust.git",
17 | "main": "./build/hclust.min.js",
18 | "module": "./build/hclust.min.js",
19 | "scripts": {
20 | "test": "bash ./scripts/build.sh && bash ./scripts/test.sh",
21 | "build": "bash ./scripts/build.sh"
22 | },
23 | "devDependencies": {
24 | "@babel/cli": "^7.8.4",
25 | "@babel/core": "^7.9.0",
26 | "@babel/preset-env": "^7.9.0",
27 | "babel-jest": "^25.1.0",
28 | "babel-preset-minify": "^0.5.1",
29 | "jest": "^25.1.0"
30 | },
31 | "browserslist": "> 0.1%, not dead"
32 | }
33 |
--------------------------------------------------------------------------------
/scripts/build.sh:
--------------------------------------------------------------------------------
1 | rm -rf build
2 | mkdir build
3 | npx babel ./src/hclust.js --out-file ./build/hclust.min.js
4 |
--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
1 | jest ./test/test.js --notify
2 |
--------------------------------------------------------------------------------
/src/hclust.js:
--------------------------------------------------------------------------------
1 | // get euclidean distance between two equal-dimension vectors
2 | export const euclideanDistance = (a, b) => {
3 | const size = Math.min(a.length, b.length);
4 | let sum = 0;
5 | for (let index = 0; index < size; index++)
6 | sum += (a[index] - b[index]) * (a[index] - b[index]);
7 | return Math.sqrt(sum);
8 | };
9 |
10 | // get average distance between sets of indexes, given distance matrix
11 | export const averageDistance = (setA, setB, distances) => {
12 | let distance = 0;
13 | for (const a of setA) {
14 | for (const b of setB)
15 | distance += distances[a][b];
16 | }
17 |
18 | return distance / setA.length / setB.length;
19 | };
20 |
21 | // update progress by calling user onProgress and postMessage for web workers
22 | const updateProgress = (stepNumber, stepProgress, onProgress) => {
23 | // currently only two distinct steps: computing distance matrix and clustering
24 | const progress = stepNumber / 2 + stepProgress / 2;
25 |
26 | // if onProgress is defined and is a function, call onProgress
27 | if (typeof onProgress === 'function')
28 | onProgress(progress);
29 |
30 | // if this script is being run as a web worker, call postMessage
31 | if (
32 | typeof WorkerGlobalScope !== 'undefined' &&
33 | self instanceof WorkerGlobalScope
34 | )
35 | postMessage(progress);
36 | };
37 |
38 | // default onProgress function. console logs progress
39 | const logProgress = (progress) =>
40 | console.log('Clustering: ', (progress * 100).toFixed(1) + '%');
41 |
42 | // the main clustering function
43 | export const clusterData = ({
44 | data = [],
45 | key = '',
46 | distance = euclideanDistance,
47 | linkage = averageDistance,
48 | onProgress = logProgress
49 | }) => {
50 | // extract values from specified key
51 | if (key)
52 | data = data.map((datum) => datum[key]);
53 |
54 | // compute distance between each data point and every other data point
55 | // N x N matrix where N = data.length
56 | const distances = data.map((datum, index) => {
57 | updateProgress(0, index / (data.length - 1), onProgress);
58 |
59 | // get distance between datum and other datum
60 | return data.map((otherDatum) => distance(datum, otherDatum));
61 | });
62 |
63 | // initialize clusters to match data
64 | const clusters = data.map((datum, index) => ({
65 | height: 0,
66 | indexes: [Number(index)]
67 | }));
68 |
69 | // keep track of all tree slices
70 | let clustersGivenK = [];
71 |
72 | // iterate through data
73 | for (let iteration = 0; iteration < data.length; iteration++) {
74 | updateProgress(1, (iteration + 1) / data.length, onProgress);
75 |
76 | // add current tree slice
77 | clustersGivenK.push(clusters.map((cluster) => cluster.indexes));
78 |
79 | // dont find clusters to merge when only one cluster left
80 | if (iteration >= data.length - 1)
81 | break;
82 |
83 | // initialize smallest distance
84 | let nearestDistance = Infinity;
85 | let nearestRow = 0;
86 | let nearestCol = 0;
87 |
88 | // upper triangular matrix of clusters
89 | for (let row = 0; row < clusters.length; row++) {
90 | for (let col = row + 1; col < clusters.length; col++) {
91 | // calculate distance between clusters
92 | const distance = linkage(
93 | clusters[row].indexes,
94 | clusters[col].indexes,
95 | distances
96 | );
97 | // update smallest distance
98 | if (distance < nearestDistance) {
99 | nearestDistance = distance;
100 | nearestRow = row;
101 | nearestCol = col;
102 | }
103 | }
104 | }
105 |
106 | // merge nearestRow and nearestCol clusters together
107 | const newCluster = {
108 | indexes: [
109 | ...clusters[nearestRow].indexes,
110 | ...clusters[nearestCol].indexes
111 | ],
112 | height: nearestDistance,
113 | children: [clusters[nearestRow], clusters[nearestCol]]
114 | };
115 |
116 | // remove nearestRow and nearestCol clusters
117 | // splice higher index first so it doesn't affect second splice
118 | clusters.splice(Math.max(nearestRow, nearestCol), 1);
119 | clusters.splice(Math.min(nearestRow, nearestCol), 1);
120 |
121 | // add new merged cluster
122 | clusters.push(newCluster);
123 | }
124 |
125 | // assemble full list of tree slices into array where index = k
126 | clustersGivenK = [[], ...clustersGivenK.reverse()];
127 |
128 | // return useful information
129 | return {
130 | clusters: clusters[0],
131 | distances: distances,
132 | order: clusters[0].indexes,
133 | clustersGivenK: clustersGivenK
134 | };
135 | };
136 |
--------------------------------------------------------------------------------
/test/chrispolis.hcluster.min.js:
--------------------------------------------------------------------------------
1 | (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.hcluster=f()}})(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;odata.length)throw new Error("n must be less than the size of the dataset");return clustersGivenK[data.length-n].map(function(indexes){return indexes.map(function(ndx){return data[ndx]})})};clust._squareMatrixPairs=function(n){var pairs=[];for(var row=0;row {
8 | expect(hcluster).toBeDefined();
9 | });
10 |
11 | test('can import hclust', () => {
12 | expect(clusterData).toBeDefined();
13 | });
14 |
15 | test('test dataset 1', () => {
16 | // get received results from this package
17 | const resultsB = clusterData({
18 | data: dataset1,
19 | key: 'value',
20 | onProgress: null
21 | });
22 |
23 | // transform order to be in terms of sample name/id
24 | const orderB = resultsB.order
25 | .map((index) => dataset1[index])
26 | .map((node) => node.sample);
27 | // transform slice to be in terms of sample name/id
28 | const sliceB = resultsB.clustersGivenK[10].map((cluster) =>
29 | cluster.map((index) => dataset1[index]).map((node) => node.sample)
30 | );
31 |
32 | // get "expected" results from hcluster.js
33 | const resultsA = hcluster()
34 | .distance('euclidean')
35 | .linkage('avg')
36 | .posKey('value')
37 | .data(dataset1);
38 |
39 | // transform order to be in terms of sample name/id
40 | const orderA = resultsA.orderedNodes().map((node) => node.sample);
41 | // transform slice to be in terms of sample name/id
42 | const sliceA = resultsA
43 | .getClusters(10)
44 | .map((cluster) => cluster.map((node) => node.sample));
45 |
46 | console.log('Expected order:', orderA, 'Received order:', orderB);
47 | console.log('Expected slice:', sliceA, 'Received slice:', sliceB);
48 |
49 | expect(orderB).toStrictEqual(orderA);
50 | expect(sliceB).toStrictEqual(sliceA);
51 | });
52 |
53 | test('test dataset 2', () => {
54 | // get received results from this package
55 | const resultsB = clusterData({
56 | data: dataset2,
57 | key: 'value',
58 | onProgress: null
59 | });
60 |
61 | // transform order to be in terms of signature name/id
62 | const orderB = resultsB.order
63 | .map((index) => dataset2[index])
64 | .map((node) => node.signature);
65 | // transform slice to be in terms of signature name/id
66 | const sliceB = resultsB.clustersGivenK[10].map((cluster) =>
67 | cluster.map((index) => dataset2[index]).map((node) => node.signature)
68 | );
69 |
70 | // get "expected" results from hcluster.js
71 | const resultsA = hcluster()
72 | .distance('euclidean')
73 | .linkage('avg')
74 | .posKey('value')
75 | .data(dataset2);
76 |
77 | // transform order to be in terms of signature name/id
78 | const orderA = resultsA.orderedNodes().map((node) => node.signature);
79 | // transform slice to be in terms of signature name/id
80 | const sliceA = resultsA
81 | .getClusters(10)
82 | .map((cluster) => cluster.map((node) => node.signature));
83 |
84 | console.log('Expected order:', orderA, 'Received order:', orderB);
85 | console.log('Expected slice:', sliceA, 'Received slice:', sliceB);
86 |
87 | expect(orderB).toStrictEqual(orderA);
88 | expect(sliceB).toStrictEqual(sliceA);
89 | });
90 |
--------------------------------------------------------------------------------