├── .github └── workflows │ └── npm-publish.yaml ├── .gitignore ├── LICENSE ├── README.md ├── babel.config.js ├── build └── hclust.min.js ├── package.json ├── scripts ├── build.sh └── test.sh ├── src └── hclust.js └── test ├── chrispolis.hcluster.min.js ├── dataset-1.json ├── dataset-2.json └── test.js /.github/workflows/npm-publish.yaml: -------------------------------------------------------------------------------- 1 | name: npm-publish 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | npm-publish: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v2 14 | - name: Set up Node 15 | uses: actions/setup-node@v1 16 | with: 17 | node-version: '12.x' 18 | - name: Publish 19 | if: github.ref == 'refs/heads/master' 20 | uses: mikeal/merge-release@master 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | NPM_AUTH_TOKEN: ${{ secrets.NPM_AUTH_TOKEN }} 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | yarn.lock 3 | yarn-error.log 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Greene Laboratory 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### hclust 2 | [Agglomerative hierarchical clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) in JavaScript 3 | 4 | Inspired by the MIT-licensed [hcluster.js](https://github.com/cmpolis/hcluster.js) by [@ChrisPolis](https://twitter.com/chrispolis). 5 | [See the comparison of the two below](#comparison-with-hclusterjs). 6 | 7 | --- 8 | 9 | ### Usage 10 | 11 | #### Browser 12 | 13 | ```html 14 | 15 | 20 | ``` 21 | 22 | #### Node 23 | 24 | `npm install @greenelab/hclust` 25 | 26 | or 27 | 28 | `yarn add @greenelab/hclust` 29 | 30 | then 31 | 32 | ```javascript 33 | import { clusterData } from '@greenelab/hclust'; 34 | import { euclideanDistance } from '@greenelab/hclust'; 35 | import { avgDistance } from '@greenelab/hclust'; 36 | ``` 37 | 38 | --- 39 | 40 | ### `clusterData({ data, key, distance, linkage, onProgress })` 41 | 42 | #### Parameters 43 | 44 | **`data`** 45 | 46 | The data you want to cluster, in the format: 47 | 48 | ```javascript 49 | [ 50 | ... 51 | [ ... 1, 2, 3 ...], 52 | [ ... 1, 2, 3 ...], 53 | [ ... 1, 2, 3 ...], 54 | ... 55 | ] 56 | ``` 57 | 58 | or if `key` parameter is specified: 59 | 60 | ```javascript 61 | [ 62 | ... 63 | { someKey: [ ... 1, 2, 3 ...] }, 64 | { someKey: [ ... 1, 2, 3 ...] }, 65 | { someKey: [ ... 1, 2, 3 ...] }, 66 | ... 67 | ] 68 | ``` 69 | 70 | The entries in the outer array can be considered the `rows` and the entries within each `row` array can be considered the `cols`. 71 | Each `row` should have the same number of `cols`. 72 | 73 | *Default value:* `[]` 74 | 75 | **`key`** 76 | 77 | A `string` key to specify which values to extract from the `data` array. 78 | If omitted, `data` is assumed to be an array of arrays. 79 | If specified, `data` is assumed to be array of objects, each with a key that contains the values for that `row`. 80 | 81 | *Default value:* `''` 82 | 83 | **`distance`** 84 | 85 | A function to calculate the distance between two equal-dimension vectors, used in calculating the distance matrix, in the format: 86 | 87 | ```javascript 88 | function (arrayA, arrayB) { return someNumber; } 89 | ``` 90 | 91 | The function receives two equal-length arrays of numbers (ints or floats) and should return a number (int or float). 92 | 93 | *Default value:* `euclideanDistance` from this `hclust` package 94 | 95 | **`linkage`** 96 | 97 | A function to calculate the distance between pairs of clusters based on a distance matrix, used in determining linkage criterion, in the format: 98 | 99 | ```javascript 100 | function (arrayA, arrayB, distanceMatrix) { return someNumber; } 101 | ``` 102 | 103 | The function receives two sets of indexes and the distance matrix computed between each datum and every other datum. 104 | The function should return a number (int or float) 105 | 106 | *Default value:* `averageDistance` from this `hclust` package 107 | 108 | **`onProgress`** 109 | 110 | A function that is called several times throughout clustering, and is provided the current progress through the clustering, in the format: 111 | 112 | ```javascript 113 | function (progress) { } 114 | ``` 115 | 116 | The function receives the percent progress between `0` and `1`. 117 | 118 | *Default value:* an internal function that `console.log`'s the progress 119 | 120 | **Note:** [`postMessage`](https://developer.mozilla.org/en-US/docs/Web/API/Worker/postMessage) is called in the same places as `onProgress`, if the script is running as a [web worker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers). 121 | 122 | #### Returns 123 | 124 | ```javascript 125 | const { clusters, distances, order, clustersGivenK } = clusterData(...); 126 | ``` 127 | 128 | **`clusters`** 129 | 130 | The resulting cluster tree, in the format: 131 | 132 | ```javascript 133 | { 134 | indexes: [ ... Number, Number, Number ... ], 135 | height: Number, 136 | children: [ ... {}, {}, {} ... ] 137 | } 138 | 139 | ``` 140 | 141 | **`distances`** 142 | 143 | The computed distance matrix, in the format: 144 | 145 | ```javascript 146 | [ 147 | ... 148 | [ ... Number, Number, Number ...], 149 | [ ... Number, Number, Number ...], 150 | [ ... Number, Number, Number ...] 151 | ... 152 | ] 153 | ``` 154 | 155 | **`order`** 156 | 157 | The new order of the data, in terms of original data array indexes, in the format: 158 | 159 | ```javascript 160 | [ ... Number, Number, Number ... ] 161 | ``` 162 | 163 | Equivalent to `clusters.indexes` and `clustersGivenK[1]`. 164 | 165 | **`clustersGivenK`** 166 | 167 | A list of tree slices in terms of original data array indexes, where index = K, in the format: 168 | 169 | ```javascript 170 | [ 171 | [], // K = 0 172 | [ [] ], // K = 1 173 | [ [], [] ], // K = 2 174 | [ [], [], [] ], // K = 3 175 | [ [], [], [], [] ], // K = 4 176 | [ [], [], [], [], [] ] // K = 5 177 | ... 178 | ] 179 | ``` 180 | 181 | --- 182 | 183 | ### `euclideanDistance(arrayA, arrayB)` 184 | 185 | Calculates the [euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) between two equal-dimension vectors. 186 | 187 | --- 188 | 189 | ### `avgDistance(arrayA, arrayB, distanceMatrix)` 190 | 191 | Calculates the average distance between pairs of clusters based on a distance matrix. 192 | 193 | --- 194 | 195 | ### Comparison with [hcluster.js](https://github.com/cmpolis/hcluster.js) 196 | 197 | - This package does not duplicate items from the original dataset in the results. 198 | Results are given in terms of indexes, either with respect to the original dataset or the distance matrix. 199 | - This package uses more modern JavaScript syntaxes and practices to make the code cleaner and simpler. 200 | - This package provides an `onProgress` callback and calls `postMessage` for use in [web workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers). 201 | Because clustering can take a long time with large data sets, you may want to run it as a web worker so the browser doesn't freeze for a long time, and you may need a callback so you can give users visual feedback on its progress. 202 | - This package makes some performance optimizations, such as removing unnecessary loops through big sets. 203 | It has been tested on various OS's (Windows, Mac, Linux, iOS, Android), devices (desktop, laptop, mobile), browsers (Chrome, Firefox, Safari), contexts (main thread, web worker), and hosting locations (local, online). 204 | The results vary widely, and are likely sensitive to the specifics of hardware, cpu cores, browser implementation, etc. 205 | But in general, this package is more performant than `hcluster.js`, to varying degrees, and is always at least as performant on average. 206 | Chrome seems to see the most performance gains (up to 10x, when the row number is high), while Firefox seems to see no gains. 207 | - This package does not touch the input data object, whereas the `hcluster.js` package does. 208 | D3 often expects you to mutate data objects directly, which is now typically considered bad practice in JavaScript. 209 | Instead, this package returns the useful data from the clustering algorithm (including the distance matrix), and allows you to mutate or not mutate the data object depending on your needs. 210 | - This package leaves out the `minDistance` or `maxDistance` functions that are built into `hcluster.js`, because -- per [this reference](https://onlinelibrary.wiley.com/doi/abs/10.1002/9780470316801.ch5) -- they are not as effective as `averageDistance`. 211 | 212 | --- 213 | 214 | ### Making changes to the library 215 | 216 | 1. [Install Node](https://nodejs.org/en/download/) 217 | 2. [Install Yarn](https://classic.yarnpkg.com/en/docs/install) 218 | 3. Clone this repo and navigate to it in your command terminal 219 | 4. Run `yarn install` to install this package's dependencies 220 | 5. Make desired changes to `./src/hclust.js` 221 | 6. Run `yarn test` to automatically rebuild the library and run test suite 222 | 7. Run `yarn build` to just rebuild the library, and output the compiled contents to `./build/hclust.min.js` 223 | 8. Commit changes to repo if necessary. *Make sure to run the build command before committing; it won't happen automatically.* 224 | 225 | --- 226 | 227 | ### Similar libraries 228 | 229 | [cmpolis/hcluster.js](https://github.com/cmpolis/hcluster.js) 230 | [harthur/clustering](https://github.com/harthur/clustering) 231 | [mljs/hclust](https://github.com/mljs/hclust) 232 | [math-utils/hierarchical-clustering](https://github.com/math-utils/hierarchical-clustering) 233 | 234 | --- 235 | 236 | ### Further reading 237 | 238 | The [AGNES](https://onlinelibrary.wiley.com/doi/abs/10.1002/9780470316801.ch5) (AGglomerative NESting) method; continuously merge nodes that have the least dissimilarity. 239 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [ 3 | [ 4 | '@babel/preset-env', 5 | { 6 | targets: { 7 | node: 'current', 8 | }, 9 | }, 10 | ], 11 | ], 12 | }; 13 | -------------------------------------------------------------------------------- /build/hclust.min.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports.clusterData = exports.averageDistance = exports.euclideanDistance = void 0; 7 | 8 | // get euclidean distance between two equal-dimension vectors 9 | const euclideanDistance = (a, b) => { 10 | const size = Math.min(a.length, b.length); 11 | let sum = 0; 12 | 13 | for (let index = 0; index < size; index++) sum += (a[index] - b[index]) * (a[index] - b[index]); 14 | 15 | return Math.sqrt(sum); 16 | }; // get average distance between sets of indexes, given distance matrix 17 | 18 | 19 | exports.euclideanDistance = euclideanDistance; 20 | 21 | const averageDistance = (setA, setB, distances) => { 22 | let distance = 0; 23 | 24 | for (const a of setA) { 25 | for (const b of setB) distance += distances[a][b]; 26 | } 27 | 28 | return distance / setA.length / setB.length; 29 | }; // update progress by calling user onProgress and postMessage for web workers 30 | 31 | 32 | exports.averageDistance = averageDistance; 33 | 34 | const updateProgress = (stepNumber, stepProgress, onProgress) => { 35 | // currently only two distinct steps: computing distance matrix and clustering 36 | const progress = stepNumber / 2 + stepProgress / 2; // if onProgress is defined and is a function, call onProgress 37 | 38 | if (typeof onProgress === 'function') onProgress(progress); // if this script is being run as a web worker, call postMessage 39 | 40 | if (typeof WorkerGlobalScope !== 'undefined' && self instanceof WorkerGlobalScope) postMessage(progress); 41 | }; // default onProgress function. console logs progress 42 | 43 | 44 | const logProgress = progress => console.log('Clustering: ', (progress * 100).toFixed(1) + '%'); // the main clustering function 45 | 46 | 47 | const clusterData = ({ 48 | data = [], 49 | key = '', 50 | distance = euclideanDistance, 51 | linkage = averageDistance, 52 | onProgress = logProgress 53 | }) => { 54 | // extract values from specified key 55 | if (key) data = data.map(datum => datum[key]); // compute distance between each data point and every other data point 56 | // N x N matrix where N = data.length 57 | 58 | const distances = data.map((datum, index) => { 59 | updateProgress(0, index / (data.length - 1), onProgress); // get distance between datum and other datum 60 | 61 | return data.map(otherDatum => distance(datum, otherDatum)); 62 | }); // initialize clusters to match data 63 | 64 | const clusters = data.map((datum, index) => ({ 65 | height: 0, 66 | indexes: [Number(index)] 67 | })); // keep track of all tree slices 68 | 69 | let clustersGivenK = []; // iterate through data 70 | 71 | for (let iteration = 0; iteration < data.length; iteration++) { 72 | updateProgress(1, (iteration + 1) / data.length, onProgress); // add current tree slice 73 | 74 | clustersGivenK.push(clusters.map(cluster => cluster.indexes)); // dont find clusters to merge when only one cluster left 75 | 76 | if (iteration >= data.length - 1) break; // initialize smallest distance 77 | 78 | let nearestDistance = Infinity; 79 | let nearestRow = 0; 80 | let nearestCol = 0; // upper triangular matrix of clusters 81 | 82 | for (let row = 0; row < clusters.length; row++) { 83 | for (let col = row + 1; col < clusters.length; col++) { 84 | // calculate distance between clusters 85 | const distance = linkage(clusters[row].indexes, clusters[col].indexes, distances); // update smallest distance 86 | 87 | if (distance < nearestDistance) { 88 | nearestDistance = distance; 89 | nearestRow = row; 90 | nearestCol = col; 91 | } 92 | } 93 | } // merge nearestRow and nearestCol clusters together 94 | 95 | 96 | const newCluster = { 97 | indexes: [...clusters[nearestRow].indexes, ...clusters[nearestCol].indexes], 98 | height: nearestDistance, 99 | children: [clusters[nearestRow], clusters[nearestCol]] 100 | }; // remove nearestRow and nearestCol clusters 101 | // splice higher index first so it doesn't affect second splice 102 | 103 | clusters.splice(Math.max(nearestRow, nearestCol), 1); 104 | clusters.splice(Math.min(nearestRow, nearestCol), 1); // add new merged cluster 105 | 106 | clusters.push(newCluster); 107 | } // assemble full list of tree slices into array where index = k 108 | 109 | 110 | clustersGivenK = [[], ...clustersGivenK.reverse()]; // return useful information 111 | 112 | return { 113 | clusters: clusters[0], 114 | distances: distances, 115 | order: clusters[0].indexes, 116 | clustersGivenK: clustersGivenK 117 | }; 118 | }; 119 | 120 | exports.clusterData = clusterData; 121 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@greenelab/hclust", 3 | "version": "0.0.0-dev", 4 | "description": "Agglomerative hierarchical clustering in JavaScript", 5 | "keywords": [ 6 | "hierarchy", 7 | "hierarchical", 8 | "cluster", 9 | "clustering", 10 | "agglomerative", 11 | "data", 12 | "tree" 13 | ], 14 | "author": "Vincent Rubinetti", 15 | "license": "MIT", 16 | "repository": "git+https://github.com/greenelab/hclust.git", 17 | "main": "./build/hclust.min.js", 18 | "module": "./build/hclust.min.js", 19 | "scripts": { 20 | "test": "bash ./scripts/build.sh && bash ./scripts/test.sh", 21 | "build": "bash ./scripts/build.sh" 22 | }, 23 | "devDependencies": { 24 | "@babel/cli": "^7.8.4", 25 | "@babel/core": "^7.9.0", 26 | "@babel/preset-env": "^7.9.0", 27 | "babel-jest": "^25.1.0", 28 | "babel-preset-minify": "^0.5.1", 29 | "jest": "^25.1.0" 30 | }, 31 | "browserslist": "> 0.1%, not dead" 32 | } 33 | -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | rm -rf build 2 | mkdir build 3 | npx babel ./src/hclust.js --out-file ./build/hclust.min.js 4 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | jest ./test/test.js --notify 2 | -------------------------------------------------------------------------------- /src/hclust.js: -------------------------------------------------------------------------------- 1 | // get euclidean distance between two equal-dimension vectors 2 | export const euclideanDistance = (a, b) => { 3 | const size = Math.min(a.length, b.length); 4 | let sum = 0; 5 | for (let index = 0; index < size; index++) 6 | sum += (a[index] - b[index]) * (a[index] - b[index]); 7 | return Math.sqrt(sum); 8 | }; 9 | 10 | // get average distance between sets of indexes, given distance matrix 11 | export const averageDistance = (setA, setB, distances) => { 12 | let distance = 0; 13 | for (const a of setA) { 14 | for (const b of setB) 15 | distance += distances[a][b]; 16 | } 17 | 18 | return distance / setA.length / setB.length; 19 | }; 20 | 21 | // update progress by calling user onProgress and postMessage for web workers 22 | const updateProgress = (stepNumber, stepProgress, onProgress) => { 23 | // currently only two distinct steps: computing distance matrix and clustering 24 | const progress = stepNumber / 2 + stepProgress / 2; 25 | 26 | // if onProgress is defined and is a function, call onProgress 27 | if (typeof onProgress === 'function') 28 | onProgress(progress); 29 | 30 | // if this script is being run as a web worker, call postMessage 31 | if ( 32 | typeof WorkerGlobalScope !== 'undefined' && 33 | self instanceof WorkerGlobalScope 34 | ) 35 | postMessage(progress); 36 | }; 37 | 38 | // default onProgress function. console logs progress 39 | const logProgress = (progress) => 40 | console.log('Clustering: ', (progress * 100).toFixed(1) + '%'); 41 | 42 | // the main clustering function 43 | export const clusterData = ({ 44 | data = [], 45 | key = '', 46 | distance = euclideanDistance, 47 | linkage = averageDistance, 48 | onProgress = logProgress 49 | }) => { 50 | // extract values from specified key 51 | if (key) 52 | data = data.map((datum) => datum[key]); 53 | 54 | // compute distance between each data point and every other data point 55 | // N x N matrix where N = data.length 56 | const distances = data.map((datum, index) => { 57 | updateProgress(0, index / (data.length - 1), onProgress); 58 | 59 | // get distance between datum and other datum 60 | return data.map((otherDatum) => distance(datum, otherDatum)); 61 | }); 62 | 63 | // initialize clusters to match data 64 | const clusters = data.map((datum, index) => ({ 65 | height: 0, 66 | indexes: [Number(index)] 67 | })); 68 | 69 | // keep track of all tree slices 70 | let clustersGivenK = []; 71 | 72 | // iterate through data 73 | for (let iteration = 0; iteration < data.length; iteration++) { 74 | updateProgress(1, (iteration + 1) / data.length, onProgress); 75 | 76 | // add current tree slice 77 | clustersGivenK.push(clusters.map((cluster) => cluster.indexes)); 78 | 79 | // dont find clusters to merge when only one cluster left 80 | if (iteration >= data.length - 1) 81 | break; 82 | 83 | // initialize smallest distance 84 | let nearestDistance = Infinity; 85 | let nearestRow = 0; 86 | let nearestCol = 0; 87 | 88 | // upper triangular matrix of clusters 89 | for (let row = 0; row < clusters.length; row++) { 90 | for (let col = row + 1; col < clusters.length; col++) { 91 | // calculate distance between clusters 92 | const distance = linkage( 93 | clusters[row].indexes, 94 | clusters[col].indexes, 95 | distances 96 | ); 97 | // update smallest distance 98 | if (distance < nearestDistance) { 99 | nearestDistance = distance; 100 | nearestRow = row; 101 | nearestCol = col; 102 | } 103 | } 104 | } 105 | 106 | // merge nearestRow and nearestCol clusters together 107 | const newCluster = { 108 | indexes: [ 109 | ...clusters[nearestRow].indexes, 110 | ...clusters[nearestCol].indexes 111 | ], 112 | height: nearestDistance, 113 | children: [clusters[nearestRow], clusters[nearestCol]] 114 | }; 115 | 116 | // remove nearestRow and nearestCol clusters 117 | // splice higher index first so it doesn't affect second splice 118 | clusters.splice(Math.max(nearestRow, nearestCol), 1); 119 | clusters.splice(Math.min(nearestRow, nearestCol), 1); 120 | 121 | // add new merged cluster 122 | clusters.push(newCluster); 123 | } 124 | 125 | // assemble full list of tree slices into array where index = k 126 | clustersGivenK = [[], ...clustersGivenK.reverse()]; 127 | 128 | // return useful information 129 | return { 130 | clusters: clusters[0], 131 | distances: distances, 132 | order: clusters[0].indexes, 133 | clustersGivenK: clustersGivenK 134 | }; 135 | }; 136 | -------------------------------------------------------------------------------- /test/chrispolis.hcluster.min.js: -------------------------------------------------------------------------------- 1 | (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.hcluster=f()}})(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;odata.length)throw new Error("n must be less than the size of the dataset");return clustersGivenK[data.length-n].map(function(indexes){return indexes.map(function(ndx){return data[ndx]})})};clust._squareMatrixPairs=function(n){var pairs=[];for(var row=0;row { 8 | expect(hcluster).toBeDefined(); 9 | }); 10 | 11 | test('can import hclust', () => { 12 | expect(clusterData).toBeDefined(); 13 | }); 14 | 15 | test('test dataset 1', () => { 16 | // get received results from this package 17 | const resultsB = clusterData({ 18 | data: dataset1, 19 | key: 'value', 20 | onProgress: null 21 | }); 22 | 23 | // transform order to be in terms of sample name/id 24 | const orderB = resultsB.order 25 | .map((index) => dataset1[index]) 26 | .map((node) => node.sample); 27 | // transform slice to be in terms of sample name/id 28 | const sliceB = resultsB.clustersGivenK[10].map((cluster) => 29 | cluster.map((index) => dataset1[index]).map((node) => node.sample) 30 | ); 31 | 32 | // get "expected" results from hcluster.js 33 | const resultsA = hcluster() 34 | .distance('euclidean') 35 | .linkage('avg') 36 | .posKey('value') 37 | .data(dataset1); 38 | 39 | // transform order to be in terms of sample name/id 40 | const orderA = resultsA.orderedNodes().map((node) => node.sample); 41 | // transform slice to be in terms of sample name/id 42 | const sliceA = resultsA 43 | .getClusters(10) 44 | .map((cluster) => cluster.map((node) => node.sample)); 45 | 46 | console.log('Expected order:', orderA, 'Received order:', orderB); 47 | console.log('Expected slice:', sliceA, 'Received slice:', sliceB); 48 | 49 | expect(orderB).toStrictEqual(orderA); 50 | expect(sliceB).toStrictEqual(sliceA); 51 | }); 52 | 53 | test('test dataset 2', () => { 54 | // get received results from this package 55 | const resultsB = clusterData({ 56 | data: dataset2, 57 | key: 'value', 58 | onProgress: null 59 | }); 60 | 61 | // transform order to be in terms of signature name/id 62 | const orderB = resultsB.order 63 | .map((index) => dataset2[index]) 64 | .map((node) => node.signature); 65 | // transform slice to be in terms of signature name/id 66 | const sliceB = resultsB.clustersGivenK[10].map((cluster) => 67 | cluster.map((index) => dataset2[index]).map((node) => node.signature) 68 | ); 69 | 70 | // get "expected" results from hcluster.js 71 | const resultsA = hcluster() 72 | .distance('euclidean') 73 | .linkage('avg') 74 | .posKey('value') 75 | .data(dataset2); 76 | 77 | // transform order to be in terms of signature name/id 78 | const orderA = resultsA.orderedNodes().map((node) => node.signature); 79 | // transform slice to be in terms of signature name/id 80 | const sliceA = resultsA 81 | .getClusters(10) 82 | .map((cluster) => cluster.map((node) => node.signature)); 83 | 84 | console.log('Expected order:', orderA, 'Received order:', orderB); 85 | console.log('Expected slice:', sliceA, 'Received slice:', sliceB); 86 | 87 | expect(orderB).toStrictEqual(orderA); 88 | expect(sliceB).toStrictEqual(sliceA); 89 | }); 90 | --------------------------------------------------------------------------------