├── .github └── workflows │ ├── nodejs.yml │ ├── release.yml │ └── typedoc.yml ├── .gitignore ├── .npmrc ├── .prettierrc.json ├── CHANGELOG.md ├── CITATION.cff ├── LICENSE ├── README.md ├── eslint.config.mjs ├── package.json ├── src ├── .npmignore ├── __tests__ │ ├── iris.test.ts │ ├── load.test.ts │ └── test.test.ts └── pca.ts ├── tsconfig.cjs.json ├── tsconfig.esm.json └── tsconfig.json /.github/workflows/nodejs.yml: -------------------------------------------------------------------------------- 1 | name: Node.js CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | nodejs: 11 | # Documentation: https://github.com/zakodium/workflows#nodejs-ci 12 | uses: zakodium/workflows/.github/workflows/nodejs.yml@nodejs-v1 13 | with: 14 | lint-check-types: true 15 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | release: 10 | # Documentation: https://github.com/zakodium/workflows#release 11 | uses: zakodium/workflows/.github/workflows/release.yml@release-v1 12 | with: 13 | npm: true 14 | secrets: 15 | github-token: ${{ secrets.BOT_TOKEN }} 16 | npm-token: ${{ secrets.NPM_BOT_TOKEN }} 17 | -------------------------------------------------------------------------------- /.github/workflows/typedoc.yml: -------------------------------------------------------------------------------- 1 | name: Deploy TypeDoc on GitHub pages 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: [published] 7 | 8 | env: 9 | NODE_VERSION: 18.x 10 | ENTRY_FILE: 'src/pca.ts' 11 | 12 | jobs: 13 | deploy: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | - uses: actions/setup-node@v3 18 | with: 19 | node-version: ${{ env.NODE_VERSION }} 20 | - name: Install dependencies 21 | run: npm install 22 | - name: Build documentation 23 | uses: zakodium/typedoc-action@v2 24 | with: 25 | entry: ${{ env.ENTRY_FILE }} 26 | - name: Deploy to GitHub pages 27 | uses: JamesIves/github-pages-deploy-action@releases/v4 28 | with: 29 | token: ${{ secrets.BOT_TOKEN }} 30 | branch: gh-pages 31 | folder: docs 32 | clean: true 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | /pca.js 3 | coverage 4 | lib 5 | lib-esm -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "arrowParens": "always", 3 | "semi": true, 4 | "singleQuote": true, 5 | "tabWidth": 2, 6 | "trailingComma": "all" 7 | } 8 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [4.1.1](https://github.com/mljs/pca/compare/v4.1.0...v4.1.1) (2022-11-11) 4 | 5 | 6 | ### Bug Fixes 7 | 8 | * update DOI of Zenodo ([fffc814](https://github.com/mljs/pca/commit/fffc814a49a369ee4e0748e17b8b8c8a8ca5ea74)) 9 | 10 | ## [4.1.0](https://github.com/mljs/pca/compare/v4.0.2...v4.1.0) (2022-11-11) 11 | 12 | 13 | ### Features 14 | 15 | * add citation ([677d49a](https://github.com/mljs/pca/commit/677d49aa4ab26b83afb17262cc99593332eb25ec)) 16 | 17 | ### [4.0.2](https://github.com/mljs/pca/compare/v4.0.1...v4.0.2) (2021-04-12) 18 | 19 | ## [4.0.1](https://github.com/mljs/pca/compare/v4.0.0...v4.0.1) (2019-10-19) 20 | 21 | 22 | ### Bug Fixes 23 | 24 | * save excludedFeatures in the model and default to empty array ([4a33387](https://github.com/mljs/pca/commit/4a333878383cfa2137fb20ea2868aa6506fdf507)) 25 | 26 | 27 | 28 | # [4.0.0](https://github.com/mljs/pca/compare/v3.0.0...v4.0.0) (2019-07-21) 29 | 30 | 31 | ### Features 32 | 33 | * add ignoreZeroVariance option ([aefd93c](https://github.com/mljs/pca/commit/aefd93c)) 34 | * add invert method ([617446c](https://github.com/mljs/pca/commit/617446c)) 35 | * add NIPALS option ([f8f9862](https://github.com/mljs/pca/commit/f8f9862)) 36 | 37 | 38 | ### BREAKING CHANGES 39 | 40 | * The `use*` options have been replaced by a single `method` option. 41 | 42 | 43 | 44 | # [3.0.0](https://github.com/mljs/pca/compare/v3.0.0-0...v3.0.0) (2019-04-25) 45 | 46 | 47 | 48 | 49 | # [2.1.0](https://github.com/mljs/pca/compare/v2.0.0...v2.1.0) (2017-07-24) 50 | 51 | 52 | ### Features 53 | 54 | * add nComponents option to predict method ([e18185c](https://github.com/mljs/pca/commit/e18185c)) 55 | 56 | 57 | 58 | 59 | # [2.0.0](https://github.com/mljs/pca/compare/v1.1.2...v2.0.0) (2017-07-21) 60 | 61 | 62 | ### Chores 63 | 64 | * remove support for Node 4 ([b749c19](https://github.com/mljs/pca/commit/b749c19)) 65 | 66 | 67 | ### BREAKING CHANGES 68 | 69 | * Removed support for Node 4 70 | 71 | 72 | 73 | 74 | ## [1.1.2](https://github.com/mljs/pca/compare/v1.1.1...v1.1.2) (2016-08-16) 75 | 76 | 77 | 78 | 79 | ## [1.1.1](https://github.com/mljs/pca/compare/v1.1.0...v1.1.1) (2016-06-02) 80 | 81 | 82 | ### Bug Fixes 83 | 84 | * throw error when standardization is not possible (constant column) ([3a65aea](https://github.com/mljs/pca/commit/3a65aea)) 85 | 86 | 87 | 88 | 89 | # [1.1.0](https://github.com/mljs/pca/compare/v1.0.0...v1.1.0) (2016-05-31) 90 | 91 | 92 | 93 | 94 | # [1.0.0](https://github.com/mljs/pca/compare/v0.1.1...v1.0.0) (2016-05-31) 95 | 96 | 97 | 98 | 99 | ## [0.1.1](https://github.com/mljs/pca/compare/v0.1.0...v0.1.1) (2015-11-19) 100 | 101 | 102 | 103 | 104 | # [0.1.0](https://github.com/mljs/pca/compare/2cdb5f5...v0.1.0) (2015-09-23) 105 | 106 | 107 | ### Bug Fixes 108 | 109 | * **standardize:** optional standardization added. ([e6b7a5f](https://github.com/mljs/pca/commit/e6b7a5f)) 110 | * **test:** 'new' added in the PCA test. ([2cdb5f5](https://github.com/mljs/pca/commit/2cdb5f5)) 111 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: 'If you use this software, please cite it as below.' 3 | title: 'Javascript implementation of Principal Component Analysis (PCA)' 4 | abstract: 'This tool allows to reduce the dimensionality of a dataset by projecting it into a new space where the variance can be explained by most important dimensions using Javascript.' 5 | repository-artifact: http://cheminfo.github.io/jcampconverter/ 6 | repository-code: https://github.com/cheminfo/jcampconverter 7 | authors: 8 | - family-names: 'Zasso' 9 | given-names: 'Michaël' 10 | affiliation: 'Zakodium Sàrl, Switzerland' 11 | orcid: 'https://orcid.org/0000-0001-5295-2159' 12 | - family-names: 'Patiny' 13 | given-names: 'Luc' 14 | orcid: 'https://orcid.org/0000-0002-4943-2643' 15 | - family-names: 'Wist' 16 | given-names: 'Julien' 17 | affiliation: 'Universidad del Valle, Cali, Colombia' 18 | orcid: 'https://orcid.org/0000-0002-3416-2572' 19 | - family-names: 'Kostro' 20 | given-names: 'Daniel' 21 | affiliation: 'Zakodium Sàrl, Switzerland' 22 | orcid: 'https://orcid.org/0000-0003-3885-6579' 23 | license: MIT 24 | keywords: 25 | - PCA 26 | - Machine learning 27 | - Unsupervised learning 28 | doi: 10.5281/zenodo.7314532 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 ml.js 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ml-pca 2 | 3 | Principal component analysis (PCA). 4 | 5 |

6 | 7 | 8 | Zakodium logo 9 | 10 | 11 |

12 | Maintained by Zakodium 13 |

14 | 15 | [![NPM version][npm-image]][npm-url] 16 | [![build status][ci-image]][ci-url] 17 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7314532.svg)](https://doi.org/10.5281/zenodo.7314532) 18 | [![npm download][download-image]][download-url] 19 | 20 |

21 | 22 | ## Installation 23 | 24 | `$ npm install ml-pca` 25 | 26 | ## Usage 27 | 28 | ```js 29 | const { PCA } = require('ml-pca'); 30 | const dataset = require('ml-dataset-iris').getNumbers(); 31 | // dataset is a two-dimensional array where rows represent the samples and columns the features 32 | const pca = new PCA(dataset); 33 | console.log(pca.getExplainedVariance()); 34 | /* 35 | [ 0.9246187232017269, 36 | 0.05306648311706785, 37 | 0.017102609807929704, 38 | 0.005212183873275558 ] 39 | */ 40 | const newPoints = [ 41 | [4.9, 3.2, 1.2, 0.4], 42 | [5.4, 3.3, 1.4, 0.9], 43 | ]; 44 | console.log(pca.predict(newPoints)); // project new points into the PCA space 45 | /* 46 | [ 47 | [ -2.830722471866897, 48 | 0.01139060953209596, 49 | 0.0030369648815961603, 50 | -0.2817812120420965 ], 51 | [ -2.308002707614927, 52 | -0.3175048770719249, 53 | 0.059976053412802766, 54 | -0.688413413360567 ]] 55 | */ 56 | ``` 57 | 58 | ## [API Documentation](https://mljs.github.io/pca/) 59 | 60 | ## License 61 | 62 | [MIT](./LICENSE) 63 | 64 | [npm-image]: https://img.shields.io/npm/v/ml-pca.svg 65 | [npm-url]: https://npmjs.org/package/ml-pca 66 | [ci-image]: https://github.com/mljs/pca/actions/workflows/nodejs.yml/badge.svg 67 | [ci-url]: https://github.com/mljs/pca/actions/workflows/nodejs.yml 68 | [download-image]: https://img.shields.io/npm/dm/ml-pca.svg 69 | [download-url]: https://npmjs.org/package/ml-pca 70 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import cheminfo from 'eslint-config-cheminfo-typescript'; 2 | import globals from 'globals'; 3 | 4 | export default [ 5 | ...cheminfo, 6 | { 7 | languageOptions: { 8 | globals: { 9 | ...globals.node, 10 | }, 11 | }, 12 | rules: {} 13 | } 14 | ] -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ml-pca", 3 | "version": "4.1.1", 4 | "description": "Principal component analysis", 5 | "main": "lib/pca.js", 6 | "module": "lib-esm/pca.js", 7 | "types": "lib/pca.d.ts", 8 | "files": [ 9 | "pca.js", 10 | "pca.d.ts", 11 | "src", 12 | "lib", 13 | "lib-esm" 14 | ], 15 | "scripts": { 16 | "check-types": "tsc --noEmit", 17 | "clean": "rimraf lib lib-esm", 18 | "eslint": "eslint src", 19 | "eslint-fix": "npm run eslint -- --fix", 20 | "prepack": "npm run tsc", 21 | "prettier": "prettier --check src", 22 | "prettier-write": "prettier --write src", 23 | "test": "npm run test-only && npm run eslint && npm run prettier && npm run check-types", 24 | "test-only": "vitest run --coverage", 25 | "tsc": "npm run clean && npm run tsc-cjs && npm run tsc-esm", 26 | "tsc-cjs": "tsc --project tsconfig.cjs.json", 27 | "tsc-esm": "tsc --project tsconfig.esm.json" 28 | }, 29 | "repository": { 30 | "type": "git", 31 | "url": "https://github.com/mljs/pca.git" 32 | }, 33 | "keywords": [ 34 | "pca", 35 | "principal", 36 | "component", 37 | "analysis", 38 | "dimensionality", 39 | "reduction", 40 | "data", 41 | "mining", 42 | "datamining", 43 | "machine", 44 | "learning" 45 | ], 46 | "author": "Jefferson Hernández", 47 | "license": "MIT", 48 | "bugs": { 49 | "url": "https://github.com/mljs/pca/issues" 50 | }, 51 | "homepage": "https://github.com/mljs/pca", 52 | "dependencies": { 53 | "ml-matrix": "^6.11.1" 54 | }, 55 | "devDependencies": { 56 | "@vitest/coverage-v8": "^2.1.3", 57 | "eslint": "^9.12.0", 58 | "eslint-config-cheminfo-typescript": "^16.0.0", 59 | "jest-matcher-deep-close-to": "^3.0.2", 60 | "ml-dataset-iris": "^1.2.1", 61 | "prettier": "^3.3.3", 62 | "rimraf": "^6.0.1", 63 | "typescript": "^5.6.3", 64 | "vitest": "^2.1.3" 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/.npmignore: -------------------------------------------------------------------------------- 1 | __tests__ 2 | .npmignore 3 | -------------------------------------------------------------------------------- /src/__tests__/iris.test.ts: -------------------------------------------------------------------------------- 1 | // Ref: http://www.r-bloggers.com/computing-and-visualizing-pca-in-r/ 2 | import { toBeDeepCloseTo } from 'jest-matcher-deep-close-to'; 3 | import { getNumbers } from 'ml-dataset-iris'; 4 | import { Matrix } from 'ml-matrix'; 5 | import { expect, describe, it } from 'vitest'; 6 | 7 | import { PCA } from '../pca'; 8 | 9 | expect.extend({ toBeDeepCloseTo }); 10 | const iris = getNumbers(); 11 | 12 | const expectedLoadings = [ 13 | [0.521, 0.269, 0.58, 0.565], 14 | [0.377, 0.923, 0.024, 0.067], 15 | [0.72, 0.244, 0.142, 0.634], 16 | [0.261, 0.124, 0.801, 0.524], 17 | ]; 18 | 19 | const expectedLoadingsNIPALS = [ 20 | [0.5211, -0.2693, 0.5804, 0.5649], 21 | [0.3774, 0.9233, 0.0245, 0.067], 22 | [0.7196, -0.2444, -0.1421, -0.6343], 23 | [-0.2613, 0.1235, 0.8014, -0.5236], 24 | ]; 25 | 26 | const expectedLoadingsSquare = [ 27 | [0.6637, 0.5054, 0.4075, 0.3712], 28 | [0.115, 0.4035, 0.7622, 0.4928], 29 | [0.4861, 0.7503, 0.2176, 0.3914], 30 | [0.5566, 0.136, 0.4533, 0.6827], 31 | ]; 32 | 33 | describe('iris dataset test method covarianceMatrix', () => { 34 | const pca = new PCA(iris, { scale: true, method: 'covarianceMatrix' }); 35 | it('loadings', () => { 36 | const loadings = pca 37 | .getLoadings() 38 | .to2DArray() 39 | .map((x) => x.map((y) => Math.abs(y))); 40 | expect(loadings).toBeDeepCloseTo(expectedLoadings, 3); 41 | }); 42 | }); 43 | 44 | describe('iris dataset test wrong method', () => { 45 | it('wrong method', () => { 46 | // @ts-expect-error We test an error 47 | expect(() => new PCA(iris, { scale: true, method: 'variance' })).toThrow( 48 | 'unknown method: variance', 49 | ); 50 | }); 51 | }); 52 | 53 | describe('iris dataset', () => { 54 | const pca = new PCA(iris, { scale: true, method: 'SVD' }); 55 | it('loadings', () => { 56 | const loadings = pca 57 | .getLoadings() 58 | .to2DArray() 59 | .map((x) => x.map((y) => Math.abs(y))); 60 | expect(loadings).toBeDeepCloseTo(expectedLoadings, 3); 61 | }); 62 | it('standard deviation', () => { 63 | expect(pca.getStandardDeviations()).toBeDeepCloseTo( 64 | [1.7084, 0.956, 0.3831, 0.1439], 65 | 4, 66 | ); 67 | }); 68 | it('explained variance', () => { 69 | expect(pca.getExplainedVariance()).toBeDeepCloseTo( 70 | [0.7296, 0.2285, 0.03669, 0.00518], 71 | 4, 72 | ); 73 | }); 74 | it('cumulative variance', () => { 75 | expect(pca.getCumulativeVariance()).toBeDeepCloseTo( 76 | [0.7296, 0.9581, 0.9948, 1], 77 | 4, 78 | ); 79 | }); 80 | it('prediction', () => { 81 | const pred = pca.predict(iris.slice(0, 2)); 82 | expect(pred.to2DArray()).toBeDeepCloseTo( 83 | [ 84 | [-2.257, -0.478, 0.127, -0.024], 85 | [-2.074, 0.672, 0.234, -0.103], 86 | ], 87 | 3, 88 | ); 89 | }); 90 | it('inverting scaled', () => { 91 | const input = iris.slice(0, 2); 92 | const pred = pca.predict(input); 93 | 94 | const inv = pca.invert(pred); 95 | 96 | expect(inv.to2DArray()).toBeDeepCloseTo(input); 97 | }); 98 | it('inverting not scaled', () => { 99 | const dataset = [ 100 | [1, 2, 3], 101 | [0, 3, 5], 102 | [2, 2, 2], 103 | ]; 104 | const newpca = new PCA(dataset); 105 | const pred = newpca.predict(dataset); 106 | 107 | const inv = newpca.invert(pred); 108 | 109 | expect(inv.to2DArray()).toBeDeepCloseTo(dataset); 110 | }); 111 | }); 112 | 113 | describe('iris dataset with provided covariance matrix', () => { 114 | const dataset = new Matrix(iris); 115 | const mean = dataset.mean('column'); 116 | const stdevs = dataset.standardDeviation('column', { mean }); 117 | dataset.subRowVector(mean).divRowVector(stdevs); 118 | const covarianceMatrix = dataset 119 | .transpose() 120 | .mmul(dataset) 121 | .div(dataset.rows - 1); 122 | const pca = new PCA(covarianceMatrix, { isCovarianceMatrix: true }); 123 | it('loadings', () => { 124 | const loadings = pca 125 | .getLoadings() 126 | .to2DArray() 127 | .map((x) => x.map((y) => Math.abs(y))); 128 | expect(loadings).toBeDeepCloseTo(expectedLoadings, 3); 129 | }); 130 | }); 131 | 132 | describe('iris dataset with computed covariance matrix', () => { 133 | const subData = iris.slice(0, 4); 134 | const pca = new PCA(subData, { 135 | scale: true, 136 | isCovarianceMatrix: true, 137 | }); 138 | it('loadings', () => { 139 | const loadings = pca 140 | .getLoadings() 141 | .to2DArray() 142 | .map((x) => x.map((y) => Math.abs(y))); 143 | expect(loadings).toBeDeepCloseTo(expectedLoadingsSquare, 3); 144 | }); 145 | }); 146 | 147 | describe('iris dataset and nipals', () => { 148 | const pca = new PCA(iris, { 149 | scale: true, 150 | method: 'NIPALS', 151 | nCompNIPALS: 4, 152 | isCovarianceMatrix: false, 153 | }); 154 | 155 | it('loadings', () => { 156 | const loadings = pca 157 | .getLoadings() 158 | .to2DArray() 159 | .map((x) => x.map((y) => Math.abs(y))); 160 | expect(loadings).toBeDeepCloseTo( 161 | expectedLoadingsNIPALS.map((x) => x.map((y) => Math.abs(y))), 162 | 3, 163 | ); 164 | }); 165 | 166 | it('loadings should be orthogonal', () => { 167 | const m = pca.getLoadings().transpose().mmul(pca.getLoadings()).round(); 168 | expect(m.sub(Matrix.eye(4, 4)).sum()).toBe(0); 169 | }); 170 | 171 | it('eigenvalues', () => { 172 | const eigenvalues = pca.getEigenvalues(); 173 | expect(eigenvalues.map((x) => Math.sqrt(x))).toBeDeepCloseTo( 174 | [20.853205, 11.67007, 4.676192, 1.756847], 175 | 6, 176 | ); 177 | }); 178 | 179 | it('scores', () => { 180 | const scores = pca.predict(iris); 181 | expect(scores.get(0, 0)).toBeCloseTo(-2.25714118, 6); 182 | expect(scores.get(0, 1)).toBeCloseTo(0.478423832, 6); 183 | }); 184 | 185 | it('scores may be scaled', () => { 186 | const scores = pca.predict(iris); 187 | const eigenvalues = pca.getStandardDeviations(); 188 | const scaledScores = scores.divRowVector(eigenvalues); 189 | expect(scaledScores.get(0, 0)).toBeCloseTo(-0.1082392451, 6); 190 | }); 191 | 192 | it('X may be recomputed', () => { 193 | const U = pca.predict(iris); 194 | const V = pca.getLoadings(); 195 | const S = pca.getEigenvalues(); 196 | 197 | // we scale the scores 198 | const SU = U.divRowVector(S); 199 | // we recompute X 200 | const RX = SU.mmul(Matrix.diag(S)).mmul(V); 201 | expect(RX.get(0, 0)).toBeCloseTo(-0.89767388, 6); 202 | }); 203 | 204 | it('explained variance', () => { 205 | const R2 = pca.getExplainedVariance(); 206 | expect(R2).toBeDeepCloseTo( 207 | [0.729624454, 0.228507618, 0.036689219, 0.005178709], 208 | 4, 209 | ); 210 | }); 211 | }); 212 | 213 | describe('iris dataset and nipals default nCompNIPALS', () => { 214 | const pca = new PCA(iris, { 215 | scale: true, 216 | method: 'NIPALS', 217 | isCovarianceMatrix: false, 218 | }); 219 | 220 | it('eigenvalues', () => { 221 | const sd = pca.getStandardDeviations(); 222 | expect(sd).toBeDeepCloseTo([20.853205, 11.67007], 6); 223 | }); 224 | 225 | it('prediction', () => { 226 | const pred = pca.predict(iris.slice(0, 2)); 227 | expect(pred.to2DArray()).toBeDeepCloseTo( 228 | [ 229 | [-2.257, 0.478], 230 | [-2.074, -0.672], 231 | ], 232 | 3, 233 | ); 234 | }); 235 | }); 236 | -------------------------------------------------------------------------------- /src/__tests__/load.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | 3 | import { PCA } from '../pca'; 4 | 5 | describe('PCA model', () => { 6 | it('Save / load model', () => { 7 | const dataset = [ 8 | [2, 1, 0, 3.7], 9 | [3, 1, 0, 3.2], 10 | [2.5, 1, 0, 3.1], 11 | [2.1, 1, 0, 3], 12 | ]; 13 | const pca = new PCA(dataset, { scale: true, ignoreZeroVariance: true }); 14 | expect(pca.predict(dataset).rows).toBe(4); 15 | expect(pca.predict(dataset).columns).toBe(2); 16 | 17 | const model = JSON.stringify(pca.toJSON()); 18 | 19 | const newpca = PCA.load(JSON.parse(model)); 20 | expect(newpca.predict(dataset).rows).toBe(4); 21 | expect(newpca.predict(dataset).columns).toBe(2); 22 | }); 23 | }); 24 | -------------------------------------------------------------------------------- /src/__tests__/test.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | 3 | import { PCA } from '../pca'; 4 | 5 | describe('PCA algorithm', () => { 6 | const testDataset = [ 7 | [3.38156266663556, 3.38911268489207], 8 | [4.52787538040321, 5.85417810116941], 9 | [2.65568186873946, 4.41199471748479], 10 | [2.76523467422508, 3.71541364974329], 11 | [2.84656010622109, 4.17550644951439], 12 | [3.89067195630921, 6.48838087188621], 13 | [3.47580524144079, 3.63284876204706], 14 | [5.91129844549583, 6.68076852676779], 15 | [3.92889396796927, 5.09844660814783], 16 | [4.56183536608942, 5.62329929038287], 17 | [4.57407170552516, 5.39765068914995], 18 | [4.37173355733069, 5.46116548918004], 19 | [4.191693876251, 4.95469359045186], 20 | [5.24408517686664, 4.66148766849075], 21 | [2.83584020280787, 3.76801716326883], 22 | [5.63526969258877, 6.3121143831056], 23 | [4.68632967964966, 5.66524110304899], 24 | [2.85051337486241, 4.62645627270763], 25 | [5.11015730037567, 7.36319662353662], 26 | [5.18256376844695, 4.64650908778182], 27 | [5.70732809135459, 6.68103994977504], 28 | [3.57968458251575, 4.80278073546266], 29 | [5.63937773123337, 6.12043594486419], 30 | [4.2634685116016, 4.68942896498378], 31 | [2.5365169312575, 3.88449077575653], 32 | [3.22382901750257, 4.94255585367287], 33 | [4.92948801055806, 5.95501971122402], 34 | [5.79295773976472, 5.10839305453511], 35 | [2.81684823843681, 4.81895768959782], 36 | [3.88882413905485, 5.10036563684974], 37 | [3.34323419214569, 5.89301345482551], 38 | [5.87973413931621, 5.52141663871971], 39 | [3.10391912309722, 3.85710242154672], 40 | [5.33150572016357, 4.68074234658945], 41 | [3.37542686902548, 4.56537851617577], 42 | [4.77667888193414, 6.25435038973932], 43 | [2.67574630193237, 3.73096987540176], 44 | [5.50027665196111, 5.67948113445839], 45 | [1.79709714108619, 3.24753885348582], 46 | [4.32251470267314, 5.11110472186451], 47 | [4.42100444798251, 6.02563977712186], 48 | [3.1792988626619, 4.43686031619158], 49 | [3.03354124664264, 3.97879278223097], 50 | [4.60934820070329, 5.87979200261535], 51 | [2.96378859260761, 3.30024834860712], 52 | [3.97176248181608, 5.40773735417849], 53 | [1.18023320575165, 2.87869409391385], 54 | [1.91895045046187, 5.07107847507096], 55 | [3.95524687147485, 4.50532709674253], 56 | [5.11795499426461, 6.08507386392396], 57 | ]; 58 | 59 | const pca = new PCA(testDataset, { 60 | scale: true, 61 | }); 62 | 63 | it('PCA Main test', () => { 64 | const U = [ 65 | [0.7071, 0.7071], 66 | [0.7071, -0.7071], 67 | ]; 68 | const S = [1.73553, 0.2644696]; 69 | 70 | const currentU = pca.getEigenvectors(); 71 | const currentS = pca.getEigenvalues(); 72 | 73 | for (let i = 0; i < 2; ++i) { 74 | for (let j = 0; j < 2; ++j) { 75 | expect(currentU.get(i, j)).toBeCloseTo(U[i][j], 3); 76 | } 77 | } 78 | 79 | for (let i = 0; i < 2; ++i) { 80 | expect(currentS[i]).toBeCloseTo(S[i], 3); 81 | } 82 | }); 83 | 84 | it('Projection method', () => { 85 | const result = pca.predict(testDataset, { nComponents: 1 }); 86 | expect(result.get(0, 0)).toBeCloseTo(-1.481274, 5); 87 | }); 88 | 89 | it('Variance explained method', () => { 90 | const varianceExplained = pca.getExplainedVariance(); 91 | expect(varianceExplained[0]).toBeCloseTo(0.8678, 4); 92 | expect(varianceExplained[1]).toBeCloseTo(0.1322, 4); 93 | }); 94 | 95 | it('Export and import', () => { 96 | const model = JSON.stringify(pca.toJSON()); 97 | const newpca = PCA.load(JSON.parse(model)); 98 | 99 | const U = [ 100 | [0.7071, 0.7071], 101 | [0.7071, -0.7071], 102 | ]; 103 | const S = [1.73553, 0.2644696]; 104 | 105 | const currentU = newpca.getEigenvectors(); 106 | const currentS = newpca.getEigenvalues(); 107 | 108 | for (let i = 0; i < 2; ++i) { 109 | for (let j = 0; j < 2; ++j) { 110 | expect(currentU.get(i, j)).toBeCloseTo(U[i][j], 3); 111 | } 112 | } 113 | 114 | for (let i = 0; i < 2; ++i) { 115 | expect(currentS[i]).toBeCloseTo(S[i], 3); 116 | } 117 | }); 118 | 119 | it('Standardization error with constant column', () => { 120 | const dataset = [ 121 | [1, 2, 3.7], 122 | [1, 3, 3.2], 123 | [1, 2.5, 3.1], 124 | [1, 2.1, 3], 125 | ]; 126 | expect(() => new PCA(dataset, { scale: true })).toThrow( 127 | /standard deviation is zero at index 0/, 128 | ); 129 | }); 130 | 131 | it('Standardization error with constant column - ignoreZeroVariance', () => { 132 | const dataset = [ 133 | [2, 1, 0, 3.7], 134 | [3, 1, 0, 3.2], 135 | [2.5, 1, 0, 3.1], 136 | [2.1, 1, 0, 3], 137 | ]; 138 | const newpca = new PCA(dataset, { scale: true, ignoreZeroVariance: true }); 139 | expect(newpca.getLoadings().rows).toBe(2); 140 | expect(newpca.predict(dataset).rows).toBe(4); 141 | expect(newpca.predict(dataset).columns).toBe(2); 142 | }); 143 | 144 | it('Test number components in function predict', () => { 145 | const dataset = [ 146 | [1, 2, 0], 147 | [3, 4, 0], 148 | [5, 6, 0], 149 | ]; 150 | const newpca = new PCA(dataset); 151 | expect(newpca.predict(dataset, { nComponents: 2 }).columns).toBe(2); 152 | }); 153 | 154 | it('should throw on load if wrong model', () => { 155 | // @ts-expect-error We test the error 156 | expect(() => PCA.load({})).toThrow(/model must have a name property/); 157 | // @ts-expect-error We test the error 158 | expect(() => PCA.load({ name: 'test' })).toThrow(/invalid model: test/); 159 | }); 160 | 161 | it('should throw on wrong method', () => { 162 | expect( 163 | () => 164 | new PCA( 165 | [ 166 | [0, 1], 167 | [1, 0], 168 | ], 169 | // @ts-expect-error We test the error 170 | { method: 'XXX ' }, 171 | ), 172 | ).toThrow(/unknown method: XXX/); 173 | }); 174 | }); 175 | -------------------------------------------------------------------------------- /src/pca.ts: -------------------------------------------------------------------------------- 1 | import { 2 | Matrix, 3 | MatrixTransposeView, 4 | EVD, 5 | SVD, 6 | NIPALS, 7 | AbstractMatrix, 8 | } from 'ml-matrix'; 9 | 10 | type MaybeMatrix = AbstractMatrix | number[][]; 11 | 12 | export interface PCAOptions { 13 | isCovarianceMatrix?: boolean; 14 | method?: 'SVD' | 'NIPALS' | 'covarianceMatrix'; 15 | center?: boolean; 16 | scale?: boolean; 17 | nCompNIPALS?: number; 18 | ignoreZeroVariance?: boolean; 19 | } 20 | 21 | export interface PCAModel { 22 | name: 'PCA'; 23 | center: boolean; 24 | scale: boolean; 25 | means: number[]; 26 | stdevs: number[]; 27 | U: Matrix; 28 | S: number[]; 29 | R?: any; 30 | excludedFeatures?: number[]; 31 | } 32 | 33 | export interface PredictOptions { 34 | nComponents?: number; 35 | } 36 | 37 | /** 38 | * Creates new PCA (Principal Component Analysis) from the dataset 39 | * @param {MaybeMatrix} dataset - dataset or covariance matrix. 40 | * @param {PCAOptions} [options] 41 | * @param {boolean} [options.isCovarianceMatrix=false] - true if the dataset is a covariance matrix. 42 | * @param {string} [options.method='SVD'] - select which method to use: SVD (default), covarianceMatrix or NIPALS. 43 | * @param {number} [options.nCompNIPALS=2] - number of components to be computed with NIPALS. 44 | * @param {boolean} [options.center=true] - should the data be centered (subtract the mean). 45 | * @param {boolean} [options.scale=false] - should the data be scaled (divide by the standard deviation). 46 | * @param {boolean} [options.ignoreZeroVariance=false] - ignore columns with zero variance if `scale` is `true`. 47 | */ 48 | export class PCA { 49 | private center: boolean; 50 | private scale: boolean; 51 | private excludedFeatures: number[]; 52 | /* eslint-disable @typescript-eslint/naming-convention */ 53 | private U: Matrix | null = null; 54 | private S: number[] | null = null; 55 | private R: any; 56 | private means: number[] | null; 57 | private stdevs: number[] | null; 58 | 59 | public constructor( 60 | dataset?: MaybeMatrix, 61 | options: PCAOptions = {}, 62 | model?: PCAModel, 63 | ) { 64 | if (model) { 65 | this.center = model.center; 66 | this.scale = model.scale; 67 | this.means = model.means; 68 | this.stdevs = model.stdevs; 69 | this.U = Matrix.checkMatrix(model.U); 70 | this.S = model.S; 71 | this.R = model.R; 72 | this.excludedFeatures = model.excludedFeatures || []; 73 | return; 74 | } 75 | let datasetMatrix: Matrix; 76 | if (Array.isArray(dataset)) { 77 | datasetMatrix = new Matrix(dataset); 78 | } else { 79 | datasetMatrix = new Matrix(dataset as Matrix); 80 | } 81 | 82 | const { 83 | isCovarianceMatrix = false, 84 | method = 'SVD', 85 | nCompNIPALS = 2, 86 | center = true, 87 | scale = false, 88 | ignoreZeroVariance = false, 89 | } = options; 90 | 91 | this.center = center; 92 | this.scale = scale; 93 | this.means = null; 94 | this.stdevs = null; 95 | this.excludedFeatures = []; 96 | 97 | if (isCovarianceMatrix) { 98 | // User provided a covariance matrix instead of dataset. 99 | this._computeFromCovarianceMatrix(datasetMatrix); 100 | return; 101 | } 102 | 103 | this._adjust(datasetMatrix, ignoreZeroVariance); 104 | switch (method) { 105 | case 'covarianceMatrix': { 106 | // User provided a dataset but wants us to compute and use the covariance matrix. 107 | const covarianceMatrix = new MatrixTransposeView(datasetMatrix) 108 | .mmul(datasetMatrix) 109 | .div(datasetMatrix.rows - 1); 110 | this._computeFromCovarianceMatrix(covarianceMatrix); 111 | break; 112 | } 113 | case 'NIPALS': { 114 | this._computeWithNIPALS(datasetMatrix, nCompNIPALS); 115 | break; 116 | } 117 | case 'SVD': { 118 | const svd = new SVD(datasetMatrix, { 119 | computeLeftSingularVectors: false, 120 | computeRightSingularVectors: true, 121 | autoTranspose: true, 122 | }); 123 | 124 | this.U = svd.rightSingularVectors; 125 | 126 | const singularValues = svd.diagonal; 127 | const eigenvalues: number[] = []; 128 | for (const singularValue of singularValues) { 129 | eigenvalues.push( 130 | (singularValue * singularValue) / (datasetMatrix.rows - 1), 131 | ); 132 | } 133 | this.S = eigenvalues; 134 | break; 135 | } 136 | default: { 137 | throw new Error(`unknown method: ${method as string}`); 138 | } 139 | } 140 | } 141 | 142 | /** 143 | * Load a PCA model from JSON 144 | * @param model 145 | * @returns 146 | */ 147 | public static load(model: PCAModel): PCA { 148 | if (typeof model.name !== 'string') { 149 | throw new TypeError('model must have a name property'); 150 | } 151 | if (model.name !== 'PCA') { 152 | throw new RangeError(`invalid model: ${model.name as string}`); 153 | } 154 | return new PCA(undefined, undefined, model); 155 | } 156 | 157 | /** 158 | * Project the dataset into the PCA space 159 | * @param dataset 160 | * @param options 161 | * @returns dataset projected in the PCA space 162 | */ 163 | public predict(dataset: MaybeMatrix, options: PredictOptions = {}): Matrix { 164 | const { nComponents = (this.U as Matrix).columns } = options; 165 | let datasetmatrix; 166 | if (Array.isArray(dataset)) { 167 | datasetmatrix = new Matrix(dataset); 168 | } else { 169 | datasetmatrix = new Matrix(dataset); 170 | } 171 | if (this.center) { 172 | datasetmatrix.subRowVector(this.means as number[]); 173 | if (this.scale) { 174 | for (const i of this.excludedFeatures) { 175 | datasetmatrix.removeColumn(i); 176 | } 177 | datasetmatrix.divRowVector(this.stdevs as number[]); 178 | } 179 | } 180 | const predictions = datasetmatrix.mmul(this.U as Matrix); 181 | return predictions.subMatrix(0, predictions.rows - 1, 0, nComponents - 1); 182 | } 183 | 184 | /** 185 | * Calculates the inverse PCA transform 186 | * @param dataset 187 | * @returns dataset projected in the PCA space 188 | */ 189 | public invert(dataset: Matrix): Matrix { 190 | dataset = Matrix.checkMatrix(dataset); 191 | 192 | const inverse = dataset.mmul((this.U as Matrix).transpose()); 193 | 194 | if (this.center) { 195 | if (this.scale) { 196 | inverse.mulRowVector(this.stdevs as number[]); 197 | } 198 | inverse.addRowVector(this.means as number[]); 199 | } 200 | 201 | return inverse; 202 | } 203 | 204 | /** 205 | * Returns the proportion of variance for each component 206 | * @returns 207 | */ 208 | public getExplainedVariance(): number[] { 209 | let sum = 0; 210 | if (this.S) { 211 | for (const s of this.S) { 212 | sum += s; 213 | } 214 | } 215 | if (this.S) { 216 | return this.S.map((value) => value / sum); 217 | } 218 | return []; 219 | } 220 | 221 | /** 222 | * Returns the cumulative proportion of variance 223 | * @returns 224 | */ 225 | public getCumulativeVariance(): number[] { 226 | const explained = this.getExplainedVariance(); 227 | for (let i = 1; i < explained.length; i++) { 228 | explained[i] += explained[i - 1]; 229 | } 230 | return explained; 231 | } 232 | 233 | /** 234 | * Returns the Eigenvectors of the covariance matrix 235 | * @returns 236 | */ 237 | public getEigenvectors(): Matrix { 238 | return this.U as Matrix; 239 | } 240 | 241 | /** 242 | * Returns the Eigenvalues (on the diagonal) 243 | * @returns 244 | */ 245 | public getEigenvalues(): number[] { 246 | return this.S as number[]; 247 | } 248 | 249 | /** 250 | * Returns the standard deviations of the principal components 251 | * @returns 252 | */ 253 | public getStandardDeviations(): number[] { 254 | return (this.S as number[]).map((x) => Math.sqrt(x)); 255 | } 256 | 257 | /** 258 | * Returns the loadings matrix 259 | * @returns 260 | */ 261 | public getLoadings(): Matrix { 262 | return (this.U as Matrix).transpose(); 263 | } 264 | 265 | /** 266 | * Export the current model to a JSON object 267 | * @returns model 268 | */ 269 | public toJSON(): PCAModel { 270 | return { 271 | name: 'PCA', 272 | center: this.center, 273 | scale: this.scale, 274 | means: this.means as number[], 275 | stdevs: this.stdevs as number[], 276 | U: this.U as Matrix, 277 | S: this.S as number[], 278 | excludedFeatures: this.excludedFeatures, 279 | }; 280 | } 281 | 282 | private _adjust(dataset: Matrix, ignoreZeroVariance: boolean) { 283 | if (this.center) { 284 | const mean = dataset.mean('column'); 285 | const stdevs = this.scale 286 | ? dataset.standardDeviation('column', { mean }) 287 | : null; 288 | this.means = mean; 289 | dataset.subRowVector(mean); 290 | if (this.scale) { 291 | for (let i = 0; i < (stdevs as number[]).length; i++) { 292 | if ((stdevs as number[])[i] === 0) { 293 | if (ignoreZeroVariance) { 294 | dataset.removeColumn(i); 295 | (stdevs as number[]).splice(i, 1); 296 | this.excludedFeatures.push(i); 297 | i--; 298 | } else { 299 | throw new RangeError( 300 | `Cannot scale the dataset (standard deviation is zero at index ${i}`, 301 | ); 302 | } 303 | } 304 | } 305 | this.stdevs = stdevs; 306 | dataset.divRowVector(stdevs as number[]); 307 | } 308 | } 309 | } 310 | 311 | private _computeFromCovarianceMatrix(dataset: MaybeMatrix) { 312 | const evd = new EVD(dataset as number[][], { assumeSymmetric: true }); 313 | this.U = evd.eigenvectorMatrix; 314 | this.U.flipRows(); 315 | this.S = evd.realEigenvalues; 316 | this.S.reverse(); 317 | } 318 | 319 | private _computeWithNIPALS(dataset: Matrix, nCompNIPALS: number) { 320 | this.U = new Matrix(nCompNIPALS, dataset.columns); 321 | this.S = []; 322 | 323 | let x = dataset; 324 | for (let i = 0; i < nCompNIPALS; i++) { 325 | const dc = new NIPALS(x); 326 | 327 | this.U.setRow(i, dc.w.transpose()); 328 | this.S.push(dc.s.get(0, 0) ** 2); 329 | 330 | x = dc.xResidual; 331 | } 332 | this.U = this.U.transpose(); // to be compatible with API 333 | } 334 | } 335 | -------------------------------------------------------------------------------- /tsconfig.cjs.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "module": "commonjs", 5 | "declaration": true, 6 | "declarationMap": true 7 | }, 8 | "exclude": ["./src/**/__tests__"] 9 | } 10 | -------------------------------------------------------------------------------- /tsconfig.esm.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.cjs.json", 3 | "compilerOptions": { 4 | "module": "es2020", 5 | "outDir": "lib-esm" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "allowJs": true, 4 | "esModuleInterop": true, 5 | "moduleResolution": "node", 6 | "skipLibCheck": true, 7 | "outDir": "lib", 8 | "sourceMap": true, 9 | "strict": true, 10 | "target": "es2020", 11 | "checkJs": true, 12 | "noImplicitAny": false 13 | }, 14 | "include": ["./src/**/*"] 15 | } 16 | --------------------------------------------------------------------------------