├── .gitattributes ├── .gitignore ├── .npmignore ├── .travis.yml ├── .vscode └── launch.json ├── LICENSE ├── README.md ├── assets ├── DT.png ├── knngraph.png ├── logistic.png ├── logistic2.png ├── test.csv ├── testSet.txt └── train.csv ├── docs ├── Matrix.md ├── README.md ├── Vector.md ├── algorithm.md ├── features.md ├── fileParser.md └── graph.md ├── gulpfile.js ├── package-lock.json ├── package.json ├── src ├── algorithm │ ├── AdaBoost │ │ ├── index.ts │ │ └── test.ts │ ├── DT │ │ ├── dt.txt │ │ ├── index.ts │ │ └── test.ts │ ├── index.ts │ ├── kMeans │ │ ├── index.ts │ │ ├── test.ts │ │ └── testSet.txt │ ├── kNN │ │ ├── index.ts │ │ └── test.ts │ └── logistic │ │ ├── index.ts │ │ └── test.ts ├── index.ts └── utils │ ├── .DS_Store │ ├── charts │ ├── .DS_Store │ ├── DT │ │ └── tpl.html │ ├── index.ts │ ├── kNN │ │ └── tpl.html │ ├── logistic │ │ └── tpl.html │ └── server.ts │ ├── features │ ├── index.ts │ ├── preprocessing.ts │ └── test.ts │ ├── fileParser │ ├── index.ts │ ├── result.csv │ └── test.ts │ ├── index.ts │ ├── matrix │ └── index.ts │ └── vector │ └── index.ts ├── test └── utils │ ├── Matrix.js │ ├── Vector.js │ └── fileParser.js ├── tsconfig.json └── tslint.json /.gitattributes: -------------------------------------------------------------------------------- 1 | *.html linguist-language=TypeScript -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | lib/ 3 | .DS_Store -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | assets/ 3 | tslint.json 4 | gulpfile.js 5 | tsconfig.json 6 | dist/maps/ 7 | src/ 8 | docs/ 9 | test/ 10 | .DS_Store -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "8.9.1" 4 | 5 | cache: 6 | directories: 7 | - node_modules 8 | script: npm run test -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // 使用 IntelliSense 了解相关属性。 3 | // 悬停以查看现有属性的描述。 4 | // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "node", 9 | "request": "launch", 10 | "name": "Launch Program", 11 | "program": "${file}", 12 | "outFiles": [ 13 | "${workspaceRoot}/dist/**/*.js" 14 | ], 15 | "cwd": "${workspaceRoot}", 16 | "sourceMaps": true, 17 | "console": "integratedTerminal" 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Xia Luo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mlhelper 2 | [![npm](https://img.shields.io/npm/v/mlhelper.svg?style=flat-square)](https://github.com/laoqiren/mlhelper) 3 | [![npm](https://img.shields.io/npm/l/mlhelper.svg?style=flat-square)](https://github.com/laoqiren/mlhelper) 4 | 5 | Algorithms and utils for Machine Learning in JavaScript based on Node.js. while implementing commonly used machine learning algorithms, This library attempts to provide more abundant ecology, such as matrix and vector operations, file parsing, feature engineering, data visualization, and so on. 6 | 7 | *`QQ Group`: 485305514* 8 | ## Installation 9 | ``` 10 | $ npm install mlhelper 11 | ``` 12 | 13 | ## Documention 14 | 15 | * [algorithm](docs/algorithm.md) 16 | * [Matrix](docs/Matrix.md) 17 | * [Vector](docs/Vector.md) 18 | * [file Parser](docs/fileParser.md) 19 | * [graph tools](docs/graph.md) 20 | * [feature Engineering](docs/features.md) 21 | 22 | ## Example 23 | 24 | ### Algorithm 25 | 26 | ```js 27 | const AdaBoost = require('mlhelper/lib/algorithm').AdaBoost; 28 | //or const AdaBoost = require('mlhelper').algorithm.AdaBoost; 29 | 30 | const dataSet = [ 31 | [1.0,2.1], 32 | [2.0,1.1], 33 | [1.3,1.0], 34 | [1.0,1.0], 35 | [2.0,1.0] 36 | ] 37 | const labels = [1.0,1.0,-1.0,-1.0,1.0]; 38 | let ada = new AdaBoost(dataSet,labels,40); 39 | let result = ada.classify([[1.0,2.1], 40 | [2.0,1.1], 41 | [1.3,1.0], 42 | [1.0,1.0], 43 | [2.0,1.0]]); 44 | console.log(result); // [ 1, 1, -1, -1, -1 ] 45 | ``` 46 | 47 | ### Utils 48 | 49 | **Matrix:** 50 | ```js 51 | const Matrix = require('mlhelper/lib/utils').Matrix; 52 | 53 | let m1 = new Matrix([ 54 | [1,2,3], 55 | [3,4,5] 56 | ]); 57 | 58 | let m2 = new Matrix([ 59 | [2,2,6], 60 | [3,1,5] 61 | ]); 62 | 63 | console.log(m2.sub(m1)) // Matrix { arr: [ [ 1, 0, 3 ], [ 0, -3, 0 ] ] } 64 | console.log(m1.mult(m2)) // Matrix { arr: [ [ 2, 4, 18 ], [ 9, 4, 25 ] ] } 65 | ``` 66 | 67 | **Vector:** 68 | ```js 69 | const Vector = require('mlhelper/lib/utils').Vector; 70 | 71 | let v = new Vector([5,10,7,1]); 72 | console.log(v.argSort()) // [ 3, 0, 2, 1 ] 73 | ``` 74 | 75 | **fileParser:** 76 | ```js 77 | const parser = require('mlhelper/lib/utils').fileParser; 78 | 79 | let dt = parser.read_csv(path.join(__dirname,'./train.csv'),{ 80 | index_col: 0, 81 | delimiter: ',', 82 | header: 0, 83 | dataType: 'number' 84 | }); 85 | let labels = dt.getClasses(); 86 | let dataSet =dt.drop('quality').values; 87 | ``` 88 | 89 | **Feature Engineering** 90 | ```js 91 | // preprocessing features 92 | const preprocessing = require('mlhelper/lib/utils').features.preprocessing; 93 | 94 | // make the features obey the standard normal distribution(Standardization) 95 | let testStandardScaler = preprocessing.standardScaler(dataSet); 96 | 97 | let testNormalize = preprocessing.normalize(dataSet); 98 | 99 | let testBinarizer = preprocessing.binarizer(dataSet); 100 | 101 | // ... 102 | ``` 103 | 104 | **graph tools:** 105 | 106 | Decision Tree: 107 | ```js 108 | charts.drawDT(dt.getTree(),{ 109 | width:600, 110 | height:400 111 | }); 112 | ``` 113 | ![/assets/DT.png](/assets/DT.png) 114 | 115 | **logistic regression** 116 | ```js 117 | charts.drawLogistic(dataSet,labels,weights); 118 | ``` 119 | 120 | 121 | 122 | ## Contribute 123 | 124 | The original purpose of this project is to learn, and now I need more people to participate in this project, and any issue and good advice is welcome. 125 | ### git clone 126 | ``` 127 | git clone https://github.com/laoqiren/mlhelper.git 128 | ``` 129 | ### install dependencies&&devdependecies 130 | ``` 131 | npm install 132 | ``` 133 | 134 | ### development 135 | ``` 136 | npm run dev 137 | ``` 138 | 139 | ### test 140 | ``` 141 | npm run test 142 | ``` 143 | 144 | ### build 145 | ``` 146 | npm run build 147 | ``` 148 | ## LICENSE 149 | MIT. 150 | 151 | *You can use the project for any purpose, except for illegal activities.* 152 | -------------------------------------------------------------------------------- /assets/DT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/assets/DT.png -------------------------------------------------------------------------------- /assets/knngraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/assets/knngraph.png -------------------------------------------------------------------------------- /assets/logistic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/assets/logistic.png -------------------------------------------------------------------------------- /assets/logistic2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/assets/logistic2.png -------------------------------------------------------------------------------- /assets/test.csv: -------------------------------------------------------------------------------- 1 | ,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol 2 | 0,6.4,0.32,0.27,4.9,0.034,18.0,122.0,0.9916,3.36,0.71,12.5 3 | 1,7.1,0.18,0.39,14.5,0.051,48.0,156.0,0.99947,3.35,0.78,9.1 4 | 2,7.1,0.17,0.4,14.55,0.047,47.0,156.0,0.99945,3.34,0.78,9.1 5 | 3,7.1,0.18,0.39,15.25,0.047,45.0,158.0,0.99946,3.34,0.77,9.1 6 | 4,7.8,0.29,0.29,3.15,0.044,41.0,117.0,0.99153,3.24,0.35,11.5 7 | 5,6.2,0.255,0.27,1.3,0.037,30.0,86.0,0.98834,3.05,0.59,12.9 8 | 6,8.2,0.34,0.29,5.2,0.076,19.0,92.0,0.99138,2.95,0.39,12.5 9 | 7,6.5,0.24,0.28,1.1,0.034,26.0,83.0,0.98928,3.25,0.33,12.3 10 | 8,6.9,0.24,0.23,7.1,0.041,20.0,97.0,0.99246,3.1,0.85,11.4 11 | 9,6.7,0.4,0.22,8.8,0.052,24.0,113.0,0.99576,3.22,0.45,9.4 12 | 10,6.7,0.3,0.44,18.5,0.057,65.0,224.0,0.99956,3.11,0.53,9.1 13 | 11,6.7,0.4,0.22,8.8,0.052,24.0,113.0,0.99576,3.22,0.45,9.4 14 | 12,6.8,0.17,0.32,1.4,0.04,35.0,106.0,0.99026,3.16,0.66,12.0 15 | 13,7.1,0.25,0.28,1.2,0.04,31.0,111.0,0.99174,3.18,0.53,11.1 16 | 14,5.9,0.27,0.27,5.0,0.035,14.0,97.0,0.99058,3.1,0.33,11.8 17 | 15,6.0,0.16,0.22,1.6,0.042,36.0,106.0,0.9905,3.24,0.32,11.4 18 | 16,6.7,0.3,0.44,18.75,0.057,65.0,224.0,0.99956,3.11,0.53,9.1 19 | 17,6.6,0.15,0.32,6.0,0.033,59.0,128.0,0.99192,3.19,0.71,12.1 20 | 18,7.3,0.34,0.3,9.4,0.057,34.0,178.0,0.99554,3.15,0.44,10.4 21 | 19,6.0,0.17,0.29,9.7,0.044,33.0,98.0,0.99536,3.12,0.36,9.2 22 | 20,6.7,0.47,0.29,4.75,0.034,29.0,134.0,0.99056,3.29,0.46,13.0 23 | 21,6.6,0.15,0.32,6.0,0.033,59.0,128.0,0.99192,3.19,0.71,12.1 -------------------------------------------------------------------------------- /assets/testSet.txt: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor -------------------------------------------------------------------------------- /assets/train.csv: -------------------------------------------------------------------------------- 1 | ,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality 2 | 0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6 3 | 1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6 4 | 2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6 5 | 3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6 6 | 4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6 7 | 5,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6 8 | 6,6.2,0.32,0.16,7.0,0.045,30.0,136.0,0.9949,3.18,0.47,9.6,6 9 | 7,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6 10 | 8,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6 11 | 9,8.1,0.22,0.43,1.5,0.044,28.0,129.0,0.9938,3.22,0.45,11.0,6 12 | 10,8.1,0.27,0.41,1.45,0.033,11.0,63.0,0.9908,2.99,0.56,12.0,5 13 | 11,8.6,0.23,0.4,4.2,0.035,17.0,109.0,0.9947,3.14,0.53,9.7,5 14 | 12,7.9,0.18,0.37,1.2,0.04,16.0,75.0,0.992,3.18,0.63,10.8,5 15 | 13,6.6,0.16,0.4,1.5,0.044,48.0,143.0,0.9912,3.54,0.52,12.4,7 16 | 14,8.3,0.42,0.62,19.25,0.04,41.0,172.0,1.0002,2.98,0.67,9.7,5 17 | 15,6.6,0.17,0.38,1.5,0.032,28.0,112.0,0.9914,3.25,0.55,11.4,7 18 | 16,6.3,0.48,0.04,1.1,0.046,30.0,99.0,0.9928,3.24,0.36,9.6,6 19 | 17,6.2,0.66,0.48,1.2,0.029,29.0,75.0,0.9892,3.33,0.39,12.8,8 20 | 18,7.4,0.34,0.42,1.1,0.033,17.0,171.0,0.9917,3.12,0.53,11.3,6 21 | 19,6.5,0.31,0.14,7.5,0.044,34.0,133.0,0.9955,3.22,0.5,9.5,5 22 | 20,6.2,0.66,0.48,1.2,0.029,29.0,75.0,0.9892,3.33,0.39,12.8,8 23 | 21,6.4,0.31,0.38,2.9,0.038,19.0,102.0,0.9912,3.17,0.35,11.0,7 24 | 22,6.8,0.26,0.42,1.7,0.049,41.0,122.0,0.993,3.47,0.48,10.5,8 25 | 23,7.6,0.67,0.14,1.5,0.074,25.0,168.0,0.9937,3.05,0.51,9.3,5 26 | 24,6.6,0.27,0.41,1.3,0.052,16.0,142.0,0.9951,3.42,0.47,10.0,6 27 | 25,7.0,0.25,0.32,9.0,0.046,56.0,245.0,0.9955,3.25,0.5,10.4,6 28 | 26,6.9,0.24,0.35,1.0,0.052,35.0,146.0,0.993,3.45,0.44,10.0,6 29 | 27,7.0,0.28,0.39,8.7,0.051,32.0,141.0,0.9961,3.38,0.53,10.5,6 30 | 28,7.4,0.27,0.48,1.1,0.047,17.0,132.0,0.9914,3.19,0.49,11.6,6 31 | 29,7.2,0.32,0.36,2.0,0.033,37.0,114.0,0.9906,3.1,0.71,12.3,7 32 | 30,8.5,0.24,0.39,10.4,0.044,20.0,142.0,0.9974,3.2,0.53,10.0,6 33 | 31,8.3,0.14,0.34,1.1,0.042,7.0,47.0,0.9934,3.47,0.4,10.2,6 34 | 32,7.4,0.25,0.36,2.05,0.05,31.0,100.0,0.992,3.19,0.44,10.8,6 -------------------------------------------------------------------------------- /docs/Matrix.md: -------------------------------------------------------------------------------- 1 | # Matrix utils 2 | 3 | ## constructor(arr: Array>) 4 | *arr:* The Original 2D array data. 5 | 6 | ## toArray(): Array> 7 | Get the original array. 8 | 9 | ## size(): [number,number] 10 | Get the size of Matrix, including rows and columns. 11 | 12 | ## sum(axis=1 as number): Array 13 | The sum of data in the same row/column 14 | 15 | *axis:* If axis is set to 1, the sum of all the data of the same row is calculated, otherwise the column is computed. Default to 1. 16 | 17 | ## min(axis=0 as number): Array\ 18 | The minimum value of data in the same row/column. 19 | 20 | *axis:* If axis is set to 1, the minimum value of all data in the same row is calculated, otherwise the column is computed. default to 0. 21 | 22 | ## max(axis=0 as number): Array\ 23 | The maximum value of data in the same row/column. 24 | 25 | *axis:* If axis is set to 1, the maximum value of all data in the same row is calculated, otherwise the column is computed. default to 0. 26 | 27 | ## static mean(arr: Array>, axis=0 as number): Array\ 28 | The average value of the same row/column of data 29 | 30 | *arr:* dataset to calculate. 31 | *axis:* If axis is set to 1, the average value of all the data in the same row is calculated, otherwise the column is computed 32 | 33 | ## transpose(): Array> 34 | Transpose the Matrix. 35 | 36 | ## static zeros(r: number,c?: number): Array>|Array\ 37 | 38 | If there are two parameters, the zero matrix of the specified size is returned. If there is only one parameter, the one dimensional array is returned 39 | 40 | ## static ones(m: number,n?: number): Array>|Array\ 41 | 42 | Be similar with `zeros()`. 43 | ## sub(toSub: Matrix): Matrix 44 | Matrix subtraction. 45 | 46 | *toSub:* Matrix to sub with. 47 | 48 | 49 | ## add(toAdd: Matrix): Matrix 50 | Matrix addition. 51 | 52 | *toAdd:* Matrix to add with. 53 | 54 | ## mult(toMult: Matrix): Matrix 55 | Matrix multiplication. 56 | 57 | *toMult:* Matrix to multiply with. 58 | 59 | ## divide(toDivide: Matrix): Matrix 60 | Matrix Division. 61 | 62 | *toDivide:* Matrix to divide with. 63 | 64 | 65 | ```js 66 | const Matrix = require('mlhelper').utils.Matrix; 67 | 68 | const dataSet = [ 69 | [2,4,6], 70 | [5,7,1], 71 | [3,3,1] 72 | ]; 73 | const dataSet2 = [ 74 | [1,3,5], 75 | [2,4,7], 76 | [3,5,8] 77 | ]; 78 | let matA = new Matrix(dataSet), 79 | matB = new Matrix(dataSet2); 80 | 81 | 82 | let result = matA.add(matB); 83 | expect(result.toArray()).to.eql([ 84 | [3,7,11], 85 | [7,11,8], 86 | [6,8,9] 87 | ]); // true; 88 | 89 | expect(matA.max(1)).to.eql([6,7,3]); //true 90 | expect(matA.max(0)).to.eql([5,7,6]); //true 91 | 92 | expect(matA.transpose()).to.eql([[2,5,3],[4,7,3],[6,1,1]]); //true 93 | 94 | expect(Matrix.ones(2,2)).to.eql([[1,1],[1,1]]); //true 95 | expect(Matrix.ones(2,2)).to.eql([[1,1],[1,1]]); //true 96 | ``` -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # mlhelper documentation 2 | 3 | * [algorithm](algorithm.md) 4 | * [Matrix](Matrix.md) 5 | * [Vector](Vector.md) 6 | * [file Parser](fileParser.md) 7 | * [graph tools](graph.md) 8 | * [feature Engineering](features.md) 9 | 10 | *中文文档敬请期待!也欢迎参与文档翻译。* -------------------------------------------------------------------------------- /docs/Vector.md: -------------------------------------------------------------------------------- 1 | # Vector utils 2 | 3 | ## constructor(arr: Array\) 4 | *arr:* One-dimensional array. 5 | 6 | ## argSort(): Array\ 7 | The sorted index of array. 8 | 9 | ## static sign(arr: number|Array\): number|Array\ 10 | 11 | For each element, when its value equals to 0 returns 0, else if it's larger than 0 returns 1 else returns -1. If arr is a number, just return a result number. 12 | 13 | ## static rand(m: number) 14 | Create specific number of random numbers between 0 and 1. 15 | 16 | ```js 17 | const Vector = require('mlhelper').utils.Vector; 18 | 19 | const arr = [4,7,1,8,2]; 20 | const vect = new Vector(arr); 21 | 22 | expect(vect.argSort()).to.eql([2,4,0,1,3]); //true 23 | expect(Vector.sign([-2,2,0,4])).to.eql([-1,1,0,1]); //true 24 | expect(Vector.sign(-6)).to.eql(-1); // true 25 | ``` -------------------------------------------------------------------------------- /docs/algorithm.md: -------------------------------------------------------------------------------- 1 | # Algorithms 2 | 3 | ## kNN (k-nearest neighbors algorithm) 4 | 5 | wiki: [https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm) 6 | ### constructor(dataSet: Array>,labels: Array\) 7 | 8 | *dataSet:* The two dimensional array of data sets with known classifications. 9 | 10 | *labels:* Classification vector of dataset. 11 | 12 | ### classify(inx: Array\,k: number): any 13 | Classification based on feature vectors. 14 | 15 | *inx:* data for testing. 16 | 17 | *k:* Make decisions based on K nearest neighbors. 18 | 19 | ```js 20 | const kNN = require('mlhelper').algorithm.kNN; 21 | 22 | let knn = new kNN([ 23 | [1.,1.1], 24 | [1.,1.], 25 | [0.,0.], 26 | [0.,0.1] 27 | ],['A','A','B','C']); 28 | 29 | let result = knn.classify([1.1,0.8],4); 30 | 31 | console.log(result) // 'A' 32 | ``` 33 | ### autoNormalVector(inx: Array\): Array\ 34 | Normalize the test data vectors so that each feature is concentrated between 0 and 1. 35 | 36 | ### static autoNormal(dataSet: Array>): Array> 37 | Normalize the dataset matrix so that each feature is concentrated between 0 and 1. 38 | ## DT(ID3) Decision tree 39 | wiki: [https://en.wikipedia.org/wiki/Decision_tree](https://en.wikipedia.org/wiki/Decision_tree) 40 | ### constructor(dataSet: Array>,labels: Array\,alg: string="ID3") 41 | 42 | *dataSet:* The two dimensional array of data sets with known classifications(every row including the class). 43 | 44 | *labels:* Classification vector of dataset. 45 | 46 | *alg:* Algorithm to create decision tree. Default is ID3. By now, only ID3 is supported. 47 | 48 | ### classify(featLabels: Array\,testVec: Array\): any 49 | *featLabels:* vector of feature names. 50 | 51 | *testVec:* vector of test data. 52 | 53 | ### getTree(): object 54 | 55 | return the created decision tree. 56 | 57 | ```js 58 | const DT = require('mlhelper').algorithm.DT; 59 | 60 | let dataSet = parser.parseFile(path.join(__dirname,'./dt.txt')); 61 | 62 | let labels = ['age','prescript','astigmatic','tearRate'] 63 | let dt = new DT(dataSet,labels); 64 | 65 | let result = dt.classify(labels,["young","myope","no","reduced"]) //no lenses 66 | 67 | console.log(dt.getTree()); // { tearRate: { reduced: 'no lenses', normal: { astigmatic: [Object] } } } 68 | ``` 69 | 70 | ### storeTree(filePath: string): Promise 71 | 72 | store the decision tree to file and returns Promise object. 73 | 74 | ### static classifyFromTree(inputTree: object,featLabels: Array\,testVec: Array\): any 75 | 76 | Classify the data according to the existing decision tree. The meaning of the parameter refers to the above explanation. 77 | 78 | ## Logistic regression 79 | wiki: [https://en.wikipedia.org/wiki/Logistic_regression](https://en.wikipedia.org/wiki/Logistic_regression) 80 | 81 | ### constructor(dataMatIn: Array>,classLabels: Array\,numIter: number) 82 | *dataMatIn* like dataset for training. 83 | 84 | *classLabels:* the classes of training datas. 85 | 86 | *numIter:* Maximum iterations 87 | 88 | ### classify(inX: Array\): number 89 | Claasify the test data. 90 | 91 | ### getWeights(): Array\ 92 | Random gradient ascent method for optimal regression coefficients of each feature. 93 | 94 | ```js 95 | let logi = new Logistic(dataSet,labels,150); 96 | let result = logi.classify(dataSet[i]); 97 | let weights = logi.getWeights(); 98 | console.log(weights); //[ 2.9301940437635965, -5.7803993740016555, 9.834929045066424 ] 99 | ``` 100 | 101 | ### AdaBoost 102 | wiki: [https://en.wikipedia.org/wiki/AdaBoost](https://en.wikipedia.org/wiki/AdaBoost) 103 | ### constructor(dataSet: Array>,labels: Array\,numInt=40 as number) 104 | 105 | *dataSet:* matirx like datas for training. 106 | 107 | *labels:* vector of the training datas' classes. 108 | 109 | *maximum:* permission iterative number of times, default is 40. 110 | 111 | ### classify(inx: Array>): Array\ 112 | *inx:* Matrix like for testing. 113 | 114 | ```js 115 | const AdaBoost = require('mlhelper').algorithm.AdaBoost; 116 | const dataSet = [ 117 | [1.0,2.1], 118 | [2.0,1.1], 119 | [1.3,1.0], 120 | [1.0,1.0], 121 | [2.0,1.0] 122 | ] 123 | const labels = [1.0,1.0,-1.0,-1.0,1.0]; 124 | 125 | let ada = new AdaBoost(dataSet,labels,40); 126 | 127 | let result = ada.classify([[1.0,2.1], 128 | [2.0,1.1], 129 | [1.3,1.0], 130 | [1.0,1.0], 131 | [2.0,1.0]]); 132 | 133 | console.log(result); //[ 1, 1, -1, -1, -1 ] 134 | ``` 135 | 136 | ### k-means clustering 137 | wiki: [https://en.wikipedia.org/wiki/K-means_clustering](https://en.wikipedia.org/wiki/K-means_clustering) 138 | ### constructor(dataSet: Array\>,k: number) 139 | *dataSet:* Matrix like dataset to cluster. 140 | 141 | *k:* how many centroids. 142 | 143 | ### cluster(max=50 as number): [Array>,Array>] 144 | 145 | *max:* permission iterative number of times, default is 50. 146 | 147 | *return:* will return an array `[centroids,clusterAssment]`, the `centroids` is the coordinate matrix of all cluster centers and `clusterAssment` is an array `[centroidsIndex,minDist**2]`, `centroidsIndex` is the the index of the center to which the point belongs and `minDist` is the The distance between the point and its center. 148 | 149 | ```js 150 | const kMeans = require('mlhelper').algorithm.kMeans; 151 | let kmeans = new kMeans(dataSet,5); 152 | 153 | let result = kmeans.cluster(40); 154 | console.log(util.inspect(result)) 155 | ``` 156 | 157 | ## TODO -------------------------------------------------------------------------------- /docs/features.md: -------------------------------------------------------------------------------- 1 | # Feature engineering -------------------------------------------------------------------------------- /docs/fileParser.md: -------------------------------------------------------------------------------- 1 | # File Parser utils 2 | 3 | ## parseFile(filePath: string,options: object): Array> 4 | Parsing with simple files. 5 | 6 | ### filePath: 7 | the absolute file path to read. 8 | ### options: \ 9 | * options.toNumber \ wheather to transfer data to Number. Default to false. 10 | * options.delmiter \ delmiter. Default to '\t'. 11 | 12 | ## read_csv (filePath: string,options): CSV 13 | 14 | Dealing with complex CSV files. 15 | 16 | ### filePath: 17 | the absolute file path to read. 18 | ### options: \. 19 | * index_col \ when set to true, the first column of data will be regarded as the counter column. Default to be false. 20 | * delimiter \ delmiter for every line. Default to be ','. 21 | * header \|number> Can be the vector of custom header line or the index of the header line. default to 0. 22 | * dataType \ the type of datas, default to 'number'. 23 | * classType \ the type of the last column of each line. default to 'number'. 24 | 25 | ### return: 26 | instance of CSV. 27 | 28 | ## write_csv (filePath: string,data: Array>,options): void 29 | 30 | ### filePath: 31 | absolute path of the file to write. 32 | 33 | ### data: 34 | matrix like datas to write. 35 | ### options \ 36 | * options.index: \. if set to be true, it will add a index column for each line. default to false. 37 | * header: \>. custom header to add to the first line. default to []. 38 | 39 | ## class CSV 40 | 41 | ### getHeader(): Array\ 42 | Get the header line of the dataset. 43 | 44 | ### drop(label: string | number): CSV 45 | 46 | *label:* delete the specific number of column or the column of specific label. 47 | 48 | ### getClasses(): Array\ 49 | Get the last column of every line. 50 | 51 | ```js 52 | const parser = require('mlheper').utils.fileParser; 53 | 54 | let dt = parser.read_csv(path.join(__dirname,'../../../assets/train.csv'),{ 55 | index_col: 0, 56 | delimiter: ',', 57 | header: 0, 58 | dataType: 'number' 59 | }); 60 | 61 | let labels = dt.getClasses(); 62 | 63 | let dataSet =dt.drop('quality').values; 64 | 65 | // ... 66 | parser.write_csv(path.join(__dirname,'./result.csv'),resultSet,{ 67 | header: ['ID','quality'] 68 | }); 69 | ``` -------------------------------------------------------------------------------- /docs/graph.md: -------------------------------------------------------------------------------- 1 | # Graph utils 2 | Provides visual capabilities for various algorithms, which automatically open your browser and draw visual graphics 3 | 4 | ## drawkNN(dataSet: Array>,labels: Array,inx: Array,options: object) 5 | 6 | Plot the scatter diagram of KNN algorithm 7 | *dataSet:* matrix of datas for training. 8 | 9 | *labels:* vector of training datas' classes. 10 | 11 | *inx:* vector of data to test. 12 | 13 | ### options 14 | 15 | * options.width: \ the width of the graph. default to "600px". 16 | * options.height: \ the height of the graph. default to "400px". 17 | * options.size: \ the size of every point. default to 20. 18 | 19 | ```js 20 | const charts = require('mlhelper').utils.charts; 21 | //... 22 | let inx = [7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8], 23 | normalInx = knn.autoNormalVector(inx); 24 | 25 | console.log(knn.classify(inx,100)); // 6 26 | charts.drawkNN(kNN.autoNormal(dataSet),labels,normalInx,{ 27 | width: "500px", 28 | height: "400px", 29 | size: 15 30 | }); 31 | ``` 32 | ![/assets/knngraph.png](/assets/knngraph.png) 33 | 34 | ## drawDT(tree: object,options: object) 35 | Decision tree visualization. 36 | ### tree 37 | the decision tree object. 38 | 39 | ### options 40 | * options.width: \ the width of the graph. 41 | * options.height: \ the height of the graph. size: the size of every point. 42 | 43 | ```js 44 | const charts = require('mlhelper').utils.charts; 45 | // ... 46 | charts.drawDT(dt.getTree(),{ 47 | width:600, 48 | height:400 49 | }); 50 | ``` 51 | ![/assets/DT.png](/assets/DT.png) 52 | 53 | ## drawLogistic(dataSet: Array>,labels: Array\,weights: Array\,options: object) 54 | Visualization of logistic regression algorithm 55 | 56 | ### dataSet 57 | The matrix like dataset for training. 58 | 59 | ### labels: 60 | the classes for training dataset. 61 | 62 | ### weights: 63 | Random gradient ascent method for optimal regression coefficients of each feature 64 | 65 | ### options: 66 | * options.width: \ the width of the graph. default to "600px". 67 | * options.height: \ the height of the graph. default to "400px". 68 | * options.size: \ the size of every point. deault to 20. 69 | 70 | ```js 71 | const charts = require('mlhelper').utils.charts; 72 | // ... 73 | let weights = logi.getWeights() 74 | console.log(weights); 75 | 76 | charts.drawLogistic(dataSet,labels,weights) 77 | ``` 78 | 79 | -------------------------------------------------------------------------------- /gulpfile.js: -------------------------------------------------------------------------------- 1 | const gulp = require('gulp'); 2 | const ts = require('gulp-typescript'); 3 | const tsProject = ts.createProject('tsconfig.json'); 4 | const sourcemaps = require('gulp-sourcemaps'); 5 | const babel = require('gulp-babel'); 6 | 7 | const PATHS = { 8 | scripts: ['./src/**/*.ts'], 9 | output: './lib' 10 | } 11 | 12 | gulp.task('copyFiles',()=>{ 13 | return gulp.src([ 14 | 'src/**/*', 15 | '!src/**/*.ts' 16 | ]).pipe(gulp.dest(PATHS.output)); 17 | }); 18 | 19 | gulp.task('watch-ts',['build-ts'],()=>{ 20 | gulp.watch(PATHS.scripts,['build-ts']); 21 | }); 22 | 23 | gulp.task('build-ts',()=>{ 24 | return gulp.src(PATHS.scripts) 25 | .pipe(sourcemaps.init()) 26 | .pipe(tsProject()) 27 | .js 28 | .pipe(babel({ 29 | presets: ['env'] 30 | })) 31 | .pipe(sourcemaps.write('./maps')) 32 | .pipe(gulp.dest(PATHS.output)); 33 | }); 34 | 35 | gulp.task('default',['watch-ts']); -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mlhelper", 3 | "version": "0.2.0", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "@types/node": { 8 | "version": "8.0.46", 9 | "resolved": "https://registry.npmjs.org/@types/node/-/node-8.0.46.tgz", 10 | "integrity": "sha512-rRkP4kb5JYIfAoRKaDbcdPZBcTNOgzSApyzhPN9e6rhViSJAWQGlSXIX5gc75iR02jikhpzy3usu31wMHllfFw==", 11 | "dev": true 12 | }, 13 | "mathjs": { 14 | "version": "http://registry.npm.taobao.org/mathjs/download/mathjs-3.16.4.tgz", 15 | "requires": { 16 | "complex.js": "http://registry.npm.taobao.org/complex.js/download/complex.js-2.0.4.tgz", 17 | "decimal.js": "http://registry.npm.taobao.org/decimal.js/download/decimal.js-7.2.3.tgz", 18 | "fraction.js": "http://registry.npm.taobao.org/fraction.js/download/fraction.js-4.0.2.tgz", 19 | "javascript-natural-sort": "http://registry.npm.taobao.org/javascript-natural-sort/download/javascript-natural-sort-0.7.1.tgz", 20 | "seed-random": "http://registry.npm.taobao.org/seed-random/download/seed-random-2.2.0.tgz", 21 | "tiny-emitter": "http://registry.npm.taobao.org/tiny-emitter/download/tiny-emitter-2.0.0.tgz", 22 | "typed-function": "http://registry.npm.taobao.org/typed-function/download/typed-function-0.10.5.tgz" 23 | }, 24 | "dependencies": { 25 | "complex.js": { 26 | "version": "http://registry.npm.taobao.org/complex.js/download/complex.js-2.0.4.tgz", 27 | "bundled": true 28 | }, 29 | "decimal.js": { 30 | "version": "http://registry.npm.taobao.org/decimal.js/download/decimal.js-7.2.3.tgz", 31 | "bundled": true 32 | }, 33 | "fraction.js": { 34 | "version": "http://registry.npm.taobao.org/fraction.js/download/fraction.js-4.0.2.tgz", 35 | "bundled": true 36 | }, 37 | "javascript-natural-sort": { 38 | "version": "http://registry.npm.taobao.org/javascript-natural-sort/download/javascript-natural-sort-0.7.1.tgz", 39 | "bundled": true 40 | }, 41 | "seed-random": { 42 | "version": "http://registry.npm.taobao.org/seed-random/download/seed-random-2.2.0.tgz", 43 | "bundled": true 44 | }, 45 | "tiny-emitter": { 46 | "version": "http://registry.npm.taobao.org/tiny-emitter/download/tiny-emitter-2.0.0.tgz", 47 | "bundled": true 48 | }, 49 | "typed-function": { 50 | "version": "http://registry.npm.taobao.org/typed-function/download/typed-function-0.10.5.tgz", 51 | "bundled": true 52 | } 53 | } 54 | }, 55 | "sylvester": { 56 | "version": "0.0.21", 57 | "resolved": "https://registry.npmjs.org/sylvester/-/sylvester-0.0.21.tgz", 58 | "integrity": "sha1-KYexzivS84sNzio0OIiEv6RADqc=" 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mlhelper", 3 | "version": "0.2.0", 4 | "description": "tools for ML in JavaScript", 5 | "main": "lib/index.js", 6 | "scripts": { 7 | "test": "mocha --recursive", 8 | "dev": "gulp watch-ts", 9 | "build": "gulp copyFiles && gulp build-ts", 10 | "postversion": "git push && git push --tags" 11 | }, 12 | "keywords": [ 13 | "ML", 14 | "AI" 15 | ], 16 | "author": "laoqiren", 17 | "license": "MIT", 18 | "dependencies": { 19 | "express": "^4.16.2", 20 | "immutable": "^3.8.2", 21 | "lodash": "^4.17.4", 22 | "mathjs": "^3.16.4", 23 | "swig": "^1.4.2", 24 | "sylvester": "0.0.21" 25 | }, 26 | "devDependencies": { 27 | "@types/lodash": "^4.14.78", 28 | "@types/node": "^8.0.46", 29 | "babel-core": "^6.26.0", 30 | "babel-preset-env": "^1.6.1", 31 | "chai": "^4.1.2", 32 | "gulp": "^3.9.1", 33 | "gulp-babel": "^7.0.0", 34 | "gulp-sourcemaps": "^2.6.1", 35 | "gulp-typescript": "^3.2.2", 36 | "mocha": "^4.0.1", 37 | "typescript": "^2.5.3" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/algorithm/AdaBoost/index.ts: -------------------------------------------------------------------------------- 1 | import Matrix from '../../utils/matrix/index'; 2 | import Vector from '../../utils/vector/index'; 3 | import * as _ from 'lodash'; 4 | 5 | class AdaBoost { 6 | private dataSet: Array>; 7 | private labels: Array; 8 | private numInt: number; 9 | 10 | /** 11 | * Creates an instance of AdaBoost. 12 | * @param {Array>} dataSet matirx like datas for training. 13 | * @param {Array} labels vector of the training datas' classes. 14 | * @param {number} [numInt=40 as number] maximum permission iterative number of times, default is 40 15 | * @memberof AdaBoost 16 | */ 17 | constructor(dataSet: Array>,labels: Array,numInt=40 as number){ 18 | this.dataSet = dataSet; 19 | this.labels = labels; 20 | this.numInt = numInt; 21 | } 22 | /** 23 | * 基于单层决策树的弱分类器 weak classifier based on Single layer decision tree 24 | * 25 | * @param {Matrix} dataMatrix matrix of datas for classify 26 | * @param {number} dimen the index of features to classify 27 | * @param {number} threshVal the threshold value of the featuer to classify 28 | * @param {String} flag can be 'lt' or 'gt', when it's 'lt', when the value of the feature is lower than threshVal, the data's class will be -1, or will be 1.0. 29 | * @returns {Array} classification results. 30 | * @memberof AdaBoost 31 | */ 32 | stumpClassify(dataMatrix: Matrix,dimen: number,threshVal: number,flag: String): Array{ 33 | let m = dataMatrix.size()[0]; 34 | let retArray = Matrix.ones(m); 35 | 36 | if(flag === 'lt'){ 37 | retArray = retArray.map((v,i)=>dataMatrix.arr[i][dimen]<=threshVal?-1.0:1.0); 38 | } else { 39 | retArray = retArray.map((v,i)=>dataMatrix.arr[i][dimen]>threshVal?-1.0:1.0); 40 | } 41 | return retArray; 42 | } 43 | /** 44 | * 针对某个特征权值向量找到其对应的最佳单层决策树若分类器,及该弱分类器分类结果和错误率 According to a feature weight vector, the optimal single layer decision tree is found, the classifier, the classification result and the error rate of the weak classifier 45 | * 46 | * @param {Array} D the vector of every feature's weight. 47 | * @returns {[Object,number,Array]} the information of the best weak classifier, the error rate of the classifier and the classification results. 48 | * @memberof AdaBoost 49 | */ 50 | buildStump(D: Array): [Object,number,Array]{ 51 | let dataSetMat = new Matrix(this.dataSet); 52 | let labels = this.labels; 53 | let [m,n] = dataSetMat.size(); 54 | let numSteps = 10.0, 55 | bestStump = {}, //用于存放最佳单层决策树 56 | bestClassEst = Matrix.ones(m), // 该弱分类器最低错误率时的预测分类向量 57 | minError = Infinity; // 初始最新错误率为无穷大 58 | let that = this; 59 | 60 | for(let i=0; ipredictedVals[i]===labels[i]?0:1); 72 | 73 | let weightedError = _.sum(_.zipWith(errArr,D,(a,b)=>a*b)); // 各个特征的错误加权和 74 | 75 | if(weightedError} array of all the weak classifier. 95 | * @memberof AdaBoost 96 | */ 97 | adaBoostTrainDS(): Array{ 98 | let numInt = this.numInt, // 最大迭代次数 99 | dataMatrix = new Matrix(this.dataSet), 100 | labels = this.labels, 101 | weakClassArr = []; //弱分类器数组 102 | let m = dataMatrix.size()[0], 103 | D = Matrix.ones(m).map(v=>v/m); //初始的特征权值向量 104 | 105 | let aggClassEst = > Matrix.zeros(m); 106 | 107 | for(let i=0; ia*b).map(v=>-1*alpha*v); 116 | D = _.zipWith(D,expon,(a,b)=>a*Math.exp(b)); 117 | let Dsum = _.sum(D); 118 | D = D.map(v=>v/Dsum); 119 | 120 | // 计算已有弱分类器组合后的错误率 121 | let alphaClassEst = classEst.map(v=>v*alpha); 122 | aggClassEst = _.zipWith(alphaClassEst,aggClassEst,(a,b)=>a+b); 123 | let realAggClassEst = > Vector.sign(aggClassEst); 124 | let aggErrors = _.zipWith(_.zipWith(realAggClassEst,labels,(a,b)=>a===b?0:1),Matrix.ones(m),(a,b)=>a*b); 125 | let errorRate = _.sum(aggErrors)/m; 126 | 127 | if(errorRate === 0.0) { 128 | break; 129 | } 130 | } 131 | return weakClassArr; 132 | } 133 | /** 134 | * 输入测试数据矩阵,根据所有弱分类器组合学习,得到最终结果 compose all weak classifier to get a strong classifier. 135 | * 136 | * @param {Array>} inx matrix of datas for testing. 137 | * @returns {Array} vector of classification results. 138 | * @memberof AdaBoost 139 | */ 140 | classify(inx: Array>): Array{ 141 | let dataMatrix = new Matrix(inx); 142 | let m = dataMatrix.size()[0]; 143 | let aggClassEst = > Matrix.zeros(m); 144 | 145 | // 组合所有弱分类器 146 | let classifierArr = this.adaBoostTrainDS(); 147 | classifierArr.forEach((v,i)=>{ 148 | let classEst = this.stumpClassify(dataMatrix,v['dim'],v['thresh'],v['ineq']); 149 | aggClassEst = _.zipWith(classEst.map(value=>value*v['alpha']),aggClassEst,(a,b)=>a+b); 150 | }); 151 | 152 | return > Vector.sign(aggClassEst); 153 | } 154 | } 155 | 156 | export default AdaBoost; -------------------------------------------------------------------------------- /src/algorithm/AdaBoost/test.ts: -------------------------------------------------------------------------------- 1 | import AdaBoost from './index'; 2 | 3 | const dataSet = [ 4 | [1.0,2.1], 5 | [2.0,1.1], 6 | [1.3,1.0], 7 | [1.0,1.0], 8 | [2.0,1.0] 9 | ] 10 | const labels = [1.0,1.0,-1.0,-1.0,1.0]; 11 | 12 | let ada = new AdaBoost(dataSet,labels,40); 13 | 14 | let result = ada.classify([[1.0,2.1], 15 | [2.0,1.1], 16 | [1.3,1.0], 17 | [1.0,1.0], 18 | [2.0,1.0]]); 19 | 20 | console.log(result); -------------------------------------------------------------------------------- /src/algorithm/DT/dt.txt: -------------------------------------------------------------------------------- 1 | young myope no reduced no lenses 2 | young myope no normal soft 3 | young myope yes reduced no lenses 4 | young myope yes normal hard 5 | young hyper no reduced no lenses 6 | young hyper no normal soft 7 | young hyper yes reduced no lenses 8 | young hyper yes normal hard 9 | pre myope no reduced no lenses 10 | pre myope no normal soft 11 | pre myope yes reduced no lenses 12 | pre myope yes normal hard 13 | pre hyper no reduced no lenses 14 | pre hyper no normal soft 15 | pre hyper yes reduced no lenses 16 | pre hyper yes normal no lenses 17 | presbyopic myope no reduced no lenses 18 | presbyopic myope no normal no lenses 19 | presbyopic myope yes reduced no lenses 20 | presbyopic myope yes normal hard 21 | presbyopic hyper no reduced no lenses 22 | presbyopic hyper no normal soft 23 | presbyopic hyper yes reduced no lenses 24 | presbyopic hyper yes normal no lenses -------------------------------------------------------------------------------- /src/algorithm/DT/index.ts: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | import * as fs from 'fs'; 3 | import {Repeat,List} from 'immutable'; 4 | 5 | interface ClassCount { 6 | [index: string]: number; 7 | } 8 | 9 | /** 10 | * 计算香农熵 Calculating Shannon entropy 11 | * 12 | * @param {Array>} dataSet 13 | * @returns {number} 14 | */ 15 | function calShannoEnt(dataSet: Array>): number{ 16 | let numEntries = dataSet.length; 17 | let labelCounts:ClassCount = {}; 18 | 19 | dataSet.forEach(v=>{ 20 | let label = v[v.length-1]; 21 | if(label in labelCounts){ 22 | return labelCounts[label] += 1; 23 | } 24 | 25 | labelCounts[label] = 1; 26 | }); 27 | let shannoEnt = 0.0; 28 | for(let label in labelCounts){ 29 | let prob = labelCounts[label]/numEntries; 30 | shannoEnt -= prob * Math.log2(prob); 31 | 32 | } 33 | return shannoEnt; 34 | } 35 | 36 | /** 37 | * 划分数据集 Partition dataset 38 | * 39 | * @param {array} dataSet 原始数据集 Raw data set 40 | * @param {number} axis 划分特征 which feature to partition 41 | * @param {any} value 特征值 the value of the feature to partition 42 | * @returns {array} 划分后的数据集 the partition result. 43 | */ 44 | function splitDataSet(dataSet: Array>,axis: number,value: any): Array>{ 45 | let retDataSet = dataSet.reduce((pre,cur)=>{ 46 | let curList = List(cur); 47 | if(cur[axis] === value){ 48 | pre.push(curList.splice(axis,1).toArray()); 49 | } 50 | return pre; 51 | },[]); 52 | return retDataSet; 53 | } 54 | 55 | /** 56 | * 选择最好的划分特征 choose the best feature to partition. 57 | * 58 | * @param {Array>} dataSet 59 | * @returns {number} 60 | */ 61 | function chooseBestLabelToSplit(dataSet: Array>): number{ 62 | let numLables = dataSet[0].length - 1, 63 | baseEntropy = calShannoEnt(dataSet), 64 | bestInfoGain = 0.0, 65 | bestLabel = -1; 66 | 67 | for(let i=0; iv[i]), 69 | uniqueVals = [...new Set(featList)], 70 | newEntropy = 0.0; 71 | uniqueVals.forEach((v,index)=>{ 72 | let subDataSet = splitDataSet(dataSet,i,v), 73 | prob = subDataSet.length/dataSet.length; 74 | newEntropy += prob * calShannoEnt(subDataSet); 75 | }); 76 | let infoGain = baseEntropy - newEntropy; 77 | 78 | if(infoGain > bestInfoGain){ 79 | bestInfoGain = infoGain; 80 | bestLabel = i; 81 | } 82 | } 83 | 84 | return bestLabel; 85 | } 86 | 87 | /** 88 | * 多数决策,当子数据集只有一个特征,且各个实例所属分类仍旧不同时调用此方法 The majority decision, only one set of features when the data, and each instance belongs to classification is still not at the same time this method is called 89 | * 90 | * @param {Array} classList 91 | * @returns {string} 92 | */ 93 | function majorityCnt(classList: Array): string{ 94 | let classCount:ClassCount = {}; 95 | classList.forEach((v,i)=>{ 96 | if(v in classCount){ 97 | return classCount[v] += 1; 98 | } 99 | classCount[v] = 1; 100 | }) 101 | let sortedClassCount = Object.keys(classCount).sort((a,b)=>classCount[b]-classCount[a]); 102 | 103 | return sortedClassCount[0]; 104 | } 105 | 106 | /** 107 | * 构建决策树 create decision tree. 108 | * 109 | * @param {Array>} dataSet for training. 110 | * @param {Array} labels the classes of training data. 111 | * @returns {object} 112 | */ 113 | function createTree(dataSet: Array>,labels: Array): object{ 114 | let classList = dataSet.map(v=>v[v.length-1]), 115 | uniqueClasses = [...new Set(classList)].length; 116 | if(uniqueClasses === 1){ 117 | return classList[0]; 118 | } 119 | if(dataSet[0].length === 1){ 120 | return majorityCnt(classList); 121 | } 122 | let bestFeat = chooseBestLabelToSplit(dataSet), 123 | bestFeatLabel = labels[bestFeat]; 124 | let resultTree = { 125 | [bestFeatLabel]: {} 126 | } 127 | labels.splice(bestFeat,1); 128 | let featValues = dataSet.map(v=>v[bestFeat]), 129 | uniqueVals = [...new Set(featValues)]; 130 | uniqueVals.forEach(v=>{ 131 | let subLabels = [...labels], 132 | subDataSet = splitDataSet(dataSet,bestFeat,v); 133 | resultTree[bestFeatLabel][v] = createTree(subDataSet,subLabels); 134 | }) 135 | 136 | return resultTree; 137 | } 138 | 139 | /** 140 | * 判断测试数据分类 class the testing data. 141 | * 142 | * @param {object} inputTree 决策树对象 the decision tree. 143 | * @param {array} featLabels 特征名称向量 the vector of feature names. 144 | * @param {array} testVec 测试向量 the vector for testing. 145 | * @returns 测试数据的分类 146 | */ 147 | function classify(inputTree: object,featLabels: Array,testVec: Array): any{ 148 | let firstStr = Object.keys(inputTree)[0], 149 | secondDict = inputTree[firstStr], 150 | featIndex = featLabels.indexOf(firstStr); 151 | 152 | let resultClass; 153 | for(let key of Object.keys(secondDict)){ 154 | 155 | if(testVec[featIndex] === key){ 156 | if(typeof secondDict[key] === 'object'){ 157 | resultClass = classify(secondDict[key],featLabels,testVec); 158 | } else{ 159 | resultClass = secondDict[key]; 160 | break; 161 | } 162 | } 163 | } 164 | return resultClass; 165 | } 166 | 167 | class DT { 168 | tree: object; 169 | constructor(public dataSet: Array>,public labels: Array,alg: string="ID3"){ 170 | this.tree = createTree(dataSet,[...labels]); 171 | } 172 | getTree(): object{ 173 | return this.tree; 174 | } 175 | // 根据实例构造的决策树进行测试 176 | classify(featLabels: Array,testVec: Array): any{ 177 | return classify(this.tree,featLabels,testVec); 178 | } 179 | // 将决策树存入文件 180 | storeTree(filePath: string){ 181 | let jsonTree = JSON.stringify(this.tree); 182 | return new Promise((resolve,reject)=>{ 183 | fs.writeFile(filePath,jsonTree,err=>{ 184 | if(err){ 185 | return reject(err); 186 | } 187 | resolve(); 188 | }); 189 | }) 190 | } 191 | // 根据提供的决策树进行测试,静态方法,无需实例化构造决策树 192 | static classifyFromTree(inputTree: object,featLabels: Array,testVec: Array): any{ 193 | return classify(inputTree,featLabels,testVec); 194 | } 195 | } 196 | 197 | export default DT; -------------------------------------------------------------------------------- /src/algorithm/DT/test.ts: -------------------------------------------------------------------------------- 1 | import DT from './index'; 2 | import * as parser from '../../utils/fileParser/index'; 3 | import * as path from 'path'; 4 | import * as util from 'util'; 5 | import * as charts from '../../utils/charts/index'; 6 | 7 | 8 | let dataSet = parser.parseFile(path.join(__dirname,'./dt.txt')); 9 | 10 | let labels = ['age','prescript','astigmatic','tearRate'] 11 | let dt = new DT(dataSet,labels); 12 | 13 | let result = dt.classify(labels,["young","myope","no","reduced"]) 14 | 15 | console.log(util.inspect(dt.getTree(),{depth: null})); 16 | 17 | charts.drawDT(dt.getTree(),{ 18 | width:600, 19 | height:400 20 | }); 21 | 22 | -------------------------------------------------------------------------------- /src/algorithm/index.ts: -------------------------------------------------------------------------------- 1 | import kNN from './kNN/index'; 2 | import DT from './DT/index'; 3 | import logistic from './logistic/index'; 4 | import AdaBoost from './AdaBoost/index'; 5 | import kMeans from './kMeans/index'; 6 | 7 | export { 8 | kNN, 9 | DT, 10 | logistic, 11 | AdaBoost, 12 | kMeans 13 | } -------------------------------------------------------------------------------- /src/algorithm/kMeans/index.ts: -------------------------------------------------------------------------------- 1 | import Matrix from '../../utils/matrix'; 2 | import Vector from '../../utils/vector'; 3 | import * as _ from 'lodash'; 4 | import * as util from 'util'; 5 | 6 | class kMeans { 7 | private dataSet: Matrix; 8 | private k: number; 9 | 10 | /** 11 | * Creates an instance of kMeans. 12 | * @param {Array>} dataSet 13 | * @param {number} k how many centroids. 14 | * @memberof kMeans 15 | */ 16 | constructor(dataSet: Array>,k: number){ 17 | this.dataSet = new Matrix(dataSet); 18 | this.k = k; 19 | } 20 | /** 21 | * 随机创建K个初始质心 Random creation of K initial centroids 22 | * 23 | * @returns {Array>} 24 | * @memberof kMeans 25 | */ 26 | createCent(): Array>{ 27 | let n = this.dataSet.size()[1]; 28 | let centroids = >>Matrix.zeros(this.k,n); 29 | 30 | for(let j=0; jv*rangeJ+minJ); 36 | 37 | centroids.forEach((v,i)=>{ 38 | v[j] = randomVect[i]; 39 | }); 40 | } 41 | 42 | return centroids; 43 | } 44 | /** 45 | * 计算两点欧式距离 Calculating the Euclidean distance between two points 46 | * 47 | * @param {Array} vec1 vector 1. 48 | * @param {Array} vec2 vector 2. 49 | * @returns {number} 50 | * @memberof kMeans 51 | */ 52 | distEclud(vec1: Array,vec2: Array): number{ 53 | return Math.sqrt(_.sum(_.zipWith(vec1,vec2,(a,b)=>(a-b)**2))); 54 | } 55 | /** 56 | * 聚类函数cluster function 57 | * 58 | * @param {number} [max=50 as number] Maximum iterations 59 | * @returns {[Array>,Array>]} 60 | * @memberof kMeans 61 | */ 62 | cluster(max=50 as number): [Array>,Array>]{ 63 | let m = this.dataSet.size()[0], 64 | dataSet = this.dataSet.arr; 65 | let clusterAssment = >> Matrix.zeros(m,2), //各个实例的聚类结果,结果包含所属质心,和该实例到所属质心的距离 66 | centroids = this.createCent(), //存放各个质心向量 67 | clusterChanged = true, // 标识聚类情况发生变化,只要有一个实例的聚类发生变化,设为true 68 | k = this.k; // 质心个数 69 | let num = 0; // 收敛次数 70 | while(clusterChanged){ 71 | if(++num>max){ //超过最大收敛次数,退出循环 72 | break; 73 | } 74 | 75 | clusterChanged = false; 76 | for(let i=0; i{ //找到属于当前质心所在簇的所有实例 95 | if(v[0]===cent){ 96 | centPointsIndex.push(i); 97 | } 98 | }); 99 | if(centPointsIndex.length !== 0) { 100 | let pointsInCent = dataSet.filter((v,i)=>i in centPointsIndex); //根据位置找到实例向量 101 | centroids[cent] = Matrix.mean(pointsInCent); //更新质心向量,每个特征值为该簇所以实例该特征的平均值 102 | } 103 | } 104 | } 105 | return [centroids,clusterAssment]; 106 | } 107 | } 108 | 109 | export default kMeans; -------------------------------------------------------------------------------- /src/algorithm/kMeans/test.ts: -------------------------------------------------------------------------------- 1 | import kMeans from './index'; 2 | import * as util from 'util'; 3 | import * as path from 'path'; 4 | import * as fileParser from '../../utils/fileParser'; 5 | 6 | let dataSet = >> fileParser.parseFile(path.join(__dirname,'./testSet.txt'),{ 7 | toNumber: true 8 | }); 9 | 10 | let kmeans = new kMeans(dataSet,5); 11 | 12 | let result = kmeans.cluster(); 13 | console.log(util.inspect(result)) -------------------------------------------------------------------------------- /src/algorithm/kMeans/testSet.txt: -------------------------------------------------------------------------------- 1 | 1.658985 4.285136 2 | -3.453687 3.424321 3 | 4.838138 -1.151539 4 | -5.379713 -3.362104 5 | 0.972564 2.924086 6 | -3.567919 1.531611 7 | 0.450614 -3.302219 8 | -3.487105 -1.724432 9 | 2.668759 1.594842 10 | -3.156485 3.191137 11 | 3.165506 -3.999838 12 | -2.786837 -3.099354 13 | 4.208187 2.984927 14 | -2.123337 2.943366 15 | 0.704199 -0.479481 16 | -0.392370 -3.963704 17 | 2.831667 1.574018 18 | -0.790153 3.343144 19 | 2.943496 -3.357075 20 | -3.195883 -2.283926 21 | 2.336445 2.875106 22 | -1.786345 2.554248 23 | 2.190101 -1.906020 24 | -3.403367 -2.778288 25 | 1.778124 3.880832 26 | -1.688346 2.230267 27 | 2.592976 -2.054368 28 | -4.007257 -3.207066 29 | 2.257734 3.387564 30 | -2.679011 0.785119 31 | 0.939512 -4.023563 32 | -3.674424 -2.261084 33 | 2.046259 2.735279 34 | -3.189470 1.780269 35 | 4.372646 -0.822248 36 | -2.579316 -3.497576 37 | 1.889034 5.190400 38 | -0.798747 2.185588 39 | 2.836520 -2.658556 40 | -3.837877 -3.253815 41 | 2.096701 3.886007 42 | -2.709034 2.923887 43 | 3.367037 -3.184789 44 | -2.121479 -4.232586 45 | 2.329546 3.179764 46 | -3.284816 3.273099 47 | 3.091414 -3.815232 48 | -3.762093 -2.432191 49 | 3.542056 2.778832 50 | -1.736822 4.241041 51 | 2.127073 -2.983680 52 | -4.323818 -3.938116 53 | 3.792121 5.135768 54 | -4.786473 3.358547 55 | 2.624081 -3.260715 56 | -4.009299 -2.978115 57 | 2.493525 1.963710 58 | -2.513661 2.642162 59 | 1.864375 -3.176309 60 | -3.171184 -3.572452 61 | 2.894220 2.489128 62 | -2.562539 2.884438 63 | 3.491078 -3.947487 64 | -2.565729 -2.012114 65 | 3.332948 3.983102 66 | -1.616805 3.573188 67 | 2.280615 -2.559444 68 | -2.651229 -3.103198 69 | 2.321395 3.154987 70 | -1.685703 2.939697 71 | 3.031012 -3.620252 72 | -4.599622 -2.185829 73 | 4.196223 1.126677 74 | -2.133863 3.093686 75 | 4.668892 -2.562705 76 | -2.793241 -2.149706 77 | 2.884105 3.043438 78 | -2.967647 2.848696 79 | 4.479332 -1.764772 80 | -4.905566 -2.911070 -------------------------------------------------------------------------------- /src/algorithm/kNN/index.ts: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | import Matrix from '../../utils/matrix/index'; 3 | import Vector from '../../utils/vector/index'; 4 | import {Repeat,List} from 'immutable'; 5 | 6 | interface ClassCount { 7 | [index: string]: number; 8 | } 9 | /** 10 | * 归一化数据 11 | * 12 | * @param {object} Matrix: dataSet 13 | * @returns {Array} [normalized data,the range of each feature,the minimum value of each feature] 14 | */ 15 | function autoNormal(dataSet: Matrix): [Array>,Array,Array]{ 16 | let minVals = dataSet.min(0); // 每个特征的最小值 17 | let maxVals = dataSet.max(0); // 每个特征的最大值 18 | let ranges = new Vector(maxVals).zipWith((a,b)=>a-b,new Vector(minVals)); // 每个特征的范围 19 | 20 | let normalDataSet = new Matrix(Matrix.zeros(...dataSet.size())); 21 | let setSize = dataSet.size()[0]; // 训练集实例数 22 | 23 | normalDataSet = dataSet.sub(new Matrix(Repeat(minVals,setSize).toArray())); //分子为每个特征原始值减去该特征最小值 24 | 25 | normalDataSet = normalDataSet.divide(new Matrix(Repeat(ranges,setSize).toArray())); // 上式得到的每个特征值除以该特征范围 26 | return [normalDataSet.arr,ranges,minVals]; 27 | } 28 | 29 | class kNN { 30 | private dataSet: Matrix; 31 | private labels: Vector; 32 | private ranges: Array; 33 | private minVals: Array; 34 | 35 | /** 36 | * Creates an instance of kNN. 37 | * @param {Array>} dataSet Matrix like datas for training. 38 | * @param {Array} labels vector like classes of each tarining data. 39 | * @memberof kNN 40 | */ 41 | constructor(dataSet: Array>,labels: Array){ 42 | let [normalDataSet,ranges,minVals] = autoNormal(new Matrix(dataSet)); 43 | this.dataSet = new Matrix(normalDataSet); 44 | this.labels = new Vector(labels); 45 | this.ranges = ranges; 46 | this.minVals = minVals; 47 | } 48 | /** 49 | * kNN算法主体 50 | * 51 | * @param {array} inx data for testing. 52 | * @param {number} K值 the K number. 53 | * @returns {any} 54 | * @memberof kNN 55 | */ 56 | classify(inx_: Array,k: number): any{ 57 | const setSize = this.dataSet.size()[0]; 58 | if(k > setSize) { 59 | k = setSize; 60 | } 61 | 62 | //归一化测试数据 63 | let inx = this.autoNormalVector(inx_); 64 | // 求测试数据与每一个训练数据的距离 65 | let diffMat = new Matrix(Repeat(inx,setSize).toArray()).sub(this.dataSet); // 建立与训练数据同大小的矩阵,再一一对应相减 66 | 67 | let sqDiffMat = diffMat.mult(diffMat); 68 | let sqDistances = sqDiffMat.sum(1); 69 | let distances = sqDistances.map(Math.sqrt); 70 | let sortedDistanceIndicies = (new Vector(distances)).argSort(); // 与各个训练数据的距离排序的下标 71 | 72 | // 统计每个距离最近前K个值里各个分类的数量 73 | let classCount:ClassCount = {}; 74 | for(let i=0; iclassCount[b]-classCount[a]); 83 | 84 | // 返回实例最多的分类 85 | return sortedClassCount[0] 86 | } 87 | /** 88 | * normalize the vector of testing data. 89 | * 90 | * @param {Array} inx_ 91 | * @returns {Array} 92 | * @memberof kNN 93 | */ 94 | autoNormalVector(inx_: Array): Array{ 95 | let inx = [...inx_]; 96 | let minVals = this.minVals, 97 | ranges = this.ranges; 98 | 99 | inx = new Vector(inx).zipWith((a,b)=>a - b,new Vector(minVals)); 100 | inx = new Vector(inx).zipWith((a,b)=>a/b,new Vector(ranges)); 101 | return inx; 102 | } 103 | /** 104 | * normalize the given matrix like datas. 105 | * 106 | * @static 107 | * @param {Array>} dataSet 108 | * @returns {Array>} 109 | * @memberof kNN 110 | */ 111 | static autoNormal(dataSet: Array>): Array>{ 112 | return autoNormal(new Matrix(dataSet))[0]; 113 | } 114 | } 115 | 116 | export default kNN; -------------------------------------------------------------------------------- /src/algorithm/kNN/test.ts: -------------------------------------------------------------------------------- 1 | import kNN from '../kNN'; 2 | 3 | let knn = new kNN([ 4 | [1.,1.1], 5 | [1.,1.], 6 | [0.,0.], 7 | [0.,0.1] 8 | ],['A','A','B','C']); 9 | 10 | let result = knn.classify([1.1,0.8],4); 11 | 12 | console.log(result) -------------------------------------------------------------------------------- /src/algorithm/logistic/index.ts: -------------------------------------------------------------------------------- 1 | import Matrix from '../../utils/matrix'; 2 | import * as _ from 'lodash'; 3 | 4 | function sigmoid(inx){ 5 | return 1.0/(1+Math.exp(-inx)); 6 | } 7 | class Logistic { 8 | private dataMatrix: Matrix; 9 | private labels: Array; 10 | private numIter: number; 11 | 12 | /** 13 | * Creates an instance of Logistic. 14 | * @param {Array>} dataMatIn_ matrix like dataset for training. 15 | * @param {Array} classLabels the classes of training datas. 16 | * @param {number} numIter Maximum iterations 17 | * @memberof Logistic 18 | */ 19 | constructor(dataMatIn_: Array>,classLabels: Array,numIter: number){ 20 | let dataMatIn = [...dataMatIn_]; 21 | dataMatIn = dataMatIn.map(v=>[1.0,v[0],v[1]]); 22 | 23 | this.dataMatrix = new Matrix(dataMatIn); 24 | this.labels = classLabels; 25 | this.numIter = numIter; 26 | } 27 | /** 28 | * 随机梯度上升法求各个特征的最佳回归系数 Random gradient ascent method for optimal regression coefficients of each feature 29 | * 30 | * @returns {Array} 31 | * @memberof Logistic 32 | */ 33 | getWeights(): Array{ 34 | let dataMatrix = this.dataMatrix.arr; 35 | let labels = this.labels; 36 | 37 | let [m,n] = this.dataMatrix.size(); 38 | let weights = Matrix.ones(n); // 初始化每个特征的系数 39 | let indexArr; 40 | let alpha; // 梯度上升步长 41 | 42 | for(let i=0; ia*b); 49 | 50 | let h = sigmoid(_.sum(vec)); 51 | let error = labels[randomIndex] - h; 52 | let rised = dataMatrix[randomIndex].map(v=>v*alpha*error); 53 | 54 | weights = _.zipWith(weights,rised,(a,b)=>a+b); 55 | indexArr.splice(randomIndex,1); 56 | } 57 | } 58 | 59 | return weights; 60 | } 61 | 62 | /** 63 | * 分类测试数据 class the given test data. 64 | * 65 | * @param {Array} inX_ test data 66 | * @returns {number} 67 | * @memberof Logistic 68 | */ 69 | classify(inX_: Array): number{ 70 | let inX = [...inX_]; 71 | inX = [1.0,inX[0],inX[1]]; 72 | 73 | let weights = this.getWeights(); 74 | let vec = _.zipWith(inX,weights,(a,b)=>a*b); 75 | let prob = sigmoid(_.sum(vec)); 76 | 77 | return prob>0.5?1.0:0.0; 78 | } 79 | } 80 | 81 | export default Logistic; -------------------------------------------------------------------------------- /src/algorithm/logistic/test.ts: -------------------------------------------------------------------------------- 1 | import Logistic from './index'; 2 | import * as Parser from '../../utils/fileParser/index'; 3 | import * as preprocessing from '../../utils/features/preprocessing'; 4 | import * as path from 'path'; 5 | import { log, print } from 'util'; 6 | import * as charts from '../../utils/charts/index'; 7 | 8 | let datas = Parser.read_csv(path.join(__dirname,'../../../assets/testSet.txt'),{ 9 | index_col:false, 10 | delimiter: ',', 11 | header: false, 12 | classType:'string' 13 | }); 14 | 15 | let dataSet = datas.drop(4).values; 16 | 17 | let labels = datas.getClasses(); 18 | 19 | labels = labels.map(v=>{ 20 | return v==='Iris-setosa'?1:0; 21 | }); 22 | 23 | let logi = new Logistic(dataSet,labels,150); 24 | 25 | let errors = 0; 26 | for(let i=0; i 2 | 3 | 4 | 5 | 6 | 7 | Decision Tree 8 | 9 | 19 | 20 | 21 | 99 | 100 | -------------------------------------------------------------------------------- /src/utils/charts/index.ts: -------------------------------------------------------------------------------- 1 | import server from './server'; 2 | import * as path from 'path'; 3 | import * as fs from 'fs'; 4 | import * as swig from 'swig'; 5 | 6 | /** 7 | * 根据模板和数据渲染可视化页面 8 | * 9 | * @export 10 | * @param {string} template 11 | * @param {object} data 12 | * @returns 13 | */ 14 | export function renderFile(template: string,data: object){ 15 | return swig.render(fs.readFileSync(template).toString(),{ 16 | filename: template, 17 | autoescape: false, 18 | locals: data 19 | }); 20 | } 21 | 22 | interface KnnConfig { 23 | width: string; 24 | height: string; 25 | size: number; 26 | } 27 | 28 | function treeLeaf(obj,leafRule){ 29 | let objKey = Object.keys(obj)[0]; 30 | let leaf = obj[objKey]; 31 | let children = []; 32 | let rules = Object.keys(leaf); 33 | for(let rule of rules){ 34 | if(typeof leaf[rule] === 'object'){ 35 | children.push(treeLeaf(leaf[rule],rule)); 36 | continue; 37 | } 38 | children.push({ 39 | name: leaf[rule], 40 | rule 41 | }); 42 | } 43 | 44 | return { 45 | name: objKey, 46 | rule: leafRule, 47 | children: [...children] 48 | } 49 | } 50 | 51 | /** 52 | * 可视化决策树 53 | * 54 | * @export 55 | * @param {object} tree the decision tree get from DT algorithm. 56 | * @param {any} options configuration object. { 57 | * width: {number} the width of the graph. 58 | * height: {number} the height of the graph. size: the size of every point. 59 | * } 60 | */ 61 | export function drawDT(tree: object,{ 62 | width=600, 63 | height=400 64 | }){ 65 | let firstStr = Object.keys(tree)[0]; 66 | let obj = treeLeaf(tree,null); 67 | let html = renderFile(path.resolve(__dirname,'DT','tpl.html'),{ 68 | width, 69 | height, 70 | data: JSON.stringify(obj) 71 | }) 72 | server(html); 73 | } 74 | 75 | /** 76 | * 可视化kNN算法,绘制散点图 77 | * 78 | * @export 79 | * @param {Array>} dataSet_ matrix of datas for training. 80 | * @param {Array} labels_ vector of training datas' classes. 81 | * @param {Array} inx vector of data to test. 82 | * @param {object} options configuration object. { 83 | * width: {string} the width of the graph. default to "600px". 84 | * height: {string} the height of the graph. default to "400px". 85 | * size: {number} the size of every point. default to 20. 86 | * } 87 | */ 88 | export function drawkNN(dataSet_: Array>,labels_: Array,inx: Array,{ 89 | width="600px", 90 | height="400px", 91 | size=20 92 | }={} as KnnConfig){ 93 | let dataSet = [...dataSet_]; 94 | let labels = [...labels_]; 95 | let data = []; 96 | let classes = [...new Set(labels)].filter(v=>v!==undefined); 97 | classes.forEach(c=>{ 98 | let classSet = dataSet.filter((value,i)=>labels[i] === c); 99 | data.push([...classSet]) 100 | }); 101 | 102 | classes.push('test'); 103 | data.push([inx]); 104 | let html = renderFile(path.resolve(__dirname,'kNN','tpl.html'),{ 105 | title: "Scatter plot for kNN", 106 | width, 107 | height, 108 | size, 109 | data: JSON.stringify(data), 110 | classes: JSON.stringify(classes.map(v=>v.toString())) 111 | }); 112 | server(html) 113 | } 114 | 115 | 116 | /** 117 | * 118 | * 绘制logistic回归算法的散点图和回归线 119 | * @export 120 | * @param {Array>} dataSet_ matrix of datas for training. 121 | * @param {Array} labels_ vector of training datas' classes. 122 | * @param {Array} weights vector of features' weights get from logistic algorithm. 123 | * @param {object} options configuration object. { 124 | * width: {string} the width of the graph. default to "600px". 125 | * height: {string} the height of the graph. default to "400px". 126 | * size: {number} the size of every point. deault to 20. 127 | * } 128 | */ 129 | export function drawLogistic(dataSet_: Array>,labels_: Array,weights: Array,{ 130 | width="600px", 131 | height="400px", 132 | size=20 133 | }={} as KnnConfig){ 134 | let dataSet = [...dataSet_]; 135 | let labels = [...labels_]; 136 | let data = []; 137 | let classes = [...new Set(labels)].filter(v=>v!==undefined); 138 | classes.forEach(c=>{ 139 | let classSet = dataSet.filter((value,i)=>labels[i] === c); 140 | data.push([...classSet]) 141 | }); 142 | 143 | let xs = dataSet.map(v=>v[0]), 144 | minx = Math.min(...xs), 145 | maxx = Math.max(...xs); 146 | 147 | let k = -weights[1]/weights[2]; 148 | let b = -weights[0]/weights[2]; 149 | 150 | let linePoints = [ 151 | [minx,k*minx+b], 152 | [maxx,k*maxx+b] 153 | ] 154 | 155 | let html = renderFile(path.resolve(__dirname,'logistic','tpl.html'),{ 156 | title: "Logistic Regression", 157 | width, 158 | height, 159 | size, 160 | data: JSON.stringify(data), 161 | classes: JSON.stringify(classes.map(v=>v.toString())), 162 | linePoints: JSON.stringify(linePoints) 163 | }); 164 | server(html) 165 | } -------------------------------------------------------------------------------- /src/utils/charts/kNN/tpl.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{title}} 6 | 7 | 8 | 9 | 10 | 11 |
12 | 92 | 93 | -------------------------------------------------------------------------------- /src/utils/charts/logistic/tpl.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{title}} 6 | 7 | 8 | 9 | 10 | 11 |
12 | 120 | 121 | -------------------------------------------------------------------------------- /src/utils/charts/server.ts: -------------------------------------------------------------------------------- 1 | import * as express from 'express'; 2 | import * as c from 'child_process'; 3 | 4 | 5 | export default function(html){ 6 | const app = express(); 7 | app.use('/',(req,res,next)=>{ 8 | res.set('Content-Type', 'text/html'); 9 | res.status(200).send(html); 10 | }) 11 | app.listen(4000,()=>{ 12 | console.log('the server has been listened at port 4000') 13 | }) 14 | 15 | let cmd = ''; 16 | 17 | switch (process.platform) { 18 | case 'wind32': 19 | cmd = 'start'; 20 | break; 21 | 22 | case 'linux': 23 | cmd = 'xdg-open'; 24 | break; 25 | 26 | case 'darwin': 27 | cmd = 'open'; 28 | break; 29 | } 30 | c.exec(`${cmd} http://localhost:4000`); 31 | } -------------------------------------------------------------------------------- /src/utils/features/index.ts: -------------------------------------------------------------------------------- 1 | import * as preprocessing from './preprocessing'; 2 | 3 | 4 | export default { 5 | preprocessing 6 | } -------------------------------------------------------------------------------- /src/utils/features/preprocessing.ts: -------------------------------------------------------------------------------- 1 | import Matrix from '../matrix/index'; 2 | import Vector from '../vector/index'; 3 | import {Repeat,List} from 'immutable'; 4 | 5 | // 标准化数据集,针对每列(每个特征),将特征转化为服从正态分布 6 | export function standardScaler(dataSet: Array>): Array>{ 7 | let mdataset = new Matrix(dataSet); 8 | let transSet = mdataset.transpose(); 9 | 10 | let setAvgs = mdataset.calAvg(0); 11 | let result = transSet.map((v,i)=>{ 12 | let vlength = v.length; 13 | return v.map(col=>{ 14 | return Math.pow((col-setAvgs[i]),2)/vlength; 15 | }); 16 | }); 17 | 18 | return new Matrix(result).transpose(); 19 | } 20 | 21 | // 归一化数据集,采用区间缩放法,将特征值缩放到(0,1) 22 | export function normalize(dataSet_: Array>): Array>{ 23 | let dataSet = new Matrix(dataSet_); 24 | let minVals = dataSet.min(0); // 每个特征的最小值 25 | let maxVals = dataSet.max(0); // 每个特征的最大值 26 | let ranges = new Vector(maxVals).zipWith((a,b)=>a-b,new Vector(minVals)); // 每个特征的范围 27 | 28 | let normalDataSet = new Matrix(Matrix.zeros(...dataSet.size())); 29 | let setSize = dataSet.size()[0]; // 训练集实例数 30 | 31 | normalDataSet = dataSet.sub(new Matrix(Repeat(minVals,setSize).toArray())); //分子为每个特征原始值减去该特征最小值 32 | 33 | normalDataSet = normalDataSet.divide(new Matrix(Repeat(ranges,setSize).toArray())); // 上式得到的每个特征值除以该特征范围 34 | return normalDataSet.arr; 35 | } 36 | 37 | // 二值化特征,第二个参数指定每个特征的阀值,相应特征大于指定阀值取1,否则取0 38 | export function binarizer(dataSet: Array>,threshold: Array): Array>{ 39 | let mdataset = new Matrix(dataSet); 40 | let transSet = mdataset.transpose(); 41 | 42 | let result = transSet.map((v,i)=>{ 43 | return v.map(c=>c>threshold[i]?1:0); 44 | }); 45 | 46 | return result; 47 | } 48 | 49 | // 哑编码特征值,当特征值为非数值时,将各个值作为新的特征 50 | export function oneHotEncoder(dataSet: Array>): Array>{ 51 | 52 | } -------------------------------------------------------------------------------- /src/utils/features/test.ts: -------------------------------------------------------------------------------- 1 | import features from './index'; 2 | import * as parser from '../fileParser/index'; 3 | import * as path from 'path'; 4 | 5 | const preprocessing = features.preprocessing; 6 | 7 | let dt = parser.read_csv(path.join(__dirname,'../../../assets/train.csv'),{ 8 | index_col: 0, 9 | delimiter: ',', 10 | header: 0, 11 | dataType: 'number' 12 | }); 13 | 14 | let dataSet =dt.drop('quality').values; 15 | 16 | let testStandardScaler = preprocessing.standardScaler(dataSet); 17 | console.log(testStandardScaler); 18 | 19 | let testNormalize = preprocessing.normalize(dataSet); 20 | console.log(testNormalize); -------------------------------------------------------------------------------- /src/utils/fileParser/index.ts: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs'; 2 | import * as _ from 'lodash'; 3 | 4 | interface ReadCsvConfig { 5 | index_col?: boolean | number; 6 | delimiter?: string; 7 | header?: Array | number | boolean; 8 | dataType?: string; 9 | classType?: string; 10 | } 11 | 12 | interface WriteCsvConfig { 13 | index?: boolean; 14 | header?: any[]; 15 | delimiter?: string; 16 | } 17 | 18 | interface ReadFileConfig { 19 | toNumber?: boolean; 20 | delimiter?: string; 21 | } 22 | 23 | 24 | /** 25 | * 简单读取文件,配置包括是否转化数据为数值型和分隔符号 26 | * 27 | * @export 28 | * @param {string} filePath 29 | * @param {object} options { 30 | * toNumber: {boolean}. whether transform datas to number or not. deault to false. 31 | * delmiter: {string}. delmiter for every line. default to '\t'. 32 | * } 33 | * @returns {Array>} 34 | */ 35 | export function parseFile(filePath:string,{ 36 | toNumber=false, 37 | delimiter='\t' 38 | }={} as ReadFileConfig): Array>{ 39 | let content = fs.readFileSync(filePath,{encoding: 'utf-8'}); 40 | let lines = content.split('\n'); 41 | let result = lines.map(line=>line.split(delimiter)); 42 | 43 | if(toNumber){ 44 | return result.map(v=>v.map(c=>Number(c))) 45 | } 46 | return result; 47 | } 48 | 49 | class CSV { 50 | values: Array> 51 | constructor(public headerLine: Array,datasWithoutIndex: Array>){ 52 | this.values = datasWithoutIndex; 53 | } 54 | /** 55 | * 获取标题行 Get the header line. 56 | * 57 | * @returns {Array} 58 | * @memberof CSV 59 | */ 60 | getHeader(): Array{ 61 | return this.headerLine; 62 | } 63 | /** 64 | * 删除某一行或者指定标题的列 delete the specific column. 65 | * 66 | * @param {(string | number)} label delete the specific number of column or the column of specific label. 67 | * @returns {CSV} instance of class CSV. 68 | * @memberof CSV 69 | */ 70 | drop(label: string | number): CSV{ 71 | let headerLine = [...this.headerLine]; 72 | let values = this.values.map(v=>[...v]); 73 | let labelIndex = typeof label === 'string'?headerLine.indexOf(label):label; 74 | 75 | if(headerLine.length !== 0){ 76 | headerLine.splice(labelIndex,1); 77 | } 78 | values.forEach(v=>v.splice(labelIndex,1)); 79 | 80 | return new CSV(headerLine,values); 81 | } 82 | /** 83 | * 获取分类列,一般为最后一列。 Get the last column of every line. 84 | * 85 | * @returns {Array} 86 | * @memberof CSV 87 | */ 88 | getClasses(): Array{ 89 | return this.values.map(v=>v[v.length-1]) 90 | } 91 | 92 | } 93 | 94 | export {CSV}; 95 | /** 96 | * 读取csv文件 Read CSV file. 97 | * 98 | * @export 99 | * @param {string} filePath 100 | * @param {object} options { 101 | * index_col: {boolean|number}. when set to true, the first column of data will be regarded as the counter column. Default to be false. 102 | * delmiter: {string}. delmiter for every line. Default to be ','. 103 | * header: {Array|number}. Can be the vector of custom header line or the index of the header line. default to 0. 104 | * dataType: {string}. the type of datas, default to 'number'. 105 | * classType: {string}. the type of the last column of each line. default to 'number'. 106 | * } 107 | * @returns {CSV} instance of class CSV. 108 | */ 109 | export function read_csv (filePath: string,{ 110 | index_col=false, 111 | delimiter=',', 112 | header=0, 113 | dataType='number', 114 | classType='number' 115 | }={} as ReadCsvConfig): CSV{ 116 | let rawContent = fs.readFileSync(filePath,{encoding: 'utf-8'}); 117 | 118 | let lines = rawContent.split('\n').map(v=>v.split(delimiter)); 119 | let headerLine; 120 | if(Array.isArray(header)){ 121 | headerLine = header; 122 | } else if(header === 0){ 123 | headerLine = lines[0]; 124 | lines = _.tail(lines); 125 | } else { 126 | headerLine = [] 127 | } 128 | 129 | if(headerLine[0] === ''){ 130 | headerLine = _.tail(headerLine) 131 | } 132 | 133 | let datasWithoutIndex = []; 134 | // 去除Index列 135 | if(index_col !== false){ 136 | datasWithoutIndex = lines.map(v=>_.tail(v)); 137 | } else { 138 | datasWithoutIndex = lines; 139 | } 140 | 141 | if(dataType === 'number'){ 142 | datasWithoutIndex = datasWithoutIndex.map(row=>row.map(col=>{ 143 | if(classType === 'number'){ 144 | return Number(col); 145 | } 146 | return col; 147 | })) 148 | } 149 | return new CSV(headerLine,datasWithoutIndex); 150 | } 151 | 152 | /** 153 | * 写入CSV数据 Write datas to file. 154 | * 155 | * @export 156 | * @param {string} filePath 157 | * @param {any[][]} data datas to write. 158 | * @param {object} options { 159 | * index: {boolean}. if set to be true, it will add a index column for each line. default to false. 160 | * header: {Array}. custom header to add to the first line. default to []. 161 | * } 162 | */ 163 | export function write_csv (filePath: string,data: any[][],{ 164 | index=false, 165 | header=[], 166 | delimiter=',' 167 | }={} as WriteCsvConfig): void{ 168 | let dataToWrite = [...data]; 169 | if(index !== false){ 170 | dataToWrite.forEach((v,i)=>{ 171 | v.unshift(i); 172 | }); 173 | } 174 | if(Array.isArray(header) && header.length >= 1){ 175 | dataToWrite.unshift(header); 176 | } 177 | dataToWrite = dataToWrite.map(row=>row.map(col=>col.toString())); 178 | let contentToWrite = ''; 179 | 180 | dataToWrite.forEach(v=>{ 181 | contentToWrite += v.join(delimiter); 182 | contentToWrite += '\n'; 183 | }); 184 | 185 | fs.writeFileSync(filePath,contentToWrite,{ 186 | encoding: 'utf-8' 187 | }); 188 | } -------------------------------------------------------------------------------- /src/utils/fileParser/result.csv: -------------------------------------------------------------------------------- 1 | ID,quality 2 | 0,6 3 | 1,6 4 | 2,6 5 | 3,6 6 | 4,6 7 | 5,6 8 | 6,6 9 | 7,6 10 | 8,6 11 | 9,6 12 | 10,6 13 | 11,6 14 | 12,6 15 | 13,6 16 | 14,6 17 | 15,6 18 | 16,6 19 | 17,6 20 | 18,6 21 | 19,6 22 | 20,6 23 | 21,6 24 | -------------------------------------------------------------------------------- /src/utils/fileParser/test.ts: -------------------------------------------------------------------------------- 1 | import * as parser from './index'; 2 | import * as path from 'path'; 3 | import * as charts from '../charts/index'; 4 | import kNN from '../../algorithm/kNN/index'; 5 | 6 | let dt = parser.read_csv(path.join(__dirname,'../../../assets/train.csv'),{ 7 | index_col: 0, 8 | delimiter: ',', 9 | header: 0, 10 | dataType: 'number' 11 | }); 12 | 13 | let labels = dt.getClasses(); 14 | 15 | let dataSet =dt.drop('quality').values; 16 | let knn = new kNN(dataSet,labels); 17 | 18 | let dataToTest = parser.read_csv(path.join(__dirname,'../../../assets/test.csv'),{ 19 | index_col: 0, 20 | dataType: 'number' 21 | }).drop('quality').values; 22 | 23 | let resultSet = dataToTest.map((v,i)=>[i,knn.classify(knn.autoNormalVector(v),48)]) 24 | 25 | // 将结果写入 csv 26 | parser.write_csv(path.join(__dirname,'./result.csv'),resultSet,{ 27 | header: ['ID','quality'] 28 | }); 29 | 30 | 31 | // 绘图 32 | let inx = [7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8], 33 | normalInx = knn.autoNormalVector(inx); 34 | 35 | console.log(knn.classify(inx,100)); // 6 36 | charts.drawkNN(kNN.autoNormal(dataSet),labels,normalInx,{ 37 | width: "500px", 38 | height: "400px", 39 | size: 15 40 | }); 41 | -------------------------------------------------------------------------------- /src/utils/index.ts: -------------------------------------------------------------------------------- 1 | import Matrix from './matrix/index'; 2 | import Vector from './vector/index'; 3 | import * as fileParser from './fileParser/index'; 4 | import * as charts from './charts/index'; 5 | import features from './features/index'; 6 | 7 | export { 8 | Matrix, 9 | Vector, 10 | fileParser, 11 | charts, 12 | features 13 | } -------------------------------------------------------------------------------- /src/utils/matrix/index.ts: -------------------------------------------------------------------------------- 1 | import {List,Repeat} from 'immutable'; 2 | import * as math from 'mathjs'; 3 | import * as _ from 'lodash'; 4 | 5 | class Matrix { 6 | constructor(public arr: Array>){} 7 | 8 | /** 9 | * 获取原始多维数组 Get raw data. 10 | * 11 | * @returns {Array>} 12 | * @memberof Matrix 13 | */ 14 | toArray(): Array>{ 15 | return this.arr; 16 | } 17 | /** 18 | * 针对两个矩阵同一行同一列的值对应进行计算 Matrix operation. like '*','/','+','-'. 19 | * 20 | * @param {Class Matrix} arrA Matrix A 21 | * @param {Class Matrix} arrB Matrix B 22 | * @param {String} operator operation,'+'|'-'|'*'|'\' 23 | * @returns {Class Matrix} result. 24 | * @memberof Matrix 25 | */ 26 | zipWith(arrA: Array>,arrB: Array>,operator: string): Matrix{ 27 | let result = []; 28 | 29 | switch(operator){ 30 | case '+': 31 | result = _.zipWith(arrA,arrB,(a,b)=>{ 32 | return _.zipWith(a,b,(m,n)=>m+n); 33 | }); 34 | break; 35 | case '-': 36 | result = _.zipWith(arrA,arrB,(a,b)=>{ 37 | return _.zipWith(a,b,(m,n)=>m-n); 38 | }); 39 | break; 40 | case '*': 41 | result = _.zipWith(arrA,arrB,(a,b)=>{ 42 | return _.zipWith(a,b,(m,n)=>m*n); 43 | }); 44 | break; 45 | case '/': 46 | result = _.zipWith(arrA,arrB,(a,b)=>{ 47 | return _.zipWith(a,b,(m,n)=>m/n); 48 | }); 49 | break; 50 | default: 51 | return; 52 | 53 | } 54 | return new Matrix(result); 55 | } 56 | /** 57 | * Matrix subtraction 58 | * 59 | * @param {Matrix} toSub Matrix to sub with. 60 | * @returns {Matrix} 61 | * @memberof Matrix 62 | */ 63 | sub(toSub: Matrix): Matrix{ 64 | return this.zipWith(this.arr,toSub.arr,'-'); 65 | } 66 | /** 67 | * matrix addition 68 | * 69 | * @param {Matrix} toAdd Matrix to add with. 70 | * @returns {Matrix} 71 | * @memberof Matrix 72 | */ 73 | add(toAdd: Matrix): Matrix{ 74 | return this.zipWith(this.arr,toAdd.arr,'+') 75 | } 76 | /** 77 | * matrix multiplication 78 | * 79 | * @param {Matrix} toMult matrix to multiply with. 80 | * @returns {Matrix} 81 | * @memberof Matrix 82 | */ 83 | mult(toMult: Matrix): Matrix{ 84 | return this.zipWith(this.arr,toMult.arr,'*') 85 | } 86 | /** 87 | * Matrix Division 88 | * 89 | * @param {Matrix} toDivide matrix to divide with. 90 | * @returns {Matrix} 91 | * @memberof Matrix 92 | */ 93 | divide(toDivide: Matrix): Matrix{ 94 | return this.zipWith(this.arr,toDivide.arr,'/'); 95 | } 96 | /** 97 | * Get the size of Matrix, including rows and columns. 98 | * 99 | * @returns {[number,number]} 100 | * @memberof Matrix 101 | */ 102 | size(): [number,number]{ 103 | return [this.arr.length,this.arr[0].length]; 104 | } 105 | /** 106 | * 矩阵同一行/列进行相加 The sum of data in the same row/column 107 | * 108 | * @param {number} [axis=1] when to be 1, get the sum of the same row, when to be 0, get the sum of the same column. default to 1. 109 | * @returns {Array} 110 | * @memberof Matrix 111 | */ 112 | sum(axis=1 as number): Array{ 113 | if(axis === 0){ 114 | return this.transpose().map(v=>_.sum(v)); 115 | } 116 | return this.arr.map(v=>_.sum(v)); 117 | } 118 | /** 119 | * 120 | * 获取同一行/列的最小值 The minimum value of data in the same row/column. 121 | * @param {number} [axis=0 as number] when set to be 0, get the minimum value of data in the same column. 1 to the same row. default is 0. 122 | * @returns {Array} 123 | * @memberof Matrix 124 | */ 125 | min(axis=0 as number): Array{ 126 | let arr = axis === 0?this.transpose():[...this.arr]; 127 | 128 | arr = arr.map(v=>v.filter(c=>typeof c === 'number')); 129 | 130 | return arr.map(v=>_.min(v)); 131 | } 132 | /** 133 | * 获取同一行/列的最大值 The maximum value of data in the same row/column. 134 | * 135 | * @param {number} [axis=0 as number] when set to be 0, get the maximum value of data in the same column. 1 to the same row. default is 0. 136 | * @returns {Array} 137 | * @memberof Matrix 138 | */ 139 | max(axis=0 as number): Array{ 140 | let arr = axis === 0?this.transpose():[...this.arr]; 141 | 142 | arr = arr.map(v=>v.filter(c=>typeof c === 'number')); 143 | 144 | return arr.map(v=>_.max(v)); 145 | } 146 | /** 147 | * 转置矩阵 Transpose matrix 148 | * 149 | * @returns {Array>} 150 | * @memberof Matrix 151 | */ 152 | transpose(): Array>{ 153 | return math.transpose(this.arr); 154 | } 155 | calAvg(flag=0 as number): Array{ 156 | let arr = flag === 0 ? this.transpose():this.arr; 157 | return arr.map((v,i)=>{ 158 | let sum = v.reduce((pre,cur)=>pre+cur,0); 159 | return sum/v.length; 160 | }); 161 | } 162 | // 初始化零矩阵 163 | static zeros(r: number,c?: number): Array>|Array{ 164 | return c?math.zeros(r,c)._data:math.zeros(r)._data; 165 | } 166 | static ones(m: number,n?: number): Array>|Array{ 167 | return n?math.ones(m,n)._data:math.ones(m)._data; 168 | } 169 | /** 170 | * 获取同一行或同一列的平均值 The average value of the same row/column of data 171 | * 172 | * @static 173 | * @param {Array>} arr 174 | * @param {number} [axis=0 as number] when to be 0, the same column, 1 to the same row. default to 0. 175 | * @returns {Array} 176 | * @memberof Matrix 177 | */ 178 | static mean(arr: Array>, axis=0 as number): Array{ 179 | 180 | if(axis === 0){ //按列求平均值 181 | return math.transpose(arr).map(v=>_.sum(v)/v.length); 182 | } else { 183 | return arr.map(v=>_.sum(v)/v.length); 184 | } 185 | } 186 | } 187 | 188 | export default Matrix; -------------------------------------------------------------------------------- /src/utils/vector/index.ts: -------------------------------------------------------------------------------- 1 | import {List,Repeat} from 'immutable'; 2 | import * as _ from 'lodash'; 3 | 4 | class Vector { 5 | constructor(public arr: Array){ 6 | } 7 | /** 8 | * 数组元素从小到大排序对应的下标 The sorted index of array. 9 | * 10 | * @returns {number[]} 11 | * @memberof Vector 12 | */ 13 | argSort(): number[]{ 14 | let list = [...this.arr]; 15 | let result = list 16 | .map((v,i)=>[v,i]) 17 | .sort(([v1],[v2])=>v1>v2) 18 | .map(([,i])=>i); 19 | 20 | return result; 21 | } 22 | zipWith(func: Function,b): Array{ 23 | let result = this.arr.map((v,i)=>func(v,b.arr[i])) 24 | // console.log(result) 25 | return result; 26 | } 27 | /** 28 | * 针对每个元素,若值等于0返回0,若大于0返回1,若小于0返回-1 For each element, when its value equals to 0 returns 0, else if it's larger than 0 returns 1 else returns -1. 29 | * 30 | * @static 31 | * @param {(number|Array)} arr 32 | * @returns {(number|Array)} 33 | * @memberof Vector 34 | */ 35 | static sign(arr: number|Array): number|Array{ 36 | if(Array.isArray(arr)){ 37 | return arr.map(v=>v===0.0?0.0:(v>0.0?1.0:-1.0)) 38 | } else { 39 | return arr===0.0?0.0:(arr>0.0?1.0:-1.0); 40 | } 41 | } 42 | /** 43 | * 创建指定个数的0-1之间的随机数 Create specific number of random number between 0 and 1. 44 | * 45 | * @static 46 | * @param {number} m 47 | * @returns 48 | * @memberof Vector 49 | */ 50 | static rand(m: number){ 51 | let initArr = Repeat(0,m).toArray(); 52 | let result = initArr.map(v=>Math.random()); 53 | return result; 54 | } 55 | } 56 | 57 | export default Vector; -------------------------------------------------------------------------------- /test/utils/Matrix.js: -------------------------------------------------------------------------------- 1 | const Matrix = require('../../lib/index').utils.Matrix; 2 | const expect = require('chai').expect; 3 | 4 | const dataSet = [ 5 | [2,4,6], 6 | [5,7,1], 7 | [3,3,1] 8 | ] 9 | const dataSet2 = [ 10 | [1,3,5], 11 | [2,4,7], 12 | [3,5,8] 13 | ] 14 | 15 | let matA = new Matrix(dataSet), 16 | matB = new Matrix(dataSet2); 17 | 18 | describe('Matrix',()=>{ 19 | describe('#toArray()',()=>{ 20 | it('should return raw array value',()=>{ 21 | let arr = matA.toArray(); 22 | expect(arr).to.equal(dataSet); 23 | }) 24 | }); 25 | 26 | describe('#add()',()=>{ 27 | it('should return the result of addition',()=>{ 28 | let result = matA.add(matB); 29 | expect(result.toArray()).to.eql([ 30 | [3,7,11], 31 | [7,11,8], 32 | [6,8,9] 33 | ]); 34 | }); 35 | }); 36 | 37 | describe('#size()',()=>{ 38 | it('should return the size of the matrix',()=>{ 39 | expect(matA.size()).to.eql([3,3]); 40 | }); 41 | }); 42 | 43 | describe('#max()',()=>{ 44 | it('should return the maxiumn value of each row when axis=1',()=>{ 45 | expect(matA.max(1)).to.eql([6,7,3]); 46 | }); 47 | it('should return the maxiumn value of each column when axis=0',()=>{ 48 | expect(matA.max(0)).to.eql([5,7,6]); 49 | }); 50 | }); 51 | 52 | describe('#min()',()=>{ 53 | it('should return the miniumn value of each row when axis=1',()=>{ 54 | expect(matA.min(1)).to.eql([2,1,1]); 55 | }); 56 | it('should return the miniumn value of each column when axis=0',()=>{ 57 | expect(matA.min(0)).to.eql([2,3,1]); 58 | }); 59 | }); 60 | 61 | describe('#transpose()',()=>{ 62 | it('should transpose the matrix',()=>{ 63 | expect(matA.transpose()).to.eql([[2,5,3],[4,7,3],[6,1,1]]); 64 | }); 65 | }); 66 | 67 | describe('#ones()',()=>{ 68 | it('should return vector when call ones(m)',()=>{ 69 | expect(Matrix.ones(3)).to.eql([1,1,1]); 70 | }); 71 | it('should return matrix when call ones(m,n)',()=>{ 72 | expect(Matrix.ones(2,2)).to.eql([[1,1],[1,1]]); 73 | }); 74 | }); 75 | 76 | describe('#zeros()',()=>{ 77 | it('should return vector when call zeros(m)',()=>{ 78 | expect(Matrix.zeros(3)).to.eql([0,0,0]); 79 | }); 80 | it('should return matrix when call zeros(m,n)',()=>{ 81 | expect(Matrix.zeros(2,2)).to.eql([[0,0],[0,0]]); 82 | }); 83 | }); 84 | 85 | describe('#mean()',()=>{ 86 | it('should return the average value of each row when axis=1',()=>{ 87 | expect(Matrix.mean(dataSet),1).to.be.an('array'); 88 | }); 89 | it('should return the average value of each column when axis=0',()=>{ 90 | expect(Matrix.mean(dataSet),0).to.be.an('array'); 91 | }); 92 | }); 93 | }); -------------------------------------------------------------------------------- /test/utils/Vector.js: -------------------------------------------------------------------------------- 1 | const Vector = require('../../lib/index').utils.Vector; 2 | const expect = require('chai').expect; 3 | 4 | const arr = [4,7,1,8,2]; 5 | const vect = new Vector(arr); 6 | 7 | describe('Vector',()=>{ 8 | describe('#argSort()',()=>{ 9 | it('should return the sorted index of the array',()=>{ 10 | expect(vect.argSort()).to.eql([2,4,0,1,3]); 11 | }); 12 | }); 13 | 14 | describe('#sign()',()=>{ 15 | it('should return array when sign(Array)',()=>{ 16 | expect(Vector.sign([-2,2,0,4])).to.eql([-1,1,0,1]); 17 | }); 18 | 19 | it('should return -1 when number is lower than 0',()=>{ 20 | expect(Vector.sign(-6)).to.eql(-1); 21 | }); 22 | 23 | it('should return 1 when number is larger than 0',()=>{ 24 | expect(Vector.sign(6)).to.eql(1); 25 | }); 26 | 27 | it('should return 0 when number is equal to 0',()=>{ 28 | expect(Vector.sign(0)).to.eql(0); 29 | }); 30 | }); 31 | 32 | describe('#rand()',()=>{ 33 | it('should return n of random number',()=>{ 34 | expect(Vector.rand(3)).to.have.lengthOf(3); 35 | }); 36 | }); 37 | }); -------------------------------------------------------------------------------- /test/utils/fileParser.js: -------------------------------------------------------------------------------- 1 | const parser = require('../../lib/index').utils.fileParser; 2 | const CSV = parser.CSV; 3 | const path = require('path'); 4 | const expect = require('chai').expect; 5 | 6 | 7 | describe('fileParser',()=>{ 8 | describe('#parseFile()',()=>{ 9 | it('should return matrix like data',()=>{ 10 | let result = parser.parseFile(path.join(__dirname,'./../../assets/testSet.txt'),{ 11 | toNumber: false, 12 | delimiter: ',' 13 | }); 14 | expect(result).to.have.lengthOf(100); 15 | expect(result[0]).to.have.lengthOf(5); 16 | }); 17 | }); 18 | 19 | describe('#readCsv()',()=>{ 20 | it('should return instance of CSV',()=>{ 21 | let dt = parser.read_csv(path.join(__dirname,'../../assets/train.csv'),{ 22 | index_col: 0, 23 | delimiter: ',', 24 | header: 0, 25 | dataType: 'number' 26 | }); 27 | expect(dt).to.be.an.instanceof(CSV); 28 | }); 29 | }); 30 | }); 31 | 32 | describe('CSV',()=>{ 33 | let dt = parser.read_csv(path.join(__dirname,'../../assets/train.csv'),{ 34 | index_col: 0, 35 | delimiter: ',', 36 | header: 0, 37 | dataType: 'number' 38 | }); 39 | describe('#getHeader()',()=>{ 40 | it('should return the header line of the dataset',()=>{ 41 | let header = dt.getHeader(); 42 | expect(header).to.be.an('array'); 43 | }); 44 | }); 45 | describe('#drop()',()=>{ 46 | it('should return a new instance of CSV',()=>{ 47 | expect(dt.drop(0)).to.be.an.instanceof(CSV); 48 | }); 49 | }); 50 | describe('#getClasses()',()=>{ 51 | it('should return the class vector',()=>{ 52 | expect(dt.getClasses()).to.be.an('array'); 53 | }); 54 | }); 55 | }); -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [ 3 | "src/**/*.ts" 4 | ], 5 | "compilerOptions": { 6 | "module": "commonjs", 7 | "removeComments": true, 8 | "allowJs": true, 9 | "target": "es2015", 10 | "sourceMap": true, 11 | "types": [ 12 | "node", 13 | "lodash" 14 | ] 15 | }, 16 | "exclude": [ 17 | "node_modules" 18 | ] 19 | } -------------------------------------------------------------------------------- /tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "tslint:recommended", 3 | "rules": { 4 | "max-line-length": { 5 | "options": [120] 6 | }, 7 | "new-parens": true, 8 | "no-arg": true, 9 | "no-bitwise": true, 10 | "no-conditional-assignment": true, 11 | "no-consecutive-blank-lines": false, 12 | "no-console": { 13 | "options": [ 14 | "debug", 15 | "info", 16 | "log", 17 | "time", 18 | "timeEnd", 19 | "trace" 20 | ] 21 | } 22 | }, 23 | "jsRules": { 24 | "max-line-length": { 25 | "options": [120] 26 | } 27 | } 28 | } --------------------------------------------------------------------------------