├── .gitattributes
├── .gitignore
├── .npmignore
├── .travis.yml
├── .vscode
    └── launch.json
├── LICENSE
├── README.md
├── assets
    ├── DT.png
    ├── knngraph.png
    ├── logistic.png
    ├── logistic2.png
    ├── test.csv
    ├── testSet.txt
    └── train.csv
├── docs
    ├── Matrix.md
    ├── README.md
    ├── Vector.md
    ├── algorithm.md
    ├── features.md
    ├── fileParser.md
    └── graph.md
├── gulpfile.js
├── package-lock.json
├── package.json
├── src
    ├── algorithm
    │   ├── AdaBoost
    │   │   ├── index.ts
    │   │   └── test.ts
    │   ├── DT
    │   │   ├── dt.txt
    │   │   ├── index.ts
    │   │   └── test.ts
    │   ├── index.ts
    │   ├── kMeans
    │   │   ├── index.ts
    │   │   ├── test.ts
    │   │   └── testSet.txt
    │   ├── kNN
    │   │   ├── index.ts
    │   │   └── test.ts
    │   └── logistic
    │   │   ├── index.ts
    │   │   └── test.ts
    ├── index.ts
    └── utils
    │   ├── .DS_Store
    │   ├── charts
    │       ├── .DS_Store
    │       ├── DT
    │       │   └── tpl.html
    │       ├── index.ts
    │       ├── kNN
    │       │   └── tpl.html
    │       ├── logistic
    │       │   └── tpl.html
    │       └── server.ts
    │   ├── features
    │       ├── index.ts
    │       ├── preprocessing.ts
    │       └── test.ts
    │   ├── fileParser
    │       ├── index.ts
    │       ├── result.csv
    │       └── test.ts
    │   ├── index.ts
    │   ├── matrix
    │       └── index.ts
    │   └── vector
    │       └── index.ts
├── test
    └── utils
    │   ├── Matrix.js
    │   ├── Vector.js
    │   └── fileParser.js
├── tsconfig.json
└── tslint.json


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.html linguist-language=TypeScript


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | lib/
3 | .DS_Store


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | node_modules/
 2 | assets/
 3 | tslint.json
 4 | gulpfile.js
 5 | tsconfig.json
 6 | dist/maps/
 7 | src/
 8 | docs/
 9 | test/
10 | .DS_Store


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 |  - "8.9.1"
4 | 
5 | cache:
6 |   directories:
7 |     - node_modules
8 | script: npm run test


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // 使用 IntelliSense 了解相关属性。 
 3 |     // 悬停以查看现有属性的描述。
 4 |     // 欲了解更多信息，请访问: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "type": "node",
 9 |             "request": "launch",
10 |             "name": "Launch Program",
11 |             "program": "${file}",
12 |             "outFiles": [
13 |                 "${workspaceRoot}/dist/**/*.js"
14 |             ],
15 |             "cwd": "${workspaceRoot}",
16 |             "sourceMaps": true,
17 |             "console": "integratedTerminal"
18 |         }
19 |     ]
20 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Xia Luo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # mlhelper
  2 | [![npm](https://img.shields.io/npm/v/mlhelper.svg?style=flat-square)](https://github.com/laoqiren/mlhelper)
  3 | [![npm](https://img.shields.io/npm/l/mlhelper.svg?style=flat-square)](https://github.com/laoqiren/mlhelper)
  4 | 
  5 | Algorithms and utils for Machine Learning in JavaScript based on Node.js. while implementing commonly used machine learning algorithms, This library attempts to provide more abundant ecology, such as matrix and vector operations, file parsing, feature engineering, data visualization, and so on.
  6 | 
  7 | *`QQ Group`: 485305514*
  8 | ## Installation
  9 | ```
 10 | $ npm install mlhelper
 11 | ```
 12 | 
 13 | ## Documention
 14 | 
 15 | * [algorithm](docs/algorithm.md)
 16 | * [Matrix](docs/Matrix.md)
 17 | * [Vector](docs/Vector.md)
 18 | * [file Parser](docs/fileParser.md)
 19 | * [graph tools](docs/graph.md)
 20 | * [feature Engineering](docs/features.md)
 21 | 
 22 | ## Example
 23 | 
 24 | ### Algorithm
 25 | 
 26 | ```js
 27 | const AdaBoost = require('mlhelper/lib/algorithm').AdaBoost;
 28 | //or const AdaBoost = require('mlhelper').algorithm.AdaBoost;
 29 | 
 30 | const dataSet = [
 31 |     [1.0,2.1],
 32 |     [2.0,1.1],
 33 |     [1.3,1.0],
 34 |     [1.0,1.0],
 35 |     [2.0,1.0]
 36 | ]
 37 | const labels = [1.0,1.0,-1.0,-1.0,1.0];
 38 | let ada = new AdaBoost(dataSet,labels,40);
 39 | let result = ada.classify([[1.0,2.1],
 40 |     [2.0,1.1],
 41 |     [1.3,1.0],
 42 |     [1.0,1.0],
 43 |     [2.0,1.0]]);
 44 | console.log(result); // [ 1, 1, -1, -1, -1 ]
 45 | ```
 46 | 
 47 | ### Utils
 48 | 
 49 | **Matrix:**
 50 | ```js
 51 | const Matrix = require('mlhelper/lib/utils').Matrix;
 52 | 
 53 | let m1 = new Matrix([
 54 |     [1,2,3],
 55 |     [3,4,5]
 56 | ]);
 57 | 
 58 | let m2 = new Matrix([
 59 |     [2,2,6],
 60 |     [3,1,5]
 61 | ]);
 62 | 
 63 | console.log(m2.sub(m1)) // Matrix { arr: [ [ 1, 0, 3 ], [ 0, -3, 0 ] ] }
 64 | console.log(m1.mult(m2)) // Matrix { arr: [ [ 2, 4, 18 ], [ 9, 4, 25 ] ] }
 65 | ```
 66 | 
 67 | **Vector:**
 68 | ```js
 69 | const Vector = require('mlhelper/lib/utils').Vector;
 70 | 
 71 | let v = new Vector([5,10,7,1]);
 72 | console.log(v.argSort()) // [ 3, 0, 2, 1 ]
 73 | ```
 74 | 
 75 | **fileParser:**
 76 | ```js
 77 | const parser = require('mlhelper/lib/utils').fileParser;
 78 | 
 79 | let dt = parser.read_csv(path.join(__dirname,'./train.csv'),{
 80 |     index_col: 0,
 81 |     delimiter: ',',
 82 |     header: 0,
 83 |     dataType: 'number'
 84 | });
 85 | let labels = dt.getClasses();
 86 | let dataSet =dt.drop('quality').values;
 87 | ```
 88 | 
 89 | **Feature Engineering**
 90 | ```js
 91 | // preprocessing features
 92 | const preprocessing = require('mlhelper/lib/utils').features.preprocessing;
 93 | 
 94 | // make the features obey the standard normal distribution(Standardization)
 95 | let testStandardScaler = preprocessing.standardScaler(dataSet);
 96 | 
 97 | let testNormalize = preprocessing.normalize(dataSet);
 98 | 
 99 | let testBinarizer = preprocessing.binarizer(dataSet);
100 | 
101 | // ...
102 | ```
103 | 
104 | **graph tools:**
105 | 
106 | Decision Tree:
107 | ```js
108 | charts.drawDT(dt.getTree(),{
109 |     width:600,
110 |     height:400
111 | });
112 | ```
113 | ![/assets/DT.png](/assets/DT.png)
114 | 
115 | **logistic regression**
116 | ```js
117 | charts.drawLogistic(dataSet,labels,weights);
118 | ```
119 | <img src="/assets/logistic2.png" width="550px"/>
120 | 
121 | 
122 | ## Contribute
123 | 
124 | The original purpose of this project is to learn, and now I need more people to participate in this project, and any issue and good advice is welcome.
125 | ### git clone 
126 | ```
127 | git clone https://github.com/laoqiren/mlhelper.git
128 | ```
129 | ### install dependencies&&devdependecies
130 | ```
131 | npm install
132 | ```
133 | 
134 | ### development
135 | ```
136 | npm run dev
137 | ```
138 | 
139 | ### test
140 | ```
141 | npm run test
142 | ```
143 | 
144 | ### build
145 | ```
146 | npm run build
147 | ```
148 | ## LICENSE
149 | MIT.
150 | 
151 | *You can use the project for any purpose, except for illegal activities.*
152 | 


--------------------------------------------------------------------------------
/assets/DT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/assets/DT.png


--------------------------------------------------------------------------------
/assets/knngraph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/assets/knngraph.png


--------------------------------------------------------------------------------
/assets/logistic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/assets/logistic.png


--------------------------------------------------------------------------------
/assets/logistic2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/assets/logistic2.png


--------------------------------------------------------------------------------
/assets/test.csv:
--------------------------------------------------------------------------------
 1 | ,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
 2 | 0,6.4,0.32,0.27,4.9,0.034,18.0,122.0,0.9916,3.36,0.71,12.5
 3 | 1,7.1,0.18,0.39,14.5,0.051,48.0,156.0,0.99947,3.35,0.78,9.1
 4 | 2,7.1,0.17,0.4,14.55,0.047,47.0,156.0,0.99945,3.34,0.78,9.1
 5 | 3,7.1,0.18,0.39,15.25,0.047,45.0,158.0,0.99946,3.34,0.77,9.1
 6 | 4,7.8,0.29,0.29,3.15,0.044,41.0,117.0,0.99153,3.24,0.35,11.5
 7 | 5,6.2,0.255,0.27,1.3,0.037,30.0,86.0,0.98834,3.05,0.59,12.9
 8 | 6,8.2,0.34,0.29,5.2,0.076,19.0,92.0,0.99138,2.95,0.39,12.5
 9 | 7,6.5,0.24,0.28,1.1,0.034,26.0,83.0,0.98928,3.25,0.33,12.3
10 | 8,6.9,0.24,0.23,7.1,0.041,20.0,97.0,0.99246,3.1,0.85,11.4
11 | 9,6.7,0.4,0.22,8.8,0.052,24.0,113.0,0.99576,3.22,0.45,9.4
12 | 10,6.7,0.3,0.44,18.5,0.057,65.0,224.0,0.99956,3.11,0.53,9.1
13 | 11,6.7,0.4,0.22,8.8,0.052,24.0,113.0,0.99576,3.22,0.45,9.4
14 | 12,6.8,0.17,0.32,1.4,0.04,35.0,106.0,0.99026,3.16,0.66,12.0
15 | 13,7.1,0.25,0.28,1.2,0.04,31.0,111.0,0.99174,3.18,0.53,11.1
16 | 14,5.9,0.27,0.27,5.0,0.035,14.0,97.0,0.99058,3.1,0.33,11.8
17 | 15,6.0,0.16,0.22,1.6,0.042,36.0,106.0,0.9905,3.24,0.32,11.4
18 | 16,6.7,0.3,0.44,18.75,0.057,65.0,224.0,0.99956,3.11,0.53,9.1
19 | 17,6.6,0.15,0.32,6.0,0.033,59.0,128.0,0.99192,3.19,0.71,12.1
20 | 18,7.3,0.34,0.3,9.4,0.057,34.0,178.0,0.99554,3.15,0.44,10.4
21 | 19,6.0,0.17,0.29,9.7,0.044,33.0,98.0,0.99536,3.12,0.36,9.2
22 | 20,6.7,0.47,0.29,4.75,0.034,29.0,134.0,0.99056,3.29,0.46,13.0
23 | 21,6.6,0.15,0.32,6.0,0.033,59.0,128.0,0.99192,3.19,0.71,12.1


--------------------------------------------------------------------------------
/assets/testSet.txt:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor


--------------------------------------------------------------------------------
/assets/train.csv:
--------------------------------------------------------------------------------
 1 | ,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
 2 | 0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
 3 | 1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
 4 | 2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
 5 | 3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
 6 | 4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
 7 | 5,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
 8 | 6,6.2,0.32,0.16,7.0,0.045,30.0,136.0,0.9949,3.18,0.47,9.6,6
 9 | 7,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
10 | 8,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
11 | 9,8.1,0.22,0.43,1.5,0.044,28.0,129.0,0.9938,3.22,0.45,11.0,6
12 | 10,8.1,0.27,0.41,1.45,0.033,11.0,63.0,0.9908,2.99,0.56,12.0,5
13 | 11,8.6,0.23,0.4,4.2,0.035,17.0,109.0,0.9947,3.14,0.53,9.7,5
14 | 12,7.9,0.18,0.37,1.2,0.04,16.0,75.0,0.992,3.18,0.63,10.8,5
15 | 13,6.6,0.16,0.4,1.5,0.044,48.0,143.0,0.9912,3.54,0.52,12.4,7
16 | 14,8.3,0.42,0.62,19.25,0.04,41.0,172.0,1.0002,2.98,0.67,9.7,5
17 | 15,6.6,0.17,0.38,1.5,0.032,28.0,112.0,0.9914,3.25,0.55,11.4,7
18 | 16,6.3,0.48,0.04,1.1,0.046,30.0,99.0,0.9928,3.24,0.36,9.6,6
19 | 17,6.2,0.66,0.48,1.2,0.029,29.0,75.0,0.9892,3.33,0.39,12.8,8
20 | 18,7.4,0.34,0.42,1.1,0.033,17.0,171.0,0.9917,3.12,0.53,11.3,6
21 | 19,6.5,0.31,0.14,7.5,0.044,34.0,133.0,0.9955,3.22,0.5,9.5,5
22 | 20,6.2,0.66,0.48,1.2,0.029,29.0,75.0,0.9892,3.33,0.39,12.8,8
23 | 21,6.4,0.31,0.38,2.9,0.038,19.0,102.0,0.9912,3.17,0.35,11.0,7
24 | 22,6.8,0.26,0.42,1.7,0.049,41.0,122.0,0.993,3.47,0.48,10.5,8
25 | 23,7.6,0.67,0.14,1.5,0.074,25.0,168.0,0.9937,3.05,0.51,9.3,5
26 | 24,6.6,0.27,0.41,1.3,0.052,16.0,142.0,0.9951,3.42,0.47,10.0,6
27 | 25,7.0,0.25,0.32,9.0,0.046,56.0,245.0,0.9955,3.25,0.5,10.4,6
28 | 26,6.9,0.24,0.35,1.0,0.052,35.0,146.0,0.993,3.45,0.44,10.0,6
29 | 27,7.0,0.28,0.39,8.7,0.051,32.0,141.0,0.9961,3.38,0.53,10.5,6
30 | 28,7.4,0.27,0.48,1.1,0.047,17.0,132.0,0.9914,3.19,0.49,11.6,6
31 | 29,7.2,0.32,0.36,2.0,0.033,37.0,114.0,0.9906,3.1,0.71,12.3,7
32 | 30,8.5,0.24,0.39,10.4,0.044,20.0,142.0,0.9974,3.2,0.53,10.0,6
33 | 31,8.3,0.14,0.34,1.1,0.042,7.0,47.0,0.9934,3.47,0.4,10.2,6
34 | 32,7.4,0.25,0.36,2.05,0.05,31.0,100.0,0.992,3.19,0.44,10.8,6


--------------------------------------------------------------------------------
/docs/Matrix.md:
--------------------------------------------------------------------------------
 1 | # Matrix utils
 2 | 
 3 | ## constructor(arr: Array<Array\<number>>)
 4 | *arr:* The Original 2D array data.
 5 | 
 6 | ## toArray(): Array<Array\<number>>
 7 | Get the original array.
 8 | 
 9 | ## size(): [number,number]
10 | Get the size of Matrix, including rows and columns.
11 | 
12 | ## sum(axis=1 as number): Array<number>
13 | The sum of data in the same row/column
14 | 
15 | *axis:* If axis is set to 1, the sum of all the data of the same row is calculated, otherwise the column is computed. Default to 1.
16 | 
17 | ## min(axis=0 as number): Array\<number>
18 | The minimum value of data in the same row/column.
19 | 
20 | *axis:* If axis is set to 1, the minimum value of all data in the same row is calculated, otherwise the column is computed. default to 0.
21 | 
22 | ## max(axis=0 as number): Array\<number>
23 | The maximum value of data in the same row/column.
24 | 
25 | *axis:* If axis is set to 1, the maximum value of all data in the same row is calculated, otherwise the column is computed. default to 0.
26 | 
27 | ## static mean(arr: Array<Array\<number>>, axis=0 as number): Array\<number>
28 | The average value of the same row/column of data
29 | 
30 | *arr:* dataset to calculate.
31 | *axis:* If axis is set to 1, the average value of all the data in the same row is calculated, otherwise the column is computed
32 | 
33 | ## transpose(): Array<Array\<number>>
34 | Transpose the Matrix.
35 | 
36 | ## static zeros(r: number,c?: number): Array<Array\<number>>|Array\<number>
37 | 
38 | If there are two parameters, the zero matrix of the specified size is returned. If there is only one parameter, the one dimensional array is returned
39 | 
40 | ## static ones(m: number,n?: number): Array<Array\<number>>|Array\<number>
41 | 
42 | Be similar with `zeros()`.
43 | ## sub(toSub: Matrix): Matrix
44 | Matrix subtraction.
45 | 
46 | *toSub:* Matrix to sub with.
47 | 
48 | 
49 | ## add(toAdd: Matrix): Matrix
50 | Matrix addition.
51 | 
52 | *toAdd:* Matrix to add with.
53 | 
54 | ## mult(toMult: Matrix): Matrix
55 | Matrix multiplication.
56 | 
57 | *toMult:* Matrix to multiply with.
58 | 
59 | ## divide(toDivide: Matrix): Matrix
60 | Matrix Division.
61 | 
62 | *toDivide:* Matrix to divide with.
63 | 
64 | 
65 | ```js
66 | const Matrix = require('mlhelper').utils.Matrix;
67 | 
68 | const dataSet = [
69 |     [2,4,6],
70 |     [5,7,1],
71 |     [3,3,1]
72 | ];
73 | const dataSet2 = [
74 |     [1,3,5],
75 |     [2,4,7],
76 |     [3,5,8]
77 | ];
78 | let matA = new Matrix(dataSet),
79 |     matB = new Matrix(dataSet2);
80 | 
81 | 
82 | let result = matA.add(matB);
83 | expect(result.toArray()).to.eql([
84 |     [3,7,11],
85 |     [7,11,8],
86 |     [6,8,9]
87 | ]);  // true;
88 | 
89 | expect(matA.max(1)).to.eql([6,7,3]);  //true
90 | expect(matA.max(0)).to.eql([5,7,6]);  //true
91 | 
92 | expect(matA.transpose()).to.eql([[2,5,3],[4,7,3],[6,1,1]]);  //true
93 | 
94 | expect(Matrix.ones(2,2)).to.eql([[1,1],[1,1]]); //true
95 | expect(Matrix.ones(2,2)).to.eql([[1,1],[1,1]]); //true
96 | ```


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # mlhelper documentation
 2 | 
 3 | * [algorithm](algorithm.md)
 4 | * [Matrix](Matrix.md)
 5 | * [Vector](Vector.md)
 6 | * [file Parser](fileParser.md)
 7 | * [graph tools](graph.md)
 8 | * [feature Engineering](features.md)
 9 | 
10 | *中文文档敬请期待！也欢迎参与文档翻译。*


--------------------------------------------------------------------------------
/docs/Vector.md:
--------------------------------------------------------------------------------
 1 | # Vector utils
 2 | 
 3 | ## constructor(arr: Array\<number>)
 4 | *arr:* One-dimensional array.
 5 | 
 6 | ## argSort(): Array\<number>
 7 | The sorted index of array.
 8 | 
 9 | ## static sign(arr: number|Array\<number>): number|Array\<number>
10 | 
11 | For each element, when its value equals to 0 returns 0, else if it's larger than 0 returns 1 else returns -1. If arr is a number, just return a result number.
12 | 
13 | ## static rand(m: number)
14 | Create specific number of random numbers between 0 and 1.
15 | 
16 | ```js
17 | const Vector = require('mlhelper').utils.Vector;
18 | 
19 | const arr = [4,7,1,8,2];
20 | const vect = new Vector(arr);
21 | 
22 | expect(vect.argSort()).to.eql([2,4,0,1,3]); //true
23 | expect(Vector.sign([-2,2,0,4])).to.eql([-1,1,0,1]); //true
24 | expect(Vector.sign(-6)).to.eql(-1); // true
25 | ```


--------------------------------------------------------------------------------
/docs/algorithm.md:
--------------------------------------------------------------------------------
  1 | # Algorithms
  2 | 
  3 | ## kNN (k-nearest neighbors algorithm)
  4 | 
  5 | wiki: [https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm)
  6 | ### constructor(dataSet: Array<Array\<number>>,labels: Array\<any>)
  7 | 
  8 | *dataSet:* The two dimensional array of data sets with known classifications.
  9 | 
 10 | *labels:* Classification vector of dataset.
 11 | 
 12 | ### classify(inx: Array\<number>,k: number): any
 13 | Classification based on feature vectors.
 14 | 
 15 | *inx:* data for testing.
 16 | 
 17 | *k:*  Make decisions based on K nearest neighbors.
 18 | 
 19 | ```js
 20 | const kNN = require('mlhelper').algorithm.kNN;
 21 | 
 22 | let knn = new kNN([
 23 |     [1.,1.1],
 24 |     [1.,1.],
 25 |     [0.,0.],
 26 |     [0.,0.1]
 27 | ],['A','A','B','C']);
 28 | 
 29 | let result = knn.classify([1.1,0.8],4);
 30 | 
 31 | console.log(result) // 'A'
 32 | ```
 33 | ### autoNormalVector(inx: Array\<number>): Array\<number>
 34 | Normalize the test data vectors so that each feature is concentrated between 0 and 1.
 35 | 
 36 | ### static autoNormal(dataSet: Array<Array\<number>>): Array<Array\<number>>
 37 | Normalize the dataset matrix so that each feature is concentrated between 0 and 1.
 38 | ## DT(ID3) Decision tree
 39 | wiki: [https://en.wikipedia.org/wiki/Decision_tree](https://en.wikipedia.org/wiki/Decision_tree)
 40 | ### constructor(dataSet: Array<Array\<any>>,labels: Array\<string>,alg: string="ID3")
 41 | 
 42 | *dataSet:* The two dimensional array of data sets with known classifications(every row including the class).
 43 | 
 44 | *labels:* Classification vector of dataset.
 45 | 
 46 | *alg:* Algorithm to create decision tree. Default is ID3. By now, only ID3 is supported.
 47 | 
 48 | ### classify(featLabels: Array\<string>,testVec: Array\<any>): any
 49 | *featLabels:* vector of feature names.
 50 | 
 51 | *testVec:* vector of test data.
 52 | 
 53 | ### getTree(): object
 54 | 
 55 | return the created decision tree.
 56 | 
 57 | ```js
 58 | const DT = require('mlhelper').algorithm.DT;
 59 | 
 60 | let dataSet = parser.parseFile(path.join(__dirname,'./dt.txt'));
 61 | 
 62 | let labels = ['age','prescript','astigmatic','tearRate']
 63 | let dt = new DT(dataSet,labels);
 64 | 
 65 | let result = dt.classify(labels,["young","myope","no","reduced"]) //no lenses
 66 | 
 67 | console.log(dt.getTree()); // { tearRate: { reduced: 'no lenses', normal: { astigmatic: [Object] } } }
 68 | ```
 69 | 
 70 | ### storeTree(filePath: string): Promise
 71 | 
 72 | store the decision tree to file and returns Promise object.
 73 | 
 74 | ### static classifyFromTree(inputTree: object,featLabels: Array\<string>,testVec: Array\<any>): any
 75 | 
 76 | Classify the data according to the existing decision tree. The meaning of the parameter refers to the above explanation.
 77 | 
 78 | ## Logistic regression
 79 | wiki: [https://en.wikipedia.org/wiki/Logistic_regression](https://en.wikipedia.org/wiki/Logistic_regression)
 80 | 
 81 | ### constructor(dataMatIn: Array<Array\<number>>,classLabels: Array\<number>,numIter: number)
 82 | *dataMatIn* like dataset for training.
 83 | 
 84 | *classLabels:* the classes of training datas.
 85 | 
 86 | *numIter:* Maximum iterations
 87 | 
 88 | ### classify(inX: Array\<number>): number
 89 | Claasify the test data.
 90 | 
 91 | ### getWeights(): Array\<number>
 92 |  Random gradient ascent method for optimal regression coefficients of each feature.
 93 | 
 94 | ```js
 95 | let logi = new Logistic(dataSet,labels,150);
 96 | let result = logi.classify(dataSet[i]);
 97 | let weights = logi.getWeights();
 98 | console.log(weights); //[ 2.9301940437635965, -5.7803993740016555, 9.834929045066424 ]
 99 | ```
100 | 
101 | ### AdaBoost
102 | wiki: [https://en.wikipedia.org/wiki/AdaBoost](https://en.wikipedia.org/wiki/AdaBoost)
103 | ### constructor(dataSet: Array<Array\<number>>,labels: Array\<number>,numInt=40 as number)
104 | 
105 | *dataSet:* matirx like datas for training.
106 | 
107 | *labels:* vector of the training datas' classes.
108 | 
109 | *maximum:* permission iterative number of times, default is 40.
110 | 
111 | ### classify(inx: Array<Array\<number>>): Array\<number>
112 | *inx:* Matrix like for testing.
113 | 
114 | ```js
115 | const AdaBoost = require('mlhelper').algorithm.AdaBoost;
116 | const dataSet = [
117 |     [1.0,2.1],
118 |     [2.0,1.1],
119 |     [1.3,1.0],
120 |     [1.0,1.0],
121 |     [2.0,1.0]
122 | ]
123 | const labels = [1.0,1.0,-1.0,-1.0,1.0];
124 | 
125 | let ada = new AdaBoost(dataSet,labels,40);
126 | 
127 | let result = ada.classify([[1.0,2.1],
128 |     [2.0,1.1],
129 |     [1.3,1.0],
130 |     [1.0,1.0],
131 |     [2.0,1.0]]);
132 | 
133 | console.log(result); //[ 1, 1, -1, -1, -1 ]
134 | ```
135 | 
136 | ### k-means clustering
137 | wiki: [https://en.wikipedia.org/wiki/K-means_clustering](https://en.wikipedia.org/wiki/K-means_clustering)
138 | ### constructor(dataSet: Array\<Array\<number>>,k: number)
139 | *dataSet:* Matrix like dataset to cluster.
140 | 
141 | *k:* how many centroids.
142 | 
143 | ### cluster(max=50 as number): [Array<Array\<number>>,Array<Array\<number>>]
144 | 
145 | *max:* permission iterative number of times, default is 50.
146 | 
147 | *return:* will return an array `[centroids,clusterAssment]`, the `centroids` is the coordinate matrix of all cluster centers and `clusterAssment` is an array `[centroidsIndex,minDist**2]`, `centroidsIndex` is the the index of the center to which the point belongs and `minDist` is the The distance between the point and its center.
148 | 
149 | ```js
150 | const kMeans = require('mlhelper').algorithm.kMeans;
151 | let kmeans = new kMeans(dataSet,5);
152 | 
153 | let result = kmeans.cluster(40);
154 | console.log(util.inspect(result))
155 | ```
156 | 
157 | ## TODO


--------------------------------------------------------------------------------
/docs/features.md:
--------------------------------------------------------------------------------
1 | # Feature engineering


--------------------------------------------------------------------------------
/docs/fileParser.md:
--------------------------------------------------------------------------------
 1 | # File Parser utils
 2 | 
 3 | ## parseFile(filePath: string,options: object): Array<Array\<any>>
 4 | Parsing with simple files.
 5 | 
 6 | ### filePath:
 7 | the absolute file path to read.
 8 | ### options: \<object>
 9 | * options.toNumber \<boolean> wheather to transfer data to Number. Default to false.
10 | * options.delmiter \<string> delmiter. Default to '\t'.
11 | 
12 | ## read_csv (filePath: string,options): CSV
13 | 
14 | Dealing with complex CSV files.
15 | 
16 | ### filePath:
17 | the absolute file path to read.
18 | ### options: \<object>.
19 | * index_col \<boolean|number> when set to true, the first column of data will be regarded as the counter column. Default to be false.
20 | * delimiter \<string> delmiter for every line. Default to be ','.
21 | * header \<Array<string>|number> Can be the vector of custom header line or the index of the header line. default to 0.
22 | * dataType \<string> the type of datas, default to 'number'.
23 | * classType \<string> the type of the last column of each line. default to 'number'.
24 | 
25 | ### return: 
26 | instance of CSV.
27 | 
28 | ## write_csv (filePath: string,data: Array<Array\<any>>,options): void
29 | 
30 | ### filePath: 
31 | absolute path of the file to write.
32 | 
33 | ### data:
34 | matrix like datas to write.
35 | ### options \<object>
36 | * options.index: \<boolean>. if set to be true, it will add a index column for each line. default to false.
37 | * header: \<Array\<any>>. custom header to add to the first line. default to [].
38 | 
39 | ## class CSV
40 | 
41 | ### getHeader(): Array\<string>
42 | Get the header line of the dataset.
43 | 
44 | ### drop(label: string | number): CSV
45 | 
46 | *label:* delete the specific number of column or the column of specific label.
47 | 
48 | ### getClasses(): Array\<any>
49 | Get the last column of every line.
50 | 
51 | ```js
52 | const parser = require('mlheper').utils.fileParser;
53 | 
54 | let dt = parser.read_csv(path.join(__dirname,'../../../assets/train.csv'),{
55 |     index_col: 0,
56 |     delimiter: ',',
57 |     header: 0,
58 |     dataType: 'number'
59 | });
60 | 
61 | let labels = dt.getClasses();
62 | 
63 | let dataSet =dt.drop('quality').values;
64 | 
65 | // ...
66 | parser.write_csv(path.join(__dirname,'./result.csv'),resultSet,{
67 |     header: ['ID','quality']
68 | });
69 | ```


--------------------------------------------------------------------------------
/docs/graph.md:
--------------------------------------------------------------------------------
 1 | # Graph utils
 2 | Provides visual capabilities for various algorithms, which automatically open your browser and draw visual graphics
 3 | 
 4 | ## drawkNN(dataSet: Array<Array\<number>>,labels: Array<any>,inx: Array<number>,options: object)
 5 | 
 6 | Plot the scatter diagram of KNN algorithm
 7 |  *dataSet:* matrix of datas for training.
 8 | 
 9 |  *labels:* vector of training datas' classes.
10 | 
11 |  *inx:* vector of data to test.
12 | 
13 | ### options
14 | 
15 | * options.width: \<string> the width of the graph. default to "600px".
16 | * options.height: \<string> the height of the graph. default to "400px".
17 | * options.size: \<number> the size of every point. default to 20.
18 | 
19 | ```js
20 | const charts = require('mlhelper').utils.charts;
21 | //...
22 | let inx = [7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8],
23 |     normalInx = knn.autoNormalVector(inx);
24 | 
25 | console.log(knn.classify(inx,100)); // 6
26 | charts.drawkNN(kNN.autoNormal(dataSet),labels,normalInx,{
27 |     width: "500px",
28 |     height: "400px",
29 |     size: 15
30 | });
31 | ```
32 | ![/assets/knngraph.png](/assets/knngraph.png)
33 | 
34 | ## drawDT(tree: object,options: object)
35 | Decision tree visualization.
36 | ### tree
37 | the decision tree object.
38 | 
39 | ### options
40 | * options.width: \<number> the width of the graph. 
41 | * options.height: \<number> the height of the graph. size: the size of every point.
42 | 
43 | ```js
44 | const charts = require('mlhelper').utils.charts;
45 | // ...
46 | charts.drawDT(dt.getTree(),{
47 |     width:600,
48 |     height:400
49 | });
50 | ```
51 | ![/assets/DT.png](/assets/DT.png)
52 | 
53 | ## drawLogistic(dataSet: Array<Array\<number>>,labels: Array\<any>,weights: Array\<number>,options: object)
54 | Visualization of logistic regression algorithm
55 | 
56 | ### dataSet
57 | The matrix like dataset for training.
58 | 
59 | ### labels:
60 | the classes for training dataset.
61 | 
62 | ### weights:
63 | Random gradient ascent method for optimal regression coefficients of each feature
64 | 
65 | ### options:
66 | * options.width: \<string> the width of the graph. default to "600px".
67 | * options.height: \<string> the height of the graph. default to "400px".
68 | * options.size: \<number> the size of every point. deault to 20.
69 | 
70 | ```js
71 | const charts = require('mlhelper').utils.charts;
72 | // ...
73 | let weights = logi.getWeights()
74 | console.log(weights);
75 | 
76 | charts.drawLogistic(dataSet,labels,weights)
77 | ```
78 | 
79 | <img src="/assets/logistic.png" width="550px"/>


--------------------------------------------------------------------------------
/gulpfile.js:
--------------------------------------------------------------------------------
 1 | const gulp = require('gulp');
 2 | const ts = require('gulp-typescript');
 3 | const tsProject = ts.createProject('tsconfig.json');
 4 | const sourcemaps = require('gulp-sourcemaps');
 5 | const babel = require('gulp-babel');
 6 | 
 7 | const PATHS = {
 8 |     scripts: ['./src/**/*.ts'],
 9 |     output: './lib'
10 | }
11 | 
12 | gulp.task('copyFiles',()=>{
13 |     return gulp.src([
14 |         'src/**/*',
15 |         '!src/**/*.ts'          
16 |     ]).pipe(gulp.dest(PATHS.output));
17 | });
18 | 
19 | gulp.task('watch-ts',['build-ts'],()=>{
20 |     gulp.watch(PATHS.scripts,['build-ts']);
21 | });
22 | 
23 | gulp.task('build-ts',()=>{
24 |     return gulp.src(PATHS.scripts)
25 |         .pipe(sourcemaps.init())
26 |         .pipe(tsProject())
27 |         .js
28 |         .pipe(babel({
29 |             presets: ['env']
30 |         }))
31 |         .pipe(sourcemaps.write('./maps'))
32 |         .pipe(gulp.dest(PATHS.output));
33 | });
34 | 
35 | gulp.task('default',['watch-ts']);


--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "mlhelper",
 3 |   "version": "0.2.0",
 4 |   "lockfileVersion": 1,
 5 |   "requires": true,
 6 |   "dependencies": {
 7 |     "@types/node": {
 8 |       "version": "8.0.46",
 9 |       "resolved": "https://registry.npmjs.org/@types/node/-/node-8.0.46.tgz",
10 |       "integrity": "sha512-rRkP4kb5JYIfAoRKaDbcdPZBcTNOgzSApyzhPN9e6rhViSJAWQGlSXIX5gc75iR02jikhpzy3usu31wMHllfFw==",
11 |       "dev": true
12 |     },
13 |     "mathjs": {
14 |       "version": "http://registry.npm.taobao.org/mathjs/download/mathjs-3.16.4.tgz",
15 |       "requires": {
16 |         "complex.js": "http://registry.npm.taobao.org/complex.js/download/complex.js-2.0.4.tgz",
17 |         "decimal.js": "http://registry.npm.taobao.org/decimal.js/download/decimal.js-7.2.3.tgz",
18 |         "fraction.js": "http://registry.npm.taobao.org/fraction.js/download/fraction.js-4.0.2.tgz",
19 |         "javascript-natural-sort": "http://registry.npm.taobao.org/javascript-natural-sort/download/javascript-natural-sort-0.7.1.tgz",
20 |         "seed-random": "http://registry.npm.taobao.org/seed-random/download/seed-random-2.2.0.tgz",
21 |         "tiny-emitter": "http://registry.npm.taobao.org/tiny-emitter/download/tiny-emitter-2.0.0.tgz",
22 |         "typed-function": "http://registry.npm.taobao.org/typed-function/download/typed-function-0.10.5.tgz"
23 |       },
24 |       "dependencies": {
25 |         "complex.js": {
26 |           "version": "http://registry.npm.taobao.org/complex.js/download/complex.js-2.0.4.tgz",
27 |           "bundled": true
28 |         },
29 |         "decimal.js": {
30 |           "version": "http://registry.npm.taobao.org/decimal.js/download/decimal.js-7.2.3.tgz",
31 |           "bundled": true
32 |         },
33 |         "fraction.js": {
34 |           "version": "http://registry.npm.taobao.org/fraction.js/download/fraction.js-4.0.2.tgz",
35 |           "bundled": true
36 |         },
37 |         "javascript-natural-sort": {
38 |           "version": "http://registry.npm.taobao.org/javascript-natural-sort/download/javascript-natural-sort-0.7.1.tgz",
39 |           "bundled": true
40 |         },
41 |         "seed-random": {
42 |           "version": "http://registry.npm.taobao.org/seed-random/download/seed-random-2.2.0.tgz",
43 |           "bundled": true
44 |         },
45 |         "tiny-emitter": {
46 |           "version": "http://registry.npm.taobao.org/tiny-emitter/download/tiny-emitter-2.0.0.tgz",
47 |           "bundled": true
48 |         },
49 |         "typed-function": {
50 |           "version": "http://registry.npm.taobao.org/typed-function/download/typed-function-0.10.5.tgz",
51 |           "bundled": true
52 |         }
53 |       }
54 |     },
55 |     "sylvester": {
56 |       "version": "0.0.21",
57 |       "resolved": "https://registry.npmjs.org/sylvester/-/sylvester-0.0.21.tgz",
58 |       "integrity": "sha1-KYexzivS84sNzio0OIiEv6RADqc="
59 |     }
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "mlhelper",
 3 |   "version": "0.2.0",
 4 |   "description": "tools for ML in JavaScript",
 5 |   "main": "lib/index.js",
 6 |   "scripts": {
 7 |     "test": "mocha --recursive",
 8 |     "dev": "gulp watch-ts",
 9 |     "build": "gulp copyFiles && gulp build-ts",
10 |     "postversion": "git push && git push --tags"
11 |   },
12 |   "keywords": [
13 |     "ML",
14 |     "AI"
15 |   ],
16 |   "author": "laoqiren",
17 |   "license": "MIT",
18 |   "dependencies": {
19 |     "express": "^4.16.2",
20 |     "immutable": "^3.8.2",
21 |     "lodash": "^4.17.4",
22 |     "mathjs": "^3.16.4",
23 |     "swig": "^1.4.2",
24 |     "sylvester": "0.0.21"
25 |   },
26 |   "devDependencies": {
27 |     "@types/lodash": "^4.14.78",
28 |     "@types/node": "^8.0.46",
29 |     "babel-core": "^6.26.0",
30 |     "babel-preset-env": "^1.6.1",
31 |     "chai": "^4.1.2",
32 |     "gulp": "^3.9.1",
33 |     "gulp-babel": "^7.0.0",
34 |     "gulp-sourcemaps": "^2.6.1",
35 |     "gulp-typescript": "^3.2.2",
36 |     "mocha": "^4.0.1",
37 |     "typescript": "^2.5.3"
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/algorithm/AdaBoost/index.ts:
--------------------------------------------------------------------------------
  1 | import Matrix from '../../utils/matrix/index';
  2 | import Vector from '../../utils/vector/index';
  3 | import * as _ from 'lodash';
  4 | 
  5 | class AdaBoost {
  6 |     private dataSet: Array<Array<number>>;
  7 |     private labels: Array<number>;
  8 |     private numInt: number;
  9 | 
 10 |     /**
 11 |      * Creates an instance of AdaBoost.
 12 |      * @param {Array<Array<number>>} dataSet matirx like datas for training.
 13 |      * @param {Array<number>} labels vector of the training datas' classes.
 14 |      * @param {number} [numInt=40 as number] maximum permission iterative number of times, default is 40
 15 |      * @memberof AdaBoost
 16 |      */
 17 |     constructor(dataSet: Array<Array<number>>,labels: Array<number>,numInt=40 as number){
 18 |         this.dataSet = dataSet;
 19 |         this.labels = labels;
 20 |         this.numInt = numInt;
 21 |     }
 22 |     /**
 23 |      * 基于单层决策树的弱分类器 weak classifier based on Single layer decision tree
 24 |      * 
 25 |      * @param {Matrix} dataMatrix matrix of datas for classify
 26 |      * @param {number} dimen the index of features to classify
 27 |      * @param {number} threshVal the threshold value of the featuer to classify
 28 |      * @param {String} flag can be 'lt' or 'gt', when it's 'lt', when the value of the feature is lower than threshVal, the data's class will be -1, or will be 1.0.
 29 |      * @returns {Array<number>} classification results.
 30 |      * @memberof AdaBoost
 31 |      */
 32 |     stumpClassify(dataMatrix: Matrix,dimen: number,threshVal: number,flag: String): Array<number>{
 33 |         let m = dataMatrix.size()[0];
 34 |         let retArray = Matrix.ones(m);
 35 | 
 36 |         if(flag === 'lt'){
 37 |             retArray = retArray.map((v,i)=>dataMatrix.arr[i][dimen]<=threshVal?-1.0:1.0);
 38 |         } else {
 39 |             retArray = retArray.map((v,i)=>dataMatrix.arr[i][dimen]>threshVal?-1.0:1.0);
 40 |         }
 41 |         return retArray;
 42 |     }
 43 |     /**
 44 |      * 针对某个特征权值向量找到其对应的最佳单层决策树若分类器，及该弱分类器分类结果和错误率 According to a feature weight vector, the optimal single layer decision tree is found, the classifier, the classification result and the error rate of the weak classifier
 45 |      * 
 46 |      * @param {Array<number>} D the vector of every feature's weight.
 47 |      * @returns {[Object,number,Array<number>]} the information of the best weak classifier, the error rate of the classifier and the classification results.
 48 |      * @memberof AdaBoost
 49 |      */
 50 |     buildStump(D: Array<number>): [Object,number,Array<number>]{
 51 |         let dataSetMat = new Matrix(this.dataSet);
 52 |         let labels = this.labels;
 53 |         let [m,n] = dataSetMat.size();
 54 |         let numSteps = 10.0,
 55 |             bestStump = {},  //用于存放最佳单层决策树
 56 |             bestClassEst = Matrix.ones(m), // 该弱分类器最低错误率时的预测分类向量
 57 |             minError = Infinity; // 初始最新错误率为无穷大
 58 |         let that = this;
 59 | 
 60 |         for(let i=0; i<n; i++){
 61 |             let rangeMin = dataSetMat.min()[i],
 62 |                 rangeMax = dataSetMat.max()[i];
 63 |             let stepSize = (rangeMax-rangeMin)/numSteps;
 64 | 
 65 |             for(let j=0; j<numSteps; j++){
 66 |                 for(let flag of ['lt','gt']){
 67 |                     let threshVal = rangeMin + j * stepSize;
 68 |                     let predictedVals = that.stumpClassify(dataSetMat,i,threshVal,flag); //弱分类器分类结果
 69 | 
 70 |                     let errArr = Matrix.ones(m);
 71 |                     errArr = errArr.map((v,i)=>predictedVals[i]===labels[i]?0:1);
 72 | 
 73 |                     let weightedError = _.sum(_.zipWith(errArr,D,(a,b)=>a*b)); // 各个特征的错误加权和
 74 | 
 75 |                     if(weightedError<minError){ //得到当前特征权值向量下的最佳弱分类器
 76 |                         minError = weightedError;
 77 |                         bestClassEst = [...predictedVals];
 78 |                         bestStump = {
 79 |                             dim: i,
 80 |                             thresh: threshVal,
 81 |                             ineq: flag
 82 |                         }
 83 |                     }
 84 |                 }
 85 |             }
 86 |         }
 87 | 
 88 |         return [bestStump,minError,bestClassEst];
 89 | 
 90 |     }
 91 |     /**
 92 |      * 根据最大迭代次数，得到所有弱分类器  get all the weak classifier by iteration specified times
 93 |      * 
 94 |      * @returns {Array<Object>} array of all the weak classifier.
 95 |      * @memberof AdaBoost
 96 |      */
 97 |     adaBoostTrainDS(): Array<Object>{
 98 |         let numInt = this.numInt, // 最大迭代次数
 99 |             dataMatrix = new Matrix(this.dataSet),
100 |             labels = this.labels,
101 |             weakClassArr = []; //弱分类器数组
102 |         let m = dataMatrix.size()[0],
103 |             D = Matrix.ones(m).map(v=>v/m);  //初始的特征权值向量
104 | 
105 |         let aggClassEst = <Array<number>> Matrix.zeros(m);
106 | 
107 |         for(let i=0; i<numInt; i++){
108 |             let [bestStump,error,classEst] = this.buildStump(D);
109 |             let alpha = 0.5*Math.log((1.0-error)/Math.max(error,1e-16)); // 计算当前弱分类器的权重
110 | 
111 |             bestStump['alpha'] = alpha;
112 |             weakClassArr.push(bestStump);
113 | 
114 |             // 重新计算特征权值向量
115 |             let expon = _.zipWith(labels,classEst,(a,b)=>a*b).map(v=>-1*alpha*v);
116 |             D = _.zipWith(D,expon,(a,b)=>a*Math.exp(b));
117 |             let Dsum = _.sum(D);
118 |             D = D.map(v=>v/Dsum);
119 | 
120 |             // 计算已有弱分类器组合后的错误率
121 |             let alphaClassEst = classEst.map(v=>v*alpha);
122 |             aggClassEst = _.zipWith(alphaClassEst,aggClassEst,(a,b)=>a+b);
123 |             let realAggClassEst = <Array<number>> Vector.sign(aggClassEst);
124 |             let aggErrors = _.zipWith(_.zipWith(realAggClassEst,labels,(a,b)=>a===b?0:1),Matrix.ones(m),(a,b)=>a*b);
125 |             let errorRate = _.sum(aggErrors)/m;
126 | 
127 |             if(errorRate === 0.0) {
128 |                 break;
129 |             }
130 |         }
131 |         return weakClassArr;
132 |     }
133 |     /**
134 |      * 输入测试数据矩阵，根据所有弱分类器组合学习，得到最终结果 compose all weak classifier to get a strong classifier.
135 |      * 
136 |      * @param {Array<Array<number>>} inx matrix of datas for testing.
137 |      * @returns {Array<number>} vector of classification results.
138 |      * @memberof AdaBoost
139 |      */
140 |     classify(inx: Array<Array<number>>): Array<number>{
141 |         let dataMatrix = new Matrix(inx);
142 |         let m = dataMatrix.size()[0];
143 |         let aggClassEst = <Array<number>> Matrix.zeros(m);
144 | 
145 |         // 组合所有弱分类器
146 |         let classifierArr = this.adaBoostTrainDS();
147 |         classifierArr.forEach((v,i)=>{
148 |             let classEst = this.stumpClassify(dataMatrix,v['dim'],v['thresh'],v['ineq']);
149 |             aggClassEst = _.zipWith(classEst.map(value=>value*v['alpha']),aggClassEst,(a,b)=>a+b);
150 |         });
151 | 
152 |         return <Array<number>> Vector.sign(aggClassEst);
153 |     }
154 | }
155 | 
156 | export default AdaBoost;


--------------------------------------------------------------------------------
/src/algorithm/AdaBoost/test.ts:
--------------------------------------------------------------------------------
 1 | import AdaBoost from './index';
 2 | 
 3 | const dataSet = [
 4 |     [1.0,2.1],
 5 |     [2.0,1.1],
 6 |     [1.3,1.0],
 7 |     [1.0,1.0],
 8 |     [2.0,1.0]
 9 | ]
10 | const labels = [1.0,1.0,-1.0,-1.0,1.0];
11 | 
12 | let ada = new AdaBoost(dataSet,labels,40);
13 | 
14 | let result = ada.classify([[1.0,2.1],
15 |     [2.0,1.1],
16 |     [1.3,1.0],
17 |     [1.0,1.0],
18 |     [2.0,1.0]]);
19 | 
20 | console.log(result);


--------------------------------------------------------------------------------
/src/algorithm/DT/dt.txt:
--------------------------------------------------------------------------------
 1 | young	myope	no	reduced	no lenses
 2 | young	myope	no	normal	soft
 3 | young	myope	yes	reduced	no lenses
 4 | young	myope	yes	normal	hard
 5 | young	hyper	no	reduced	no lenses
 6 | young	hyper	no	normal	soft
 7 | young	hyper	yes	reduced	no lenses
 8 | young	hyper	yes	normal	hard
 9 | pre	myope	no	reduced	no lenses
10 | pre	myope	no	normal	soft
11 | pre	myope	yes	reduced	no lenses
12 | pre	myope	yes	normal	hard
13 | pre	hyper	no	reduced	no lenses
14 | pre	hyper	no	normal	soft
15 | pre	hyper	yes	reduced	no lenses
16 | pre	hyper	yes	normal	no lenses
17 | presbyopic	myope	no	reduced	no lenses
18 | presbyopic	myope	no	normal	no lenses
19 | presbyopic	myope	yes	reduced	no lenses
20 | presbyopic	myope	yes	normal	hard
21 | presbyopic	hyper	no	reduced	no lenses
22 | presbyopic	hyper	no	normal	soft
23 | presbyopic	hyper	yes	reduced	no lenses
24 | presbyopic	hyper	yes	normal	no lenses


--------------------------------------------------------------------------------
/src/algorithm/DT/index.ts:
--------------------------------------------------------------------------------
  1 | // @ts-check
  2 | import * as fs from 'fs';
  3 | import {Repeat,List} from 'immutable';
  4 | 
  5 | interface ClassCount {
  6 |     [index: string]: number;
  7 | }
  8 | 
  9 | /**
 10 |  * 计算香农熵 Calculating Shannon entropy
 11 |  * 
 12 |  * @param {Array<Array<any>>} dataSet 
 13 |  * @returns {number} 
 14 |  */
 15 | function calShannoEnt(dataSet: Array<Array<any>>): number{
 16 |     let numEntries = dataSet.length;
 17 |     let labelCounts:ClassCount = {};
 18 | 
 19 |     dataSet.forEach(v=>{
 20 |         let label = v[v.length-1];
 21 |         if(label in labelCounts){
 22 |             return labelCounts[label] += 1;
 23 |         }
 24 |         
 25 |         labelCounts[label] = 1;
 26 |     });
 27 |     let shannoEnt = 0.0;
 28 |     for(let label in labelCounts){
 29 |         let prob = labelCounts[label]/numEntries;
 30 |         shannoEnt -= prob * Math.log2(prob);
 31 |         
 32 |     }
 33 |     return shannoEnt;
 34 | }
 35 | 
 36 | /**
 37 |  * 划分数据集 Partition dataset
 38 |  * 
 39 |  * @param {array} dataSet 原始数据集 Raw data set
 40 |  * @param {number} axis 划分特征 which feature to partition
 41 |  * @param {any} value 特征值 the value of the feature to partition
 42 |  * @returns {array} 划分后的数据集 the partition result.
 43 |  */
 44 | function splitDataSet(dataSet: Array<Array<any>>,axis: number,value: any): Array<Array<any>>{
 45 |     let retDataSet = dataSet.reduce((pre,cur)=>{
 46 |         let curList = List(cur);
 47 |         if(cur[axis] === value){
 48 |             pre.push(curList.splice(axis,1).toArray());
 49 |         }
 50 |         return pre;
 51 |     },[]);
 52 |     return retDataSet;    
 53 | }
 54 | 
 55 | /**
 56 |  * 选择最好的划分特征 choose the best feature to partition.
 57 |  * 
 58 |  * @param {Array<Array<any>>} dataSet 
 59 |  * @returns {number} 
 60 |  */
 61 | function chooseBestLabelToSplit(dataSet: Array<Array<any>>): number{
 62 |     let numLables = dataSet[0].length - 1,
 63 |         baseEntropy = calShannoEnt(dataSet),
 64 |         bestInfoGain = 0.0,
 65 |         bestLabel = -1;
 66 |     
 67 |     for(let i=0; i<numLables; i++){
 68 |         let featList = dataSet.map(v=>v[i]),
 69 |             uniqueVals = [...new Set(featList)],
 70 |             newEntropy = 0.0;
 71 |         uniqueVals.forEach((v,index)=>{
 72 |             let subDataSet = splitDataSet(dataSet,i,v),
 73 |                 prob = subDataSet.length/dataSet.length;
 74 |             newEntropy += prob * calShannoEnt(subDataSet);
 75 |         });
 76 |         let infoGain = baseEntropy - newEntropy;
 77 |         
 78 |         if(infoGain > bestInfoGain){
 79 |             bestInfoGain = infoGain;
 80 |             bestLabel = i;
 81 |         }
 82 |     }
 83 |     
 84 |     return bestLabel;
 85 | }
 86 | 
 87 | /**
 88 |  * 多数决策，当子数据集只有一个特征，且各个实例所属分类仍旧不同时调用此方法 The majority decision, only one set of features when the data, and each instance belongs to classification is still not at the same time this method is called
 89 |  * 
 90 |  * @param {Array<string>} classList 
 91 |  * @returns {string} 
 92 |  */
 93 | function majorityCnt(classList: Array<string>): string{
 94 |     let classCount:ClassCount = {};
 95 |     classList.forEach((v,i)=>{
 96 |         if(v in classCount){
 97 |             return classCount[v] += 1;
 98 |         }
 99 |         classCount[v] = 1;
100 |     })
101 |     let sortedClassCount = Object.keys(classCount).sort((a,b)=>classCount[b]-classCount[a]);
102 | 
103 |     return sortedClassCount[0];
104 | }
105 | 
106 | /**
107 |  * 构建决策树 create decision tree.
108 |  * 
109 |  * @param {Array<Array<any>>} dataSet for training.
110 |  * @param {Array<string>} labels the classes of training data.
111 |  * @returns {object} 
112 |  */
113 | function createTree(dataSet: Array<Array<any>>,labels: Array<string>): object{
114 |     let classList = dataSet.map(v=>v[v.length-1]),
115 |         uniqueClasses = [...new Set(classList)].length;
116 |     if(uniqueClasses === 1){
117 |         return classList[0];
118 |     }
119 |     if(dataSet[0].length === 1){
120 |         return majorityCnt(classList);
121 |     }
122 |     let bestFeat = chooseBestLabelToSplit(dataSet),
123 |         bestFeatLabel = labels[bestFeat];
124 |     let resultTree = {
125 |         [bestFeatLabel]: {}
126 |     }
127 |     labels.splice(bestFeat,1);
128 |     let featValues = dataSet.map(v=>v[bestFeat]),
129 |         uniqueVals = [...new Set(featValues)];
130 |     uniqueVals.forEach(v=>{
131 |         let subLabels = [...labels],
132 |             subDataSet = splitDataSet(dataSet,bestFeat,v);
133 |         resultTree[bestFeatLabel][v] = createTree(subDataSet,subLabels);
134 |     })
135 | 
136 |     return resultTree;
137 | }
138 | 
139 | /**
140 |  * 判断测试数据分类 class the testing data.
141 |  * 
142 |  * @param {object} inputTree 决策树对象  the decision tree.
143 |  * @param {array} featLabels 特征名称向量  the vector of feature names.
144 |  * @param {array} testVec 测试向量  the vector for testing.
145 |  * @returns 测试数据的分类
146 |  */
147 | function classify(inputTree: object,featLabels: Array<string>,testVec: Array<any>): any{
148 |     let firstStr = Object.keys(inputTree)[0],
149 |         secondDict = inputTree[firstStr],
150 |         featIndex = featLabels.indexOf(firstStr);
151 |         
152 |     let resultClass;
153 |     for(let key of Object.keys(secondDict)){
154 |         
155 |         if(testVec[featIndex] === key){
156 |             if(typeof secondDict[key] === 'object'){
157 |                 resultClass = classify(secondDict[key],featLabels,testVec);
158 |             } else{
159 |                 resultClass = secondDict[key];
160 |                 break;
161 |             }
162 |         }
163 |     }
164 |     return resultClass;
165 | }
166 | 
167 | class DT {
168 |     tree: object;
169 |     constructor(public dataSet: Array<Array<any>>,public labels: Array<string>,alg: string="ID3"){
170 |         this.tree = createTree(dataSet,[...labels]);
171 |     }
172 |     getTree(): object{
173 |         return this.tree;
174 |     }
175 |     // 根据实例构造的决策树进行测试
176 |     classify(featLabels: Array<string>,testVec: Array<any>): any{
177 |         return classify(this.tree,featLabels,testVec);
178 |     }
179 |     // 将决策树存入文件
180 |     storeTree(filePath: string){
181 |         let jsonTree = JSON.stringify(this.tree);
182 |         return new Promise((resolve,reject)=>{
183 |             fs.writeFile(filePath,jsonTree,err=>{
184 |                 if(err){
185 |                     return reject(err);
186 |                 }
187 |                 resolve();
188 |             });
189 |         })
190 |     }
191 |     // 根据提供的决策树进行测试，静态方法，无需实例化构造决策树
192 |     static classifyFromTree(inputTree: object,featLabels: Array<string>,testVec: Array<any>): any{
193 |         return classify(inputTree,featLabels,testVec);
194 |     }
195 | }
196 | 
197 | export default DT;


--------------------------------------------------------------------------------
/src/algorithm/DT/test.ts:
--------------------------------------------------------------------------------
 1 | import DT from './index';
 2 | import * as parser from '../../utils/fileParser/index';
 3 | import * as path from 'path';
 4 | import * as util from 'util';
 5 | import * as charts from '../../utils/charts/index';
 6 | 
 7 | 
 8 | let dataSet = parser.parseFile(path.join(__dirname,'./dt.txt'));
 9 | 
10 | let labels = ['age','prescript','astigmatic','tearRate']
11 | let dt = new DT(dataSet,labels);
12 | 
13 | let result = dt.classify(labels,["young","myope","no","reduced"])
14 | 
15 | console.log(util.inspect(dt.getTree(),{depth: null}));
16 | 
17 | charts.drawDT(dt.getTree(),{
18 |     width:600,
19 |     height:400
20 | });
21 | 
22 | 


--------------------------------------------------------------------------------
/src/algorithm/index.ts:
--------------------------------------------------------------------------------
 1 | import kNN from './kNN/index';
 2 | import DT from './DT/index';
 3 | import logistic from './logistic/index';
 4 | import AdaBoost from './AdaBoost/index';
 5 | import kMeans from './kMeans/index';
 6 | 
 7 | export {
 8 |     kNN,
 9 |     DT,
10 |     logistic,
11 |     AdaBoost,
12 |     kMeans
13 | }


--------------------------------------------------------------------------------
/src/algorithm/kMeans/index.ts:
--------------------------------------------------------------------------------
  1 | import Matrix from '../../utils/matrix';
  2 | import Vector from '../../utils/vector';
  3 | import * as _ from 'lodash';
  4 | import * as util from 'util';
  5 | 
  6 | class kMeans {
  7 |     private dataSet: Matrix;
  8 |     private k: number;
  9 | 
 10 |     /**
 11 |      * Creates an instance of kMeans.
 12 |      * @param {Array<Array<number>>} dataSet 
 13 |      * @param {number} k how many centroids.
 14 |      * @memberof kMeans
 15 |      */
 16 |     constructor(dataSet: Array<Array<number>>,k: number){
 17 |         this.dataSet = new Matrix(dataSet);
 18 |         this.k = k;
 19 |     }
 20 |     /**
 21 |      * 随机创建K个初始质心 Random creation of K initial centroids
 22 |      * 
 23 |      * @returns {Array<Array<number>>} 
 24 |      * @memberof kMeans
 25 |      */
 26 |     createCent(): Array<Array<number>>{
 27 |         let n = this.dataSet.size()[1];
 28 |         let centroids = <Array<Array<number>>>Matrix.zeros(this.k,n);
 29 | 
 30 |         for(let j=0; j<n; j++){
 31 |             let minJ = this.dataSet.min()[j],
 32 |                 maxJ = this.dataSet.max()[j],
 33 |                 rangeJ = maxJ - minJ;
 34 |             
 35 |             let randomVect = Vector.rand(this.k).map(v=>v*rangeJ+minJ);
 36 | 
 37 |             centroids.forEach((v,i)=>{
 38 |                 v[j] = randomVect[i];
 39 |             });
 40 |         }
 41 | 
 42 |         return centroids;
 43 |     }
 44 |     /**
 45 |      * 计算两点欧式距离 Calculating the Euclidean distance between two points
 46 |      * 
 47 |      * @param {Array<number>} vec1 vector 1.
 48 |      * @param {Array<number>} vec2 vector 2.
 49 |      * @returns {number} 
 50 |      * @memberof kMeans
 51 |      */
 52 |     distEclud(vec1: Array<number>,vec2: Array<number>): number{
 53 |         return Math.sqrt(_.sum(_.zipWith(vec1,vec2,(a,b)=>(a-b)**2)));
 54 |     }
 55 |     /**
 56 |      * 聚类函数cluster function
 57 |      * 
 58 |      * @param {number} [max=50 as number]  Maximum iterations
 59 |      * @returns {[Array<Array<number>>,Array<Array<number>>]} 
 60 |      * @memberof kMeans
 61 |      */
 62 |     cluster(max=50 as number): [Array<Array<number>>,Array<Array<number>>]{
 63 |         let m = this.dataSet.size()[0],
 64 |             dataSet = this.dataSet.arr;
 65 |         let clusterAssment = <Array<Array<number>>> Matrix.zeros(m,2), //各个实例的聚类结果，结果包含所属质心，和该实例到所属质心的距离
 66 |             centroids = this.createCent(), //存放各个质心向量
 67 |             clusterChanged = true, // 标识聚类情况发生变化，只要有一个实例的聚类发生变化，设为true
 68 |             k = this.k;  // 质心个数
 69 |         let num = 0; // 收敛次数
 70 |         while(clusterChanged){
 71 |             if(++num>max){ //超过最大收敛次数，退出循环
 72 |                 break;
 73 |             }
 74 | 
 75 |             clusterChanged = false;
 76 |             for(let i=0; i<m; i++){
 77 |                 let minDist = Infinity, //初始化某个实例到各个质心的最小距离为无穷大
 78 |                     minIndex = -1;  //和当前实例距离最小的质心
 79 |                 for(let j=0; j<k; j++){
 80 |                     let distIJ = this.distEclud(centroids[j],dataSet[i]);
 81 |                     if(distIJ < minDist){
 82 |                         minDist = distIJ; //更新最小距离和质心
 83 |                         minIndex = j;
 84 |                     }
 85 |                 }
 86 |                 if(clusterAssment[i][0]!==minIndex){ //聚类发生变化
 87 |                     clusterChanged = true;
 88 |                 }
 89 |                 clusterAssment[i] = [minIndex,minDist**2]; //更改当前实例的聚类信息
 90 |             }
 91 |            
 92 |             for(let cent=0; cent<k; cent++){
 93 |                 let centPointsIndex = [];
 94 |                 clusterAssment.forEach((v,i)=>{ //找到属于当前质心所在簇的所有实例
 95 |                     if(v[0]===cent){
 96 |                         centPointsIndex.push(i);
 97 |                     }
 98 |                 });
 99 |                 if(centPointsIndex.length !== 0) {
100 |                     let pointsInCent = dataSet.filter((v,i)=>i in centPointsIndex); //根据位置找到实例向量
101 |                     centroids[cent] = Matrix.mean(pointsInCent); //更新质心向量，每个特征值为该簇所以实例该特征的平均值
102 |                 }
103 |             }
104 |         }
105 |         return [centroids,clusterAssment];
106 |     }
107 | }
108 | 
109 | export default kMeans;


--------------------------------------------------------------------------------
/src/algorithm/kMeans/test.ts:
--------------------------------------------------------------------------------
 1 | import kMeans from './index';
 2 | import * as util from 'util';
 3 | import * as path from 'path';
 4 | import * as fileParser from '../../utils/fileParser';
 5 | 
 6 | let dataSet = <Array<Array<number>>> fileParser.parseFile(path.join(__dirname,'./testSet.txt'),{
 7 |     toNumber: true
 8 | });
 9 | 
10 | let kmeans = new kMeans(dataSet,5);
11 | 
12 | let result = kmeans.cluster();
13 | console.log(util.inspect(result))


--------------------------------------------------------------------------------
/src/algorithm/kMeans/testSet.txt:
--------------------------------------------------------------------------------
 1 | 1.658985	4.285136
 2 | -3.453687	3.424321
 3 | 4.838138	-1.151539
 4 | -5.379713	-3.362104
 5 | 0.972564	2.924086
 6 | -3.567919	1.531611
 7 | 0.450614	-3.302219
 8 | -3.487105	-1.724432
 9 | 2.668759	1.594842
10 | -3.156485	3.191137
11 | 3.165506	-3.999838
12 | -2.786837	-3.099354
13 | 4.208187	2.984927
14 | -2.123337	2.943366
15 | 0.704199	-0.479481
16 | -0.392370	-3.963704
17 | 2.831667	1.574018
18 | -0.790153	3.343144
19 | 2.943496	-3.357075
20 | -3.195883	-2.283926
21 | 2.336445	2.875106
22 | -1.786345	2.554248
23 | 2.190101	-1.906020
24 | -3.403367	-2.778288
25 | 1.778124	3.880832
26 | -1.688346	2.230267
27 | 2.592976	-2.054368
28 | -4.007257	-3.207066
29 | 2.257734	3.387564
30 | -2.679011	0.785119
31 | 0.939512	-4.023563
32 | -3.674424	-2.261084
33 | 2.046259	2.735279
34 | -3.189470	1.780269
35 | 4.372646	-0.822248
36 | -2.579316	-3.497576
37 | 1.889034	5.190400
38 | -0.798747	2.185588
39 | 2.836520	-2.658556
40 | -3.837877	-3.253815
41 | 2.096701	3.886007
42 | -2.709034	2.923887
43 | 3.367037	-3.184789
44 | -2.121479	-4.232586
45 | 2.329546	3.179764
46 | -3.284816	3.273099
47 | 3.091414	-3.815232
48 | -3.762093	-2.432191
49 | 3.542056	2.778832
50 | -1.736822	4.241041
51 | 2.127073	-2.983680
52 | -4.323818	-3.938116
53 | 3.792121	5.135768
54 | -4.786473	3.358547
55 | 2.624081	-3.260715
56 | -4.009299	-2.978115
57 | 2.493525	1.963710
58 | -2.513661	2.642162
59 | 1.864375	-3.176309
60 | -3.171184	-3.572452
61 | 2.894220	2.489128
62 | -2.562539	2.884438
63 | 3.491078	-3.947487
64 | -2.565729	-2.012114
65 | 3.332948	3.983102
66 | -1.616805	3.573188
67 | 2.280615	-2.559444
68 | -2.651229	-3.103198
69 | 2.321395	3.154987
70 | -1.685703	2.939697
71 | 3.031012	-3.620252
72 | -4.599622	-2.185829
73 | 4.196223	1.126677
74 | -2.133863	3.093686
75 | 4.668892	-2.562705
76 | -2.793241	-2.149706
77 | 2.884105	3.043438
78 | -2.967647	2.848696
79 | 4.479332	-1.764772
80 | -4.905566	-2.911070


--------------------------------------------------------------------------------
/src/algorithm/kNN/index.ts:
--------------------------------------------------------------------------------
  1 | // @ts-check
  2 | import Matrix from '../../utils/matrix/index';
  3 | import Vector from '../../utils/vector/index';
  4 | import {Repeat,List} from 'immutable';
  5 | 
  6 | interface ClassCount {
  7 |     [index: string]: number;
  8 | }
  9 | /**
 10 |  *  归一化数据
 11 |  * 
 12 |  * @param {object} Matrix: dataSet
 13 |  * @returns {Array} [normalized data,the range of each feature，the minimum value of each feature]
 14 |  */
 15 | function autoNormal(dataSet: Matrix): [Array<Array<number>>,Array<number>,Array<number>]{
 16 |     let minVals = dataSet.min(0); // 每个特征的最小值
 17 |     let maxVals = dataSet.max(0); // 每个特征的最大值
 18 |     let ranges = new Vector(maxVals).zipWith((a,b)=>a-b,new Vector(minVals)); // 每个特征的范围
 19 |     
 20 |     let normalDataSet = new Matrix(Matrix.zeros(...dataSet.size()));
 21 |     let setSize = dataSet.size()[0]; // 训练集实例数
 22 |     
 23 |     normalDataSet = dataSet.sub(new Matrix(Repeat(minVals,setSize).toArray())); //分子为每个特征原始值减去该特征最小值
 24 | 
 25 |     normalDataSet = normalDataSet.divide(new Matrix(Repeat(ranges,setSize).toArray())); // 上式得到的每个特征值除以该特征范围
 26 |     return [normalDataSet.arr,ranges,minVals];
 27 | }
 28 | 
 29 | class kNN {
 30 |     private dataSet: Matrix;
 31 |     private labels: Vector;
 32 |     private ranges: Array<number>;
 33 |     private minVals: Array<number>;
 34 | 
 35 |     /**
 36 |      * Creates an instance of kNN.
 37 |      * @param {Array<Array<number>>} dataSet Matrix like datas for training.
 38 |      * @param {Array<any>} labels vector like classes of each tarining data.
 39 |      * @memberof kNN
 40 |      */
 41 |     constructor(dataSet: Array<Array<number>>,labels: Array<any>){
 42 |         let [normalDataSet,ranges,minVals] = autoNormal(new Matrix(dataSet)); 
 43 |         this.dataSet = new Matrix(normalDataSet);
 44 |         this.labels = new Vector(labels);
 45 |         this.ranges = ranges;
 46 |         this.minVals = minVals;
 47 |     }
 48 |     /**
 49 |      * kNN算法主体
 50 |      * 
 51 |      * @param {array} inx data for testing.
 52 |      * @param {number} K值 the K number.
 53 |      * @returns {any}
 54 |      * @memberof kNN
 55 |      */
 56 |     classify(inx_: Array<number>,k: number): any{
 57 |         const setSize = this.dataSet.size()[0];
 58 |         if(k > setSize) {
 59 |             k = setSize;
 60 |         }
 61 | 
 62 |         //归一化测试数据
 63 |         let inx = this.autoNormalVector(inx_);
 64 |         // 求测试数据与每一个训练数据的距离
 65 |         let diffMat = new Matrix(Repeat(inx,setSize).toArray()).sub(this.dataSet); // 建立与训练数据同大小的矩阵，再一一对应相减
 66 | 
 67 |         let sqDiffMat = diffMat.mult(diffMat);
 68 |         let sqDistances = sqDiffMat.sum(1);
 69 |         let distances = sqDistances.map(Math.sqrt);
 70 |         let sortedDistanceIndicies = (new Vector(distances)).argSort(); // 与各个训练数据的距离排序的下标
 71 | 
 72 |         // 统计每个距离最近前K个值里各个分类的数量
 73 |         let classCount:ClassCount = {};
 74 |         for(let i=0; i<k; i++){
 75 |             let voteIlable = this.labels.arr[sortedDistanceIndicies[i]];
 76 |             if(classCount[voteIlable]) {
 77 |                 classCount[voteIlable] += 1;
 78 |             } else {
 79 |                 classCount[voteIlable] = 1;
 80 |             }
 81 |         }
 82 |         let sortedClassCount = Object.keys(classCount).sort((a,b)=>classCount[b]-classCount[a]);
 83 | 
 84 |         // 返回实例最多的分类
 85 |         return sortedClassCount[0]
 86 |     }
 87 |     /**
 88 |      * normalize the vector of testing data.
 89 |      * 
 90 |      * @param {Array<number>} inx_ 
 91 |      * @returns {Array<number>} 
 92 |      * @memberof kNN
 93 |      */
 94 |     autoNormalVector(inx_: Array<number>): Array<number>{
 95 |         let inx = [...inx_];
 96 |         let minVals = this.minVals,
 97 |             ranges = this.ranges;
 98 |         
 99 |         inx = new Vector(inx).zipWith((a,b)=>a - b,new Vector(minVals));
100 |         inx = new Vector(inx).zipWith((a,b)=>a/b,new Vector(ranges));
101 |         return inx;
102 |     }
103 |     /**
104 |      * normalize the given matrix like datas.
105 |      * 
106 |      * @static
107 |      * @param {Array<Array<number>>} dataSet 
108 |      * @returns {Array<Array<number>>} 
109 |      * @memberof kNN
110 |      */
111 |     static autoNormal(dataSet: Array<Array<number>>): Array<Array<number>>{
112 |         return autoNormal(new Matrix(dataSet))[0];
113 |     }
114 | }
115 | 
116 | export default kNN;


--------------------------------------------------------------------------------
/src/algorithm/kNN/test.ts:
--------------------------------------------------------------------------------
 1 | import kNN from '../kNN';
 2 | 
 3 | let knn = new kNN([
 4 |     [1.,1.1],
 5 |     [1.,1.],
 6 |     [0.,0.],
 7 |     [0.,0.1]
 8 | ],['A','A','B','C']);
 9 | 
10 | let result = knn.classify([1.1,0.8],4);
11 | 
12 | console.log(result)


--------------------------------------------------------------------------------
/src/algorithm/logistic/index.ts:
--------------------------------------------------------------------------------
 1 | import Matrix from '../../utils/matrix';
 2 | import * as _ from 'lodash';
 3 | 
 4 | function sigmoid(inx){
 5 |     return 1.0/(1+Math.exp(-inx));
 6 | }
 7 | class Logistic {
 8 |     private dataMatrix: Matrix;
 9 |     private labels: Array<number>;
10 |     private numIter: number;
11 | 
12 |     /**
13 |      * Creates an instance of Logistic.
14 |      * @param {Array<Array<number>>} dataMatIn_ matrix like dataset for training.
15 |      * @param {Array<number>} classLabels the classes of training datas.
16 |      * @param {number} numIter Maximum iterations
17 |      * @memberof Logistic
18 |      */
19 |     constructor(dataMatIn_: Array<Array<number>>,classLabels: Array<number>,numIter: number){
20 |         let dataMatIn = [...dataMatIn_];
21 |         dataMatIn = dataMatIn.map(v=>[1.0,v[0],v[1]]);
22 | 
23 |         this.dataMatrix = new Matrix(dataMatIn);
24 |         this.labels = classLabels;
25 |         this.numIter = numIter;
26 |     }
27 |     /**
28 |      * 随机梯度上升法求各个特征的最佳回归系数  Random gradient ascent method for optimal regression coefficients of each feature
29 |      * 
30 |      * @returns {Array<number>} 
31 |      * @memberof Logistic
32 |      */
33 |     getWeights(): Array<number>{
34 |         let dataMatrix = this.dataMatrix.arr;
35 |         let labels = this.labels;
36 | 
37 |         let [m,n] = this.dataMatrix.size();
38 |         let weights = Matrix.ones(n);  // 初始化每个特征的系数
39 |         let indexArr;
40 |         let alpha;  // 梯度上升步长
41 | 
42 |         for(let i=0; i<this.numIter; i++){
43 |             indexArr = _.range(m);
44 |             for(let j=0; j<m; j++){
45 |                 
46 |                 alpha = 4/(1.0+j+i) + 0.0001; // alpha不断减小，但保留一个常数，使其不会为0
47 |                 let randomIndex = _.random(0,indexArr.length-1);
48 |                 let vec = _.zipWith(dataMatrix[randomIndex],weights,(a,b)=>a*b);
49 |                 
50 |                 let h = sigmoid(_.sum(vec));
51 |                 let error = labels[randomIndex] - h;
52 |                 let rised = dataMatrix[randomIndex].map(v=>v*alpha*error);
53 |                 
54 |                 weights = _.zipWith(weights,rised,(a,b)=>a+b);
55 |                 indexArr.splice(randomIndex,1);
56 |             }
57 |         }
58 | 
59 |         return weights;
60 |     }
61 | 
62 |     /**
63 |      * 分类测试数据 class the given test data.
64 |      * 
65 |      * @param {Array<number>} inX_  test data
66 |      * @returns {number} 
67 |      * @memberof Logistic
68 |      */
69 |     classify(inX_: Array<number>): number{
70 |         let inX = [...inX_];
71 |         inX = [1.0,inX[0],inX[1]];
72 | 
73 |         let weights = this.getWeights();
74 |         let vec = _.zipWith(inX,weights,(a,b)=>a*b);
75 |         let prob = sigmoid(_.sum(vec));
76 |         
77 |         return prob>0.5?1.0:0.0;
78 |     }
79 | }
80 | 
81 | export default Logistic;


--------------------------------------------------------------------------------
/src/algorithm/logistic/test.ts:
--------------------------------------------------------------------------------
 1 | import Logistic from './index';
 2 | import * as Parser from '../../utils/fileParser/index';
 3 | import * as preprocessing from '../../utils/features/preprocessing';
 4 | import * as path from 'path';
 5 | import { log, print } from 'util';
 6 | import * as charts from '../../utils/charts/index';
 7 | 
 8 | let datas = Parser.read_csv(path.join(__dirname,'../../../assets/testSet.txt'),{
 9 |     index_col:false,
10 |     delimiter: ',',
11 |     header: false,
12 |     classType:'string'
13 | });
14 | 
15 | let dataSet = datas.drop(4).values;
16 | 
17 | let labels = datas.getClasses();
18 | 
19 | labels = labels.map(v=>{
20 |     return v==='Iris-setosa'?1:0;
21 | });
22 | 
23 | let logi = new Logistic(dataSet,labels,150);
24 | 
25 | let errors = 0;
26 | for(let i=0; i<dataSet.length; i++){
27 |     let r = logi.classify(dataSet[i]);
28 |     if(labels[i]!==r){
29 |         errors+=1;
30 |     }
31 | }
32 | 
33 | console.log('error rate:',errors/dataSet.length)
34 | 
35 | let weights = logi.getWeights()
36 | console.log(weights);
37 | 
38 | charts.drawLogistic(dataSet,labels,weights)
39 | 
40 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | import * as algorithm from './algorithm/index';
2 | import * as utils from './utils/index';
3 | 
4 | export {
5 |     algorithm,
6 |     utils
7 | }


--------------------------------------------------------------------------------
/src/utils/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/src/utils/.DS_Store


--------------------------------------------------------------------------------
/src/utils/charts/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoqiren/mlhelper/381bb85f866a1da2cbeda0d59910584e9a68e6c5/src/utils/charts/.DS_Store


--------------------------------------------------------------------------------
/src/utils/charts/DT/tpl.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <meta http-equiv="X-UA-Compatible" content="ie=edge">
  7 |     <title>Decision Tree</title>
  8 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.17/d3.js"></script>
  9 |     <style>
 10 |         .node circle {
 11 |             fill: #fff;
 12 |             stroke: steelblue;
 13 |             stroke-width: 1.5px;
 14 |         }
 15 |         .node text {
 16 |             font: 10px sans-serif;
 17 |         }
 18 |     </style>
 19 | </head>
 20 | <body>
 21 |     <script>
 22 |         var width = {{width}},
 23 |         height = {{height}};
 24 | 
 25 |         var tree = d3.layout.tree()
 26 |             .size([height, width - 160]);
 27 | 
 28 |         var diagonal = d3.svg.diagonal()
 29 |             .projection(function (d) {
 30 |                 return [d.y, d.x];
 31 |             });
 32 | 
 33 |         var svg = d3.select("body").append("svg")
 34 |             .attr("width", width)
 35 |             .attr("height", height)
 36 |             .append("g")
 37 |             .attr("transform", "translate(100,0)");
 38 | 
 39 |         var root = getData(),
 40 |             nodes = tree.nodes(root),
 41 |             links = tree.links(nodes);
 42 | 
 43 |         var link = svg.selectAll(".link")
 44 |             .data(links)
 45 |             .enter()
 46 |             .append("g")
 47 |             .attr("class", "link");
 48 | 
 49 |         link.append("path")
 50 |             .attr("fill", "none")
 51 |             .attr("stroke", "#ff8888")
 52 |             .attr("stroke-width", "1.5px")
 53 |             .attr("d", diagonal);
 54 | 
 55 |         link.append("text")
 56 |             .attr("font-family", "Arial, Helvetica, sans-serif")
 57 |             .attr("fill", "Black")
 58 |             .style("font", "normal 12px Arial")
 59 |             .attr("transform", function(d) {
 60 |                 return "translate(" +
 61 |                     ((d.source.y + d.target.y)/2) + "," + 
 62 |                     ((d.source.x + d.target.x)/2) + ")";
 63 |             })   
 64 |             .attr("dy", ".35em")
 65 |             .attr("text-anchor", "middle")
 66 |             .text(function(d) {
 67 |                 console.log(d.target.rule);
 68 |                 return d.target.rule;
 69 |             });
 70 | 
 71 |         var node = svg.selectAll(".node")
 72 |             .data(nodes)
 73 |             .enter()
 74 |             .append("g")
 75 |             .attr("class", "node")
 76 |             .attr("transform", function (d) {
 77 |                 return "translate(" + d.y + "," + d.x + ")";
 78 |             });
 79 | 
 80 |         node.append("circle")
 81 |             .attr("r", 4.5);
 82 | 
 83 |         node.append("text")
 84 |             .attr("dx", function (d) {
 85 |                 return d.children ? -8 : 8;
 86 |             })
 87 |             .attr("dy", 3)
 88 |             .style("text-anchor", function (d) {
 89 |                 return d.children ? "end" : "start";
 90 |             })
 91 |             .text(function (d) {
 92 |                 return d.name;
 93 |             });
 94 | 
 95 |         function getData() {
 96 |             return {{data}}
 97 |         };
 98 |     </script>
 99 | </body>
100 | </html>


--------------------------------------------------------------------------------
/src/utils/charts/index.ts:
--------------------------------------------------------------------------------
  1 | import server from './server';
  2 | import * as path from 'path';
  3 | import * as fs from 'fs';
  4 | import * as swig from 'swig';
  5 | 
  6 | /**
  7 |  * 根据模板和数据渲染可视化页面
  8 |  * 
  9 |  * @export
 10 |  * @param {string} template 
 11 |  * @param {object} data 
 12 |  * @returns 
 13 |  */
 14 | export function renderFile(template: string,data: object){
 15 |     return swig.render(fs.readFileSync(template).toString(),{
 16 |         filename: template,
 17 |         autoescape: false,
 18 |         locals: data
 19 |     });
 20 | }
 21 | 
 22 | interface KnnConfig {
 23 |     width: string;
 24 |     height: string;
 25 |     size: number;
 26 | }
 27 | 
 28 | function treeLeaf(obj,leafRule){
 29 |     let objKey = Object.keys(obj)[0];
 30 |     let leaf = obj[objKey];
 31 |     let children = [];
 32 |     let rules = Object.keys(leaf);
 33 |     for(let rule of rules){
 34 |         if(typeof leaf[rule] === 'object'){
 35 |             children.push(treeLeaf(leaf[rule],rule));
 36 |             continue;
 37 |         }
 38 |         children.push({
 39 |             name: leaf[rule],
 40 |             rule
 41 |         });
 42 |     }
 43 | 
 44 |     return {
 45 |         name: objKey,
 46 |         rule: leafRule,
 47 |         children: [...children]
 48 |     }
 49 | }
 50 | 
 51 | /**
 52 |  * 可视化决策树
 53 |  * 
 54 |  * @export
 55 |  * @param {object} tree the decision tree get from DT algorithm.
 56 |  * @param {any} options configuration object. {
 57 |  * width: {number} the width of the graph. 
 58 |  * height: {number} the height of the graph. size: the size of every point.
 59 |  * }
 60 |  */
 61 | export function drawDT(tree: object,{
 62 |     width=600,
 63 |     height=400
 64 | }){
 65 |     let firstStr = Object.keys(tree)[0];
 66 |     let obj = treeLeaf(tree,null);
 67 |     let html = renderFile(path.resolve(__dirname,'DT','tpl.html'),{
 68 |         width,
 69 |         height,
 70 |         data: JSON.stringify(obj)
 71 |     })
 72 |     server(html);
 73 | }
 74 | 
 75 | /**
 76 |  * 可视化kNN算法，绘制散点图
 77 |  * 
 78 |  * @export
 79 |  * @param {Array<Array<number>>} dataSet_  matrix of datas for training.
 80 |  * @param {Array<any>} labels_ vector of training datas' classes.
 81 |  * @param {Array<number>} inx vector of data to test.
 82 |  * @param {object} options configuration object. {
 83 |  * width: {string} the width of the graph. default to "600px".
 84 |  * height: {string} the height of the graph. default to "400px".
 85 |  * size: {number} the size of every point. default to 20.
 86 |  * }
 87 |  */
 88 | export function drawkNN(dataSet_: Array<Array<number>>,labels_: Array<any>,inx: Array<number>,{
 89 |     width="600px",
 90 |     height="400px",
 91 |     size=20
 92 | }={} as KnnConfig){
 93 |     let dataSet = [...dataSet_];
 94 |     let labels = [...labels_];
 95 |     let data = [];
 96 |     let classes = [...new Set(labels)].filter(v=>v!==undefined);
 97 |     classes.forEach(c=>{
 98 |         let classSet = dataSet.filter((value,i)=>labels[i] === c);
 99 |         data.push([...classSet])
100 |     });
101 | 
102 |     classes.push('test');
103 |     data.push([inx]);
104 |     let html = renderFile(path.resolve(__dirname,'kNN','tpl.html'),{
105 |         title: "Scatter plot for kNN",
106 |         width,
107 |         height,
108 |         size,
109 |         data: JSON.stringify(data),
110 |         classes: JSON.stringify(classes.map(v=>v.toString()))
111 |     });
112 |     server(html)
113 | }
114 | 
115 | 
116 | /**
117 |  * 
118 |  * 绘制logistic回归算法的散点图和回归线
119 |  * @export
120 |  * @param {Array<Array<number>>} dataSet_ matrix of datas for training.
121 |  * @param {Array<any>} labels_ vector of training datas' classes.
122 |  * @param {Array<number>} weights vector of features' weights get from logistic algorithm.
123 |  * @param {object} options configuration object. {
124 |  * width: {string} the width of the graph. default to "600px".
125 |  * height: {string} the height of the graph. default to "400px".
126 |  * size: {number} the size of every point. deault to 20.
127 |  * }
128 |  */
129 | export function drawLogistic(dataSet_: Array<Array<number>>,labels_: Array<any>,weights: Array<number>,{
130 |     width="600px",
131 |     height="400px",
132 |     size=20
133 | }={} as KnnConfig){
134 |     let dataSet = [...dataSet_];
135 |     let labels = [...labels_];
136 |     let data = [];
137 |     let classes = [...new Set(labels)].filter(v=>v!==undefined);
138 |     classes.forEach(c=>{
139 |         let classSet = dataSet.filter((value,i)=>labels[i] === c);
140 |         data.push([...classSet])
141 |     });
142 | 
143 |     let xs = dataSet.map(v=>v[0]),
144 |         minx = Math.min(...xs),
145 |         maxx = Math.max(...xs);
146 | 
147 |     let k = -weights[1]/weights[2];
148 |     let b = -weights[0]/weights[2];
149 | 
150 |     let linePoints = [
151 |         [minx,k*minx+b],
152 |         [maxx,k*maxx+b]
153 |     ]
154 | 
155 |     let html = renderFile(path.resolve(__dirname,'logistic','tpl.html'),{
156 |         title: "Logistic Regression",
157 |         width,
158 |         height,
159 |         size,
160 |         data: JSON.stringify(data),
161 |         classes: JSON.stringify(classes.map(v=>v.toString())),
162 |         linePoints: JSON.stringify(linePoints)
163 |     });
164 |     server(html)
165 | }


--------------------------------------------------------------------------------
/src/utils/charts/kNN/tpl.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>{{title}}</title>
 6 |     <!-- 引入 echarts.js -->
 7 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/echarts/3.7.2/echarts-en.min.js"></script>
 8 | </head>
 9 | <body>
10 |     <!-- 为ECharts准备一个具备大小（宽高）的Dom -->
11 |     <div id="main" style="width: {{width}};height:{{height}};"></div>
12 |     <script type="text/javascript">
13 |         var myChart = echarts.init(document.getElementById('main'));
14 |         var getRandomColor = function(){
15 |             return '#'+('00000'+(Math.random()*0x1000000<<0).toString(16)).slice(-6);
16 |         }
17 |     const data = {{ data }};
18 |     const labels = {{classes}};
19 |     let colors = [];
20 |     for(var i=0; i<labels.length; i++){
21 |         colors.push(getRandomColor())
22 |     }
23 | 
24 |     let series = data.map((v,i)=>{
25 |         return {
26 |             name: labels[i],
27 |             data: data[i],
28 |             type: i===data.length-1 ? 'line':'scatter',
29 |             symbolSize: function (data) {
30 |                 return {{size}}
31 |             },
32 |             label: {
33 |                 emphasis: {
34 |                     show: true,
35 |                     formatter: function (param) {
36 |                         return param.data[1];
37 |                     },
38 |                     position: 'top'
39 |                 }
40 |             },
41 |             itemStyle: {
42 |                 normal: {
43 |                     shadowBlur: 10,
44 |                     shadowColor: 'rgba(120, 36, 50, 0.5)',
45 |                     shadowOffsetY: 5,
46 |                     color: new echarts.graphic.RadialGradient(0.4, 0.3, 1, [{
47 |                         offset: 0,
48 |                         color: colors[i]
49 |                     }, {
50 |                         offset: 1,
51 |                         color: colors[i]
52 |                     }])
53 |                 }
54 |             }
55 |         }
56 |     })
57 | 
58 |     option = {
59 |         backgroundColor: new echarts.graphic.RadialGradient(0.3, 0.3, 0.8, [{
60 |             offset: 0,
61 |             color: '#f7f8fa'
62 |         }, {
63 |             offset: 1,
64 |             color: '#cdd0d5'
65 |         }]),
66 |         title: {
67 |             text: "{{title}}"
68 |         },
69 |         legend: {
70 |             right: 10,
71 |             data: labels
72 |         },
73 |         xAxis: {
74 |             splitLine: {
75 |                 lineStyle: {
76 |                     type: 'dashed'
77 |                 }
78 |             }
79 |         },
80 |         yAxis: {
81 |             splitLine: {
82 |                 lineStyle: {
83 |                     type: 'dashed'
84 |                 }
85 |             },
86 |             scale: true
87 |         },
88 |         series: series
89 |     };
90 |     myChart.setOption(option);
91 |     </script>
92 | </body>
93 | </html>


--------------------------------------------------------------------------------
/src/utils/charts/logistic/tpl.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     <meta charset="utf-8">
  5 |     <title>{{title}}</title>
  6 |     <!-- 引入 echarts.js -->
  7 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/echarts/3.7.2/echarts-en.min.js"></script>
  8 | </head>
  9 | <body>
 10 |     <!-- 为ECharts准备一个具备大小（宽高）的Dom -->
 11 |     <div id="main" style="width: {{width}};height:{{height}};"></div>
 12 |     <script type="text/javascript">
 13 |         var myChart = echarts.init(document.getElementById('main'));
 14 |         var getRandomColor = function(){
 15 |             return '#'+('00000'+(Math.random()*0x1000000<<0).toString(16)).slice(-6);
 16 |         }
 17 |     const data = {{ data }};
 18 |     const labels = {{classes}};
 19 |     const linePoints = {{linePoints}};
 20 |     let colors = [];
 21 |     for(var i=0; i<labels.length; i++){
 22 |         colors.push(getRandomColor())
 23 |     }
 24 | 
 25 |     var markLineOpt = {
 26 |         animation: false,
 27 |         label: {
 28 |             normal: {
 29 |                 formatter: '',
 30 |                 textStyle: {
 31 |                     align: 'right'
 32 |                 }
 33 |             }
 34 |         },
 35 |         lineStyle: {
 36 |             normal: {
 37 |                 type: 'solid'
 38 |             }
 39 |         },
 40 |         tooltip: {
 41 |             formatter: ''
 42 |         },
 43 |         data: [[{
 44 |             coord: linePoints[0],
 45 |             symbol: 'none'
 46 |         }, {
 47 |             coord: linePoints[1],
 48 |             symbol: 'none'
 49 |         }]]
 50 |     };
 51 |     let series = data.map((v,i)=>{
 52 |         return {
 53 |             name: labels[i],
 54 |             data: data[i],
 55 |             type: 'scatter',
 56 |             symbolSize: function (data) {
 57 |                 return {{size}}
 58 |             },
 59 |             label: {
 60 |                 emphasis: {
 61 |                     show: true,
 62 |                     formatter: function (param) {
 63 |                         return param.data[1];
 64 |                     },
 65 |                     position: 'top'
 66 |                 }
 67 |             },
 68 |             itemStyle: {
 69 |                 normal: {
 70 |                     shadowBlur: 10,
 71 |                     shadowColor: 'rgba(120, 36, 50, 0.5)',
 72 |                     shadowOffsetY: 5,
 73 |                     color: new echarts.graphic.RadialGradient(0.4, 0.3, 1, [{
 74 |                         offset: 0,
 75 |                         color: colors[i]
 76 |                     }, {
 77 |                         offset: 1,
 78 |                         color: colors[i]
 79 |                     }])
 80 |                 }
 81 |             },
 82 |             markLine: markLineOpt
 83 |         }
 84 |     })
 85 | 
 86 |     option = {
 87 |         backgroundColor: new echarts.graphic.RadialGradient(0.3, 0.3, 0.8, [{
 88 |             offset: 0,
 89 |             color: '#f7f8fa'
 90 |         }, {
 91 |             offset: 1,
 92 |             color: '#cdd0d5'
 93 |         }]),
 94 |         title: {
 95 |             text: "{{title}}"
 96 |         },
 97 |         legend: {
 98 |             right: 10,
 99 |             data: labels
100 |         },
101 |         xAxis: {
102 |             splitLine: {
103 |                 lineStyle: {
104 |                     type: 'dashed'
105 |                 }
106 |             }
107 |         },
108 |         yAxis: {
109 |             splitLine: {
110 |                 lineStyle: {
111 |                     type: 'dashed'
112 |                 }
113 |             },
114 |             scale: true
115 |         },
116 |         series: series
117 |     };
118 |     myChart.setOption(option);
119 |     </script>
120 | </body>
121 | </html>


--------------------------------------------------------------------------------
/src/utils/charts/server.ts:
--------------------------------------------------------------------------------
 1 | import * as express from 'express';
 2 | import * as c from 'child_process';
 3 | 
 4 | 
 5 | export default function(html){
 6 |     const app = express();
 7 |     app.use('/',(req,res,next)=>{
 8 |         res.set('Content-Type', 'text/html');
 9 |         res.status(200).send(html);
10 |     })
11 |     app.listen(4000,()=>{
12 |         console.log('the server has been listened at port 4000')
13 |     })
14 | 
15 |     let cmd = '';
16 | 
17 |     switch (process.platform) {
18 |         case 'wind32':
19 |             cmd = 'start';
20 |             break;
21 | 
22 |         case 'linux':
23 |             cmd = 'xdg-open';
24 |             break;
25 | 
26 |         case 'darwin':
27 |             cmd = 'open';
28 |             break;
29 |     }
30 |     c.exec(`${cmd} http://localhost:4000`); 
31 | }


--------------------------------------------------------------------------------
/src/utils/features/index.ts:
--------------------------------------------------------------------------------
1 | import * as preprocessing from './preprocessing';
2 | 
3 | 
4 | export default {
5 |     preprocessing
6 | }


--------------------------------------------------------------------------------
/src/utils/features/preprocessing.ts:
--------------------------------------------------------------------------------
 1 | import Matrix from '../matrix/index';
 2 | import Vector from '../vector/index';
 3 | import {Repeat,List} from 'immutable';
 4 | 
 5 | // 标准化数据集，针对每列（每个特征），将特征转化为服从正态分布
 6 | export function standardScaler(dataSet: Array<Array<number>>): Array<Array<number>>{
 7 |     let mdataset = new Matrix(dataSet);
 8 |     let transSet = mdataset.transpose();
 9 | 
10 |     let setAvgs = mdataset.calAvg(0);
11 |     let result = transSet.map((v,i)=>{
12 |         let vlength = v.length;
13 |         return v.map(col=>{
14 |             return Math.pow((col-setAvgs[i]),2)/vlength;
15 |         });
16 |     });
17 | 
18 |     return new Matrix(result).transpose();
19 | }
20 | 
21 | // 归一化数据集，采用区间缩放法，将特征值缩放到（0，1）
22 | export function normalize(dataSet_: Array<Array<number>>): Array<Array<number>>{
23 |     let dataSet = new Matrix(dataSet_);
24 |     let minVals = dataSet.min(0); // 每个特征的最小值
25 |     let maxVals = dataSet.max(0); // 每个特征的最大值
26 |     let ranges = new Vector(maxVals).zipWith((a,b)=>a-b,new Vector(minVals)); // 每个特征的范围
27 |     
28 |     let normalDataSet = new Matrix(Matrix.zeros(...dataSet.size()));
29 |     let setSize = dataSet.size()[0]; // 训练集实例数
30 |     
31 |     normalDataSet = dataSet.sub(new Matrix(Repeat(minVals,setSize).toArray())); //分子为每个特征原始值减去该特征最小值
32 | 
33 |     normalDataSet = normalDataSet.divide(new Matrix(Repeat(ranges,setSize).toArray())); // 上式得到的每个特征值除以该特征范围
34 |     return normalDataSet.arr;
35 | }
36 | 
37 | // 二值化特征，第二个参数指定每个特征的阀值，相应特征大于指定阀值取1，否则取0
38 | export function binarizer(dataSet: Array<Array<number>>,threshold: Array<number>): Array<Array<number>>{
39 |     let mdataset = new Matrix(dataSet);
40 |     let transSet = mdataset.transpose();
41 | 
42 |     let result = transSet.map((v,i)=>{
43 |         return v.map(c=>c>threshold[i]?1:0);
44 |     });
45 | 
46 |     return result;
47 | }
48 | 
49 | // 哑编码特征值，当特征值为非数值时，将各个值作为新的特征
50 | export function oneHotEncoder(dataSet: Array<Array<number>>): Array<Array<number>>{
51 | 
52 | }


--------------------------------------------------------------------------------
/src/utils/features/test.ts:
--------------------------------------------------------------------------------
 1 | import features from './index';
 2 | import * as parser from '../fileParser/index';
 3 | import * as path from 'path';
 4 | 
 5 | const preprocessing = features.preprocessing;
 6 | 
 7 | let dt = parser.read_csv(path.join(__dirname,'../../../assets/train.csv'),{
 8 |     index_col: 0,
 9 |     delimiter: ',',
10 |     header: 0,
11 |     dataType: 'number'
12 | });
13 | 
14 | let dataSet =dt.drop('quality').values;
15 | 
16 | let testStandardScaler = preprocessing.standardScaler(dataSet);
17 | console.log(testStandardScaler);
18 | 
19 | let testNormalize = preprocessing.normalize(dataSet);
20 | console.log(testNormalize);


--------------------------------------------------------------------------------
/src/utils/fileParser/index.ts:
--------------------------------------------------------------------------------
  1 | import * as fs from 'fs';
  2 | import * as _ from 'lodash';
  3 | 
  4 | interface ReadCsvConfig {
  5 |     index_col?: boolean | number;
  6 |     delimiter?: string;
  7 |     header?: Array<string> | number | boolean;
  8 |     dataType?: string;
  9 |     classType?: string;
 10 | }
 11 | 
 12 | interface WriteCsvConfig {
 13 |     index?: boolean;
 14 |     header?: any[];
 15 |     delimiter?: string;
 16 | }
 17 | 
 18 | interface ReadFileConfig {
 19 |     toNumber?: boolean;
 20 |     delimiter?: string;
 21 | }
 22 | 
 23 | 
 24 | /**
 25 |  * 简单读取文件，配置包括是否转化数据为数值型和分隔符号
 26 |  * 
 27 |  * @export
 28 |  * @param {string} filePath 
 29 |  * @param {object} options {
 30 |  * toNumber: {boolean}. whether transform datas to number or not. deault to false.
 31 |  * delmiter: {string}. delmiter for every line. default to '\t'.
 32 |  * } 
 33 |  * @returns {Array<Array<any>>}
 34 |  */
 35 | export function parseFile(filePath:string,{
 36 |     toNumber=false,
 37 |     delimiter='\t'
 38 | }={} as ReadFileConfig): Array<Array<any>>{
 39 |     let content = fs.readFileSync(filePath,{encoding: 'utf-8'});
 40 |     let lines = content.split('\n');
 41 |     let result = lines.map(line=>line.split(delimiter));
 42 | 
 43 |     if(toNumber){
 44 |        return result.map(v=>v.map(c=>Number(c)))
 45 |     }
 46 |     return result;
 47 | }
 48 | 
 49 | class CSV {
 50 |     values: Array<Array<any>>
 51 |     constructor(public headerLine: Array<string>,datasWithoutIndex: Array<Array<any>>){
 52 |         this.values = datasWithoutIndex;
 53 |     }
 54 |     /**
 55 |      * 获取标题行 Get the header line.
 56 |      * 
 57 |      * @returns {Array<string>} 
 58 |      * @memberof CSV
 59 |      */
 60 |     getHeader(): Array<string>{
 61 |         return this.headerLine;
 62 |     }
 63 |     /**
 64 |      * 删除某一行或者指定标题的列 delete the specific column.
 65 |      * 
 66 |      * @param {(string | number)} label delete the specific number of column or the column of specific label.
 67 |      * @returns {CSV} instance of class CSV.
 68 |      * @memberof CSV
 69 |      */
 70 |     drop(label: string | number): CSV{
 71 |         let headerLine = [...this.headerLine];
 72 |         let values = this.values.map(v=>[...v]);
 73 |         let labelIndex = typeof label === 'string'?headerLine.indexOf(label):label;
 74 | 
 75 |         if(headerLine.length !== 0){
 76 |             headerLine.splice(labelIndex,1);
 77 |         }
 78 |         values.forEach(v=>v.splice(labelIndex,1));
 79 | 
 80 |         return new CSV(headerLine,values);
 81 |     }
 82 |     /**
 83 |      * 获取分类列，一般为最后一列。 Get the last column of every line.
 84 |      * 
 85 |      * @returns {Array<any>} 
 86 |      * @memberof CSV
 87 |      */
 88 |     getClasses(): Array<any>{
 89 |         return this.values.map(v=>v[v.length-1])
 90 |     }
 91 |     
 92 | }
 93 | 
 94 | export {CSV};
 95 | /**
 96 |  * 读取csv文件 Read CSV file.
 97 |  * 
 98 |  * @export
 99 |  * @param {string} filePath 
100 |  * @param {object} options {
101 |  * index_col: {boolean|number}. when set to true, the first column of data will be regarded as the counter column. Default to be false.
102 |  * delmiter: {string}. delmiter for every line. Default to be ','.
103 |  * header: {Array<any>|number}. Can be the vector of custom header line or the index of the header line. default to 0.
104 |  * dataType: {string}. the type of datas, default to 'number'.
105 |  * classType: {string}. the type of the last column of each line. default to 'number'.
106 |  * }
107 |  * @returns {CSV} instance of class CSV.
108 |  */
109 | export function read_csv (filePath: string,{
110 |     index_col=false,
111 |     delimiter=',',
112 |     header=0,
113 |     dataType='number',
114 |     classType='number'
115 | }={} as ReadCsvConfig): CSV{
116 |     let rawContent = fs.readFileSync(filePath,{encoding: 'utf-8'});
117 | 
118 |     let lines = rawContent.split('\n').map(v=>v.split(delimiter));
119 |     let headerLine;
120 |     if(Array.isArray(header)){
121 |         headerLine = header;
122 |     } else if(header === 0){
123 |         headerLine = lines[0];
124 |         lines = _.tail(lines);
125 |     } else {
126 |         headerLine = []
127 |     }
128 | 
129 |     if(headerLine[0] === ''){
130 |         headerLine = _.tail(headerLine)
131 |     }
132 | 
133 |     let datasWithoutIndex = [];
134 |     // 去除Index列
135 |     if(index_col !== false){
136 |         datasWithoutIndex = lines.map(v=>_.tail(v));
137 |     } else {
138 |         datasWithoutIndex = lines;
139 |     }
140 | 
141 |     if(dataType === 'number'){
142 |         datasWithoutIndex = datasWithoutIndex.map(row=>row.map(col=>{
143 |             if(classType === 'number'){
144 |                 return Number(col);
145 |             }
146 |             return col;
147 |         }))
148 |     }
149 |     return new CSV(headerLine,datasWithoutIndex);
150 | }
151 | 
152 | /**
153 |  * 写入CSV数据  Write datas to file.
154 |  * 
155 |  * @export
156 |  * @param {string} filePath 
157 |  * @param {any[][]} data datas to write.
158 |  * @param {object} options {
159 |  * index: {boolean}. if set to be true, it will add a index column for each line. default to false.
160 |  * header: {Array<any>}. custom header to add to the first line. default to [].
161 |  * }
162 |  */
163 | export function write_csv (filePath: string,data: any[][],{
164 |     index=false,
165 |     header=[],
166 |     delimiter=','
167 | }={} as WriteCsvConfig): void{
168 |     let dataToWrite = [...data];
169 |     if(index !== false){
170 |         dataToWrite.forEach((v,i)=>{
171 |             v.unshift(i);
172 |         });
173 |     }
174 |     if(Array.isArray(header) && header.length >= 1){
175 |         dataToWrite.unshift(header);
176 |     }
177 |     dataToWrite = dataToWrite.map(row=>row.map(col=>col.toString()));
178 |     let contentToWrite = '';
179 | 
180 |     dataToWrite.forEach(v=>{
181 |         contentToWrite += v.join(delimiter);
182 |         contentToWrite += '\n';
183 |     });
184 |     
185 |     fs.writeFileSync(filePath,contentToWrite,{
186 |         encoding: 'utf-8'
187 |     });
188 | }


--------------------------------------------------------------------------------
/src/utils/fileParser/result.csv:
--------------------------------------------------------------------------------
 1 | ID,quality
 2 | 0,6
 3 | 1,6
 4 | 2,6
 5 | 3,6
 6 | 4,6
 7 | 5,6
 8 | 6,6
 9 | 7,6
10 | 8,6
11 | 9,6
12 | 10,6
13 | 11,6
14 | 12,6
15 | 13,6
16 | 14,6
17 | 15,6
18 | 16,6
19 | 17,6
20 | 18,6
21 | 19,6
22 | 20,6
23 | 21,6
24 | 


--------------------------------------------------------------------------------
/src/utils/fileParser/test.ts:
--------------------------------------------------------------------------------
 1 | import * as parser from './index';
 2 | import * as path from 'path';
 3 | import * as charts from '../charts/index';
 4 | import kNN from '../../algorithm/kNN/index';
 5 | 
 6 | let dt = parser.read_csv(path.join(__dirname,'../../../assets/train.csv'),{
 7 |     index_col: 0,
 8 |     delimiter: ',',
 9 |     header: 0,
10 |     dataType: 'number'
11 | });
12 | 
13 | let labels = dt.getClasses();
14 | 
15 | let dataSet =dt.drop('quality').values;
16 | let knn = new kNN(dataSet,labels);
17 | 
18 | let dataToTest = parser.read_csv(path.join(__dirname,'../../../assets/test.csv'),{
19 |     index_col: 0,
20 |     dataType: 'number'
21 | }).drop('quality').values;
22 | 
23 | let resultSet = dataToTest.map((v,i)=>[i,knn.classify(knn.autoNormalVector(v),48)])
24 | 
25 | // 将结果写入 csv
26 | parser.write_csv(path.join(__dirname,'./result.csv'),resultSet,{
27 |     header: ['ID','quality']
28 | });
29 | 
30 | 
31 | // 绘图
32 | let inx = [7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8],
33 |     normalInx = knn.autoNormalVector(inx);
34 | 
35 | console.log(knn.classify(inx,100)); // 6
36 | charts.drawkNN(kNN.autoNormal(dataSet),labels,normalInx,{
37 |     width: "500px",
38 |     height: "400px",
39 |     size: 15
40 | });
41 | 


--------------------------------------------------------------------------------
/src/utils/index.ts:
--------------------------------------------------------------------------------
 1 | import Matrix from './matrix/index';
 2 | import Vector from './vector/index';
 3 | import * as fileParser from './fileParser/index';
 4 | import * as charts from './charts/index';
 5 | import features from './features/index';
 6 | 
 7 | export {
 8 |     Matrix,
 9 |     Vector,
10 |     fileParser,
11 |     charts,
12 |     features
13 | }


--------------------------------------------------------------------------------
/src/utils/matrix/index.ts:
--------------------------------------------------------------------------------
  1 | import {List,Repeat} from 'immutable';
  2 | import * as math from 'mathjs';
  3 | import * as _ from 'lodash';
  4 | 
  5 | class Matrix {
  6 |     constructor(public arr: Array<Array<number>>){}
  7 |     
  8 |     /**
  9 |      * 获取原始多维数组 Get raw data.
 10 |      * 
 11 |      * @returns {Array<Array<number>>} 
 12 |      * @memberof Matrix
 13 |      */
 14 |     toArray(): Array<Array<number>>{
 15 |         return this.arr;
 16 |     }
 17 |     /**
 18 |      * 针对两个矩阵同一行同一列的值对应进行计算 Matrix operation. like '*','/','+','-'.
 19 |      * 
 20 |      * @param {Class Matrix} arrA Matrix A
 21 |      * @param {Class Matrix} arrB Matrix B
 22 |      * @param {String} operator  operation，'+'|'-'|'*'|'\'
 23 |      * @returns {Class Matrix} result.
 24 |      * @memberof Matrix
 25 |      */
 26 |     zipWith(arrA: Array<Array<number>>,arrB: Array<Array<number>>,operator: string): Matrix{
 27 |         let result = [];
 28 | 
 29 |         switch(operator){
 30 |             case '+':
 31 |                 result = _.zipWith(arrA,arrB,(a,b)=>{
 32 |                     return _.zipWith(a,b,(m,n)=>m+n);
 33 |                 });
 34 |                 break;
 35 |             case '-':
 36 |                 result = _.zipWith(arrA,arrB,(a,b)=>{
 37 |                     return _.zipWith(a,b,(m,n)=>m-n);
 38 |                 });
 39 |                 break;
 40 |             case '*':
 41 |                 result = _.zipWith(arrA,arrB,(a,b)=>{
 42 |                     return _.zipWith(a,b,(m,n)=>m*n);
 43 |                 });
 44 |                 break;
 45 |             case '/':
 46 |                 result = _.zipWith(arrA,arrB,(a,b)=>{
 47 |                     return _.zipWith(a,b,(m,n)=>m/n);
 48 |                 });
 49 |                 break;
 50 |             default:
 51 |                 return;
 52 | 
 53 |         }
 54 |         return new Matrix(result);
 55 |     }
 56 |     /**
 57 |      * Matrix subtraction
 58 |      * 
 59 |      * @param {Matrix} toSub Matrix to sub with.
 60 |      * @returns {Matrix} 
 61 |      * @memberof Matrix
 62 |      */
 63 |     sub(toSub: Matrix): Matrix{
 64 |         return this.zipWith(this.arr,toSub.arr,'-');
 65 |     }
 66 |     /**
 67 |      * matrix addition
 68 |      * 
 69 |      * @param {Matrix} toAdd Matrix to add with.
 70 |      * @returns {Matrix} 
 71 |      * @memberof Matrix
 72 |      */
 73 |     add(toAdd: Matrix): Matrix{
 74 |         return this.zipWith(this.arr,toAdd.arr,'+')
 75 |     }
 76 |     /**
 77 |      * matrix multiplication
 78 |      * 
 79 |      * @param {Matrix} toMult matrix to multiply with.
 80 |      * @returns {Matrix} 
 81 |      * @memberof Matrix
 82 |      */
 83 |     mult(toMult: Matrix): Matrix{
 84 |         return this.zipWith(this.arr,toMult.arr,'*')
 85 |     }
 86 |     /**
 87 |      * Matrix Division
 88 |      * 
 89 |      * @param {Matrix} toDivide matrix to divide with.
 90 |      * @returns {Matrix} 
 91 |      * @memberof Matrix
 92 |      */
 93 |     divide(toDivide: Matrix): Matrix{
 94 |         return this.zipWith(this.arr,toDivide.arr,'/');
 95 |     }
 96 |     /**
 97 |      * Get the size of Matrix, including rows and columns.
 98 |      * 
 99 |      * @returns {[number,number]} 
100 |      * @memberof Matrix
101 |      */
102 |     size(): [number,number]{
103 |         return [this.arr.length,this.arr[0].length];
104 |     }
105 |     /**
106 |      * 矩阵同一行/列进行相加 The sum of data in the same row/column
107 |      * 
108 |      * @param {number} [axis=1] when to be 1, get the sum of the same row, when to be 0, get the sum of the same column. default to 1.
109 |      * @returns {Array}
110 |      * @memberof Matrix
111 |      */
112 |     sum(axis=1 as number): Array<number>{
113 |         if(axis === 0){
114 |             return this.transpose().map(v=>_.sum(v));
115 |         }
116 |         return this.arr.map(v=>_.sum(v));
117 |     }
118 |     /**
119 |      * 
120 |      * 获取同一行/列的最小值 The minimum value of data in the same row/column.
121 |      * @param {number} [axis=0 as number] when set to be 0, get the minimum value of data in the same column. 1 to the same row. default is 0.
122 |      * @returns {Array<number>} 
123 |      * @memberof Matrix
124 |      */
125 |     min(axis=0 as number): Array<number>{
126 |         let arr = axis === 0?this.transpose():[...this.arr];
127 | 
128 |         arr = arr.map(v=>v.filter(c=>typeof c === 'number'));
129 | 
130 |         return arr.map(v=>_.min(v));
131 |     }
132 |     /**
133 |      * 获取同一行/列的最大值 The maximum value of data in the same row/column.
134 |      * 
135 |      * @param {number} [axis=0 as number] when set to be 0, get the maximum value of data in the same column. 1 to the same row. default is 0.
136 |      * @returns {Array<number>} 
137 |      * @memberof Matrix
138 |      */
139 |     max(axis=0 as number): Array<number>{
140 |         let arr = axis === 0?this.transpose():[...this.arr];
141 |         
142 |         arr = arr.map(v=>v.filter(c=>typeof c === 'number'));
143 | 
144 |         return arr.map(v=>_.max(v));
145 |     }
146 |     /**
147 |      * 转置矩阵 Transpose matrix
148 |      * 
149 |      * @returns {Array<Array<number>>} 
150 |      * @memberof Matrix
151 |      */
152 |     transpose(): Array<Array<number>>{
153 |         return math.transpose(this.arr);
154 |     }
155 |     calAvg(flag=0 as number): Array<number>{
156 |         let arr = flag === 0 ? this.transpose():this.arr;
157 |         return arr.map((v,i)=>{
158 |             let sum = v.reduce((pre,cur)=>pre+cur,0);
159 |             return sum/v.length;
160 |         });
161 |     }
162 |     // 初始化零矩阵
163 |     static zeros(r: number,c?: number): Array<Array<number>>|Array<number>{
164 |         return c?math.zeros(r,c)._data:math.zeros(r)._data;
165 |     }
166 |     static ones(m: number,n?: number): Array<Array<number>>|Array<number>{
167 |         return n?math.ones(m,n)._data:math.ones(m)._data;
168 |     }
169 |     /**
170 |      * 获取同一行或同一列的平均值 The average value of the same row/column of data
171 |      * 
172 |      * @static
173 |      * @param {Array<Array<number>>} arr 
174 |      * @param {number} [axis=0 as number] when to be 0, the same column, 1 to the same row. default to 0.
175 |      * @returns {Array<number>} 
176 |      * @memberof Matrix
177 |      */
178 |     static mean(arr: Array<Array<number>>, axis=0 as number): Array<number>{
179 |         
180 |         if(axis === 0){ //按列求平均值
181 |             return math.transpose(arr).map(v=>_.sum(v)/v.length);
182 |         } else {
183 |             return arr.map(v=>_.sum(v)/v.length);
184 |         }
185 |     }
186 | }
187 | 
188 | export default Matrix;


--------------------------------------------------------------------------------
/src/utils/vector/index.ts:
--------------------------------------------------------------------------------
 1 | import {List,Repeat} from 'immutable';
 2 | import * as _ from 'lodash';
 3 | 
 4 | class Vector {
 5 |     constructor(public arr: Array<number>){
 6 |     }
 7 |     /**
 8 |      * 数组元素从小到大排序对应的下标 The sorted index of array.
 9 |      * 
10 |      * @returns {number[]} 
11 |      * @memberof Vector
12 |      */
13 |     argSort(): number[]{
14 |         let list = [...this.arr];
15 |         let result = list
16 |             .map((v,i)=>[v,i])
17 |             .sort(([v1],[v2])=>v1>v2)
18 |             .map(([,i])=>i);
19 | 
20 |         return result;
21 |     }
22 |     zipWith(func: Function,b): Array<number>{
23 |         let result = this.arr.map((v,i)=>func(v,b.arr[i]))
24 |       //  console.log(result)
25 |         return result;
26 |     }
27 |     /**
28 |      * 针对每个元素，若值等于0返回0，若大于0返回1，若小于0返回-1   For each element, when its value equals to 0 returns 0, else if it's larger than 0 returns 1 else returns -1.
29 |      * 
30 |      * @static
31 |      * @param {(number|Array<number>)} arr 
32 |      * @returns {(number|Array<number>)} 
33 |      * @memberof Vector
34 |      */
35 |     static sign(arr: number|Array<number>): number|Array<number>{
36 |         if(Array.isArray(arr)){
37 |             return arr.map(v=>v===0.0?0.0:(v>0.0?1.0:-1.0))
38 |         } else {
39 |             return arr===0.0?0.0:(arr>0.0?1.0:-1.0);
40 |         }
41 |     }
42 |     /**
43 |      * 创建指定个数的0-1之间的随机数  Create specific number of random number between 0 and 1.
44 |      * 
45 |      * @static
46 |      * @param {number} m 
47 |      * @returns 
48 |      * @memberof Vector
49 |      */
50 |     static rand(m: number){
51 |         let initArr = Repeat(0,m).toArray();
52 |         let result = initArr.map(v=>Math.random());
53 |         return result;
54 |     }
55 | }
56 | 
57 | export default Vector;


--------------------------------------------------------------------------------
/test/utils/Matrix.js:
--------------------------------------------------------------------------------
 1 | const Matrix = require('../../lib/index').utils.Matrix;
 2 | const expect = require('chai').expect;
 3 | 
 4 | const dataSet = [
 5 |     [2,4,6],
 6 |     [5,7,1],
 7 |     [3,3,1]
 8 | ]
 9 | const dataSet2 = [
10 |     [1,3,5],
11 |     [2,4,7],
12 |     [3,5,8]
13 | ]
14 | 
15 | let matA = new Matrix(dataSet),
16 |     matB = new Matrix(dataSet2);
17 | 
18 | describe('Matrix',()=>{
19 |    describe('#toArray()',()=>{
20 |        it('should return raw array value',()=>{
21 |            let arr = matA.toArray();
22 |            expect(arr).to.equal(dataSet);
23 |        })
24 |    });
25 | 
26 |    describe('#add()',()=>{
27 |        it('should return the result of addition',()=>{
28 |            let result = matA.add(matB);
29 |            expect(result.toArray()).to.eql([
30 |                [3,7,11],
31 |                [7,11,8],
32 |                [6,8,9]
33 |            ]);
34 |        });
35 |    });
36 | 
37 |    describe('#size()',()=>{
38 |        it('should return the size of the matrix',()=>{
39 |            expect(matA.size()).to.eql([3,3]);
40 |        });
41 |    });
42 | 
43 |    describe('#max()',()=>{
44 |        it('should return the maxiumn value of each row when axis=1',()=>{
45 |            expect(matA.max(1)).to.eql([6,7,3]);
46 |        });
47 |        it('should return the maxiumn value of each column when axis=0',()=>{
48 |            expect(matA.max(0)).to.eql([5,7,6]);
49 |        });
50 |    });
51 | 
52 |    describe('#min()',()=>{
53 |         it('should return the miniumn value of each row when axis=1',()=>{
54 |             expect(matA.min(1)).to.eql([2,1,1]);
55 |         });
56 |         it('should return the miniumn value of each column when axis=0',()=>{
57 |             expect(matA.min(0)).to.eql([2,3,1]);
58 |         });
59 |     });
60 | 
61 |    describe('#transpose()',()=>{
62 |         it('should transpose the matrix',()=>{
63 |             expect(matA.transpose()).to.eql([[2,5,3],[4,7,3],[6,1,1]]);
64 |         });
65 |    });
66 | 
67 |    describe('#ones()',()=>{
68 |        it('should return vector when call ones(m)',()=>{
69 |            expect(Matrix.ones(3)).to.eql([1,1,1]);
70 |        });
71 |        it('should return matrix when call ones(m,n)',()=>{
72 |            expect(Matrix.ones(2,2)).to.eql([[1,1],[1,1]]);
73 |        });
74 |    });
75 | 
76 |    describe('#zeros()',()=>{
77 |         it('should return vector when call zeros(m)',()=>{
78 |             expect(Matrix.zeros(3)).to.eql([0,0,0]);
79 |         });
80 |         it('should return matrix when call zeros(m,n)',()=>{
81 |             expect(Matrix.zeros(2,2)).to.eql([[0,0],[0,0]]);
82 |         });
83 |     });
84 | 
85 |    describe('#mean()',()=>{
86 |        it('should return the average value of each row when axis=1',()=>{
87 |            expect(Matrix.mean(dataSet),1).to.be.an('array');
88 |        });
89 |        it('should return the average value of each column when axis=0',()=>{
90 |             expect(Matrix.mean(dataSet),0).to.be.an('array');
91 |         });
92 |    });
93 | });


--------------------------------------------------------------------------------
/test/utils/Vector.js:
--------------------------------------------------------------------------------
 1 | const Vector = require('../../lib/index').utils.Vector;
 2 | const expect = require('chai').expect;
 3 | 
 4 | const arr = [4,7,1,8,2];
 5 | const vect = new Vector(arr);
 6 | 
 7 | describe('Vector',()=>{
 8 |     describe('#argSort()',()=>{
 9 |         it('should return the sorted index of the array',()=>{
10 |             expect(vect.argSort()).to.eql([2,4,0,1,3]);
11 |         });
12 |     });
13 | 
14 |     describe('#sign()',()=>{
15 |         it('should return array when sign(Array<number>)',()=>{
16 |             expect(Vector.sign([-2,2,0,4])).to.eql([-1,1,0,1]);
17 |         });
18 | 
19 |         it('should return -1 when number is lower than 0',()=>{
20 |             expect(Vector.sign(-6)).to.eql(-1);
21 |         });
22 | 
23 |         it('should return 1 when number is larger than 0',()=>{
24 |             expect(Vector.sign(6)).to.eql(1);
25 |         });
26 | 
27 |         it('should return 0 when number is equal to 0',()=>{
28 |             expect(Vector.sign(0)).to.eql(0);
29 |         }); 
30 |     });
31 | 
32 |     describe('#rand()',()=>{
33 |         it('should return n of random number',()=>{
34 |             expect(Vector.rand(3)).to.have.lengthOf(3);
35 |         });
36 |     });
37 | });


--------------------------------------------------------------------------------
/test/utils/fileParser.js:
--------------------------------------------------------------------------------
 1 | const parser = require('../../lib/index').utils.fileParser;
 2 | const CSV = parser.CSV;
 3 | const path = require('path');
 4 | const expect = require('chai').expect;
 5 | 
 6 | 
 7 | describe('fileParser',()=>{
 8 |     describe('#parseFile()',()=>{
 9 |         it('should return matrix like data',()=>{
10 |             let result = parser.parseFile(path.join(__dirname,'./../../assets/testSet.txt'),{
11 |                 toNumber: false,
12 |                 delimiter: ','
13 |             });
14 |             expect(result).to.have.lengthOf(100);
15 |             expect(result[0]).to.have.lengthOf(5);
16 |         });
17 |     });
18 | 
19 |     describe('#readCsv()',()=>{
20 |         it('should return instance of CSV',()=>{
21 |             let dt = parser.read_csv(path.join(__dirname,'../../assets/train.csv'),{
22 |                 index_col: 0,
23 |                 delimiter: ',',
24 |                 header: 0,
25 |                 dataType: 'number'
26 |             });
27 |             expect(dt).to.be.an.instanceof(CSV);
28 |         });
29 |     });
30 | });
31 | 
32 | describe('CSV',()=>{
33 |     let dt = parser.read_csv(path.join(__dirname,'../../assets/train.csv'),{
34 |         index_col: 0,
35 |         delimiter: ',',
36 |         header: 0,
37 |         dataType: 'number'
38 |     });
39 |     describe('#getHeader()',()=>{
40 |         it('should return the header line of the dataset',()=>{
41 |             let header = dt.getHeader();
42 |             expect(header).to.be.an('array');
43 |         });
44 |     });
45 |     describe('#drop()',()=>{
46 |         it('should return a new instance of CSV',()=>{
47 |             expect(dt.drop(0)).to.be.an.instanceof(CSV);
48 |         });
49 |     });
50 |     describe('#getClasses()',()=>{
51 |         it('should return the class vector',()=>{
52 |             expect(dt.getClasses()).to.be.an('array');
53 |         });
54 |     });
55 | });


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "files": [
 3 |         "src/**/*.ts"
 4 |     ],
 5 |     "compilerOptions": {
 6 |         "module": "commonjs",
 7 |         "removeComments": true,
 8 |         "allowJs": true,
 9 |         "target": "es2015",
10 |         "sourceMap": true,
11 |         "types": [
12 |             "node",
13 |             "lodash"
14 |         ]
15 |     },
16 |     "exclude": [
17 |         "node_modules"
18 |     ]
19 | }


--------------------------------------------------------------------------------
/tslint.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "tslint:recommended",
 3 |     "rules": {
 4 |         "max-line-length": {
 5 |             "options": [120]
 6 |         },
 7 |         "new-parens": true,
 8 |         "no-arg": true,
 9 |         "no-bitwise": true,
10 |         "no-conditional-assignment": true,
11 |         "no-consecutive-blank-lines": false,
12 |         "no-console": {
13 |             "options": [
14 |                 "debug",
15 |                 "info",
16 |                 "log",
17 |                 "time",
18 |                 "timeEnd",
19 |                 "trace"
20 |             ]
21 |         }
22 |     },
23 |     "jsRules": {
24 |         "max-line-length": {
25 |             "options": [120]
26 |         }
27 |     }
28 | }


--------------------------------------------------------------------------------