├── .classpath
├── .gitignore
├── .project
├── .settings
└── org.eclipse.jdt.core.prefs
├── README.md
└── src
├── Jama
└── examples
│ ├── MagicSquareExample.java
│ └── SVD.java
└── com
└── pku
└── yangliu
├── ClusterMain.java
├── ComputeWordsVector.java
├── DataPreProcess.java
├── DimensionReduction.java
├── KmeansCluster.java
├── KmeansSVDCluster.java
└── PorterAlgorithm.java
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 |
3 | # Package Files #
4 | *.jar
5 | *.war
6 | *.ear
7 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | DataMiningCluster
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | #Tue Mar 20 12:44:23 CST 2012
2 | eclipse.preferences.version=1
3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6 | org.eclipse.jdt.core.compiler.compliance=1.6
7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.source=1.6
13 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | DataMiningCluster
2 | =================
3 |
4 | Implementation of text clustering algorithms including K-means, MBSAS, DBSCAN.
5 |
6 | Author: Liu Yang(yangliuyx@gmail.com)
7 |
8 | Author's blog: http://blog.csdn.net/yangliuy
9 |
10 | A Chinese technical blog related to this code package: http://blog.csdn.net/yangliuy/article/details/7471659
11 |
--------------------------------------------------------------------------------
/src/Jama/examples/MagicSquareExample.java:
--------------------------------------------------------------------------------
1 | package Jama.examples;
2 | import Jama.*;
3 | import java.util.Date;
4 |
5 | /** Example of use of Matrix Class, featuring magic squares. **/
6 |
7 | public class MagicSquareExample {
8 |
9 | /** Generate magic square test matrix. **/
10 |
11 | public static Matrix magic (int n) {
12 |
13 | double[][] M = new double[n][n];
14 |
15 | // Odd order
16 |
17 | if ((n % 2) == 1) {
18 | int a = (n+1)/2;
19 | int b = (n+1);
20 | for (int j = 0; j < n; j++) {
21 | for (int i = 0; i < n; i++) {
22 | M[i][j] = n*((i+j+a) % n) + ((i+2*j+b) % n) + 1;
23 | }
24 | }
25 |
26 | // Doubly Even Order
27 |
28 | } else if ((n % 4) == 0) {
29 | for (int j = 0; j < n; j++) {
30 | for (int i = 0; i < n; i++) {
31 | if (((i+1)/2)%2 == ((j+1)/2)%2) {
32 | M[i][j] = n*n-n*i-j;
33 | } else {
34 | M[i][j] = n*i+j+1;
35 | }
36 | }
37 | }
38 |
39 | // Singly Even Order
40 |
41 | } else {
42 | int p = n/2;
43 | int k = (n-2)/4;
44 | Matrix A = magic(p);
45 | for (int j = 0; j < p; j++) {
46 | for (int i = 0; i < p; i++) {
47 | double aij = A.get(i,j);
48 | M[i][j] = aij;
49 | M[i][j+p] = aij + 2*p*p;
50 | M[i+p][j] = aij + 3*p*p;
51 | M[i+p][j+p] = aij + p*p;
52 | }
53 | }
54 | for (int i = 0; i < p; i++) {
55 | for (int j = 0; j < k; j++) {
56 | double t = M[i][j]; M[i][j] = M[i+p][j]; M[i+p][j] = t;
57 | }
58 | for (int j = n-k+1; j < n; j++) {
59 | double t = M[i][j]; M[i][j] = M[i+p][j]; M[i+p][j] = t;
60 | }
61 | }
62 | double t = M[k][0]; M[k][0] = M[k+p][0]; M[k+p][0] = t;
63 | t = M[k][k]; M[k][k] = M[k+p][k]; M[k+p][k] = t;
64 | }
65 | return new Matrix(M);
66 | }
67 |
68 | /** Shorten spelling of print. **/
69 |
70 | private static void print (String s) {
71 | System.out.print(s);
72 | }
73 |
74 | /** Format double with Fw.d. **/
75 |
76 | public static String fixedWidthDoubletoString (double x, int w, int d) {
77 | java.text.DecimalFormat fmt = new java.text.DecimalFormat();
78 | fmt.setMaximumFractionDigits(d);
79 | fmt.setMinimumFractionDigits(d);
80 | fmt.setGroupingUsed(false);
81 | String s = fmt.format(x);
82 | while (s.length() < w) {
83 | s = " " + s;
84 | }
85 | return s;
86 | }
87 |
88 | /** Format integer with Iw. **/
89 |
90 | public static String fixedWidthIntegertoString (int n, int w) {
91 | String s = Integer.toString(n);
92 | while (s.length() < w) {
93 | s = " " + s;
94 | }
95 | return s;
96 | }
97 |
98 |
99 | public static void main (String argv[]) {
100 |
101 | /*
102 | | Tests LU, QR, SVD and symmetric Eig decompositions.
103 | |
104 | | n = order of magic square.
105 | | trace = diagonal sum, should be the magic sum, (n^3 + n)/2.
106 | | max_eig = maximum eigenvalue of (A + A')/2, should equal trace.
107 | | rank = linear algebraic rank,
108 | | should equal n if n is odd, be less than n if n is even.
109 | | cond = L_2 condition number, ratio of singular values.
110 | | lu_res = test of LU factorization, norm1(L*U-A(p,:))/(n*eps).
111 | | qr_res = test of QR factorization, norm1(Q*R-A)/(n*eps).
112 | */
113 |
114 | print("\n Test of Matrix Class, using magic squares.\n");
115 | print(" See MagicSquareExample.main() for an explanation.\n");
116 | print("\n n trace max_eig rank cond lu_res qr_res\n\n");
117 |
118 | Date start_time = new Date();
119 | double eps = Math.pow(2.0,-52.0);
120 | for (int n = 3; n <= 32; n++) {
121 | print(fixedWidthIntegertoString(n,7));
122 |
123 | Matrix M = magic(n);
124 |
125 | int t = (int) M.trace();
126 | print(fixedWidthIntegertoString(t,10));
127 |
128 | EigenvalueDecomposition E =
129 | new EigenvalueDecomposition(M.plus(M.transpose()).times(0.5));
130 | double[] d = E.getRealEigenvalues();
131 | print(fixedWidthDoubletoString(d[n-1],14,3));
132 |
133 | int r = M.rank();
134 | print(fixedWidthIntegertoString(r,7));
135 |
136 | double c = M.cond();
137 | print(c < 1/eps ? fixedWidthDoubletoString(c,12,3) :
138 | " Inf");
139 |
140 | LUDecomposition LU = new LUDecomposition(M);
141 | Matrix L = LU.getL();
142 | Matrix U = LU.getU();
143 | int[] p = LU.getPivot();
144 | Matrix R = L.times(U).minus(M.getMatrix(p,0,n-1));
145 | double res = R.norm1()/(n*eps);
146 | print(fixedWidthDoubletoString(res,12,3));
147 |
148 | QRDecomposition QR = new QRDecomposition(M);
149 | Matrix Q = QR.getQ();
150 | R = QR.getR();
151 | R = Q.times(R).minus(M);
152 | res = R.norm1()/(n*eps);
153 | print(fixedWidthDoubletoString(res,12,3));
154 |
155 | print("\n");
156 | }
157 | Date stop_time = new Date();
158 | double etime = (stop_time.getTime() - start_time.getTime())/1000.;
159 | print("\nElapsed Time = " +
160 | fixedWidthDoubletoString(etime,12,3) + " seconds\n");
161 | print("Adios\n");
162 | }
163 | }
164 |
165 |
--------------------------------------------------------------------------------
/src/Jama/examples/SVD.java:
--------------------------------------------------------------------------------
1 | package Jama.examples;
2 | /*************************************************************************
3 | * Compilation: javac -classpath .:jama.jar SVD.java
4 | * Execution: java -classpath .:jama.jar SVD
5 | * Dependencies: jama.jar
6 | *
7 | * Test client for computing singular values of a matrix.
8 | *
9 | * http://math.nist.gov/javanumerics/jama/
10 | * http://math.nist.gov/javanumerics/jama/Jama-1.0.1.jar
11 | *
12 | *************************************************************************/
13 |
14 | import Jama.Matrix;
15 | import Jama.SingularValueDecomposition;
16 |
17 | public class SVD {
18 | public static void main(String[] args) {
19 |
20 | // create M-by-N matrix that doesn't have full rank
21 | int M = 8, N = 5;
22 | //Matrix B = Matrix.random(5, 3);
23 | //Matrix A = Matrix.random(M, N).times(B).times(B.transpose());
24 | double[][] vals = {{1, 1, 1, 0, 0},{2, 2, 2, 0, 0},{1, 1, 1, 0, 0},{5, 5, 5, 0, 0},{0, 0 ,0 , 2, 2},{0, 0 ,0 , 3, 3},{0, 0 ,0 , 1, 1}};
25 | Matrix A = new Matrix(vals);
26 | System.out.print("A = ");
27 | A.print(9, 6);
28 |
29 | // compute the singular vallue decomposition
30 | System.out.println("A = U S V^T");
31 | System.out.println();
32 | SingularValueDecomposition s = A.svd();
33 | System.out.print("U = ");
34 | Matrix U = s.getU();
35 | U.print(9, 6);
36 | System.out.print("Sigma = ");
37 | Matrix S = s.getS();
38 | S.print(9, 6);
39 | System.out.print("V = ");
40 | Matrix V = s.getV();
41 | V.print(9, 6);
42 | System.out.println("rank = " + s.rank());
43 | System.out.println("condition number = " + s.cond());
44 | System.out.println("2-norm = " + s.norm2());
45 |
46 | // print out singular values
47 | System.out.print("singular values = ");
48 | Matrix svalues = new Matrix(s.getSingularValues(), 1);
49 | svalues.print(9, 6);
50 |
51 | // S.set(1, 1, 0);
52 | //S.set(3, 3, 0);
53 | // S.set(4, 4, 0);
54 | System.out.print("Sigma = ");
55 | S.print(9, 6);
56 | Matrix B = U.times(S.times(V.transpose()));
57 | System.out.print("B = ");
58 | B.print(9, 6);
59 | }
60 |
61 | }
62 |
63 |
--------------------------------------------------------------------------------
/src/com/pku/yangliu/ClusterMain.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/ClusterMain.java
--------------------------------------------------------------------------------
/src/com/pku/yangliu/ComputeWordsVector.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/ComputeWordsVector.java
--------------------------------------------------------------------------------
/src/com/pku/yangliu/DataPreProcess.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/DataPreProcess.java
--------------------------------------------------------------------------------
/src/com/pku/yangliu/DimensionReduction.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/DimensionReduction.java
--------------------------------------------------------------------------------
/src/com/pku/yangliu/KmeansCluster.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/KmeansCluster.java
--------------------------------------------------------------------------------
/src/com/pku/yangliu/KmeansSVDCluster.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/KmeansSVDCluster.java
--------------------------------------------------------------------------------
/src/com/pku/yangliu/PorterAlgorithm.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/PorterAlgorithm.java
--------------------------------------------------------------------------------