├── .classpath ├── .gitignore ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── README.md └── src ├── Jama └── examples │ ├── MagicSquareExample.java │ └── SVD.java └── com └── pku └── yangliu ├── ClusterMain.java ├── ComputeWordsVector.java ├── DataPreProcess.java ├── DimensionReduction.java ├── KmeansCluster.java ├── KmeansSVDCluster.java └── PorterAlgorithm.java /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Package Files # 4 | *.jar 5 | *.war 6 | *.ear 7 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | DataMiningCluster 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | #Tue Mar 20 12:44:23 CST 2012 2 | eclipse.preferences.version=1 3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.6 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.source=1.6 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DataMiningCluster 2 | ================= 3 | 4 | Implementation of text clustering algorithms including K-means, MBSAS, DBSCAN. 5 | 6 | Author: Liu Yang(yangliuyx@gmail.com) 7 | 8 | Author's blog: http://blog.csdn.net/yangliuy 9 | 10 | A Chinese technical blog related to this code package: http://blog.csdn.net/yangliuy/article/details/7471659 11 | -------------------------------------------------------------------------------- /src/Jama/examples/MagicSquareExample.java: -------------------------------------------------------------------------------- 1 | package Jama.examples; 2 | import Jama.*; 3 | import java.util.Date; 4 | 5 | /** Example of use of Matrix Class, featuring magic squares. **/ 6 | 7 | public class MagicSquareExample { 8 | 9 | /** Generate magic square test matrix. **/ 10 | 11 | public static Matrix magic (int n) { 12 | 13 | double[][] M = new double[n][n]; 14 | 15 | // Odd order 16 | 17 | if ((n % 2) == 1) { 18 | int a = (n+1)/2; 19 | int b = (n+1); 20 | for (int j = 0; j < n; j++) { 21 | for (int i = 0; i < n; i++) { 22 | M[i][j] = n*((i+j+a) % n) + ((i+2*j+b) % n) + 1; 23 | } 24 | } 25 | 26 | // Doubly Even Order 27 | 28 | } else if ((n % 4) == 0) { 29 | for (int j = 0; j < n; j++) { 30 | for (int i = 0; i < n; i++) { 31 | if (((i+1)/2)%2 == ((j+1)/2)%2) { 32 | M[i][j] = n*n-n*i-j; 33 | } else { 34 | M[i][j] = n*i+j+1; 35 | } 36 | } 37 | } 38 | 39 | // Singly Even Order 40 | 41 | } else { 42 | int p = n/2; 43 | int k = (n-2)/4; 44 | Matrix A = magic(p); 45 | for (int j = 0; j < p; j++) { 46 | for (int i = 0; i < p; i++) { 47 | double aij = A.get(i,j); 48 | M[i][j] = aij; 49 | M[i][j+p] = aij + 2*p*p; 50 | M[i+p][j] = aij + 3*p*p; 51 | M[i+p][j+p] = aij + p*p; 52 | } 53 | } 54 | for (int i = 0; i < p; i++) { 55 | for (int j = 0; j < k; j++) { 56 | double t = M[i][j]; M[i][j] = M[i+p][j]; M[i+p][j] = t; 57 | } 58 | for (int j = n-k+1; j < n; j++) { 59 | double t = M[i][j]; M[i][j] = M[i+p][j]; M[i+p][j] = t; 60 | } 61 | } 62 | double t = M[k][0]; M[k][0] = M[k+p][0]; M[k+p][0] = t; 63 | t = M[k][k]; M[k][k] = M[k+p][k]; M[k+p][k] = t; 64 | } 65 | return new Matrix(M); 66 | } 67 | 68 | /** Shorten spelling of print. **/ 69 | 70 | private static void print (String s) { 71 | System.out.print(s); 72 | } 73 | 74 | /** Format double with Fw.d. **/ 75 | 76 | public static String fixedWidthDoubletoString (double x, int w, int d) { 77 | java.text.DecimalFormat fmt = new java.text.DecimalFormat(); 78 | fmt.setMaximumFractionDigits(d); 79 | fmt.setMinimumFractionDigits(d); 80 | fmt.setGroupingUsed(false); 81 | String s = fmt.format(x); 82 | while (s.length() < w) { 83 | s = " " + s; 84 | } 85 | return s; 86 | } 87 | 88 | /** Format integer with Iw. **/ 89 | 90 | public static String fixedWidthIntegertoString (int n, int w) { 91 | String s = Integer.toString(n); 92 | while (s.length() < w) { 93 | s = " " + s; 94 | } 95 | return s; 96 | } 97 | 98 | 99 | public static void main (String argv[]) { 100 | 101 | /* 102 | | Tests LU, QR, SVD and symmetric Eig decompositions. 103 | | 104 | | n = order of magic square. 105 | | trace = diagonal sum, should be the magic sum, (n^3 + n)/2. 106 | | max_eig = maximum eigenvalue of (A + A')/2, should equal trace. 107 | | rank = linear algebraic rank, 108 | | should equal n if n is odd, be less than n if n is even. 109 | | cond = L_2 condition number, ratio of singular values. 110 | | lu_res = test of LU factorization, norm1(L*U-A(p,:))/(n*eps). 111 | | qr_res = test of QR factorization, norm1(Q*R-A)/(n*eps). 112 | */ 113 | 114 | print("\n Test of Matrix Class, using magic squares.\n"); 115 | print(" See MagicSquareExample.main() for an explanation.\n"); 116 | print("\n n trace max_eig rank cond lu_res qr_res\n\n"); 117 | 118 | Date start_time = new Date(); 119 | double eps = Math.pow(2.0,-52.0); 120 | for (int n = 3; n <= 32; n++) { 121 | print(fixedWidthIntegertoString(n,7)); 122 | 123 | Matrix M = magic(n); 124 | 125 | int t = (int) M.trace(); 126 | print(fixedWidthIntegertoString(t,10)); 127 | 128 | EigenvalueDecomposition E = 129 | new EigenvalueDecomposition(M.plus(M.transpose()).times(0.5)); 130 | double[] d = E.getRealEigenvalues(); 131 | print(fixedWidthDoubletoString(d[n-1],14,3)); 132 | 133 | int r = M.rank(); 134 | print(fixedWidthIntegertoString(r,7)); 135 | 136 | double c = M.cond(); 137 | print(c < 1/eps ? fixedWidthDoubletoString(c,12,3) : 138 | " Inf"); 139 | 140 | LUDecomposition LU = new LUDecomposition(M); 141 | Matrix L = LU.getL(); 142 | Matrix U = LU.getU(); 143 | int[] p = LU.getPivot(); 144 | Matrix R = L.times(U).minus(M.getMatrix(p,0,n-1)); 145 | double res = R.norm1()/(n*eps); 146 | print(fixedWidthDoubletoString(res,12,3)); 147 | 148 | QRDecomposition QR = new QRDecomposition(M); 149 | Matrix Q = QR.getQ(); 150 | R = QR.getR(); 151 | R = Q.times(R).minus(M); 152 | res = R.norm1()/(n*eps); 153 | print(fixedWidthDoubletoString(res,12,3)); 154 | 155 | print("\n"); 156 | } 157 | Date stop_time = new Date(); 158 | double etime = (stop_time.getTime() - start_time.getTime())/1000.; 159 | print("\nElapsed Time = " + 160 | fixedWidthDoubletoString(etime,12,3) + " seconds\n"); 161 | print("Adios\n"); 162 | } 163 | } 164 | 165 | -------------------------------------------------------------------------------- /src/Jama/examples/SVD.java: -------------------------------------------------------------------------------- 1 | package Jama.examples; 2 | /************************************************************************* 3 | * Compilation: javac -classpath .:jama.jar SVD.java 4 | * Execution: java -classpath .:jama.jar SVD 5 | * Dependencies: jama.jar 6 | * 7 | * Test client for computing singular values of a matrix. 8 | * 9 | * http://math.nist.gov/javanumerics/jama/ 10 | * http://math.nist.gov/javanumerics/jama/Jama-1.0.1.jar 11 | * 12 | *************************************************************************/ 13 | 14 | import Jama.Matrix; 15 | import Jama.SingularValueDecomposition; 16 | 17 | public class SVD { 18 | public static void main(String[] args) { 19 | 20 | // create M-by-N matrix that doesn't have full rank 21 | int M = 8, N = 5; 22 | //Matrix B = Matrix.random(5, 3); 23 | //Matrix A = Matrix.random(M, N).times(B).times(B.transpose()); 24 | double[][] vals = {{1, 1, 1, 0, 0},{2, 2, 2, 0, 0},{1, 1, 1, 0, 0},{5, 5, 5, 0, 0},{0, 0 ,0 , 2, 2},{0, 0 ,0 , 3, 3},{0, 0 ,0 , 1, 1}}; 25 | Matrix A = new Matrix(vals); 26 | System.out.print("A = "); 27 | A.print(9, 6); 28 | 29 | // compute the singular vallue decomposition 30 | System.out.println("A = U S V^T"); 31 | System.out.println(); 32 | SingularValueDecomposition s = A.svd(); 33 | System.out.print("U = "); 34 | Matrix U = s.getU(); 35 | U.print(9, 6); 36 | System.out.print("Sigma = "); 37 | Matrix S = s.getS(); 38 | S.print(9, 6); 39 | System.out.print("V = "); 40 | Matrix V = s.getV(); 41 | V.print(9, 6); 42 | System.out.println("rank = " + s.rank()); 43 | System.out.println("condition number = " + s.cond()); 44 | System.out.println("2-norm = " + s.norm2()); 45 | 46 | // print out singular values 47 | System.out.print("singular values = "); 48 | Matrix svalues = new Matrix(s.getSingularValues(), 1); 49 | svalues.print(9, 6); 50 | 51 | // S.set(1, 1, 0); 52 | //S.set(3, 3, 0); 53 | // S.set(4, 4, 0); 54 | System.out.print("Sigma = "); 55 | S.print(9, 6); 56 | Matrix B = U.times(S.times(V.transpose())); 57 | System.out.print("B = "); 58 | B.print(9, 6); 59 | } 60 | 61 | } 62 | 63 | -------------------------------------------------------------------------------- /src/com/pku/yangliu/ClusterMain.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/ClusterMain.java -------------------------------------------------------------------------------- /src/com/pku/yangliu/ComputeWordsVector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/ComputeWordsVector.java -------------------------------------------------------------------------------- /src/com/pku/yangliu/DataPreProcess.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/DataPreProcess.java -------------------------------------------------------------------------------- /src/com/pku/yangliu/DimensionReduction.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/DimensionReduction.java -------------------------------------------------------------------------------- /src/com/pku/yangliu/KmeansCluster.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/KmeansCluster.java -------------------------------------------------------------------------------- /src/com/pku/yangliu/KmeansSVDCluster.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/KmeansSVDCluster.java -------------------------------------------------------------------------------- /src/com/pku/yangliu/PorterAlgorithm.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangliuy/DataMiningCluster/9676117108e44d348ce09356c42adfe4d3129deb/src/com/pku/yangliu/PorterAlgorithm.java --------------------------------------------------------------------------------