├── paper.pdf
├── timer.h
├── Benchmark
    └── Matlab
    │   ├── write_clustering_result.m
    │   ├── spectral_clustering.m
    │   └── read_unweighted_graph.m
├── timer.cu
├── Makefile
├── labels.cu
├── kmeans.h
├── Makefile_example.inc
├── README.md
├── centroids.h
├── labels.h
├── spectral_clustering.cu
└── LICENSE


/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuj-umd/fastsc/HEAD/paper.pdf


--------------------------------------------------------------------------------
/timer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | namespace kmeans {
 3 | 
 4 | struct timer {
 5 |     timer();
 6 |     ~timer();
 7 |     void start();
 8 |     float stop();
 9 | private:
10 |     cudaEvent_t m_start, m_stop;
11 | };
12 | 
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/Benchmark/Matlab/write_clustering_result.m:
--------------------------------------------------------------------------------
 1 | function write_clustering_result(labels, output_filename) 
 2 | fileID = fopen(output_filename, 'w');
 3 | fprintf(fileID, 'Node ID  Label\n');
 4 | for i = 1:size(labels, 1)
 5 |     fprintf(fileID, '%d  %d \n', i, labels(i));
 6 | end
 7 | fclose(fileID);
 8 |     
 9 | 
10 | 
11 | end
12 | 
13 | 


--------------------------------------------------------------------------------
/timer.cu:
--------------------------------------------------------------------------------
 1 | #include "timer.h"
 2 | 
 3 | namespace kmeans {
 4 | 
 5 | timer::timer() {
 6 |     cudaEventCreate(&m_start);
 7 |     cudaEventCreate(&m_stop);
 8 | }
 9 | 
10 | timer::~timer() {
11 |     cudaEventDestroy(m_start);
12 |     cudaEventDestroy(m_stop);
13 | }
14 | 
15 | void timer::start() {
16 |     cudaEventRecord(m_start, 0);
17 | }
18 | 
19 | float timer::stop() {
20 |     float time;
21 |     cudaEventRecord(m_stop, 0);
22 |     cudaEventSynchronize(m_stop);
23 |     cudaEventElapsedTime(&time, m_start, m_stop);
24 |     return time;
25 | }
26 | 
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/Benchmark/Matlab/spectral_clustering.m:
--------------------------------------------------------------------------------
 1 | function spectral_clustering(input_filename, n, k, output_filename)
 2 |     S = read_unweighted_graph(input_filename);
 3 |     % 0-based index to 1-based index
 4 |     S = S + 1;
 5 |     val = ones(size(S, 1), 1);
 6 |     S = [S val];
 7 |     G = spconvert(S);
 8 |     
 9 |     % Compute I - L. 
10 |     network_sum = sum(G, 2);
11 |     D_inv = diag(1./(sqrt(network_sum)));
12 |     L = D_inv*G*D_inv;
13 |     L = (L + L') / 2;
14 |     [V,D] = eigs(L, k, 'LM');
15 |     labels = kmeans(V, k);
16 |     write_clustering_result(labels, output_filename);
17 | end


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CUDA_CPP = nvcc
 2 | CUDA_ARCH ?= sm_35
 3 | include ../arpackpp/Makefile.inc
 4 | CUDA_FLAGS = -arch=$(CUDA_ARCH) -Xptxas -v
 5 | CUDA_LIBS = -lcublas -lcusparse
 6 | 
 7 | spectral_clustering: spectral_clustering.cu timer.o labels.o kmeans.h centroids.h
 8 | 	$(CUDA_CPP) $(CPP_FLAGS) $(CUDA_FLAGS) -o spectral_clustering spectral_clustering.cu timer.o labels.o $(ALL_LIBS) $(CUDA_LIBS) 
 9 | 
10 | labels.o: labels.cu labels.h
11 | 	$(CUDA_CPP) $(CPP_FLAGS) $(CUDA_FLAGS)  -c -o labels.o labels.cu 
12 | 	   
13 | timer.o: timer.cu timer.h
14 | 	$(CUDA_CPP) $(CPP_FLAGS) $(CUDA_FLAGS) -c -o timer.o timer.cu
15 | 


--------------------------------------------------------------------------------
/labels.cu:
--------------------------------------------------------------------------------
 1 | #include "labels.h"
 2 | 
 3 | namespace kmeans {
 4 | namespace detail {
 5 | 
 6 | struct cublas_state {
 7 |     cublasHandle_t cublas_handle;
 8 |     cublas_state() {
 9 |         cublasStatus_t stat;
10 |         stat = cublasCreate(&cublas_handle);
11 |         if (stat != CUBLAS_STATUS_SUCCESS) {
12 |             std::cout << "CUBLAS initialization failed" << std::endl;
13 |             exit(1);
14 |         }
15 |     }
16 |     ~cublas_state() {
17 |         cublasStatus_t stat;
18 |         stat = cublasDestroy(cublas_handle);
19 |         if (stat != CUBLAS_STATUS_SUCCESS) {
20 |             std::cout << "CUBLAS destruction failed" << std::endl;
21 |             exit(1);
22 |         }
23 |     }
24 | };
25 | 
26 | 
27 | cublas_state state;
28 | 
29 | void gemm(cublasOperation_t transa, cublasOperation_t transb,
30 |           int m, int n, int k, const float *alpha,
31 |           const float *A, int lda, const float *B, int ldb,
32 |           const float *beta,
33 |           float *C, int ldc) {
34 |     cublasStatus_t status = cublasSgemm(state.cublas_handle, transa, transb,
35 |                                         m, n, k, alpha,
36 |                                         A, lda, B, ldb,
37 |                                         beta,
38 |                                         C, ldc);
39 |     if (status != CUBLAS_STATUS_SUCCESS) {
40 |         std::cout << "Invalid Sgemm" << std::endl;
41 |         exit(1);
42 |     }
43 | }
44 | 
45 | void gemm(cublasOperation_t transa, cublasOperation_t transb,
46 |           int m, int n, int k, const double *alpha,
47 |           const double *A, int lda, const double *B, int ldb,
48 |           const double *beta,
49 |           double *C, int ldc) {
50 |     cublasStatus_t status = cublasDgemm(state.cublas_handle, transa, transb,
51 |                                         m, n, k, alpha,
52 |                                         A, lda, B, ldb,
53 |                                         beta, 
54 |                                         C, ldc);
55 |     if (status != CUBLAS_STATUS_SUCCESS) {
56 |         std::cout << "Invalid Dgemm" << std::endl;
57 |         exit(1);
58 |     }
59 | }
60 | 
61 | }
62 | }
63 | 


--------------------------------------------------------------------------------
/Benchmark/Matlab/read_unweighted_graph.m:
--------------------------------------------------------------------------------
 1 | function S = read_unweighted_graph(filename, startRow, endRow)
 2 | %IMPORTFILE Import numeric data from a text file as a matrix.
 3 | %   FACEBOOKG = IMPORTFILE(FILENAME) Reads data from text file FILENAME for
 4 | %   the default selection.
 5 | %
 6 | %   FACEBOOKG = IMPORTFILE(FILENAME, STARTROW, ENDROW) Reads data from rows
 7 | %   STARTROW through ENDROW of text file FILENAME.
 8 | %
 9 | % Example:
10 | %   facebookG = importfile('facebookG.txt', 1, 176468);
11 | %
12 | %    See also TEXTSCAN.
13 | 
14 | % Auto-generated by MATLAB on 2017/01/07 22:45:51
15 | 
16 | %% Initialize variables.
17 | delimiter = ' ';
18 | if nargin<=2
19 |     startRow = 1;
20 |     endRow = inf;
21 | end
22 | 
23 | %% Format string for each line of text:
24 | %   column1: double (%f)
25 | %	column2: double (%f)
26 | % For more information, see the TEXTSCAN documentation.
27 | formatSpec = '%f%f%[^\n\r]';
28 | 
29 | %% Open the text file.
30 | fileID = fopen(filename,'r');
31 | 
32 | %% Read columns of data according to format string.
33 | % This call is based on the structure of the file used to generate this
34 | % code. If an error occurs for a different file, try regenerating the code
35 | % from the Import Tool.
36 | dataArray = textscan(fileID, formatSpec, endRow(1)-startRow(1)+1, 'Delimiter', delimiter, 'MultipleDelimsAsOne', true, 'EmptyValue' ,NaN,'HeaderLines', startRow(1)-1, 'ReturnOnError', false);
37 | for block=2:length(startRow)
38 |     frewind(fileID);
39 |     dataArrayBlock = textscan(fileID, formatSpec, endRow(block)-startRow(block)+1, 'Delimiter', delimiter, 'MultipleDelimsAsOne', true, 'EmptyValue' ,NaN,'HeaderLines', startRow(block)-1, 'ReturnOnError', false);
40 |     for col=1:length(dataArray)
41 |         dataArray{col} = [dataArray{col};dataArrayBlock{col}];
42 |     end
43 | end
44 | 
45 | %% Close the text file.
46 | fclose(fileID);
47 | 
48 | %% Post processing for unimportable data.
49 | % No unimportable data rules were applied during the import, so no post
50 | % processing code is included. To generate code which works for
51 | % unimportable data, select unimportable cells in a file and regenerate the
52 | % script.
53 | 
54 | %% Create output variable
55 | S = [dataArray{1:end-1}];
56 | 
57 | 


--------------------------------------------------------------------------------
/kmeans.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <thrust/device_vector.h>
 3 | #include "centroids.h"
 4 | #include "labels.h"
 5 | #include <thrust/reduce.h>
 6 | 
 7 | namespace kmeans {
 8 | 
 9 | 
10 | //! kmeans clusters data into k groups
11 | /*! 
12 |   
13 |   \param iterations How many iterations to run
14 |   \param n Number of data points
15 |   \param d Number of dimensions
16 |   \param k Number of clusters
17 |   \param data Data points, in row-major order. This vector must have
18 |   size n * d, and since it's in row-major order, data point x occupies
19 |   positions [x * d, (x + 1) * d) in the vector. The vector is passed
20 |   by reference since it is shared with the caller and not copied.
21 |   \param labels Cluster labels. This vector has size n.
22 |   The vector is passed by reference since it is shared with the caller
23 |   and not copied.
24 |   \param centroids Centroid locations, in row-major order. This
25 |   vector must have size k * d, and since it's in row-major order,
26 |   centroid x occupies positions [x * d, (x + 1) * d) in the
27 |   vector. The vector is passed by reference since it is shared
28 |   with the caller and not copied.
29 |   \param distances Distances from points to centroids. This vector has
30 |   size n. It is passed by reference since it is shared with the caller
31 |   and not copied.
32 |   \param init_from_labels If true, the labels need to be initialized
33 |   before calling kmeans. If false, the centroids need to be
34 |   initialized before calling kmeans. Defaults to true, which means
35 |   the labels must be initialized.
36 |   \param threshold This controls early termination of the kmeans
37 |   iterations. If the ratio of the sum of distances from points to
38 |   centroids from this iteration to the previous iteration changes by
39 |   less than the threshold, than the iterations are
40 |   terminated. Defaults to 0.000001
41 |   \return The number of iterations actually performed.
42 | */
43 | 
44 | template<typename T>
45 | int kmeans(int iterations,
46 |            int n, int d, int k,
47 |            thrust::device_vector<T>& data,
48 |            thrust::device_vector<int>& labels,
49 |            thrust::device_vector<T>& centroids,
50 |            thrust::device_vector<T>& distances,
51 |            bool init_from_labels=true,
52 |            double threshold=0.000001) {
53 |     thrust::device_vector<T> data_dots(n);
54 |     thrust::device_vector<T> centroid_dots(n);
55 |     thrust::device_vector<T> pairwise_distances(n * k);
56 |     
57 |     detail::make_self_dots(n, d, data, data_dots);
58 | 
59 |     if (init_from_labels) {
60 |         detail::find_centroids(n, d, k, data, labels, centroids);
61 |     }   
62 |     T prior_distance_sum = 0;
63 |     int i = 0;
64 |     for(; i < iterations; i++) {
65 |         detail::calculate_distances(n, d, k,
66 |                                     data, centroids, data_dots,
67 |                                     centroid_dots, pairwise_distances);
68 | 
69 |         int changes = detail::relabel(n, k, pairwise_distances, labels, distances);
70 |        
71 |         
72 |         detail::find_centroids(n, d, k, data, labels, centroids);
73 |         T distance_sum = thrust::reduce(distances.begin(), distances.end());
74 |         std::cout << "Iteration " << i << " produced " << changes
75 |                   << " changes, and total distance is " << distance_sum << std::endl;
76 | 
77 |         if (i > 0) {
78 |             T delta = distance_sum / prior_distance_sum;
79 |             if (delta > 1 - threshold) {
80 |                 std::cout << "Threshold triggered, terminating iterations early" << std::endl;
81 |                 return i + 1;
82 |             }
83 |         }
84 |         prior_distance_sum = distance_sum;
85 |     }
86 |     return i;
87 | }
88 | 
89 | }
90 | 


--------------------------------------------------------------------------------
/Makefile_example.inc:
--------------------------------------------------------------------------------
  1 | # ARPACK++ v1.2 2/18/2000
  2 | # c++ interface to ARPACK code.
  3 | # This file contains some definitions used to compile arpack++ examples
  4 | # with the g++ compiler under linux.
  5 | 
  6 | 
  7 | # Defining the machine.
  8 | 
  9 | PLAT         = linux
 10 | 
 11 | # Defining the compiler.
 12 | 
 13 | CPP          = g++
 14 | 
 15 | # Defining ARPACK++ directories.
 16 | # ARPACKPP_INC is the directory that contains all arpack++ header files.
 17 | # SUPERLU_DIR and UMFPACK_DIR must be set to ARPACKPP_INC.
 18 | 
 19 | #############################################################################
 20 | # Change directory path here
 21 | #ARPACKPP_DIR = $(HOME)/arpack++
 22 | #ARPACKPP_DIR = ../../..
 23 | ARPACKPP_DIR = $(HOME)/arpackpp
 24 | ARPACKPP_INC = $(ARPACKPP_DIR)/include
 25 | #SUPERLU_DIR  = $(ARPACKPP_INC)
 26 | SUPERLU_DIR  = $(ARPACKPP_DIR)/external/SuperLU
 27 | UMFPACK_DIR  = $(ARPACKPP_INC)
 28 | 
 29 | 
 30 | # Defining ARPACK, LAPACK, UMFPACK, SUPERLU, BLAS and FORTRAN libraries.
 31 | # See the arpack++ manual or the README file for directions on how to 
 32 | # obtain arpack, umfpack and SuperLU packages. 
 33 | # UMFPACK_LIB and SUPERLU_LIB must be declared only if umfpack and superlu 
 34 | # are going to be used. Some BLAS and LAPACK fortran routines are 
 35 | # distributed along with arpack fortran code, but the user should verify 
 36 | # if optimized versions of these libraries are available before installing 
 37 | # arpack. The fortran libraries described below are those required to link
 38 | # fortran and c++ code using gnu g++ and f77 compiler under linux.
 39 | # Other libraries should be defined if the user intends to compile
 40 | # arpack++ on another environment.
 41 |  
 42 | #############################################################################
 43 | # Change library path here
 44 | ARPACK_LIB   = $(ARPACKPP_DIR)/external/libarpack.a
 45 | LAPACK_LIB   = 
 46 | SUPERLU_LIB  = $(ARPACKPP_DIR)/external/libsuperlu.a
 47 | BLAS_LIB     = $(ARPACKPP_DIR)/external/libopenblas.a
 48 | FORTRAN_LIBS = -lgfortran
 49 | 
 50 | # SuiteSparse contains the UMFPACK and CHOLMOD code. To link examples against 
 51 | # these, set the SUITESPARSE_DIR to point to your installation:
 52 | #SUITESPARSE_DIR = $(ARPACKPP_DIR)/../SuiteSparse
 53 | 
 54 | SUITESPARSE_DIR = $(ARPACKPP_DIR)/external/SuiteSparse
 55 | #############################################################################
 56 | 
 57 | UMFPACK_LIB  = $(SUITESPARSE_DIR)/UMFPACK/Lib/libumfpack.a  \
 58 |  $(SUITESPARSE_DIR)/CHOLMOD/Lib/libcholmod.a \
 59 |  $(SUITESPARSE_DIR)/COLAMD/Lib/libcolamd.a \
 60 |  $(SUITESPARSE_DIR)/CCOLAMD/Lib/libccolamd.a \
 61 |  $(SUITESPARSE_DIR)/metis-4.0/libmetis.a  \
 62 |  $(SUITESPARSE_DIR)/CAMD/Lib/libcamd.a \
 63 |  $(SUITESPARSE_DIR)/AMD/Lib/libamd.a \
 64 |  $(SUITESPARSE_DIR)/SuiteSparse_config/libsuitesparseconfig.a 
 65 |  
 66 | CHOLMOD_LIB  = $(SUITESPARSE_DIR)/CHOLMOD/Lib/libcholmod.a \
 67 |  $(SUITESPARSE_DIR)/COLAMD/Lib/libcolamd.a \
 68 |  $(SUITESPARSE_DIR)/CCOLAMD/Lib/libccolamd.a \
 69 |  $(SUITESPARSE_DIR)/metis-4.0/libmetis.a  \
 70 |  $(SUITESPARSE_DIR)/CAMD/Lib/libcamd.a \
 71 |  $(SUITESPARSE_DIR)/AMD/Lib/libamd.a \
 72 | 
 73 | # For cholmod need additional headers:
 74 | CHOLMOD_INC  = -I$(SUITESPARSE_DIR)/CHOLMOD/Include -I$(SUITESPARSE_DIR)/SuiteSparse_config
 75 | 
 76 | #############################################################################
 77 | # Change CPP configuration. 
 78 | # Defining g++ flags and directories.
 79 | 
 80 | # CPP_WARNINGS = -fpermissive 
 81 | CPP_WARNINGS = -Wall
 82 | CPP_DEBUG    = -g
 83 | CPP_OPTIM    = -O
 84 | 
 85 | # If nvcc is used, no pthread
 86 | #CPP_LIBS     = -pthread
 87 | CPP_LIBS     =
 88 | CPP_INC      = 
 89 | #############################################################################
 90 | 
 91 | CPP_FLAGS    = $(CPP_DEBUG) -D$(PLAT) -I$(ARPACKPP_INC) -I$(CPP_INC) \
 92 |                $(CPP_WARNINGS)
 93 | 
 94 | # Putting all libraries together.
 95 | 
 96 | ALL_LIBS     = $(CPP_LIBS) $(ARPACK_LIB) \
 97 |                $(BLAS_LIB) $(LAPACK_LIB) $(FORTRAN_LIBS) 
 98 | 
 99 | # defining paths.
100 | 
101 | vpath %.h  $(ARPACK_INC)
102 | 
103 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # A Fast Implementation of Spectral Clustering on GPU-CPU Platforms
 2 | 
 3 | ## Introduction
 4 | ##
 5 | 
 6 | This software package provides a fast implementation of spectral clustering on GPU and CPU platforms. 
 7 | 
 8 | This work is published on IPDPS 2016 workshop titled as "A high performance implementation of spectral clustering on CPU-GPU platforms" authored by Yu Jin and Joseph F. JaJa. 
 9 | 
10 | If you use the software in your applications, please cite the paper as
11 | 
12 | 
13 | @inproceedings{jin2016,  
14 |   title={A high performance implementation of spectral clustering on cpu-gpu platforms},  
15 |   author={Jin, Yu and JaJa, Joseph F},  
16 |   booktitle={Parallel and Distributed Processing Symposium Workshops, 2016 IEEE International},  
17 |   pages={825--834},  
18 |   year={2016},  
19 |   organization={IEEE}  
20 | }
21 | 
22 | 
23 | 
24 | Spectral clustering is one of the most popular clustering algorithms for finding structural communities in graphs. However, the running time of the algorithm is usually quite long as it involves very expensive numerical operations, i.e. finding the smallest few eigenvectors of a real symmetric matrix. 
25 | 
26 | In this package, we provides a fast implementation of the spectral clustering algorithm which is significantly faster than using other CPU-based software packages such as Matlab and Python. As far as we know, our implementation is also the fastest implementation available in the open source community. 
27 | 
28 | The implementation contains three parts:
29 | 
30 | - Normalize the edge weights by the inverse of the corresponding node degrees.
31 | - Computet the first k eigenvectors of the normalized Laplacian matrix based on arpackpp package and CUDA libraries.
32 | - Apply k-means algorithm on rows of the k eigenvectors. The implementation of k-means algorithm is originally developed by Bran Catanzaro at https://github.com/bryancatanzaro/kmeans
33 | 
34 | Each part can be easily divided for individual functional usage. 
35 | 
36 | If you have trouble working with the software package, please contact Yu Jin (yuj AT umd.edu).
37 | 
38 | 
39 | ## Installation
40 | ### CUDA Environment Setup
41 | CUDA libraries, such as CUSPARSE, CUBLAS and Thrust are pre-installed. 
42 | 
43 | ### arpackpp installation
44 | Check out ARPACK++ package from https://github.com/yuj-umd/arpackpp
45 | 
46 | ```
47 | $ git clone https://github.com/yuj-umd/arpackpp.git
48 | $ cd arpackpp
49 | ```
50 | 
51 | Install the libraries
52 | 
53 | ```
54 | $ ./install-openblas.sh
55 | $ ./install-arpack-ng.sh
56 | $ ./install-superlu.sh
57 | $ ./install-suitesparse.sh
58 | 
59 | ```
60 | Change the directory and library path in Makefile.inc, as instructed in Makefile_example.inc.
61 | 
62 | ### fastsc installation
63 | Check out the code from https://github.com/yuj-umd/fastsc
64 | 
65 | ```
66 | $ git clone https://github.com/yuj-umd/fastsc.git
67 | $ cd fastsc
68 | ```
69 | Modify the arpack library path and cuda architecture in Makefile.
70 | 
71 | Compile and run the program
72 | ```
73 | $ make
74 | $ ./spectral_clustering input_file_name n k output_file_name
75 | ```
76 | 
77 | ## Usage
78 | The program format is 
79 | ```
80 | $ ./spectral_clustering input_file_name n k output_file_name
81 | ```
82 | 
83 | The input file contains the graph information represented as edge list. By default, the program supports unweighted graphs where each row contains two node indices. It is easy to adapt the code for weighted graphs and other graph representations. 
84 | 
85 | n is the total number of nodes and k is the desired number of clusters. The graph nodes are indexed from 0 to n-1 and there are NO isolated nodes.
86 | 
87 | output file will contain the node ID and the corresponding label.
88 | 
89 | Two input examples are contained in Dataset folder.
90 | 
91 | ## Benchmarks
92 | The Benchmark folder contains Matlab code with the same function. Our implementation is faster than the naive Matlab implementation especially for large-scale problems. 
93 | 
94 | 
95 | ## Reference
96 | Jin, Yu, and Joseph F. JaJa. "A high performance implementation of spectral clustering on cpu-gpu platforms." Parallel and Distributed Processing Symposium Workshops, 2016 IEEE International. IEEE, 2016.
97 | 


--------------------------------------------------------------------------------
/centroids.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <thrust/device_vector.h>
  3 | #include <thrust/sort.h>
  4 | #include <thrust/fill.h>
  5 | #include <thrust/iterator/counting_iterator.h>
  6 | 
  7 | __device__ double atomicAdd2(double* address, double val)
  8 | {
  9 |     unsigned long long int* address_as_ull =
 10 |                              (unsigned long long int*)address;
 11 |     unsigned long long int old = *address_as_ull, assumed;
 12 |     do {
 13 |         assumed = old;
 14 |         old = atomicCAS(address_as_ull, assumed,
 15 |                         __double_as_longlong(val +
 16 |                                              __longlong_as_double(assumed)));
 17 |     } while (assumed != old);
 18 |     return __longlong_as_double(old);
 19 | }
 20 | 
 21 | namespace kmeans {
 22 | namespace detail {
 23 | 
 24 | template<typename T>
 25 | __device__ __forceinline__
 26 | void update_centroid(int label, int dimension,
 27 |                      int d,
 28 |                      T accumulator, T* centroids,
 29 |                      int count, int* counts) {
 30 |     int index = label * d + dimension;
 31 |     T* target = centroids + index;
 32 |     atomicAdd(target, accumulator);
 33 |     if (dimension == 0) {
 34 |         atomicAdd(counts + label, count);
 35 |     }             
 36 | }
 37 | 
 38 | template<typename T>
 39 | __global__ void calculate_centroids(int n, int d, int k,
 40 |                                     T* data,
 41 |                                     int* ordered_labels,
 42 |                                     int* ordered_indices,
 43 |                                     T* centroids,
 44 |                                     int* counts) {
 45 |     int in_flight = blockDim.y * gridDim.y;
 46 |     int labels_per_row = (n - 1) / in_flight + 1; 
 47 |     for(int dimension = threadIdx.x; dimension < d; dimension += blockDim.x) {
 48 |         T accumulator = 0;
 49 |         int count = 0;
 50 |         int global_id = threadIdx.y + blockIdx.y * blockDim.y;
 51 |         int start = global_id * labels_per_row;
 52 |         int end = (global_id + 1) * labels_per_row;
 53 |         end = (end > n) ? n : end;
 54 |         int prior_label;
 55 |         if (start < n) {
 56 |             prior_label = ordered_labels[start];
 57 |         
 58 |             for(int label_number = start; label_number < end; label_number++) {
 59 |                 int label = ordered_labels[label_number];
 60 |                 if (label != prior_label) {
 61 |                     update_centroid(prior_label, dimension,
 62 |                                     d,
 63 |                                     accumulator, centroids,
 64 |                                     count, counts);
 65 |                     accumulator = 0;
 66 |                     count = 0;
 67 |                 }
 68 |   
 69 |                 T value = data[dimension + ordered_indices[label_number] * d];
 70 |                 accumulator += value;
 71 |                 prior_label = label;
 72 |                 count++;
 73 |             }
 74 |             update_centroid(prior_label, dimension,
 75 |                             d,
 76 |                             accumulator, centroids,
 77 |                             count, counts);
 78 |         }
 79 |     }
 80 | }
 81 | 
 82 | template<typename T>
 83 | __global__ void scale_centroids(int d, int k, int* counts, T* centroids) {
 84 |     int global_id_x = threadIdx.x + blockIdx.x * blockDim.x;
 85 |     int global_id_y = threadIdx.y + blockIdx.y * blockDim.y;
 86 |     if ((global_id_x < d) && (global_id_y < k)) {
 87 |         int count = counts[global_id_y];
 88 |         //To avoid introducing divide by zero errors
 89 |         //If a centroid has no weight, we'll do no normalization
 90 |         //This will keep its coordinates defined.
 91 |         if (count < 1) {
 92 |             count = 1;
 93 |         }
 94 |         double scale = 1.0/double(count);
 95 |         centroids[global_id_x + d * global_id_y] *= scale;
 96 |     }
 97 | }
 98 | 
 99 | template<typename T>
100 | void find_centroids(int n, int d, int k,
101 |                     thrust::device_vector<T>& data,
102 |                     //Labels are taken by value because
103 |                     //they get destroyed in sort_by_key
104 |                     //So we need to make a copy of them
105 |                     thrust::device_vector<int> labels,
106 |                     thrust::device_vector<T>& centroids) {
107 |     thrust::device_vector<int> indices(n);
108 |     thrust::device_vector<int> counts(k);
109 |     thrust::copy(thrust::counting_iterator<int>(0),
110 |                  thrust::counting_iterator<int>(n),
111 |                  indices.begin());
112 |     //Bring all labels with the same value together
113 |     thrust::sort_by_key(labels.begin(),
114 |                         labels.end(),
115 |                         indices.begin());
116 | 
117 |     //Initialize centroids to all zeros
118 |     thrust::fill(centroids.begin(),
119 |                  centroids.end(),
120 |                  0);
121 |     
122 |     //Calculate centroids 
123 |     int n_threads_x = 64;
124 |     int n_threads_y = 16;
125 |     //XXX Number of blocks here is hard coded at 30
126 |     //This should be taken care of more thoughtfully.
127 |     detail::calculate_centroids<<<dim3(1, 30), dim3(n_threads_x, n_threads_y)>>>
128 |         (n, d, k,
129 |          thrust::raw_pointer_cast(data.data()),
130 |          thrust::raw_pointer_cast(labels.data()),
131 |          thrust::raw_pointer_cast(indices.data()),
132 |          thrust::raw_pointer_cast(centroids.data()),
133 |          thrust::raw_pointer_cast(counts.data()));
134 |     
135 |     //Scale centroids
136 |     detail::scale_centroids<<<dim3((d-1)/32+1, (k-1)/32+1), dim3(32, 32)>>>
137 |         (d, k,
138 |          thrust::raw_pointer_cast(counts.data()),
139 |          thrust::raw_pointer_cast(centroids.data()));
140 | }
141 | 
142 | }
143 | }
144 | 


--------------------------------------------------------------------------------
/labels.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <thrust/device_vector.h>
  3 | #include <cfloat>
  4 | #include <cublas_v2.h>
  5 | 
  6 | namespace kmeans {
  7 | namespace detail {
  8 | 
  9 | //n: number of points
 10 | //d: dimensionality of points
 11 | //data: points, laid out in row-major order (n rows, d cols)
 12 | //dots: result vector (n rows)
 13 | // NOTE:
 14 | //Memory accesses in this function are uncoalesced!!
 15 | //This is because data is in row major order
 16 | //However, in k-means, it's called outside the optimization loop
 17 | //on the large data array, and inside the optimization loop it's
 18 | //called only on a small array, so it doesn't really matter.
 19 | //If this becomes a performance limiter, transpose the data somewhere
 20 | template<typename T>
 21 | __global__ void self_dots(int n, int d, T* data, T* dots) {
 22 | 	T accumulator = 0;
 23 |     int global_id = blockDim.x * blockIdx.x + threadIdx.x;
 24 | 
 25 |     if (global_id < n) {
 26 |         for (int i = 0; i < d; i++) {
 27 |             T value = data[i + global_id * d];
 28 |             accumulator += value * value;
 29 |         }
 30 |         dots[global_id] = accumulator;
 31 |     }    
 32 | }
 33 | 
 34 | 
 35 | template<typename T>
 36 | void make_self_dots(int n, int d,
 37 |                     thrust::device_vector<T>& data,
 38 |                     thrust::device_vector<T>& dots) {
 39 |     self_dots<<<(n-1)/256+1, 256>>>(n, d, thrust::raw_pointer_cast(data.data()),
 40 |                                     thrust::raw_pointer_cast(dots.data()));
 41 | }
 42 | 
 43 | template<typename T>
 44 | __global__ void all_dots(int n, int k, T* data_dots, T* centroid_dots, T* dots) {
 45 | 	__shared__ T local_data_dots[32];
 46 | 	__shared__ T local_centroid_dots[32];
 47 | 
 48 |     int data_index = threadIdx.x + blockIdx.x * blockDim.x;
 49 |     if ((data_index < n) && (threadIdx.y == 0)) {
 50 |         local_data_dots[threadIdx.x] = data_dots[data_index];
 51 |     }
 52 |     
 53 |     int centroid_index = threadIdx.x + blockIdx.y * blockDim.y;
 54 |     if ((centroid_index < k) && (threadIdx.y == 1)) {
 55 |         local_centroid_dots[threadIdx.x] = centroid_dots[centroid_index];
 56 |     }
 57 |        
 58 |    	__syncthreads();
 59 | 
 60 | 	centroid_index = threadIdx.y + blockIdx.y * blockDim.y;
 61 |     if ((data_index < n) && (centroid_index < k)) {
 62 |         dots[data_index + centroid_index * n] = local_data_dots[threadIdx.x] +
 63 |             local_centroid_dots[threadIdx.y];
 64 |     }
 65 | }
 66 | 
 67 | 
 68 | template<typename T>
 69 | void make_all_dots(int n, int k, thrust::device_vector<T>& data_dots,
 70 |                    thrust::device_vector<T>& centroid_dots,
 71 |                    thrust::device_vector<T>& dots) {
 72 |     all_dots<<<
 73 |         dim3((n-1)/32+1,
 74 |              (k-1)/32+1),
 75 |         dim3(32, 32)>>>(n, k, thrust::raw_pointer_cast(data_dots.data()),
 76 |                         thrust::raw_pointer_cast(centroid_dots.data()),
 77 |                         thrust::raw_pointer_cast(dots.data()));
 78 | }
 79 | 
 80 | void gemm(cublasOperation_t transa,
 81 |           cublasOperation_t transb,
 82 |           int m, int n, int k,
 83 |           const float *alpha,
 84 |           const float *A, int lda,
 85 |           const float *B, int ldb,
 86 |           const float *beta,
 87 |           float *C, int ldc);
 88 | 
 89 | void gemm(cublasOperation_t transa,
 90 |           cublasOperation_t transb,
 91 |           int m, int n, int k,
 92 |           const double *alpha,
 93 |           const double *A, int lda,
 94 |           const double *B, int ldb,
 95 |           const double *beta,
 96 |           double *C, int ldc);
 97 | 
 98 | template<typename T>
 99 | void calculate_distances(int n, int d, int k,
100 |                          thrust::device_vector<T>& data,
101 |                          thrust::device_vector<T>& centroids,
102 |                          thrust::device_vector<T>& data_dots,
103 |                          thrust::device_vector<T>& centroid_dots,
104 |                          thrust::device_vector<T>& pairwise_distances) {
105 |     detail::make_self_dots(k, d, centroids, centroid_dots);
106 |     detail::make_all_dots(n, k, data_dots, centroid_dots, pairwise_distances);
107 |     //||x-y||^2 = ||x||^2 + ||y||^2 - 2 x . y
108 |     //pairwise_distances has ||x||^2 + ||y||^2, so beta = 1
109 |     //The dgemm calculates x.y for all x and y, so alpha = -2.0
110 |     T alpha = -2.0;
111 |     T beta = 1.0;
112 |     //If the data were in standard column major order, we'd do a
113 |     //centroids * data ^ T
114 |     //But the data is in row major order, so we have to permute
115 |     //the arguments a little
116 |     gemm(CUBLAS_OP_T, CUBLAS_OP_N,
117 |          n, k, d, &alpha,
118 |          thrust::raw_pointer_cast(data.data()),
119 |          d,//Has to be n or d
120 |          thrust::raw_pointer_cast(centroids.data()),
121 |          d,//Has to be k or d
122 |          &beta,
123 |          thrust::raw_pointer_cast(pairwise_distances.data()),
124 |          n); //Has to be n or k
125 | }
126 | 
127 | template<typename T>
128 | __global__ void make_new_labels(int n, int k, T* pairwise_distances,
129 |                                 int* labels, int* changes,
130 |                                 T* distances) {
131 |     T min_distance = DBL_MAX;
132 |     T min_idx = -1;
133 |     int global_id = threadIdx.x + blockIdx.x * blockDim.x;
134 |     if (global_id < n) {
135 |         int old_label = labels[global_id];
136 |         for(int c = 0; c < k; c++) {
137 |             T distance = pairwise_distances[c * n + global_id];
138 |             if (distance < min_distance) {
139 |                 min_distance = distance;
140 |                 min_idx = c;
141 |             }
142 |         }
143 |         labels[global_id] = min_idx;
144 |         distances[global_id] = sqrt(min_distance);
145 |         if (old_label != min_idx) {
146 |             atomicAdd(changes, 1);
147 |         }
148 |     }
149 | }
150 | 
151 | 
152 | template<typename T>
153 | int relabel(int n, int k,
154 |             thrust::device_vector<T>& pairwise_distances,
155 |             thrust::device_vector<int>& labels,
156 |             thrust::device_vector<T>& distances) {
157 |     thrust::device_vector<int> changes(1);
158 |     changes[0] = 0;
159 |     make_new_labels<<<(n-1)/256+1,256>>>(
160 |         n, k,
161 |         thrust::raw_pointer_cast(pairwise_distances.data()),
162 |         thrust::raw_pointer_cast(labels.data()),
163 |         thrust::raw_pointer_cast(changes.data()),
164 |         thrust::raw_pointer_cast(distances.data()));
165 |     return changes[0];
166 | }
167 | 
168 | }
169 | }
170 | 


--------------------------------------------------------------------------------
/spectral_clustering.cu:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include "rsymsol.h"
  3 | #include "arrssym.h"
  4 | #include <fstream>
  5 | #include <cstdlib>
  6 | #include <cmath>
  7 | #include <thrust/device_vector.h>
  8 | #include <thrust/host_vector.h>
  9 | #include <thrust/functional.h>
 10 | #include "cusparse.h"
 11 | #include "cuda_runtime.h"
 12 | #include <vector>
 13 | #include "timer.h"
 14 | #include "kmeans.h"
 15 | 
 16 | using namespace std;
 17 | 
 18 | int CUDA_MULT(float *x, float *y, cusparseHandle_t& handle, cusparseStatus_t& status, cusparseMatDescr_t& descr, int n, int nnz, thrust::device_vector<int>& csrRowPtr, thrust::device_vector<int>& cooColIndex, thrust::device_vector<float>& cooVal, thrust::device_vector<float>& tmpx, thrust::device_vector<float>& tmpy){
 19 | 	float fone = 1.0;
 20 | 	float fzero = 0.0;
 21 | 	cudaMemcpy(thrust::raw_pointer_cast(tmpx.data()), x, n*sizeof(float), cudaMemcpyHostToDevice);
 22 | 	status = cusparseScsrmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, 
 23 | 			n, n, nnz, &fone, 
 24 | 			descr, 
 25 | 			thrust::raw_pointer_cast(cooVal.data()), 
 26 | 			thrust::raw_pointer_cast(csrRowPtr.data()) , thrust::raw_pointer_cast(cooColIndex.data()),
 27 | 			thrust::raw_pointer_cast(tmpx.data()), &fzero, 
 28 | 			thrust::raw_pointer_cast(tmpy.data()));
 29 | 	if (status != CUSPARSE_STATUS_SUCCESS) {
 30 | 		printf("cusparseScsrmv Failed");
 31 | 		return 1;
 32 | 	}
 33 | 	cudaMemcpy(y, thrust::raw_pointer_cast(tmpy.data()), n*sizeof(float), cudaMemcpyDeviceToHost);
 34 | 	return 0;
 35 | 
 36 | }
 37 | 
 38 | 
 39 | void random_labels(thrust::device_vector<int>& labels, int n, int k) {
 40 | 	thrust::host_vector<int> host_labels(n);
 41 | 	for(int i = 0; i < n; i++) {
 42 | 		host_labels[i] = rand() % k;
 43 | 	}
 44 | 	labels = host_labels;
 45 | }
 46 | 
 47 | void regular_labels(thrust::device_vector<int>& labels, int n, int k) {
 48 | 	// Initialize by assigning nodes that are close in indexing order with the same label.
 49 | 	thrust::host_vector<int> host_labels(n);
 50 | 	int l = n/k;
 51 | 	int count = 0;
 52 | 	int cur = 0;
 53 | 	for(int i = 0; i < n; i++) {
 54 | 		host_labels[i] = cur;
 55 | 		count++;
 56 | 		if(count > l) {
 57 | 			cur++;
 58 | 			count = 0;
 59 | 		}
 60 | 	}
 61 | 	labels = host_labels;
 62 | }
 63 | 
 64 | int main(int argc, char* argv[]) {
 65 | 	if(argc < 5) {
 66 | 		cout<<"Not enough input arguments!"<<endl;
 67 | 		cout<<"The input format is: " <<endl;
 68 | 		cout<<"1. Filename"<<endl;
 69 | 		cout<<"2. Number of nodes n"<<endl;
 70 | 		cout<<"3. Number of clusters k"<<endl;
 71 | 		cout<<"4. Output labeling file"<<endl;
 72 | 		exit(1);
 73 | 	}
 74 | 	// The graph is represented in edgelist format.
 75 | 	// Each row represent the edge between <i, j>. 
 76 | 	// For undirected graphs, both <i, j> and <j, i> need to be included in the file.
 77 | 	// Nodes are indexed from 0 to n-1 with no isolated nodes.
 78 | 	ifstream infile(argv[1]);
 79 | 	if(!infile) {
 80 | 		cout<<"wrong input file"<<endl;
 81 | 		return;
 82 | 	}   
 83 | 	int n = atoi(argv[2]);
 84 | 	int k = atoi(argv[3]);
 85 | 	string line;
 86 | 	int nnz = 0;
 87 | 
 88 | 	// Get the number of edges
 89 | 	while(getline(infile, line)) nnz++;
 90 | 	thrust::host_vector<int> row(nnz), col(nnz);
 91 | 
 92 | 	// Initialize the degree
 93 | 	thrust::host_vector<float> degree(n, 0.0);
 94 | 
 95 | 	// For unweighted graphs, edge weights are initilized to 1.0. Otherwise, revise the code to the specific graph representation.
 96 | 	thrust::host_vector<float> val(nnz, 1.0);
 97 | 	infile.close();
 98 | 	infile.open(argv[1]);
 99 | 	cout<<"Start loading data..."<<endl;
100 | 	for(int i = 0; i < nnz; ++i) {
101 | 		infile>>row[i]>>col[i];
102 | 		if (row[i] >= n || col[i] >= n) {
103 | 			cout<<"Index exceed the dimension. Please check the right number of nodes"<<endl;
104 | 			exit(1);
105 | 		}
106 | 		// If the input graph is weighted, change it to
107 | 		//infile>>row[i]>>col[i]>>val[i];
108 | 		degree[row[i]] = degree[row[i]] + val[i];
109 | 	}
110 | 	infile.close();
111 | 	cout<<"Loading data completed!"<<endl;
112 | 
113 | 	cout<<"Start computing normalized Graph Laplacian..."<<endl;
114 | 	for(int i = 0; i < n; ++i) {
115 | 		if (degree[i] < 1e-8) {
116 | 			cout<<"Node " <<i<<" is an isolated node"<<endl;
117 | 			cout<<"Please eliminate isolated nodes and try again!"<<endl;
118 | 			exit(1);
119 | 		}
120 | 	}
121 | 	thrust::host_vector<float> degree_sqrt(n);
122 | 
123 | 	// Normlize the edge weight of <i, j> by 1.0/sqrt(degree[i] * degree[j])
124 | 	for(int i = 0; i < n; ++i) {
125 | 		degree_sqrt[i] = sqrt(degree[i]);
126 | 	}
127 | 
128 | 	for(int i = 0; i < nnz; ++i) {
129 | 		val[i] = val[i] / (degree_sqrt[col[i]] * degree_sqrt[row[i]]);
130 | 	}
131 | 
132 | 	cout<<"Computing normalized Graph Laplacian completed"<<endl;
133 | 	cout<<"Start computing the first smallest k eigenvectors..."<<endl;
134 | 	thrust::device_vector<int> cooRowIndex = row;
135 | 	thrust::device_vector<int> cooColIndex = col;
136 | 	thrust::device_vector<float> cooVal = val;
137 | 	cusparseStatus_t status;
138 | 	cusparseHandle_t handle=0;
139 | 	cusparseMatDescr_t descr=0;
140 | 	status= cusparseCreate(&handle);
141 | 	status= cusparseCreateMatDescr(&descr);
142 | 	if (status != CUSPARSE_STATUS_SUCCESS) {
143 | 		printf("Matrix descriptor initialization failed");
144 | 		return 1;
145 | 	}
146 | 	cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
147 | 	cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
148 | 	thrust::device_vector<int> csrRowPtr(n+1);
149 | 
150 | 	status= cusparseXcoo2csr(handle,thrust::raw_pointer_cast(cooRowIndex.data()),nnz,n,
151 | 			thrust::raw_pointer_cast(csrRowPtr.data()),CUSPARSE_INDEX_BASE_ZERO);
152 | 	if (status != CUSPARSE_STATUS_SUCCESS) {
153 | 		printf("Conversion from COO to CSR format failed");
154 | 		return 1;
155 | 	}
156 | 	thrust::device_vector<float> tmpx(n);
157 | 	thrust::device_vector<float> tmpy(n);
158 | 	ARrcSymStdEig<float> prob(n, k, "LM");
159 | 	while (!prob.ArnoldiBasisFound()) {
160 | 		prob.TakeStep();
161 | 		if ((prob.GetIdo() == 1)||(prob.GetIdo() == -1)) {
162 | 			CUDA_MULT(prob.GetVector(), prob.PutVector(), handle, status, descr, n, nnz, csrRowPtr, cooColIndex, cooVal, tmpx, tmpy);
163 | 		}
164 | 	}
165 | 
166 | 	// Finding eigenvalues and eigenvectors.
167 | 	prob.FindEigenvectors();
168 | 	// Printing eigenvalue solution.
169 | 	// Solution(prob);
170 | 
171 | 	cout<<"Completed computing the first smallest k eigenvectors!"<<endl;
172 | 
173 | 	// Extract eigenvectors. 
174 | 	// Rearrange the order such that values between i * k and (i+1)*k-1 are eigenmap for node indexed by i
175 | 	cout<<"Start kmeans clustering algorithm on the k eigenvectors..."<<endl;
176 | 	thrust::host_vector<float> eigenvectors_h(n*k);
177 | 	for (int i = 0; i < n; ++i) {
178 | 		for (int j = 0; j < k; ++j) {
179 | 			eigenvectors_h[i*k + j] = prob.Eigenvector(j, i);
180 | 		}
181 | 	}
182 | 
183 | 	//Apply K-means algorithm on the eigenvectors
184 | 	int iterations = 100;
185 | 	// The dimension of each point is equal to the number of desired clusters.
186 | 	int d = k; 
187 | 	thrust::device_vector<float> eigenvectors_d = eigenvectors_h; 
188 | 	thrust::device_vector<int> labels(n);
189 | 	thrust::device_vector<float> centroids(k * d); 
190 | 	thrust::device_vector<float> distances(n);
191 | 	// Randomly initialize the labels. (You can also try the regular_labels)
192 | 	random_labels(labels, n, k);
193 | 	kmeans::kmeans(iterations, n, d, k, eigenvectors_d, labels, centroids, distances);
194 | 	cout<<"Completed kmeans clustering algorithm on the k eigenvectors!"<<endl;
195 | 	cout<<"Start output clustering results..."<<endl;
196 | 	ofstream outfile(argv[4]);
197 | 	outfile<<"Node ID" <<' ' <<"Label"<<endl;
198 | 	for(int i = 0; i < n; ++i){ 
199 | 		outfile<<i<<' '<<labels[i]<<endl;
200 | 	} 
201 | 	outfile.close();
202 | 	cout<<"Completed output clustering results!"<<endl;
203 | 	return 0;
204 | }
205 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2017 Yu Jin
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------