├── .gitignore ├── test ├── py_kernel_pca_test.pyc ├── py_kernel_pca_test.py └── kernel_pca_test.cpp ├── include ├── kernel_pca_pywrap.hpp ├── kernel_pca.h └── progressbar.h ├── .travis.yml ├── demo.py ├── License.txt ├── README.md ├── src ├── main.cpp ├── kernel_pca_pywrap.cpp ├── progressbar.c └── kernel_pca.cu └── CMakeLists.txt /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .*.swp 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /test/py_kernel_pca_test.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nmerrill67/GPU_GSPCA/HEAD/test/py_kernel_pca_test.pyc -------------------------------------------------------------------------------- /include/kernel_pca_pywrap.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "kernel_pca.h" 6 | 7 | 8 | 9 | 10 | // Python wrapper class for KernelPCA 11 | class PyKernelPCA : public KernelPCA 12 | { 13 | 14 | private: 15 | 16 | bool CheckNpArray(PyObject* arr); 17 | 18 | // Copy a c-contiguous strided numpy array to a fortran-contiguous float or double array 19 | float* c_cont_npy_to_f_cont_float_ptr(int M, int N, PyObject* R_); 20 | double* c_cont_npy_to_f_cont_double_ptr(int M, int N, PyObject* R_); 21 | 22 | public: 23 | 24 | 25 | PyKernelPCA(int n_components); 26 | 27 | 28 | 29 | // overload KernelPCA::fit_transform 30 | PyObject* fit_transform(PyObject* R, bool verbose); 31 | 32 | }; 33 | 34 | 35 | boost::shared_ptr initWrapper(int n_components); 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | 2 | language: cpp 3 | sudo: required 4 | 5 | dist: trusty 6 | 7 | 8 | compiler: 9 | - gcc 10 | 11 | os: 12 | - linux 13 | 14 | 15 | env: 16 | global: 17 | - GH_REPO_NAME: GPU_GSPCA 18 | - GH_REPO_REF: https://github.com/nmerrill67/GPU_GSPCA.git 19 | 20 | notifications: 21 | email: 22 | on_success: never 23 | on_failure: never 24 | 25 | 26 | addons: 27 | apt: 28 | sources: 29 | - ubuntu-toolchain-r-test 30 | packages: 31 | - wget 32 | - build-essential 33 | - cmake 34 | - nvidia-367 35 | - nvidia-modprobe 36 | - libpython2.7 37 | - python-numpy 38 | - libboost-dev 39 | - libboost-python-dev 40 | - libgsl0-dev # c++ orthogonality testing only 41 | 42 | before_script: 43 | - sudo reboot 44 | # install repo packages 45 | - CUDA_REPO_PKG=cuda-repo-ubuntu1404_7.5-18_amd64.deb 46 | - wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/$CUDA_REPO_PKG 47 | - sudo dpkg -i $CUDA_REPO_PKG 48 | - rm $CUDA_REPO_PKG 49 | - sudo apt-get update 50 | - sudo apt-get install cuda 51 | - sudo reboot 52 | 53 | 54 | script: 55 | - mkdir build && cd build 56 | - cmake .. && make 57 | 58 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import py_kernel_pca 2 | import sklearn.decomposition 3 | from time import time 4 | import numpy as np 5 | 6 | 7 | gpu_pca = py_kernel_pca.KernelPCA(4) 8 | 9 | cpu_pca = sklearn.decomposition.KernelPCA(n_components=4) 10 | 11 | 12 | print "PCA for 10000x500 matrix. Computing 4 principal components\n\n" 13 | 14 | X = np.random.rand(10000, 500).astype(np.float32) 15 | 16 | X_f = np.copy(X) # make copy of X, otherwise T1 and T2 share the same reference. Additionally, the gpu pca currently only takes float32 type.i 17 | 18 | t0 = time() 19 | T1 = gpu_pca.fit_transform(X_f, verbose=True) 20 | print "GPU PCA compute time = ", (time() - t0), " sec" 21 | 22 | print "\nStarting CPU PCA computation ..." 23 | 24 | t1 = time() 25 | T2 = cpu_pca.fit_transform(X) 26 | print "CPU PCA compute time = " , (time() - t1), "sec" 27 | 28 | 29 | print "\n\nOrthogonality Test. All dot products of the resulting principal components should be ~ 0." 30 | print "This is tested by dotting the first and second largest eigenvectors (principal components) of the output for the sklearn's pca and this library's pca." 31 | 32 | print "\n\nThis library's GPU PCA: T0 . T1 = ", np.dot(T1[:,0], T1[:,1]) 33 | print "sklearns's CPU PCA: T0 . T1 = ", np.dot(T2[:,0], T2[:,1]) 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | Copyright 2017 Nathaniel Merrill 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /include/kernel_pca.h: -------------------------------------------------------------------------------- 1 | // C/C++ example for the CUBLAS (NVIDIA) 2 | // implementation of PCA-GS algorithm 3 | // 4 | // M. Andrecut (c) 2008 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // includes, cuda 11 | #include 12 | 13 | 14 | // This indexing macro is not used internally, but is useful for users contructing data arrays in c or c++ 15 | 16 | // matrix indexing convention for fortran-contiguous arrays 17 | #define ind_f(m, n, num_rows) (((n) * (num_rows)) + (m)) 18 | 19 | 20 | // useful macro 21 | #define __min__(a,b) \ 22 | ({ __typeof__ (a) _a = (a); \ 23 | __typeof__ (b) _b = (b); \ 24 | _a < _b ? _a : _b; }) 25 | 26 | 27 | class KernelPCA 28 | { 29 | 30 | private: 31 | int K; 32 | cublasStatus status; 33 | 34 | public: 35 | 36 | KernelPCA(); 37 | KernelPCA(int K); 38 | ~KernelPCA(); 39 | 40 | 41 | /* 42 | Fit a PCA model to the data matrix X, and return the principal components T. The memory for X is not freed in the function, so the user must do that after the call if X is no longer needed. 43 | 44 | 45 | 46 | input 47 | X: double* - host pointer to data array. The array represents an MxN matrix, where each M elements of X is the ith column of the matrix. 48 | M: int - number of rows (samples) in X 49 | N: int - number of columns (features) in X 50 | verbose: bool - whether or not to display a progress bar in the terminal. This is very useful for large Xs 51 | 52 | return 53 | T: double* - host pointer to transformed matrix, with the same indexing as X 54 | */ 55 | 56 | 57 | double* fit_transform(int M, int N, double *X, bool verbose); 58 | 59 | 60 | /* 61 | Overload of double-precision version. 62 | 63 | Fit a PCA model to the data matrix X, and return the principal components T. The memory for X is not freed in the function, so the user must do that after the call if X is no longer needed. 64 | 65 | 66 | 67 | input 68 | X: float* - host pointer to data array. The array represents an MxN matrix, where each M elements of X is the ith column of the matrix. 69 | M: int - number of rows (samples) in X 70 | N: int - number of columns (features) in X 71 | verbose: bool - whether or not to display a progress bar in the terminal. This is very useful for large Xs 72 | 73 | return 74 | T: float* - host pointer to transformed matrix, with the same indexing as X 75 | */ 76 | 77 | float* fit_transform(int M, int N, float *X, bool verbose); 78 | 79 | /* 80 | Change the number of components after intitialization 81 | 82 | input: 83 | K_: int - new number of components 84 | 85 | return: 86 | void 87 | */ 88 | void set_n_components(int K_); 89 | 90 | int get_n_components(); 91 | 92 | }; 93 | 94 | 95 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPU PCA 2 | 3 | Check out the python version of this library in skcuda! This llibrary implements the same algorithm in C++ with cublas, so it is slightly faster. 4 | 5 | This library implements PCA using the GRAM-SCMIDT method, using the code written in [this paper](http://arxiv.org/pdf/0811.1081.pdf) as the backend for a c/c++ library and python wrappers. 6 | 7 | This code includes the c/c++ interface as well as the python interface to run PCA on a cuda-capable gpu. It models the API of sklearn.decomposition.KernelPCA. 8 | 9 | ## Requirements: 10 | C/C++ Library: 11 | - UNIX machine 12 | - cmake 13 | - gcc, g++ 14 | - ncurses (for waitbar) 15 | - gnu scientific library (for c++ demo) 16 | - cuda-capable gpu 17 | - nvidia drivers and cuda installed 18 | 19 | Python Wrappers: 20 | - C library requirements 21 | - boost and boost python 22 | - python 2.7 23 | - numpy 24 | - sklearn (for demo comparison to cpu pca implementation only) 25 | 26 | ## Installation 27 | 28 | In a shell: 29 | ``` 30 | cd /path/to/GPU_GSPCA 31 | mkdir build && cd build 32 | cmake .. && make 33 | ``` 34 | If you have everything installed, this will build the backend c code, c demo, python wrappers, and the tests. Messages will be displayed if any of the above are not built due to libraries missing, so be on the lookout. 35 | 36 | If you do not want to build the tests for whatever reason change the cmake call to: 37 | 38 | `cmake -DBUILD_TESTS=0 ..` 39 | 40 | Make sure the install directory is in your python path, this can be done in your .bashrc as 41 | 42 | `export PYTHONPATH=/path/to/GPU_GSPCA/build:$PYTHONPATH` 43 | 44 | ## Testing 45 | 46 | After building the library, simply run: 47 | 48 | `make tests` 49 | 50 | If you have the python wrappers built, it will run the C tests and the python tests in the test directory, otherwise it will just run the C tests. 51 | 52 | ## Demos 53 | 54 | For c++ demo, run `./build/main` 55 | 56 | For python demo to compare to sklearn, run `python demo.py` 57 | 58 | This compares this library to sklearn's KernelPCA in speed and accuracy. In general, this library blows sklearn out of the water in both. This is what I got running the python demo: 59 | 60 | ``` 61 | PCA for 10000x500 matrix. Computing 4 principal components 62 | 63 | 64 | PCA |=================================================================================| ETA: 0h00m01s 65 | GPU PCA compute time = 0.786515951157 66 | CPU PCA compute time = 3.5805721283 67 | 68 | 69 | Orthogonality Test. All dot products of the resulting principal components should be ~ 0. 70 | This is tested by dotting the first and second largest eigenvectors (principal components) of the output for the sklearn's pca and this library's pca. 71 | 72 | 73 | This library's GPU PCA: T0 . T1 = 1.623e-06 74 | sklearns's CPU PCA: T0 . T1 = -8.26332e-05 75 | ``` 76 | 77 | ## Library Usage 78 | 79 | `from py_kernel_pca import KernelPCA` 80 | 81 | in any script that you want super fast pca. 82 | 83 | Then, for a numpy array X, in either single or double precision: 84 | 85 | `gpu_pca = KernelPCA() # do KernelPCA(-1) to return all principal components 86 | X_reduced = gpu_pca.fit_transform(X, verbose=True) # verbose shows the waitbar, default is no waitbar` 87 | 88 | 89 | Note that X and X_reduced are numpy arrays, and lie in the host's memory. The arrays are internally copied to gpu memory and back after the computation. 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "kernel_pca.h" 2 | // includes, GSL & CBLAS 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | // main 9 | int main(int argc, char** argv) 10 | { 11 | // PCA model: X = TP’ + R 12 | // 13 | // 14 | // 15 | // 16 | // input: 17 | // input: 18 | // input: 19 | // input: 20 | // X, MxN matrix 21 | // M = number of 22 | // N = number of 23 | // K = number of 24 | // (data) 25 | // rows in X 26 | // columns in X 27 | // components (K<=N) 28 | // output: T, MxK scores matrix 29 | // output: P, NxK loads matrix 30 | // output: R, MxN residual matrix 31 | 32 | int M = 2000, m; 33 | int N = 100, n; 34 | int K = 12; 35 | printf("\nProblem dimensions: MxN=%dx%d, K=%d", M, N, K); 36 | 37 | // initialize srand and clock 38 | 39 | srand (time(NULL)); 40 | 41 | // initiallize some random test data X 42 | double *X; 43 | 44 | X = (double*)malloc(M*N * sizeof(X[0])); 45 | 46 | if(X == 0) 47 | { 48 | fprintf (stderr, "! host memory allocation error: X\n"); 49 | return EXIT_FAILURE; 50 | } 51 | 52 | for(m = 0; m < M; m++) 53 | { 54 | for(n = 0; n < N; n++) 55 | { 56 | X[ind_f(m, n, M)] = rand() / (double)RAND_MAX; 57 | } 58 | } 59 | 60 | double dtime; 61 | clock_t start; 62 | 63 | start = clock(); 64 | 65 | KernelPCA* pca; 66 | 67 | pca = new KernelPCA(K); 68 | 69 | std::cout << "\n\nn_comp in main " << pca->get_n_components() << std::endl; 70 | 71 | 72 | double *T; // results matrix 73 | 74 | dtime = ((double)clock()-start)/CLOCKS_PER_SEC; 75 | 76 | printf("\nTime for cublas initialization: %f\n", dtime); 77 | 78 | start=clock(); 79 | 80 | T = pca->fit_transform(M, N, X, 1); 81 | 82 | free(X); // need to free X ourselves. People may want to use it after running fit_transform 83 | 84 | dtime = ((double)clock()-start)/CLOCKS_PER_SEC; 85 | 86 | printf("\nTime for device GS-PCA computation: %f\n", dtime); 87 | 88 | 89 | 90 | delete pca; 91 | 92 | // check that the bases are orthagonal 93 | gsl_matrix* T_mat = gsl_matrix_alloc(M, K); 94 | 95 | std::string T_string; 96 | 97 | for (m=0; m 17 | #include 18 | #include 19 | #include 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | /** 26 | * Progressbar data structure (do not modify or create directly) 27 | */ 28 | typedef struct _progressbar_t 29 | { 30 | /// maximum value 31 | unsigned long max; 32 | /// current value 33 | unsigned long value; 34 | 35 | /// time progressbar was started 36 | time_t start; 37 | 38 | /// label 39 | const char *label; 40 | 41 | /// characters for the beginning, filling and end of the 42 | /// progressbar. E.g. |### | has |#| 43 | struct { 44 | char begin; 45 | char fill; 46 | char end; 47 | } format; 48 | } progressbar; 49 | 50 | /// Create a new progressbar with the specified label and number of steps. 51 | /// 52 | /// @param label The label that will prefix the progressbar. 53 | /// @param max The number of times the progressbar must be incremented before it is considered complete, 54 | /// or, in other words, the number of tasks that this progressbar is tracking. 55 | /// 56 | /// @return A progressbar configured with the provided arguments. Note that the user is responsible for disposing 57 | /// of the progressbar via progressbar_finish when finished with the object. 58 | progressbar *progressbar_new(const char *label, unsigned long max); 59 | 60 | /// Create a new progressbar with the specified label, number of steps, and format string. 61 | /// 62 | /// @param label The label that will prefix the progressbar. 63 | /// @param max The number of times the progressbar must be incremented before it is considered complete, 64 | /// or, in other words, the number of tasks that this progressbar is tracking. 65 | /// @param format The format of the progressbar. The string provided must be three characters, and it will 66 | /// be interpretted with the first character as the left border of the bar, the second 67 | /// character of the bar and the third character as the right border of the bar. For example, 68 | /// "<->" would result in a bar formatted like "<------ >". 69 | /// 70 | /// @return A progressbar configured with the provided arguments. Note that the user is responsible for disposing 71 | /// of the progressbar via progressbar_finish when finished with the object. 72 | progressbar *progressbar_new_with_format(const char *label, unsigned long max, const char *format); 73 | 74 | /// Free an existing progress bar. Don't call this directly; call *progressbar_finish* instead. 75 | void progressbar_free(progressbar *bar); 76 | 77 | /// Increment the given progressbar. Don't increment past the initialized # of steps, though. 78 | void progressbar_inc(progressbar *bar); 79 | 80 | /// Set the current status on the given progressbar. 81 | void progressbar_update(progressbar *bar, unsigned long value); 82 | 83 | /// Set the label of the progressbar. Note that no rendering is done. The label is simply set so that the next 84 | /// rendering will use the new label. To immediately see the new label, call progressbar_draw. 85 | /// Does not update display or copy the label 86 | void progressbar_update_label(progressbar *bar, const char *label); 87 | 88 | /// Finalize (and free!) a progressbar. Call this when you're done, or if you break out 89 | /// partway through. 90 | void progressbar_finish(progressbar *bar); 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | 96 | #endif 97 | -------------------------------------------------------------------------------- /test/py_kernel_pca_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from py_kernel_pca import KernelPCA 4 | 5 | class TestKernelPCA(unittest.TestCase): 6 | 7 | def setUp(self): 8 | self.M = 1000 9 | self.N = 100 10 | self.test_pca = KernelPCA(-1) 11 | self.max_sdot = np.float32(0.0001) 12 | self.max_ddot = np.float64(0.0000001) 13 | self.K = 2 14 | self.test_pca2 = KernelPCA(self.K) 15 | self.Xd = np.random.rand(self.M, self.N) 16 | self.Xf = np.random.rand(self.M, self.N).astype(np.float32) 17 | self.Td_all = np.zeros((self.M, self.N), dtype=np.float64) 18 | self.Tf_2 = np.zeros((self.M, self.K), dtype=np.float32) 19 | self.Tf_all = np.zeros((self.M, self.N), dtype=np.float32) 20 | self.Td_2 = np.zeros((self.M, self.K), dtype=np.float64) 21 | 22 | def test_type_and_shape_double_all(self): 23 | # test that the shape is what we think it should be 24 | 25 | Td_all = self.test_pca.fit_transform(self.Xd, verbose=True) 26 | 27 | self.assertIsNotNone(Td_all) 28 | 29 | self.assertEqual(type(Td_all[0,0]), np.float64) 30 | 31 | self.assertEqual(Td_all.shape, (self.M, self.N)) 32 | 33 | self.Td_all = Td_all 34 | 35 | def test_ortho_double_all(self): 36 | # test the orthogonality of the eigenvectors 37 | 38 | for i in range(self.N-1): 39 | self.assertTrue(np.dot(self.Td_all[:,i], self.Td_all[:,i+1]) < self.max_ddot) 40 | 41 | def test_type_and_shape_all(self): 42 | # test that the shape is what we think it should be 43 | 44 | Tf_all = self.test_pca.fit_transform(self.Xf, verbose=True) 45 | 46 | self.assertIsNotNone(Tf_all) 47 | 48 | self.assertEqual(type(Tf_all[0,0]), np.float32) 49 | 50 | self.assertEqual(Tf_all.shape, (self.M, self.N)) 51 | 52 | self.Tf_all = Tf_all 53 | 54 | def test_ortho_all(self): 55 | # test the orthogonality of the eigenvectors 56 | 57 | for i in range(self.N-1): 58 | self.assertTrue(np.dot(self.Tf_all[:,i], self.Tf_all[:,i+1]) < self.max_sdot) 59 | 60 | def test_type_and_shape_double(self): 61 | # test that the shape is what we think it should be 62 | 63 | Td_2 = self.test_pca2.fit_transform(self.Xd, verbose=True) 64 | 65 | self.assertIsNotNone(Td_2) 66 | 67 | self.assertEqual(type(Td_2[0,0]), np.float64) 68 | 69 | self.assertEqual(Td_2.shape, (self.M, self.K)) 70 | 71 | self.Td_2 = Td_2 72 | 73 | def test_ortho_double(self): 74 | # test the orthogonality of the eigenvectors 75 | 76 | self.assertTrue(np.dot(self.Td_all[:,0], self.Td_all[:,1]) < self.max_ddot) 77 | 78 | def test_type_and_shape(self): 79 | # test that the shape is what we think it should be 80 | 81 | Tf_2 = self.test_pca2.fit_transform(self.Xf, verbose=True) 82 | 83 | self.assertIsNotNone(Tf_2) 84 | 85 | self.assertEqual(type(Tf_2[0,0]), np.float32) 86 | 87 | self.assertEqual(Tf_2.shape, (self.M, self.K)) 88 | 89 | self.Tf_2 = Tf_2 90 | 91 | def test_ortho(self): 92 | # test the orthogonality of the eigenvectors 93 | 94 | self.assertTrue(np.dot(self.Tf_2[:,0], self.Tf_2[:,1]) < self.max_sdot) 95 | 96 | 97 | 98 | 99 | def test_c_contiguous_check(self): 100 | 101 | try: 102 | X_trash = np.random.rand(self.M, self.M) 103 | T_trash = self.test_pca2(X_trash.T) 104 | fail(msg="C-contiguous array check failed") # should not reach this line. The prev line should fail and go to the except block 105 | except: 106 | 107 | print '' # need some sort of code her, or else there is an error 108 | 109 | 110 | def arr_2d_check(self): 111 | 112 | try: 113 | X_trash = np.random.rand(self.M, self.Mi, 3) 114 | T_trash = self.test_pca2(X_trash.T) 115 | fail(msg="Array dimensions check failed") # should not reach this line. The prev line should fail and go to the except block 116 | except: 117 | 118 | print '' # need some sort of code her, or else there is an error 119 | 120 | 121 | def test_k_bigger_than_array_dims_and_getset(self): 122 | 123 | self.test_pca.set_n_components(self.N+1) 124 | 125 | self.assertEqual(self.test_pca.get_n_components(), self.N+1) 126 | 127 | X = np.random.rand(self.M, self.N).astype(np.float32) 128 | T = self.test_pca.fit_transform(X, verbose=True) 129 | 130 | self.assertEqual(self.test_pca.get_n_components(), self.N) # should have been reset internally once the algorithm saw K was bigger than N 131 | 132 | X2 = np.random.rand(self.N-2, self.N-1).astype(np.float32) 133 | T = self.test_pca.fit_transform(X2, verbose=True) 134 | 135 | self.assertEqual(self.test_pca.get_n_components(), self.N-2) # should have been reset internally once the algorithm saw K was bigger than N 136 | 137 | 138 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | 3 | project(kernel_pca) 4 | 5 | if (NOT CMAKE_BUILD_TYPE) 6 | set(CMAKE_BUILD_TYPE RELEASE) 7 | 8 | endif() 9 | 10 | message("BUID TYPE = ${CMAKE_BUILD_TYPE}") 11 | 12 | set(CMAKE_CXX_FLAGS "-std=c++11 -Wall -fPIC") 13 | 14 | # colors, because why not 15 | string(ASCII 27 Esc) 16 | set(RED "${Esc}[31m") 17 | set(GREEN "${Esc}[32m") 18 | set(ENDL "${Esc}[m") 19 | 20 | # CUDA PACKAGE 21 | find_package(CUDA REQUIRED) 22 | # nvcc flags 23 | #set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=sm_50) 24 | set(CUDA_SEPARABLE_COMPILATION ON) 25 | set(CUDA_PROPAGATE_HOST_FLAGS ON) 26 | 27 | SET(CUDA_COMPILER "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc") 28 | 29 | 30 | # Progress bar 31 | 32 | find_package(Curses REQUIRED) # progressbar is included in kernel_pca.cu, so it it needs to compile 33 | 34 | include_directories( 35 | ${CMAKE_CURRENT_SOURCE_DIR}/include 36 | ) 37 | 38 | add_library(progressbar SHARED 39 | ${CMAKE_CURRENT_SOURCE_DIR}/src/progressbar.c 40 | ) 41 | 42 | target_link_libraries(progressbar ${CURSES_LIBRARIES}) 43 | 44 | cuda_include_directories( 45 | ${CMAKE_CURRENT_SOURCE_DIR}/include 46 | ${CUDA_INCLUDE_DIRECTORIES} 47 | ) 48 | 49 | cuda_add_library(kernel_pca SHARED 50 | ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel_pca.cu 51 | ) 52 | 53 | 54 | CUDA_ADD_CUBLAS_TO_TARGET(kernel_pca ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel_pca.cu) 55 | 56 | CUDA_COMPILE(kernel_pca ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel_pca.cu) 57 | 58 | target_link_libraries(kernel_pca 59 | cuda 60 | progressbar 61 | ) 62 | 63 | 64 | 65 | ### Example compilation ################ 66 | 67 | find_package(GSL QUIET) 68 | 69 | if (GSL_FOUND) 70 | 71 | message("\n\n\n ${GREEN} GSL is installed. Building c++ demo ${ENDL} \n\n\n") 72 | 73 | # COMPILE AND LINK 74 | add_executable(main ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp) 75 | 76 | include_directories( 77 | ${GSL_INCLUDE_DIRS} 78 | ) 79 | 80 | target_link_libraries(main 81 | kernel_pca 82 | ${GSL_LIBRARIES} 83 | ) 84 | else () 85 | 86 | message( "\n\n\n ${RED} GSL not installed. C++ demo not being built ${ENDL}\n\n\n ") 87 | 88 | endif () 89 | 90 | 91 | 92 | 93 | #### Python wrapper ############################ 94 | 95 | set(PYTHON_BUILT 0) 96 | 97 | find_package( PythonLibs 2.7 QUIET) 98 | 99 | if (PYTHONLIBS_FOUND) 100 | 101 | set(PYTHON_BUILT 1) 102 | 103 | message("\n\n\n ${GREEN} Python 2.7 is installed. Building python wrappers ${ENDL} \n\n\n") 104 | 105 | 106 | find_package( Boost COMPONENTS python REQUIRED ) 107 | 108 | 109 | execute_process( COMMAND export CPLUS_INCLUDE_PATH="${PYTHON_INCLUDE_DIRS}" ) 110 | 111 | include_directories( 112 | ${PYTHON_INCLUDE_DIRS} 113 | ${Boost_INCLUDE_DIR} 114 | ) 115 | 116 | add_library(py_kernel_pca SHARED 117 | ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel_pca_pywrap.cpp 118 | ) 119 | 120 | 121 | 122 | target_link_libraries(py_kernel_pca 123 | kernel_pca 124 | ${Python_LIBRARIES} 125 | ${Boost_LIBRARIES} 126 | ) 127 | 128 | # don't prepend wrapper library name with lib 129 | set_target_properties( py_kernel_pca PROPERTIES PREFIX "" ) 130 | else () 131 | 132 | message( "\n\n\n ${RED} Python 2.7 is not installed. Python wrappers will not be built! ${ENDL}\n\n\n ") 133 | 134 | 135 | endif () 136 | 137 | 138 | #### Tests ####################################### 139 | 140 | option(BUILD_TESTS "Build gtest module and python tests (if python wrappers are built)" ON) 141 | 142 | if (BUILD_TESTS) 143 | 144 | message( "\n\n\n ${GREEN} Building Tests \n\n\n") 145 | 146 | include(ExternalProject) 147 | 148 | set(GTEST_FORCE_SHARED_CRT ON) 149 | set(GTEST_DISABLE_PTHREADS OFF) 150 | 151 | ExternalProject_Add(googletest 152 | GIT_REPOSITORY https://github.com/google/googletest.git 153 | CMAKE_ARGS -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG:PATH=DebugLibs 154 | -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE:PATH=ReleaseLibs 155 | -Dgtest_force_shared_crt=${GTEST_FORCE_SHARED_CRT} 156 | -Dgtest_disable_pthreads=${GTEST_DISABLE_PTHREADS} 157 | -DBUILD_GTEST=ON 158 | PREFIX "${CMAKE_CURRENT_BINARY_DIR}" 159 | # Disable install step 160 | INSTALL_COMMAND "" 161 | ) 162 | 163 | # Specify include dir 164 | ExternalProject_Get_Property(googletest source_dir) 165 | set(GTEST_INCLUDE_DIRS ${source_dir}/googletest/include) 166 | 167 | # Specify MainTest's link libraries 168 | ExternalProject_Get_Property(googletest binary_dir) 169 | set(GTEST_LIBS_DIR ${binary_dir}/googlemock/gtest) 170 | 171 | 172 | 173 | enable_testing() 174 | 175 | include_directories(${GTEST_INCLUDE_DIRS}) 176 | 177 | set(PROJECT_TEST_NAME ${PROJECT_NAME}_test) 178 | 179 | file(GLOB TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/test/*.cpp) 180 | add_executable(${PROJECT_TEST_NAME} ${TEST_SRC_FILES}) 181 | add_dependencies(${PROJECT_TEST_NAME} googletest) 182 | 183 | target_link_libraries(${PROJECT_TEST_NAME} 184 | kernel_pca 185 | ${GSL_LIBRARIES} 186 | ${GTEST_LIBS_DIR}/libgtest.a 187 | ${GTEST_LIBS_DIR}/libgtest_main.a 188 | ) 189 | 190 | target_link_libraries(${PROJECT_TEST_NAME} ${CMAKE_THREAD_LIBS_INIT}) 191 | 192 | 193 | add_test(kernel_pca_test ${PROJECT_TEST_NAME}) 194 | 195 | if(PYTHON_BUILT) 196 | 197 | message("${GREEN} Python tests built ${ENDL}") 198 | 199 | 200 | add_custom_target(tests COMMAND make test && cd ../test && python -m unittest -v py_kernel_pca_test.TestKernelPCA WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/build) 201 | 202 | else() 203 | 204 | 205 | add_custom_target(tests COMMAND make test WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/build) 206 | 207 | endif() 208 | 209 | endif () 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | -------------------------------------------------------------------------------- /src/kernel_pca_pywrap.cpp: -------------------------------------------------------------------------------- 1 | #include "kernel_pca_pywrap.hpp" 2 | #include 3 | 4 | 5 | 6 | // indexing for c contiguous arrays. This is only used if a numpy array is c contiguous, then it needs to be converted to fortran contiguous for KernelPCA. 7 | #define ind_c(m, n, num_cols) (((m) * (num_cols)) + (n)) 8 | 9 | bool PyKernelPCA::CheckNpArray(PyObject* arr) 10 | { 11 | 12 | if (!PyArray_IS_C_CONTIGUOUS(reinterpret_cast(arr))) 13 | { 14 | throw std::runtime_error("array must be C contiguous (did you use numpy.array.T?)"); 15 | } 16 | 17 | if (PyArray_NDIM(arr) != 2) 18 | { 19 | throw std::runtime_error("numpy array must be 2 dimensional for PCA"); 20 | } 21 | 22 | switch (PyArray_TYPE(arr)) 23 | { 24 | case NPY_FLOAT32 : 25 | return 1; 26 | case NPY_FLOAT64 : 27 | return 0; 28 | default : 29 | throw std::runtime_error("numpy array must be of type float32 or float64"); 30 | } 31 | 32 | } 33 | 34 | 35 | 36 | PyKernelPCA::PyKernelPCA(int n_components) : KernelPCA::KernelPCA(n_components){} 37 | 38 | PyObject* PyKernelPCA::fit_transform(PyObject* R_, bool verbose=0) 39 | { 40 | 41 | bool isFloat; 42 | 43 | 44 | isFloat = CheckNpArray(R_); // check: is the input array c-contiguous, is it float32 type and also is it 2D 45 | 46 | 47 | int M, N; 48 | M = PyArray_DIMS(R_)[0]; // first dimension of array 49 | N = PyArray_DIMS(R_)[1]; 50 | 51 | PyObject* T_PyArr; 52 | 53 | if (isFloat) 54 | { 55 | 56 | float* R; 57 | R = c_cont_npy_to_f_cont_float_ptr(M, N, R_); 58 | 59 | 60 | 61 | float* T; 62 | 63 | T = KernelPCA::fit_transform(M, N, R, verbose); // run fit_transform on the raw float data, and put it in a float array 64 | int K, m, k; 65 | 66 | K = KernelPCA::get_n_components(); 67 | 68 | // SimpleNewFromData can only handle a c-contiguous array, so convert T to c contiguous 69 | 70 | float* T_ret; 71 | T_ret = (float*)malloc(M*K * sizeof(T_ret[0])); 72 | 73 | // switch back to C contiguous for numpy 74 | for (m = 0; m < M; m++) 75 | { 76 | for (k = 0; k < K; k++) 77 | { 78 | 79 | T_ret[ind_c(m,k,K)] = T[ind_f(m, k, M)]; 80 | 81 | } 82 | } 83 | 84 | free(T); 85 | 86 | npy_intp dims[2] = {M,K}; 87 | 88 | 89 | T_PyArr = PyArray_SimpleNewFromData(2 /* = number of array dims */, dims, NPY_FLOAT32, reinterpret_cast(T_ret)); 90 | 91 | 92 | } 93 | else 94 | { 95 | 96 | double* R; 97 | R = c_cont_npy_to_f_cont_double_ptr(M, N, R_); 98 | 99 | double* T; 100 | 101 | T = KernelPCA::fit_transform(M, N, R, verbose); // run fit_transform on the raw float data, and put it in a float array 102 | 103 | int K, m, k; 104 | 105 | K = KernelPCA::get_n_components(); 106 | 107 | // SimpleNewFromData can only handle a c-contiguous array, so convert T to c contiguous 108 | 109 | double* T_ret; 110 | T_ret = (double*)malloc(M*K * sizeof(T_ret[0])); 111 | 112 | // switch back to C contiguous for numpy 113 | for (m = 0; m < M; m++) 114 | { 115 | for (k = 0; k < K; k++) 116 | { 117 | 118 | T_ret[ind_c(m,k,K)] = T[ind_f(m, k, M)]; 119 | 120 | } 121 | } 122 | 123 | 124 | free(T); 125 | npy_intp dims[2] = {M,K}; 126 | 127 | 128 | T_PyArr = PyArray_SimpleNewFromData(2 /* = number of array dims */, dims, NPY_FLOAT64, reinterpret_cast(T_ret)); 129 | 130 | 131 | 132 | 133 | } 134 | 135 | return T_PyArr; 136 | } 137 | 138 | float* PyKernelPCA::c_cont_npy_to_f_cont_float_ptr(int M, int N, PyObject* R_) 139 | { 140 | 141 | 142 | float* R; // C array from numpy array 143 | 144 | R = (float*)malloc(M*N * sizeof(R[0])); 145 | if (R == 0) 146 | { 147 | throw std::runtime_error("Cannot allocate memory for C array R"); 148 | } 149 | 150 | 151 | npy_intp* strides = PyArray_STRIDES(R_); // strides for data gaps 152 | int s0, s1; 153 | s0 = strides[0]; s1 = strides[1]; 154 | 155 | 156 | char* R_data = (char*)PyArray_DATA(R_); 157 | 158 | 159 | 160 | // switch to fortran contiguous for KernelPCA, and at the same time switch to a c array from the PyObject 161 | for (int m = 0; m < M; m++) 162 | { 163 | for (int n = 0; n < N; n++) 164 | { 165 | R[ind_f(m,n,M)] = *(float*)&R_data[ m*s0 + n*s1 ]; 166 | } 167 | } 168 | 169 | return R; 170 | 171 | } 172 | 173 | 174 | double* PyKernelPCA::c_cont_npy_to_f_cont_double_ptr(int M, int N, PyObject* R_) 175 | { 176 | double* R; // C array from numpy array 177 | 178 | R = (double*)malloc(M*N * sizeof(R[0])); 179 | if (R == 0) 180 | { 181 | throw std::runtime_error("Cannot allocate memory for C array R"); 182 | } 183 | 184 | 185 | npy_intp* strides = PyArray_STRIDES(R_); // strides for data gaps 186 | int s0, s1; 187 | s0 = strides[0]; s1 = strides[1]; 188 | 189 | 190 | char* R_data = (char*)PyArray_DATA(R_); 191 | 192 | 193 | 194 | // switch to fortran contiguous for KernelPCA, and at the same time switch to a c array from the PyObject 195 | for (int m = 0; m < M; m++) 196 | { 197 | for (int n = 0; n < N; n++) 198 | { 199 | R[ind_f(m,n,M)] = *(double*)&R_data[ m*s0 + n*s1 ]; 200 | } 201 | } 202 | 203 | return R; 204 | 205 | 206 | } 207 | 208 | boost::shared_ptr initWrapper(int n_components) 209 | { 210 | 211 | if (n_components < -1 || n_components == 0) throw std::runtime_error("Invalid n_components. Must be 0 < n_components < min matrix dimension, or n_components=-1 to return all components"); 212 | 213 | 214 | boost::shared_ptr ptr( new PyKernelPCA(n_components) ); 215 | 216 | return ptr; 217 | 218 | } 219 | 220 | 221 | #if PY_VERSION_HEX >= 0x03000000 222 | void * 223 | #else 224 | void 225 | #endif 226 | initialize() 227 | { 228 | import_array(); 229 | } 230 | 231 | // Use boosts' macro to make the python module "kernel_pca_pywrap" 232 | // This wraps the PyKernelPCA class, which extends the KernelPCA class to be able to accept numpy arrays as input, and return numpy arrays from fit_transform 233 | BOOST_PYTHON_MODULE(py_kernel_pca) 234 | { 235 | 236 | initialize(); 237 | 238 | boost::python::numeric::array::set_module_and_type("numpy", "ndarray"); 239 | 240 | boost::python::class_< PyKernelPCA, boost::shared_ptr< PyKernelPCA >, boost::noncopyable>("KernelPCA", 241 | boost::python::no_init) 242 | .def("__init__", boost::python::make_constructor(&initWrapper)) 243 | .def("fit_transform", &PyKernelPCA::fit_transform, (boost::python::arg("verbose")=0) ) 244 | .def("get_n_components", &KernelPCA::get_n_components) 245 | .def("set_n_components", &KernelPCA::set_n_components) 246 | ; 247 | 248 | 249 | } 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /src/progressbar.c: -------------------------------------------------------------------------------- 1 | /** 2 | * \file 3 | * \author Trevor Fountain 4 | * \author Johannes Buchner 5 | * \author Erik Garrison 6 | * \date 2010-2014 7 | * \copyright BSD 3-Clause 8 | * 9 | * progressbar -- a C class (by convention) for displaying progress 10 | * on the command line (to stderr). 11 | */ 12 | 13 | #include /* tgetent, tgetnum */ 14 | #include 15 | #include 16 | #include "progressbar.h" 17 | 18 | /// How wide we assume the screen is if termcap fails. 19 | enum { DEFAULT_SCREEN_WIDTH = 80 }; 20 | /// The smallest that the bar can ever be (not including borders) 21 | enum { MINIMUM_BAR_WIDTH = 10 }; 22 | /// The format in which the estimated remaining time will be reported 23 | static const char *const ETA_FORMAT = "ETA:%2dh%02dm%02ds"; 24 | /// The maximum number of characters that the ETA_FORMAT can ever yield 25 | enum { ETA_FORMAT_LENGTH = 13 }; 26 | /// Amount of screen width taken up by whitespace (i.e. whitespace between label/bar/ETA components) 27 | enum { WHITESPACE_LENGTH = 2 }; 28 | /// The amount of width taken up by the border of the bar component. 29 | enum { BAR_BORDER_WIDTH = 2 }; 30 | 31 | /// Models a duration of time broken into hour/minute/second components. The number of seconds should be less than the 32 | /// number of seconds in one minute, and the number of minutes should be less than the number of minutes in one hour. 33 | typedef struct { 34 | int hours; 35 | int minutes; 36 | int seconds; 37 | } progressbar_time_components; 38 | 39 | static void progressbar_draw(const progressbar *bar); 40 | 41 | /** 42 | * Create a new progress bar with the specified label, max number of steps, and format string. 43 | * Note that `format` must be exactly three characters long, e.g. "<->" to render a progress 44 | * bar like "<---------->". Returns NULL if there isn't enough memory to allocate a progressbar 45 | */ 46 | progressbar *progressbar_new_with_format(const char *label, unsigned long max, const char *format) 47 | { 48 | progressbar *new = malloc(sizeof(progressbar)); 49 | if(new == NULL) { 50 | return NULL; 51 | } 52 | 53 | new->max = max; 54 | new->value = 0; 55 | new->start = time(NULL); 56 | assert(3 == strlen(format) && "format must be 3 characters in length"); 57 | new->format.begin = format[0]; 58 | new->format.fill = format[1]; 59 | new->format.end = format[2]; 60 | 61 | progressbar_update_label(new, label); 62 | progressbar_draw(new); 63 | 64 | return new; 65 | } 66 | 67 | /** 68 | * Create a new progress bar with the specified label and max number of steps. 69 | */ 70 | progressbar *progressbar_new(const char *label, unsigned long max) 71 | { 72 | return progressbar_new_with_format(label, max, "|=|"); 73 | } 74 | 75 | void progressbar_update_label(progressbar *bar, const char *label) 76 | { 77 | bar->label = label; 78 | } 79 | 80 | /** 81 | * Delete an existing progress bar. 82 | */ 83 | void progressbar_free(progressbar *bar) 84 | { 85 | free(bar); 86 | } 87 | 88 | /** 89 | * Increment an existing progressbar by `value` steps. 90 | */ 91 | void progressbar_update(progressbar *bar, unsigned long value) 92 | { 93 | bar->value = value; 94 | progressbar_draw(bar); 95 | } 96 | 97 | /** 98 | * Increment an existing progressbar by a single step. 99 | */ 100 | void progressbar_inc(progressbar *bar) 101 | { 102 | progressbar_update(bar, bar->value+1); 103 | } 104 | 105 | static void progressbar_write_char(FILE *file, const int ch, const size_t times) { 106 | size_t i; 107 | for (i = 0; i < times; ++i) { 108 | fputc(ch, file); 109 | } 110 | } 111 | 112 | static int progressbar_max(int x, int y) { 113 | return x > y ? x : y; 114 | } 115 | 116 | static unsigned int get_screen_width(void) { 117 | char termbuf[2048]; 118 | if (tgetent(termbuf, getenv("TERM")) >= 0) { 119 | return tgetnum("co") /* -2 */; 120 | } else { 121 | return DEFAULT_SCREEN_WIDTH; 122 | } 123 | } 124 | 125 | static int progressbar_bar_width(int screen_width, int label_length) { 126 | return progressbar_max(MINIMUM_BAR_WIDTH, screen_width - label_length - ETA_FORMAT_LENGTH - WHITESPACE_LENGTH); 127 | } 128 | 129 | static int progressbar_label_width(int screen_width, int label_length, int bar_width) { 130 | int eta_width = ETA_FORMAT_LENGTH; 131 | 132 | // If the progressbar is too wide to fit on the screen, we must sacrifice the label. 133 | if (label_length + 1 + bar_width + 1 + ETA_FORMAT_LENGTH > screen_width) { 134 | return progressbar_max(0, screen_width - bar_width - eta_width - WHITESPACE_LENGTH); 135 | } else { 136 | return label_length; 137 | } 138 | } 139 | 140 | static int progressbar_remaining_seconds(const progressbar* bar) { 141 | double offset = difftime(time(NULL), bar->start); 142 | if (bar->value > 0 && offset > 0) { 143 | return (offset / (double) bar->value) * (bar->max - bar->value); 144 | } else { 145 | return 0; 146 | } 147 | } 148 | 149 | static progressbar_time_components progressbar_calc_time_components(int seconds) { 150 | int hours = seconds / 3600; 151 | seconds -= hours * 3600; 152 | int minutes = seconds / 60; 153 | seconds -= minutes * 60; 154 | 155 | progressbar_time_components components = {hours, minutes, seconds}; 156 | return components; 157 | } 158 | 159 | static void progressbar_draw(const progressbar *bar) 160 | { 161 | int screen_width = get_screen_width(); 162 | int label_length = strlen(bar->label); 163 | int bar_width = progressbar_bar_width(screen_width, label_length); 164 | int label_width = progressbar_label_width(screen_width, label_length, bar_width); 165 | 166 | int progressbar_completed = (bar->value >= bar->max); 167 | int bar_piece_count = bar_width - BAR_BORDER_WIDTH; 168 | int bar_piece_current = (progressbar_completed) 169 | ? bar_piece_count 170 | : bar_piece_count * ((double) bar->value / bar->max); 171 | 172 | progressbar_time_components eta = (progressbar_completed) 173 | ? progressbar_calc_time_components(difftime(time(NULL), bar->start)) 174 | : progressbar_calc_time_components(progressbar_remaining_seconds(bar)); 175 | 176 | if (label_width == 0) { 177 | // The label would usually have a trailing space, but in the case that we don't print 178 | // a label, the bar can use that space instead. 179 | bar_width += 1; 180 | } else { 181 | // Draw the label 182 | fwrite(bar->label, 1, label_width, stderr); 183 | fputc(' ', stderr); 184 | } 185 | 186 | // Draw the progressbar 187 | fputc(bar->format.begin, stderr); 188 | progressbar_write_char(stderr, bar->format.fill, bar_piece_current); 189 | progressbar_write_char(stderr, ' ', bar_piece_count - bar_piece_current); 190 | fputc(bar->format.end, stderr); 191 | 192 | // Draw the ETA 193 | fputc(' ', stderr); 194 | fprintf(stderr, ETA_FORMAT, eta.hours, eta.minutes, eta.seconds); 195 | fputc('\r', stderr); 196 | } 197 | 198 | /** 199 | * Finish a progressbar, indicating 100% completion, and free it. 200 | */ 201 | void progressbar_finish(progressbar *bar) 202 | { 203 | // Make sure we fill the progressbar so things look complete. 204 | progressbar_draw(bar); 205 | 206 | // Print a newline, so that future outputs to stderr look prettier 207 | fprintf(stderr, "\n"); 208 | 209 | // We've finished with this progressbar, so go ahead and free it. 210 | progressbar_free(bar); 211 | } 212 | -------------------------------------------------------------------------------- /test/kernel_pca_test.cpp: -------------------------------------------------------------------------------- 1 | #include "kernel_pca.h" 2 | #include "gtest/gtest.h" 3 | #include 4 | #include 5 | #include 6 | 7 | KernelPCA* test_pca; 8 | 9 | 10 | int M = 1000, m; 11 | int N = 100, n; 12 | 13 | 14 | TEST(kernel_pca, default_consructor_test) 15 | { 16 | test_pca = new KernelPCA; 17 | EXPECT_TRUE(test_pca) << "Default constructor failed"; // make sure its not a null pointer 18 | } 19 | 20 | 21 | TEST(kernel_pca, get_k_negative_one_test) 22 | { 23 | EXPECT_EQ(test_pca->get_n_components(), -1) << "Default constructor does not set K to -1"; 24 | } 25 | 26 | double *Td; // results matrix 27 | 28 | TEST(kernel_pca, fit_transform_all_double_test) 29 | { 30 | // initialize srand and clock 31 | 32 | srand(time(NULL)); 33 | 34 | 35 | // initiallize some random test data X 36 | double *Xd; 37 | 38 | 39 | Xd = (double*)malloc(M*N * sizeof(Xd[0])); 40 | 41 | for(m = 0; m < M; m++) 42 | { 43 | for(n = 0; n < N; n++) 44 | { 45 | Xd[ind_f(m, n, M)] = rand() / (double)RAND_MAX; 46 | } 47 | } 48 | 49 | 50 | 51 | Td = test_pca->fit_transform(M, N, Xd, 1); 52 | 53 | EXPECT_TRUE(Td) << "double-precision fit_transform for all components returned a null pointer"; 54 | 55 | free(Xd); 56 | 57 | } 58 | 59 | int gsl_status; 60 | 61 | TEST(kernel_pca, double_all_orth_test) 62 | { 63 | // check that the bases are orthagonal 64 | gsl_matrix* T_mat = gsl_matrix_alloc(M, N); 65 | 66 | 67 | for (m=0; mfit_transform(M, N, Xf, 1); 128 | 129 | EXPECT_TRUE(Tf) << "single precision fit_transform for all components returned a null pointer"; 130 | 131 | free(Xf); 132 | 133 | } 134 | 135 | TEST(kernel_pca, float_all_orth_test) 136 | { 137 | // check that the bases are orthagonal 138 | gsl_matrix_float* T_mat = gsl_matrix_float_alloc(M, N); 139 | 140 | 141 | for (m=0; mget_n_components(), 4); 193 | } 194 | 195 | TEST(kernel_pca, set_get_n_comp_test) 196 | { 197 | 198 | test_pca2->set_n_components(K); 199 | 200 | EXPECT_EQ(test_pca2->get_n_components(), K); 201 | 202 | } 203 | 204 | 205 | TEST(kernel_pca, fit_transform_double_test) 206 | { 207 | 208 | // initiallize some random test data X 209 | double *Xd; 210 | 211 | 212 | Xd = (double*)malloc(M*N * sizeof(Xd[0])); 213 | 214 | for(m = 0; m < M; m++) 215 | { 216 | for(n = 0; n < N; n++) 217 | { 218 | Xd[ind_f(m, n, M)] = rand() / (double)RAND_MAX; 219 | } 220 | } 221 | 222 | 223 | 224 | Td = test_pca2->fit_transform(M, N, Xd, 1); 225 | 226 | EXPECT_TRUE(Td) << "double-precision fit_transform for 2 components returned a null pointer"; 227 | 228 | free(Xd); 229 | 230 | } 231 | 232 | 233 | TEST(kernel_pca, double_orth_test) 234 | { 235 | // check that the bases are orthagonal 236 | gsl_matrix* T_mat = gsl_matrix_alloc(M,K); 237 | 238 | 239 | for (m=0; mfit_transform(M, N, Xf, 1); 297 | 298 | EXPECT_TRUE(Tf) << "single precision fit_transform for all components returned a null pointer"; 299 | 300 | free(Xf); 301 | 302 | } 303 | 304 | TEST(kernel_pca, float_orth_test) 305 | { 306 | // check that the bases are orthagonal 307 | gsl_matrix_float* T_mat = gsl_matrix_float_alloc(M, K); 308 | 309 | 310 | for (m=0; m 3 | #include "progressbar.h" 4 | 5 | 6 | 7 | KernelPCA::KernelPCA() : K(-1) 8 | { 9 | // initialize cublas 10 | status = cublasInit(); 11 | 12 | if(status != CUBLAS_STATUS_SUCCESS) 13 | { 14 | std::runtime_error( "! CUBLAS initialization error\n"); 15 | } 16 | } 17 | 18 | 19 | 20 | KernelPCA::KernelPCA(int num_pcs) : K(num_pcs) 21 | { 22 | // initialize cublas 23 | status = cublasInit(); 24 | 25 | if(status != CUBLAS_STATUS_SUCCESS) 26 | { 27 | std::runtime_error( "! CUBLAS initialization error\n"); 28 | } 29 | } 30 | 31 | 32 | 33 | 34 | KernelPCA::~KernelPCA() 35 | { 36 | 37 | // shutdown 38 | status = cublasShutdown(); 39 | if(status != CUBLAS_STATUS_SUCCESS) 40 | { 41 | std::runtime_error( "! cublas shutdown error\n"); 42 | } 43 | 44 | 45 | } 46 | 47 | 48 | 49 | double* KernelPCA::fit_transform(int M, int N, double *R, bool verbose=false) 50 | { 51 | 52 | 53 | 54 | // maximum number of iterations 55 | int J = 10000; 56 | 57 | // max error 58 | double er = 1.0e-7; 59 | 60 | // if no K specified, or K > min(M, N) 61 | int K_; 62 | K_ = min(M, N); 63 | if (K == -1 || K > K_) K = K_; 64 | 65 | progressbar* progressBar; 66 | if (verbose) // show a progress bar if verbose is specified 67 | progressBar = progressbar_new("PCA", K); 68 | 69 | 70 | int n, j, k; 71 | 72 | // transfer the host matrix R to device matrix dR 73 | double *dR = 0; 74 | status = cublasAlloc(M*N, sizeof(dR[0]), (void**)&dR); 75 | 76 | if(status != CUBLAS_STATUS_SUCCESS) 77 | { 78 | std::runtime_error( "! cuda memory allocation error (dR)\n"); 79 | } 80 | 81 | status = cublasSetMatrix(M, N, sizeof(R[0]), R, M, dR, M); 82 | if(status != CUBLAS_STATUS_SUCCESS) 83 | { 84 | std::runtime_error( "! cuda access error (write dR)\n"); 85 | } 86 | 87 | // allocate device memory for T, P 88 | double *dT = 0; 89 | status = cublasAlloc(M*K, sizeof(dT[0]), (void**)&dT); 90 | if(status != CUBLAS_STATUS_SUCCESS) 91 | { 92 | std::runtime_error( "! cuda memory allocation error (dT)\n"); 93 | } 94 | 95 | double *dP = 0; 96 | status = cublasAlloc(N*K, sizeof(dP[0]), (void**)&dP); 97 | if(status != CUBLAS_STATUS_SUCCESS) 98 | { 99 | std::runtime_error( "! cuda memory allocation error (dP)\n"); 100 | } 101 | 102 | // allocate memory for eigenvalues 103 | double *L; 104 | L = (double*)malloc(K * sizeof(L[0]));; 105 | if(L == 0) 106 | { 107 | std::runtime_error( "! memory allocation error: T\n"); 108 | } 109 | 110 | // mean center the data 111 | double *dU = 0; 112 | status = cublasAlloc(M, sizeof(dU[0]), (void**)&dU); 113 | if(status != CUBLAS_STATUS_SUCCESS) 114 | { 115 | std::runtime_error( "! cuda memory allocation error (dU)\n"); 116 | } 117 | 118 | cublasDcopy(M, &dR[0], 1, dU, 1); 119 | for(n=1; n0) 139 | { 140 | cublasDgemv ('t', N, k, 1.0, dP, N, &dP[k*N], 1, 0.0, dU, 1); 141 | cublasDgemv ('n', N, k, -1.0, dP, N, dU, 1, 1.0, &dP[k*N], 1); 142 | } 143 | cublasDscal (N, 1.0/cublasDnrm2(N, &dP[k*N], 1), &dP[k*N], 1); 144 | cublasDgemv ('n', M, N, 1.0, dR, M, &dP[k*N], 1, 0.0, &dT[k*M], 1); 145 | if(k>0) 146 | { 147 | cublasDgemv ('t', M, k, 1.0, dT, M, &dT[k*M], 1, 0.0, dU, 1); 148 | cublasDgemv ('n', M, k, -1.0, dT, M, dU, 1, 1.0, &dT[k*M], 1); 149 | } 150 | 151 | L[k] = cublasDnrm2(M, &dT[k*M], 1); 152 | cublasDscal(M, 1.0/L[k], &dT[k*M], 1); 153 | 154 | if(fabs(a - L[k]) < er*L[k]) break; 155 | 156 | a = L[k]; 157 | 158 | } 159 | 160 | cublasDger (M, N, - L[k], &dT[k*M], 1, &dP[k*N], 1, dR, M); 161 | 162 | if (verbose) 163 | progressbar_inc(progressBar); 164 | 165 | } 166 | 167 | if (verbose) 168 | progressbar_finish(progressBar); 169 | 170 | for(k=0; k min(M, N) 212 | int K_; 213 | K_ = min(M, N); 214 | if (K == -1 || K > K_) K = K_; 215 | 216 | progressbar* progressBar; 217 | if (verbose) // show a progress bar if verbose is specified 218 | progressBar = progressbar_new("PCA", K); 219 | 220 | 221 | int n, j, k; 222 | 223 | // transfer the host matrix R to device matrix dR 224 | float *dR = 0; 225 | status = cublasAlloc(M*N, sizeof(dR[0]), (void**)&dR); 226 | 227 | if(status != CUBLAS_STATUS_SUCCESS) 228 | { 229 | std::runtime_error( "! cuda memory allocation error (dR)\n"); 230 | } 231 | 232 | status = cublasSetMatrix(M, N, sizeof(R[0]), R, M, dR, M); 233 | if(status != CUBLAS_STATUS_SUCCESS) 234 | { 235 | std::runtime_error( "! cuda access error (write dR)\n"); 236 | } 237 | 238 | // allocate device memory for T, P 239 | float *dT = 0; 240 | status = cublasAlloc(M*K, sizeof(dT[0]), (void**)&dT); 241 | if(status != CUBLAS_STATUS_SUCCESS) 242 | { 243 | std::runtime_error( "! cuda memory allocation error (dT)\n"); 244 | } 245 | 246 | float *dP = 0; 247 | status = cublasAlloc(N*K, sizeof(dP[0]), (void**)&dP); 248 | if(status != CUBLAS_STATUS_SUCCESS) 249 | { 250 | std::runtime_error( "! cuda memory allocation error (dP)\n"); 251 | } 252 | 253 | // allocate memory for eigenvalues 254 | float *L; 255 | L = (float*)malloc(K * sizeof(L[0]));; 256 | if(L == 0) 257 | { 258 | std::runtime_error( "! memory allocation error: T\n"); 259 | } 260 | 261 | // mean center the data 262 | float *dU = 0; 263 | status = cublasAlloc(M, sizeof(dU[0]), (void**)&dU); 264 | if(status != CUBLAS_STATUS_SUCCESS) 265 | { 266 | std::runtime_error( "! cuda memory allocation error (dU)\n"); 267 | } 268 | 269 | cublasScopy(M, &dR[0], 1, dU, 1); 270 | for(n=1; n0) 290 | { 291 | cublasSgemv ('t', N, k, 1.0, dP, N, &dP[k*N], 1, 0.0, dU, 1); 292 | cublasSgemv ('n', N, k, -1.0, dP, N, dU, 1, 1.0, &dP[k*N], 1); 293 | } 294 | cublasSscal (N, 1.0/cublasSnrm2(N, &dP[k*N], 1), &dP[k*N], 1); 295 | cublasSgemv ('n', M, N, 1.0, dR, M, &dP[k*N], 1, 0.0, &dT[k*M], 1); 296 | if(k>0) 297 | { 298 | cublasSgemv ('t', M, k, 1.0, dT, M, &dT[k*M], 1, 0.0, dU, 1); 299 | cublasSgemv ('n', M, k, -1.0, dT, M, dU, 1, 1.0, &dT[k*M], 1); 300 | } 301 | 302 | L[k] = cublasSnrm2(M, &dT[k*M], 1); 303 | cublasSscal(M, 1.0/L[k], &dT[k*M], 1); 304 | 305 | if(fabs(a - L[k]) < er*L[k]) break; 306 | 307 | a = L[k]; 308 | 309 | } 310 | 311 | cublasSger (M, N, - L[k], &dT[k*M], 1, &dP[k*N], 1, dR, M); 312 | 313 | if (verbose) 314 | progressbar_inc(progressBar); 315 | 316 | } 317 | 318 | if (verbose) 319 | progressbar_finish(progressBar); 320 | 321 | for(k=0; k