├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── kMeansCuda.cu
├── kMeansCuda.h
├── main.cpp
└── nbproject
    ├── Makefile-Debug.mk
    ├── Makefile-Release.mk
    ├── Makefile-impl.mk
    ├── Makefile-variables.mk
    ├── Package-Debug.bash
    ├── Package-Release.bash
    ├── configurations.xml
    ├── private
        ├── Makefile-variables.mk
        ├── configurations.xml
        ├── launcher.properties
        └── private.xml
    └── project.xml


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | 
 6 | # Compiled Dynamic libraries
 7 | *.so
 8 | *.dylib
 9 | 
10 | # Compiled Static libraries
11 | *.lai
12 | *.la
13 | *.a
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2005 Wei-keng Liao
 4 | Copyright (c) 2011 Serban Giuroiu
 5 | Copyright (c) 2013 PHAM Hoai Vu
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 8 | this software and associated documentation files (the "Software"), to deal in
 9 | the Software without restriction, including without limitation the rights to
10 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11 | the Software, and to permit persons to whom the Software is furnished to do so,
12 | subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | #
  2 | #  There exist several targets which are by default empty and which can be 
  3 | #  used for execution of your targets. These targets are usually executed 
  4 | #  before and after some main targets. They are: 
  5 | #
  6 | #     .build-pre:              called before 'build' target
  7 | #     .build-post:             called after 'build' target
  8 | #     .clean-pre:              called before 'clean' target
  9 | #     .clean-post:             called after 'clean' target
 10 | #     .clobber-pre:            called before 'clobber' target
 11 | #     .clobber-post:           called after 'clobber' target
 12 | #     .all-pre:                called before 'all' target
 13 | #     .all-post:               called after 'all' target
 14 | #     .help-pre:               called before 'help' target
 15 | #     .help-post:              called after 'help' target
 16 | #
 17 | #  Targets beginning with '.' are not intended to be called on their own.
 18 | #
 19 | #  Main targets can be executed directly, and they are:
 20 | #  
 21 | #     build                    build a specific configuration
 22 | #     clean                    remove built files from a configuration
 23 | #     clobber                  remove all built files
 24 | #     all                      build all configurations
 25 | #     help                     print help mesage
 26 | #  
 27 | #  Targets .build-impl, .clean-impl, .clobber-impl, .all-impl, and
 28 | #  .help-impl are implemented in nbproject/makefile-impl.mk.
 29 | #
 30 | #  Available make variables:
 31 | #
 32 | #     CND_BASEDIR                base directory for relative paths
 33 | #     CND_DISTDIR                default top distribution directory (build artifacts)
 34 | #     CND_BUILDDIR               default top build directory (object files, ...)
 35 | #     CONF                       name of current configuration
 36 | #     CND_PLATFORM_${CONF}       platform name (current configuration)
 37 | #     CND_ARTIFACT_DIR_${CONF}   directory of build artifact (current configuration)
 38 | #     CND_ARTIFACT_NAME_${CONF}  name of build artifact (current configuration)
 39 | #     CND_ARTIFACT_PATH_${CONF}  path to build artifact (current configuration)
 40 | #     CND_PACKAGE_DIR_${CONF}    directory of package (current configuration)
 41 | #     CND_PACKAGE_NAME_${CONF}   name of package (current configuration)
 42 | #     CND_PACKAGE_PATH_${CONF}   path to package (current configuration)
 43 | #
 44 | # NOCDDL
 45 | 
 46 | 
 47 | # Environment 
 48 | MKDIR=mkdir
 49 | CP=cp
 50 | CCADMIN=CCadmin
 51 | 
 52 | 
 53 | # build
 54 | build: .build-post
 55 | 
 56 | .build-pre:
 57 | # Add your pre 'build' code here...
 58 | 
 59 | .build-post: .build-impl
 60 | # Add your post 'build' code here...
 61 | 
 62 | 
 63 | # clean
 64 | clean: .clean-post
 65 | 
 66 | .clean-pre:
 67 | # Add your pre 'clean' code here...
 68 | 
 69 | .clean-post: .clean-impl
 70 | # Add your post 'clean' code here...
 71 | 
 72 | 
 73 | # clobber
 74 | clobber: .clobber-post
 75 | 
 76 | .clobber-pre:
 77 | # Add your pre 'clobber' code here...
 78 | 
 79 | .clobber-post: .clobber-impl
 80 | # Add your post 'clobber' code here...
 81 | 
 82 | 
 83 | # all
 84 | all: .all-post
 85 | 
 86 | .all-pre:
 87 | # Add your pre 'all' code here...
 88 | 
 89 | .all-post: .all-impl
 90 | # Add your post 'all' code here...
 91 | 
 92 | 
 93 | # build tests
 94 | build-tests: .build-tests-post
 95 | 
 96 | .build-tests-pre:
 97 | # Add your pre 'build-tests' code here...
 98 | 
 99 | .build-tests-post: .build-tests-impl
100 | # Add your post 'build-tests' code here...
101 | 
102 | 
103 | # run tests
104 | test: .test-post
105 | 
106 | .test-pre: build-tests
107 | # Add your pre 'test' code here...
108 | 
109 | .test-post: .test-impl
110 | # Add your post 'test' code here...
111 | 
112 | 
113 | # help
114 | help: .help-post
115 | 
116 | .help-pre:
117 | # Add your pre 'help' code here...
118 | 
119 | .help-post: .help-impl
120 | # Add your post 'help' code here...
121 | 
122 | 
123 | 
124 | # include project implementation makefile
125 | include nbproject/Makefile-impl.mk
126 | 
127 | # include project make variables
128 | include nbproject/Makefile-variables.mk
129 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | kmeans_cuda
 2 | ===========
 3 | 
 4 | CUDA implementation of k-means
 5 | 
 6 | The original version of k-means in CUDA was made available by Serban Giuroiu at https://github.com/serban/kmeans.
 7 | 
 8 | However Serban used pointer of pointers to represent a 2D matrix, which might not be very convenient in some cases. Moreover in my application, I have the data matrix on the device memory already, and the matrix is stored in column major order (to be used in CUBLAS and other CUDA libraries). Therefore I made some changes to Serban's implementation, concretely:
 9 | 
10 | 1. The function now works with column major matrix stored in device memory, and the result is also stored in device memory. This reduces the overhead caused by transposing the matrix in Serban's code, and makes it easier to integrate k-means in other applications.
11 | 2. A simple CUDA kernel is added for updating the cluster centroids after each iteration. This reduces the overhead caused by multiple memory transfers at each iteration. However I was lazy and this kernel (called `update_cluster`) has not been well optimized.
12 | 3. The `membership` array can be set to `NULL` when calling the function if you don't want to have it in the results.
13 | 4. Added a parameter for the maximum number of k-means iterations.
14 | 
15 | With the new kernel, the program seems to be faster. I already included a simple test case and benchmark in `main.cpp`, you can compile and run it yourself. Serban's original version is also included.
16 | 


--------------------------------------------------------------------------------
/kMeansCuda.cu:
--------------------------------------------------------------------------------
  1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
  2 | /*   File:         cuda_kmeans.cu  (CUDA version)                            */
  3 | /*   Description:  Implementation of simple k-means clustering algorithm     */
  4 | /*                 This program takes an array of N data objects, each with  */
  5 | /*                 M coordinates and performs a k-means clustering given a   */
  6 | /*                 user-provided value of the number of clusters (K). The    */
  7 | /*                 clustering results are saved in 2 arrays:                 */
  8 | /*                 1. a returned array of size [K][N] indicating the center  */
  9 | /*                    coordinates of K clusters                              */
 10 | /*                 2. membership[N] stores the cluster center ids, each      */
 11 | /*                    corresponding to the cluster a data object is assigned */
 12 | /*                                                                           */
 13 | /*   Author:  Wei-keng Liao                                                  */
 14 | /*            ECE Department, Northwestern University                        */
 15 | /*            email: wkliao@ece.northwestern.edu                             */
 16 | /*   Copyright, 2005, Wei-keng Liao                                          */
 17 | /*                                                                           */
 18 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 19 | 
 20 | // Copyright (c) 2005 Wei-keng Liao
 21 | // Copyright (c) 2011 Serban Giuroiu
 22 | // Copyright (c) 2013 Vu Pham
 23 | //
 24 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 25 | // of this software and associated documentation files (the "Software"), to deal
 26 | // in the Software without restriction, including without limitation the rights
 27 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 28 | // copies of the Software, and to permit persons to whom the Software is
 29 | // furnished to do so, subject to the following conditions:
 30 | //
 31 | // The above copyright notice and this permission notice shall be included in
 32 | // all copies or substantial portions of the Software.
 33 | //
 34 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 35 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 36 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 37 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 38 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 39 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 40 | // THE SOFTWARE.
 41 | 
 42 | // -----------------------------------------------------------------------------
 43 | 
 44 | #include <stdio.h>
 45 | #include <stdlib.h>
 46 | #include <assert.h>
 47 | 
 48 | #include "kMeansCuda.h"
 49 | 
 50 | namespace cuda 
 51 | {
 52 | 
 53 | void get_kernel_config_given_ratios(int sz1, int sz2, dim3& szGrid, dim3& szBlock
 54 |                 , int& rowPerThread, int& colPerThread
 55 |                 , int nThreadXRatio, int nThreadYRatio)
 56 | {
 57 |     szBlock.x = std::min(sz1, nThreadXRatio);
 58 |     szBlock.y = std::min(sz2, nThreadYRatio);
 59 |     szBlock.z = 1;
 60 |     szGrid.x = szGrid.y = szGrid.z = 1;
 61 |     colPerThread = rowPerThread = 1;
 62 |     
 63 |     if (sz1 > nThreadXRatio || sz2 > nThreadYRatio)
 64 |     {
 65 |         int ratio = sz1/nThreadXRatio, k;
 66 |         for (k = 1; (1 << k) <= ratio; ++k)
 67 |         {
 68 |             rowPerThread = (2 << (k/2));
 69 |         }
 70 |         //rowPerThread = 2 << (int)(std::log(std::sqrt((float)sz1/nThreadX))/std::log((float)2));
 71 |         szGrid.x = (sz1 + szBlock.x*rowPerThread - 1) / (szBlock.x*rowPerThread);
 72 | 
 73 |         ratio = sz2/nThreadYRatio;
 74 |         for (k = 1; (1 << k) <= ratio; ++k)
 75 |         {
 76 |             colPerThread = (2 << (k/2));
 77 |         }
 78 |         //colPerThread = 2 << (int)(std::log(std::sqrt((float)sz2/nThreadY))/std::log((float)2));
 79 |         szGrid.y = (sz2 + szBlock.y*colPerThread - 1) / (szBlock.y*colPerThread);
 80 |     }
 81 |     assert(szGrid.x*szBlock.x*rowPerThread >= sz1);
 82 |     assert(szGrid.y*szBlock.y*colPerThread >= sz2);
 83 | }
 84 | 
 85 | void get_kernel_config(int sz1, int sz2, dim3& szGrid, dim3& szBlock
 86 |                     , int& rowPerThread, int& colPerThread)
 87 | {
 88 |     // CUDA 2.x: maximum 1024 threads/block. CUDA < 2.x: 512 threads/block
 89 |     
 90 |     int nThreadX, nThreadY;
 91 |     if (sz1 / sz2 >= 2)
 92 |     {
 93 |         nThreadX = 64; nThreadY = 16;
 94 |     }
 95 |     else if (sz2 / sz1 >= 2)
 96 |     {
 97 |         nThreadX = 16; nThreadY = 64;
 98 |     }
 99 |     else
100 |     {
101 |         nThreadX = nThreadY = 32;
102 |     }
103 |     get_kernel_config_given_ratios(sz1, sz2, szGrid, szBlock
104 |             , rowPerThread, colPerThread, nThreadX, nThreadY);
105 | }
106 | 
107 | /******************************************************************************/
108 | 
109 | 
110 | static inline int nextPowerOfTwo(int n) {
111 |     n--;
112 | 
113 |     n = n >>  1 | n;
114 |     n = n >>  2 | n;
115 |     n = n >>  4 | n;
116 |     n = n >>  8 | n;
117 |     n = n >> 16 | n;
118 | //  n = n >> 32 | n;    //  For 64-bit ints
119 | 
120 |     return ++n;
121 | }
122 | 
123 | /*----< euclid_dist_2() >----------------------------------------------------*/
124 | /* square of Euclid distance between two multi-dimensional points            */
125 | __host__ __device__ inline static
126 | float euclid_dist_2(int    numCoords,
127 |                     int    numObjs,
128 |                     int    numClusters,
129 |                     float *objects,     // [numCoords][numObjs]
130 |                     float *clusters,    // [numCoords][numClusters]
131 |                     int    objectId,
132 |                     int    clusterId)
133 | {
134 |     int i;
135 |     float ans=0.0;
136 | 
137 |     for (i = 0; i < numCoords; i++) {
138 |         ans += (objects[numObjs * i + objectId] - clusters[numClusters * i + clusterId]) *
139 |                (objects[numObjs * i + objectId] - clusters[numClusters * i + clusterId]);
140 |     }
141 | 
142 |     return(ans);
143 | }
144 | 
145 | /*----< find_nearest_cluster() >---------------------------------------------*/
146 | __global__ static
147 | void find_nearest_cluster(int numCoords,
148 |                           int numObjs,
149 |                           int numClusters,
150 |                           float *objects,           //  [numCoords][numObjs]
151 |                           float *deviceClusters,    //  [numCoords][numClusters]
152 |                           int *membership,          //  [numObjs]
153 |                           int *intermediates)
154 | {
155 |     extern __shared__ char sharedMemory[];
156 | 
157 |     //  The type chosen for membershipChanged must be large enough to support
158 |     //  reductions! There are blockDim.x elements, one for each thread in the
159 |     //  block. See numThreadsPerClusterBlock in cuda_kmeans().
160 |     unsigned char *membershipChanged = (unsigned char *)sharedMemory;
161 | #if BLOCK_SHARED_MEM_OPTIMIZATION
162 |     float *clusters = (float *)(sharedMemory + blockDim.x);
163 | #else
164 |     float *clusters = deviceClusters;
165 | #endif
166 | 
167 |     membershipChanged[threadIdx.x] = 0;
168 | 
169 | #if BLOCK_SHARED_MEM_OPTIMIZATION
170 |     //  BEWARE: We can overrun our shared memory here if there are too many
171 |     //  clusters or too many coordinates! For reference, a Tesla C1060 has 16
172 |     //  KiB of shared memory per block, and a GeForce GTX 480 has 48 KiB of
173 |     //  shared memory per block.
174 |     for (int i = threadIdx.x; i < numClusters; i += blockDim.x) {
175 |         for (int j = 0; j < numCoords; j++) {
176 |             clusters[numClusters * j + i] = deviceClusters[numClusters * j + i];
177 |         }
178 |     }
179 |     __syncthreads();
180 | #endif
181 | 
182 |     int objectId = blockDim.x * blockIdx.x + threadIdx.x;
183 | 
184 |     if (objectId < numObjs) {
185 |         int   index, i;
186 |         float dist, min_dist;
187 | 
188 |         /* find the cluster id that has min distance to object */
189 |         index    = 0;
190 |         min_dist = euclid_dist_2(numCoords, numObjs, numClusters,
191 |                                  objects, clusters, objectId, 0);
192 | 
193 |         for (i=1; i<numClusters; i++) {
194 |             dist = euclid_dist_2(numCoords, numObjs, numClusters,
195 |                                  objects, clusters, objectId, i);
196 |             /* no need square root */
197 |             if (dist < min_dist) { /* find the min and its array index */
198 |                 min_dist = dist;
199 |                 index    = i;
200 |             }
201 |         }
202 | 
203 |         if (membership[objectId] != index) {
204 |             membershipChanged[threadIdx.x] = 1;
205 |         }
206 | 
207 |         /* assign the membership to object objectId */
208 |         membership[objectId] = index;
209 | 
210 |         __syncthreads();    //  For membershipChanged[]
211 | 
212 |         //  blockDim.x *must* be a power of two!
213 |         for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) {
214 |             if (threadIdx.x < s) {
215 |                 membershipChanged[threadIdx.x] +=
216 |                     membershipChanged[threadIdx.x + s];
217 |             }
218 |             __syncthreads();
219 |         }
220 | 
221 |         if (threadIdx.x == 0) {
222 |             intermediates[blockIdx.x] = membershipChanged[0];
223 |         }
224 |     }
225 | }
226 | 
227 | __global__ static
228 | void compute_delta(int *deviceIntermediates,
229 |                    int numIntermediates,    //  The actual number of intermediates
230 |                    int numIntermediates2)   //  The next power of two
231 | {
232 |     //  The number of elements in this array should be equal to
233 |     //  numIntermediates2, the number of threads launched. It *must* be a power
234 |     //  of two!
235 |     extern __shared__ unsigned int intermediates[];
236 | 
237 |     //  Copy global intermediate values into shared memory.
238 |     intermediates[threadIdx.x] =
239 |         (threadIdx.x < numIntermediates) ? deviceIntermediates[threadIdx.x] : 0;
240 | 
241 |     __syncthreads();
242 | 
243 |     //  numIntermediates2 *must* be a power of two!
244 |     for (unsigned int s = numIntermediates2 / 2; s > 0; s >>= 1) {
245 |         if (threadIdx.x < s) {
246 |             intermediates[threadIdx.x] += intermediates[threadIdx.x + s];
247 |         }
248 |         __syncthreads();
249 |     }
250 | 
251 |     if (threadIdx.x == 0) {
252 |         deviceIntermediates[0] = intermediates[0];
253 |     }
254 | }
255 | 
256 | #define malloc2D(name, xDim, yDim, type) do {               \
257 |     name = (type **)malloc(xDim * sizeof(type *));          \
258 |     assert(name != NULL);                                   \
259 |     name[0] = (type *)malloc(xDim * yDim * sizeof(type));   \
260 |     assert(name[0] != NULL);                                \
261 |     for (size_t i = 1; i < xDim; i++)                       \
262 |         name[i] = name[i-1] + yDim;                         \
263 | } while (0)
264 | 
265 | 
266 | /*----< cuda_kmeans() >-------------------------------------------------------*/
267 | //
268 | //  ----------------------------------------
269 | //  DATA LAYOUT
270 | //
271 | //  objects         [numObjs][numCoords]
272 | //  clusters        [numClusters][numCoords]
273 | //  dimObjects      [numCoords][numObjs]
274 | //  dimClusters     [numCoords][numClusters]
275 | //  newClusters     [numCoords][numClusters]
276 | //  deviceObjects   [numCoords][numObjs]
277 | //  deviceClusters  [numCoords][numClusters]
278 | //  ----------------------------------------
279 | //
280 | /* return an array of cluster centers of size [numClusters][numCoords]       */
281 | float** kMeansHost(float **objects,      /* in: [numObjs][numCoords] */
282 |                    int     numCoords,    /* no. features */
283 |                    int     numObjs,      /* no. objects */
284 |                    int     numClusters,  /* no. clusters */
285 |                    float   threshold,    /* % objects change membership */
286 |                    int    *membership,   /* out: [numObjs] */
287 |                    int    *loop_iterations)
288 | {
289 |     int      i, j, index, loop=0;
290 |     int     *newClusterSize; /* [numClusters]: no. objects assigned in each
291 |                                 new cluster */
292 |     float    delta;          /* % of objects change their clusters */
293 |     float  **dimObjects;
294 |     float  **clusters;       /* out: [numClusters][numCoords] */
295 |     float  **dimClusters;
296 |     float  **newClusters;    /* [numCoords][numClusters] */
297 | 
298 |     float *deviceObjects;
299 |     float *deviceClusters;
300 |     int *deviceMembership;
301 |     int *deviceIntermediates;
302 | 
303 |     //  Copy objects given in [numObjs][numCoords] layout to new
304 |     //  [numCoords][numObjs] layout
305 |     malloc2D(dimObjects, numCoords, numObjs, float);
306 |     for (i = 0; i < numCoords; i++) {
307 |         for (j = 0; j < numObjs; j++) {
308 |             dimObjects[i][j] = objects[j][i];
309 |         }
310 |     }
311 | 
312 |     /* pick first numClusters elements of objects[] as initial cluster centers*/
313 |     malloc2D(dimClusters, numCoords, numClusters, float);
314 |     for (i = 0; i < numCoords; i++) {
315 |         for (j = 0; j < numClusters; j++) {
316 |             dimClusters[i][j] = dimObjects[i][j];
317 |         }
318 |     }
319 | 
320 |     /* initialize membership[] */
321 |     for (i=0; i<numObjs; i++) membership[i] = -1;
322 | 
323 |     /* need to initialize newClusterSize and newClusters[0] to all 0 */
324 |     newClusterSize = (int*) calloc(numClusters, sizeof(int));
325 |     assert(newClusterSize != NULL);
326 | 
327 |     malloc2D(newClusters, numCoords, numClusters, float);
328 |     memset(newClusters[0], 0, numCoords * numClusters * sizeof(float));
329 | 
330 |     //  To support reduction, numThreadsPerClusterBlock *must* be a power of
331 |     //  two, and it *must* be no larger than the number of bits that will
332 |     //  fit into an unsigned char, the type used to keep track of membership
333 |     //  changes in the kernel.
334 |     const unsigned int numThreadsPerClusterBlock = 128;
335 |     const unsigned int numClusterBlocks =
336 |         (numObjs + numThreadsPerClusterBlock - 1) / numThreadsPerClusterBlock;
337 | #if BLOCK_SHARED_MEM_OPTIMIZATION
338 |     const unsigned int clusterBlockSharedDataSize =
339 |         numThreadsPerClusterBlock * sizeof(unsigned char) +
340 |         numClusters * numCoords * sizeof(float);
341 | 
342 |     cudaDeviceProp deviceProp;
343 |     int deviceNum;
344 |     cudaGetDevice(&deviceNum);
345 |     cudaGetDeviceProperties(&deviceProp, deviceNum);
346 | 
347 |     if (clusterBlockSharedDataSize > deviceProp.sharedMemPerBlock) {
348 |         err("WARNING: Your CUDA hardware has insufficient block shared memory. "
349 |             "You need to recompile with BLOCK_SHARED_MEM_OPTIMIZATION=0. "
350 |             "See the README for details.\n");
351 |     }
352 | #else
353 |     const unsigned int clusterBlockSharedDataSize =
354 |         numThreadsPerClusterBlock * sizeof(unsigned char);
355 | #endif
356 | 
357 |     const unsigned int numReductionThreads =
358 |         nextPowerOfTwo(numClusterBlocks);
359 |     const unsigned int reductionBlockSharedDataSize =
360 |         numReductionThreads * sizeof(unsigned int);
361 | 
362 |     CHECK_CUDA(cudaMalloc(&deviceObjects, numObjs*numCoords*sizeof(float)));
363 |     CHECK_CUDA(cudaMalloc(&deviceClusters, numClusters*numCoords*sizeof(float)));
364 |     CHECK_CUDA(cudaMalloc(&deviceMembership, numObjs*sizeof(int)));
365 |     CHECK_CUDA(cudaMalloc(&deviceIntermediates, numReductionThreads*sizeof(unsigned int)));
366 | 
367 |     CHECK_CUDA(cudaMemcpy(deviceObjects, dimObjects[0],
368 |               numObjs*numCoords*sizeof(float), cudaMemcpyHostToDevice));
369 |     CHECK_CUDA(cudaMemcpy(deviceMembership, membership,
370 |               numObjs*sizeof(int), cudaMemcpyHostToDevice));
371 | 
372 |     do {
373 |         CHECK_CUDA(cudaMemcpy(deviceClusters, dimClusters[0],
374 |                   numClusters*numCoords*sizeof(float), cudaMemcpyHostToDevice));
375 | 
376 |         find_nearest_cluster
377 |             <<< numClusterBlocks, numThreadsPerClusterBlock, clusterBlockSharedDataSize >>>
378 |             (numCoords, numObjs, numClusters,
379 |              deviceObjects, deviceClusters, deviceMembership, deviceIntermediates);
380 | 
381 |         cudaDeviceSynchronize();
382 |         CHECK_CUDA(cudaGetLastError());
383 | 
384 |         compute_delta <<< 1, numReductionThreads, reductionBlockSharedDataSize >>>
385 |             (deviceIntermediates, numClusterBlocks, numReductionThreads);
386 | 
387 |         cudaDeviceSynchronize();
388 |         CHECK_CUDA(cudaGetLastError());
389 | 
390 |         int d;
391 |         CHECK_CUDA(cudaMemcpy(&d, deviceIntermediates,
392 |                   sizeof(int), cudaMemcpyDeviceToHost));
393 |         delta = (float)d;
394 | 
395 |         CHECK_CUDA(cudaMemcpy(membership, deviceMembership,
396 |                   numObjs*sizeof(int), cudaMemcpyDeviceToHost));
397 | 
398 |         for (i=0; i<numObjs; i++) {
399 |             /* find the array index of nestest cluster center */
400 |             index = membership[i];
401 | 
402 |             /* update new cluster centers : sum of objects located within */
403 |             newClusterSize[index]++;
404 |             for (j=0; j<numCoords; j++)
405 |                 newClusters[j][index] += objects[i][j];
406 |         }
407 | 
408 |         //  TODO: Flip the nesting order
409 |         //  TODO: Change layout of newClusters to [numClusters][numCoords]
410 |         /* average the sum and replace old cluster centers with newClusters */
411 |         for (i=0; i<numClusters; i++) {
412 |             for (j=0; j<numCoords; j++) {
413 |                 if (newClusterSize[i] > 0)
414 |                     dimClusters[j][i] = newClusters[j][i] / newClusterSize[i];
415 |                 newClusters[j][i] = 0.0;   /* set back to 0 */
416 |             }
417 |             newClusterSize[i] = 0;   /* set back to 0 */
418 |         }
419 | 
420 |         delta /= numObjs;
421 |     } while (delta > threshold && loop++ < 500);
422 | 
423 |     *loop_iterations = loop + 1;
424 | 
425 |     /* allocate a 2D space for returning variable clusters[] (coordinates
426 |        of cluster centers) */
427 |     malloc2D(clusters, numClusters, numCoords, float);
428 |     for (i = 0; i < numClusters; i++) {
429 |         for (j = 0; j < numCoords; j++) {
430 |             clusters[i][j] = dimClusters[j][i];
431 |         }
432 |     }
433 | 
434 |     CHECK_CUDA(cudaFree(deviceObjects));
435 |     CHECK_CUDA(cudaFree(deviceClusters));
436 |     CHECK_CUDA(cudaFree(deviceMembership));
437 |     CHECK_CUDA(cudaFree(deviceIntermediates));
438 | 
439 |     free(dimObjects[0]);
440 |     free(dimObjects);
441 |     free(dimClusters[0]);
442 |     free(dimClusters);
443 |     free(newClusters[0]);
444 |     free(newClusters);
445 |     free(newClusterSize);
446 | 
447 |     return clusters;
448 | }
449 | 
450 | /******************************************************************************/
451 | 
452 | __global__ static
453 | void update_cluster(const float* objects, const int* membership, float* clusters
454 |                     , const int nCoords, const int nObjs, const int nClusters
455 |                     , const int rowPerThread, const int colPerThread)
456 | {
457 |     for (int cIdx = 0; cIdx < colPerThread; ++cIdx)
458 |     {
459 |         int c = cIdx * gridDim.y * blockDim.y + blockIdx.y * blockDim.y + threadIdx.y;
460 |         if (c >= nClusters)
461 |             break;
462 |         
463 |         for (int rIdx = 0; rIdx < rowPerThread; ++rIdx)
464 |         {
465 |             int r = rIdx * gridDim.x * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
466 |             if (r >= nCoords)
467 |                 break;
468 | 
469 |             float sumVal(0);
470 |             int clusterCount(0);
471 |             for (int i = 0; i < nObjs; ++i)
472 |             {
473 |                 if (membership[i] == c)
474 |                 {
475 |                     sumVal += objects[r*nObjs + i];
476 |                     clusterCount++;
477 |                 }
478 |             }
479 |             if (clusterCount > 0)
480 |                 clusters[nClusters*r+c] = sumVal / clusterCount;
481 |         }
482 |     }
483 | }
484 | 
485 | __global__ static
486 | void copy_rows(const float* src, const int sz1, const int sz2
487 |                 , const int copiedRows, float* dest
488 |                 , const int rowPerThread, const int colPerThread)
489 | {
490 |     for (int rIdx = 0; rIdx < rowPerThread; ++rIdx)
491 |     {
492 |         int r = rIdx * gridDim.x * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
493 |         if (r >= copiedRows)
494 |             break;
495 |             
496 |         for (int cIdx = 0; cIdx < colPerThread; ++cIdx)
497 |         {
498 |             int c = cIdx * gridDim.y * blockDim.y + blockIdx.y * blockDim.y + threadIdx.y;
499 |             if (c >= sz2)
500 |                 break;
501 |             dest[c*copiedRows+r] = src[c*sz1+r];
502 |         }
503 |     }
504 | }
505 | 
506 | int kMeans(float *deviceObjects,      /* in: [numObjs][numCoords] */
507 |                    int     numCoords,    /* no. features */
508 |                    int     numObjs,      /* no. objects */
509 |                    int     numClusters,  /* no. clusters */
510 |                    float   threshold,    /* % objects change membership */
511 |                    int     maxLoop,      /* maximum number of loops */
512 |                    int    *membership,   /* out: [numObjs] */
513 |                    float  *deviceClusters)
514 | {
515 |     int loop(0);
516 |     float    delta;          /* % of objects change their clusters */
517 |     int *deviceMembership;
518 |     int *deviceIntermediates;
519 | 
520 |     CHECK_PARAM(deviceClusters, "deviceClusters cannot be NULL");
521 |     
522 |     //  To support reduction, numThreadsPerClusterBlock *must* be a power of
523 |     //  two, and it *must* be no larger than the number of bits that will
524 |     //  fit into an unsigned char, the type used to keep track of membership
525 |     //  changes in the kernel.
526 |     const unsigned int numThreadsPerClusterBlock = 128;
527 |     const unsigned int numClusterBlocks =
528 |         (numObjs + numThreadsPerClusterBlock - 1) / numThreadsPerClusterBlock;
529 | #if BLOCK_SHARED_MEM_OPTIMIZATION
530 |     const unsigned int clusterBlockSharedDataSize =
531 |         numThreadsPerClusterBlock * sizeof(unsigned char) +
532 |         numClusters * numCoords * sizeof(float);
533 | 
534 |     cudaDeviceProp deviceProp;
535 |     int deviceNum;
536 |     cudaGetDevice(&deviceNum);
537 |     cudaGetDeviceProperties(&deviceProp, deviceNum);
538 | 
539 |     if (clusterBlockSharedDataSize > deviceProp.sharedMemPerBlock) {
540 |         err("WARNING: Your CUDA hardware has insufficient block shared memory. "
541 |             "You need to recompile with BLOCK_SHARED_MEM_OPTIMIZATION=0. "
542 |             "See the README for details.\n");
543 |     }
544 | #else
545 |     const unsigned int clusterBlockSharedDataSize =
546 |         numThreadsPerClusterBlock * sizeof(unsigned char);
547 | #endif
548 | 
549 |     const unsigned int numReductionThreads = nextPowerOfTwo(numClusterBlocks);
550 |     const unsigned int reductionBlockSharedDataSize = numReductionThreads * sizeof(unsigned int);
551 | 
552 |     CHECK_CUDA(cudaMalloc(&deviceMembership, numObjs*sizeof(int)));
553 |     CHECK_CUDA(cudaMalloc(&deviceIntermediates, numReductionThreads*sizeof(unsigned int)));
554 | 
555 |     // initialize membership[]
556 |     if (membership)
557 |     {
558 |         for (int i=0; i<numObjs; i++) 
559 |             membership[i] = -1;
560 |         CHECK_CUDA(cudaMemcpy(deviceMembership, membership,
561 |               numObjs*sizeof(int), cudaMemcpyHostToDevice));
562 |     }
563 |     else
564 |     {
565 |         int* hostMembership = (int*)malloc(numObjs*sizeof(int));
566 |         CHECK_PARAM(hostMembership, "memory allocation failed");
567 |         for (int i=0; i<numObjs; i++) 
568 |             hostMembership[i] = -1;
569 |         CHECK_CUDA(cudaMemcpy(deviceMembership, hostMembership,
570 |               numObjs*sizeof(int), cudaMemcpyHostToDevice));
571 |         free(hostMembership);
572 |     }
573 | 
574 |     dim3 szGrid, szBlock;
575 |     int rowPerThread, colPerThread;
576 |         
577 |     // initialize the cluster centroids
578 |     get_kernel_config(numClusters, numCoords, szGrid, szBlock, rowPerThread, colPerThread);
579 |     copy_rows<<<szGrid, szBlock>>>(deviceObjects, numObjs, numCoords
580 |             , numClusters, deviceClusters, rowPerThread, colPerThread);
581 |     
582 |     do
583 |     {
584 |         find_nearest_cluster
585 |             <<< numClusterBlocks, numThreadsPerClusterBlock, clusterBlockSharedDataSize >>>
586 |             (numCoords, numObjs, numClusters,
587 |              deviceObjects, deviceClusters, deviceMembership, deviceIntermediates);
588 | 
589 |         //cudaDeviceSynchronize();
590 |         //CHECK_CUDA(cudaGetLastError());
591 | 
592 |         compute_delta <<< 1, numReductionThreads, reductionBlockSharedDataSize >>>
593 |             (deviceIntermediates, numClusterBlocks, numReductionThreads);
594 | 
595 |         //cudaDeviceSynchronize();
596 |         //CHECK_CUDA(cudaGetLastError());
597 | 
598 |         get_kernel_config(numCoords, numClusters, szGrid, szBlock, rowPerThread, colPerThread);
599 |         
600 |         update_cluster <<< szGrid, szBlock >>> (deviceObjects, deviceMembership
601 |                     , deviceClusters, numCoords, numObjs, numClusters, rowPerThread, colPerThread);
602 |         
603 |         cudaDeviceSynchronize();
604 |         CHECK_CUDA(cudaGetLastError());
605 |         
606 |         // inefficient memory transfer
607 |         int d;
608 |         CHECK_CUDA(cudaMemcpy(&d, deviceIntermediates,
609 |                   sizeof(int), cudaMemcpyDeviceToHost));
610 |         delta = (float)d/numObjs;
611 |     } 
612 |     while (delta > threshold && loop++ < maxLoop);
613 | 
614 |     if (membership)
615 |     {
616 |         CHECK_CUDA(cudaMemcpy(membership, deviceMembership, 
617 |               numObjs*sizeof(int), cudaMemcpyDeviceToHost));
618 |     }
619 |     CHECK_CUDA(cudaFree(deviceMembership));
620 |     CHECK_CUDA(cudaFree(deviceIntermediates));
621 | 
622 |     return (loop + 1);
623 | }
624 | 
625 | }


--------------------------------------------------------------------------------
/kMeansCuda.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * File:   kMeansCuda.h
 3 |  * Author: hvpham
 4 |  *
 5 |  * Created on December 22, 2013, 12:27 AM
 6 |  */
 7 | 
 8 | #ifndef KMEANSCUDA_H
 9 | #define	KMEANSCUDA_H
10 | 
11 | #include <sstream>
12 | #include <stdexcept>
13 | #include <cuda_runtime.h>
14 | 
15 | namespace cuda
16 | {
17 | 
18 | inline void checkCudaError(cudaError_t err
19 |                     , char const * file, unsigned int line)
20 | {
21 |     if (err != cudaSuccess)
22 |     {
23 |         std::stringstream ss;
24 |         ss << "CUDA error " << err << " at " << file << ":" << line;
25 |         throw std::runtime_error(ss.str());
26 |     }
27 | }
28 | 
29 | inline void check(bool bTrue, const char* msg
30 |                      , char const * file, unsigned int line)
31 | {
32 |     if (!bTrue)
33 |     {
34 |         std::stringstream ss;
35 |         ss << "Error: \"" << msg << "\" at " << file << ":" << line;
36 |         throw std::runtime_error(ss.str());
37 |     }
38 | }
39 | 
40 | #define CHECK_PARAM(x, msg)   cuda::check((x), (msg), __FILE__, __LINE__)
41 | #define CHECK_CUDA(cudaError) cuda::checkCudaError((cudaError), __FILE__, __LINE__)
42 | 
43 | // device memory, column-majored
44 | int kMeans(float *deviceObjects,      /* in: [numObjs][numCoords] */
45 |                int     numCoords,    /* no. features */
46 |                int     numObjs,      /* no. objects */
47 |                int     numClusters,  /* no. clusters */
48 |                float   threshold,    /* % objects change membership */
49 |                int     maxLoop,      /* maximum number of loops */
50 |                int    *membership,   /* out: [numObjs] */
51 |                float  *deviceClusters);
52 | 
53 | // original version: host memory, row-majored
54 | float** kMeansHost(float **objects,      /* in: [numObjs][numCoords] */
55 |                    int     numCoords,    /* no. features */
56 |                    int     numObjs,      /* no. objects */
57 |                    int     numClusters,  /* no. clusters */
58 |                    float   threshold,    /* % objects change membership */
59 |                    int    *membership,   /* out: [numObjs] */
60 |                    int    *loop_iterations);
61 | }
62 | 
63 | #endif	/* KMEANSCUDA_H */
64 | 
65 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * File:   main.cpp
  3 |  * Author: hvpham
  4 |  *
  5 |  * Created on December 22, 2013, 12:27 AM
  6 |  */
  7 | 
  8 | #include <cstdlib>
  9 | #include <cmath>
 10 | #include <assert.h>
 11 | #include <iostream>
 12 | #include <ctime>
 13 | #include "kMeansCuda.h"
 14 | 
 15 | float* createDataColMajored(int sz1, int sz2, bool cudaMalloc)
 16 | {
 17 |     // I use malloc() here just to make it coherent with createDataRowMajored()
 18 |     // you should use the C++ way...
 19 |     float* arr;
 20 |     if (cudaMalloc)
 21 |         CHECK_CUDA(cudaMallocHost(&arr, sz1*sz2*sizeof(float), cudaHostAllocDefault));
 22 |     else
 23 |         arr = (float*)malloc(sz1*sz2*sizeof(float));
 24 |     for (int i = 0; i < sz1; ++i)
 25 |         for (int j = 0; j < sz2; ++j)
 26 |         {
 27 |             arr[sz1*j + i] = i*100 + j;
 28 |         }
 29 |     return arr;
 30 | }
 31 | 
 32 | float** createDataRowMajored(int sz1, int sz2)
 33 | {
 34 |     float** ret = (float**)malloc(sz1*sizeof(float*));
 35 |     ret[0] = (float*)malloc(sz1*sz2*sizeof(float));
 36 |     for (int i = 1; i < sz1; ++i)
 37 |     {
 38 |         ret[i] = ret[i-1] + sz2;
 39 |     }
 40 |     for (int i = 0; i < sz1; ++i)
 41 |         for (int j = 0; j < sz2; ++j)
 42 |         {
 43 |             ret[i][j] = i*100 + j;
 44 |         }
 45 |     return ret;
 46 | }
 47 | 
 48 | float* callkMeans1(float* hostData, int nObjs, int nDim, int nClusters, int*& membership)
 49 | {
 50 |     float* devData, *devClusters, *hostClusters;
 51 |     CHECK_CUDA(cudaMalloc(&devData, nObjs*nDim*sizeof(float)));
 52 |     CHECK_CUDA(cudaMemcpy(devData, hostData, nObjs*nDim*sizeof(float), cudaMemcpyHostToDevice));
 53 |     CHECK_CUDA(cudaMalloc(&devClusters, nClusters*nDim*sizeof(float)));
 54 |     if (membership)
 55 |         membership = new int[nObjs];
 56 |     
 57 |     cuda::kMeans(devData, nDim, nObjs, nClusters, 0, 500, membership, devClusters);
 58 |     hostClusters = new float[nClusters*nDim*sizeof(float)];
 59 |     
 60 |     CHECK_CUDA(cudaDeviceSynchronize());
 61 |     CHECK_CUDA(cudaGetLastError());
 62 |     CHECK_CUDA(cudaMemcpy(hostClusters, devClusters, nClusters*nDim*sizeof(float), cudaMemcpyDeviceToHost));
 63 |     CHECK_CUDA(cudaFree(devData));
 64 |     CHECK_CUDA(cudaFree(devClusters));
 65 |     
 66 |     return hostClusters;
 67 | }
 68 | 
 69 | float** callkMeans2(float** hostData, int nObjs, int nDim, int nClusters, int*& membership)
 70 | {
 71 |     int loops;
 72 |     membership = new int[nObjs];
 73 |     return cuda::kMeansHost(hostData, nDim, nObjs, nClusters, 0, membership, &loops);
 74 | }
 75 | 
 76 | void checkCorrectness()
 77 | {
 78 |     const int sz1 = 1024, sz2 = 1024, nClusters = 10;
 79 |     float* dataCm = createDataColMajored(sz1, sz2, false);
 80 |     float** dataRm = createDataRowMajored(sz1, sz2);
 81 |     int* membership1, *membership2;
 82 |     float *clusters1, **clusters2, *clusters3;
 83 |     
 84 |     clusters1 = callkMeans1(dataCm, sz1, sz2, nClusters, membership1);
 85 |     clusters2 = callkMeans2(dataRm, sz1, sz2, nClusters, membership2);
 86 |     
 87 |     for (int i = 0; i < sz1; ++i)
 88 |     {
 89 |         CHECK_PARAM(membership1[i] == membership2[i], "membership");
 90 |         //if(membership1[i] != membership2[i])
 91 |         //    std::cout << "Membership " << i << " " << membership1[i] << " " << membership2[i] << std::endl;
 92 |     }
 93 |     
 94 |     for (int i = 0; i < nClusters; ++i)
 95 |         for (int j = 0; j < sz2; ++j)
 96 |         {
 97 |             CHECK_PARAM(std::abs(clusters1[nClusters*j + i] - clusters2[i][j]) <= 1E-2, "clusters");
 98 |             //if(std::abs(clusters1[nClusters*j + i] - clusters2[i][j]) > 1E-2)
 99 |             //    std::cout << "Clusters " << i << " " << j << " " << clusters1[nClusters*j + i] 
100 |             //                                  << " " << clusters2[i][j] 
101 |             //                              << " " << clusters1[nClusters*j + i] - clusters2[i][j] << std::endl;
102 |         }
103 |     
104 |     // membership = NULL is also fine
105 |     int* dummyMembership = NULL;
106 |     clusters3 = callkMeans1(dataCm, sz1, sz2, nClusters, dummyMembership);
107 |     for (int i = 0; i < nClusters; ++i)
108 |         for (int j = 0; j < sz2; ++j)
109 |         {
110 |             CHECK_PARAM(std::abs(clusters3[nClusters*j + i] - clusters2[i][j]) <= 1E-2, "clusters");
111 |         }
112 |     
113 |     delete[] membership1;
114 |     delete[] membership2;
115 |     delete[] clusters1;
116 |     delete[] clusters3;
117 |     free(clusters2[0]);
118 |     free(clusters2);
119 |     free(dataCm);
120 |     free(dataRm[0]);
121 |     free(dataRm);
122 | }
123 | 
124 | void benchMark()
125 | {
126 |     const int sz1 = 1024, sz2 = 1024, nClusters = 10;
127 |     float* dataCm = createDataColMajored(sz1, sz2, true);
128 |     float** dataRm = createDataRowMajored(sz1, sz2);
129 |     int* membership1, *membership2;
130 |     float *clusters1, **clusters2;
131 |     const int TIMES = 100;
132 |     
133 |     {
134 |         clock_t begin = clock();
135 |         for (int i = 0; i < TIMES; ++i)
136 |             clusters1 = callkMeans1(dataCm, sz1, sz2, nClusters, membership1);
137 |         double elapsed_secs = double(clock() - begin) / CLOCKS_PER_SEC;
138 |         std::cout << "callkMeans1: " << elapsed_secs << " secs" << std::endl;
139 |     }
140 |     
141 |     {
142 |         clock_t begin = clock();
143 |         for (int i = 0; i < TIMES; ++i)
144 |             clusters2 = callkMeans2(dataRm, sz1, sz2, nClusters, membership2);
145 |         double elapsed_secs = double(clock() - begin) / CLOCKS_PER_SEC;
146 |         std::cout << "callkMeans2: " << elapsed_secs << " secs" << std::endl;
147 |     }
148 |     
149 |     delete[] membership1;
150 |     delete[] membership2;
151 |     delete[] clusters1;
152 |     free(clusters2[0]);
153 |     free(clusters2);
154 |     CHECK_CUDA(cudaFreeHost(dataCm));
155 |     free(dataRm[0]);
156 |     free(dataRm);
157 | }
158 | 
159 | int main(int argc, char** argv)
160 | {
161 |     checkCorrectness();
162 |     benchMark();
163 |     // callkMeans1: 116.61 secs
164 |     // callkMeans2: 143.17 secs
165 | 
166 |     return 0;
167 | }
168 | 
169 | 


--------------------------------------------------------------------------------
/nbproject/Makefile-Debug.mk:
--------------------------------------------------------------------------------
 1 | #
 2 | # Generated Makefile - do not edit!
 3 | #
 4 | # Edit the Makefile in the project folder instead (../Makefile). Each target
 5 | # has a -pre and a -post target defined where you can add customized code.
 6 | #
 7 | # This makefile implements configuration specific macros and targets.
 8 | 
 9 | 
10 | # Environment
11 | MKDIR=mkdir
12 | CP=cp
13 | GREP=grep
14 | NM=nm
15 | CCADMIN=CCadmin
16 | RANLIB=ranlib
17 | CC=gcc
18 | CCC=nvcc
19 | CXX=nvcc
20 | FC=gfortran
21 | AS=as
22 | 
23 | # Macros
24 | CND_PLATFORM=CUDA-Linux-x86
25 | CND_DLIB_EXT=so
26 | CND_CONF=Debug
27 | CND_DISTDIR=dist
28 | CND_BUILDDIR=build
29 | 
30 | # Include project Makefile
31 | include Makefile
32 | 
33 | # Object Directory
34 | OBJECTDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM}
35 | 
36 | # Object Files
37 | OBJECTFILES= \
38 | 	${OBJECTDIR}/kMeansCuda.o \
39 | 	${OBJECTDIR}/main.o
40 | 
41 | 
42 | # C Compiler Flags
43 | CFLAGS=
44 | 
45 | # CC Compiler Flags
46 | CCFLAGS=
47 | CXXFLAGS=
48 | 
49 | # Fortran Compiler Flags
50 | FFLAGS=
51 | 
52 | # Assembler Flags
53 | ASFLAGS=
54 | 
55 | # Link Libraries and Options
56 | LDLIBSOPTIONS=
57 | 
58 | # Build Targets
59 | .build-conf: ${BUILD_SUBPROJECTS}
60 | 	"${MAKE}"  -f nbproject/Makefile-${CND_CONF}.mk ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda
61 | 
62 | ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda: ${OBJECTFILES}
63 | 	${MKDIR} -p ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}
64 | 	${LINK.cc} -o ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda ${OBJECTFILES} ${LDLIBSOPTIONS}
65 | 
66 | ${OBJECTDIR}/kMeansCuda.o: kMeansCuda.cu 
67 | 	${MKDIR} -p ${OBJECTDIR}
68 | 	$(COMPILE.cc) -g -o ${OBJECTDIR}/kMeansCuda.o kMeansCuda.cu
69 | 
70 | ${OBJECTDIR}/main.o: main.cpp 
71 | 	${MKDIR} -p ${OBJECTDIR}
72 | 	$(COMPILE.cc) -g -o ${OBJECTDIR}/main.o main.cpp
73 | 
74 | # Subprojects
75 | .build-subprojects:
76 | 
77 | # Clean Targets
78 | .clean-conf: ${CLEAN_SUBPROJECTS}
79 | 	${RM} -r ${CND_BUILDDIR}/${CND_CONF}
80 | 	${RM} ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda
81 | 
82 | # Subprojects
83 | .clean-subprojects:
84 | 


--------------------------------------------------------------------------------
/nbproject/Makefile-Release.mk:
--------------------------------------------------------------------------------
 1 | #
 2 | # Generated Makefile - do not edit!
 3 | #
 4 | # Edit the Makefile in the project folder instead (../Makefile). Each target
 5 | # has a -pre and a -post target defined where you can add customized code.
 6 | #
 7 | # This makefile implements configuration specific macros and targets.
 8 | 
 9 | 
10 | # Environment
11 | MKDIR=mkdir
12 | CP=cp
13 | GREP=grep
14 | NM=nm
15 | CCADMIN=CCadmin
16 | RANLIB=ranlib
17 | CC=gcc
18 | CCC=nvcc
19 | CXX=nvcc
20 | FC=gfortran
21 | AS=as
22 | 
23 | # Macros
24 | CND_PLATFORM=CUDA-Linux-x86
25 | CND_DLIB_EXT=so
26 | CND_CONF=Release
27 | CND_DISTDIR=dist
28 | CND_BUILDDIR=build
29 | 
30 | # Include project Makefile
31 | include Makefile
32 | 
33 | # Object Directory
34 | OBJECTDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM}
35 | 
36 | # Object Files
37 | OBJECTFILES= \
38 | 	${OBJECTDIR}/kMeansCuda.o \
39 | 	${OBJECTDIR}/main.o
40 | 
41 | 
42 | # C Compiler Flags
43 | CFLAGS=
44 | 
45 | # CC Compiler Flags
46 | CCFLAGS=
47 | CXXFLAGS=
48 | 
49 | # Fortran Compiler Flags
50 | FFLAGS=
51 | 
52 | # Assembler Flags
53 | ASFLAGS=
54 | 
55 | # Link Libraries and Options
56 | LDLIBSOPTIONS=
57 | 
58 | # Build Targets
59 | .build-conf: ${BUILD_SUBPROJECTS}
60 | 	"${MAKE}"  -f nbproject/Makefile-${CND_CONF}.mk ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda
61 | 
62 | ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda: ${OBJECTFILES}
63 | 	${MKDIR} -p ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}
64 | 	${LINK.cc} -o ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda ${OBJECTFILES} ${LDLIBSOPTIONS}
65 | 
66 | ${OBJECTDIR}/kMeansCuda.o: kMeansCuda.cu 
67 | 	${MKDIR} -p ${OBJECTDIR}
68 | 	$(COMPILE.cc) -O2 -o ${OBJECTDIR}/kMeansCuda.o kMeansCuda.cu
69 | 
70 | ${OBJECTDIR}/main.o: main.cpp 
71 | 	${MKDIR} -p ${OBJECTDIR}
72 | 	$(COMPILE.cc) -O2 -o ${OBJECTDIR}/main.o main.cpp
73 | 
74 | # Subprojects
75 | .build-subprojects:
76 | 
77 | # Clean Targets
78 | .clean-conf: ${CLEAN_SUBPROJECTS}
79 | 	${RM} -r ${CND_BUILDDIR}/${CND_CONF}
80 | 	${RM} ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda
81 | 
82 | # Subprojects
83 | .clean-subprojects:
84 | 


--------------------------------------------------------------------------------
/nbproject/Makefile-impl.mk:
--------------------------------------------------------------------------------
  1 | # 
  2 | # Generated Makefile - do not edit! 
  3 | # 
  4 | # Edit the Makefile in the project folder instead (../Makefile). Each target
  5 | # has a pre- and a post- target defined where you can add customization code.
  6 | #
  7 | # This makefile implements macros and targets common to all configurations.
  8 | #
  9 | # NOCDDL
 10 | 
 11 | 
 12 | # Building and Cleaning subprojects are done by default, but can be controlled with the SUB
 13 | # macro. If SUB=no, subprojects will not be built or cleaned. The following macro
 14 | # statements set BUILD_SUB-CONF and CLEAN_SUB-CONF to .build-reqprojects-conf
 15 | # and .clean-reqprojects-conf unless SUB has the value 'no'
 16 | SUB_no=NO
 17 | SUBPROJECTS=${SUB_${SUB}}
 18 | BUILD_SUBPROJECTS_=.build-subprojects
 19 | BUILD_SUBPROJECTS_NO=
 20 | BUILD_SUBPROJECTS=${BUILD_SUBPROJECTS_${SUBPROJECTS}}
 21 | CLEAN_SUBPROJECTS_=.clean-subprojects
 22 | CLEAN_SUBPROJECTS_NO=
 23 | CLEAN_SUBPROJECTS=${CLEAN_SUBPROJECTS_${SUBPROJECTS}}
 24 | 
 25 | 
 26 | # Project Name
 27 | PROJECTNAME=kmeans_cuda
 28 | 
 29 | # Active Configuration
 30 | DEFAULTCONF=Debug
 31 | CONF=${DEFAULTCONF}
 32 | 
 33 | # All Configurations
 34 | ALLCONFS=Debug Release 
 35 | 
 36 | 
 37 | # build
 38 | .build-impl: .build-pre .validate-impl .depcheck-impl
 39 | 	@#echo "=> Running $@... Configuration=$(CONF)"
 40 | 	"${MAKE}" -f nbproject/Makefile-${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .build-conf
 41 | 
 42 | 
 43 | # clean
 44 | .clean-impl: .clean-pre .validate-impl .depcheck-impl
 45 | 	@#echo "=> Running $@... Configuration=$(CONF)"
 46 | 	"${MAKE}" -f nbproject/Makefile-${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .clean-conf
 47 | 
 48 | 
 49 | # clobber 
 50 | .clobber-impl: .clobber-pre .depcheck-impl
 51 | 	@#echo "=> Running $@..."
 52 | 	for CONF in ${ALLCONFS}; \
 53 | 	do \
 54 | 	    "${MAKE}" -f nbproject/Makefile-$${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .clean-conf; \
 55 | 	done
 56 | 
 57 | # all 
 58 | .all-impl: .all-pre .depcheck-impl
 59 | 	@#echo "=> Running $@..."
 60 | 	for CONF in ${ALLCONFS}; \
 61 | 	do \
 62 | 	    "${MAKE}" -f nbproject/Makefile-$${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .build-conf; \
 63 | 	done
 64 | 
 65 | # build tests
 66 | .build-tests-impl: .build-impl .build-tests-pre
 67 | 	@#echo "=> Running $@... Configuration=$(CONF)"
 68 | 	"${MAKE}" -f nbproject/Makefile-${CONF}.mk SUBPROJECTS=${SUBPROJECTS} .build-tests-conf
 69 | 
 70 | # run tests
 71 | .test-impl: .build-tests-impl .test-pre
 72 | 	@#echo "=> Running $@... Configuration=$(CONF)"
 73 | 	"${MAKE}" -f nbproject/Makefile-${CONF}.mk SUBPROJECTS=${SUBPROJECTS} .test-conf
 74 | 
 75 | # dependency checking support
 76 | .depcheck-impl:
 77 | 	@echo "# This code depends on make tool being used" >.dep.inc
 78 | 	@if [ -n "${MAKE_VERSION}" ]; then \
 79 | 	    echo "DEPFILES=\$$(wildcard \$$(addsuffix .d, \$${OBJECTFILES}))" >>.dep.inc; \
 80 | 	    echo "ifneq (\$${DEPFILES},)" >>.dep.inc; \
 81 | 	    echo "include \$${DEPFILES}" >>.dep.inc; \
 82 | 	    echo "endif" >>.dep.inc; \
 83 | 	else \
 84 | 	    echo ".KEEP_STATE:" >>.dep.inc; \
 85 | 	    echo ".KEEP_STATE_FILE:.make.state.\$${CONF}" >>.dep.inc; \
 86 | 	fi
 87 | 
 88 | # configuration validation
 89 | .validate-impl:
 90 | 	@if [ ! -f nbproject/Makefile-${CONF}.mk ]; \
 91 | 	then \
 92 | 	    echo ""; \
 93 | 	    echo "Error: can not find the makefile for configuration '${CONF}' in project ${PROJECTNAME}"; \
 94 | 	    echo "See 'make help' for details."; \
 95 | 	    echo "Current directory: " `pwd`; \
 96 | 	    echo ""; \
 97 | 	fi
 98 | 	@if [ ! -f nbproject/Makefile-${CONF}.mk ]; \
 99 | 	then \
100 | 	    exit 1; \
101 | 	fi
102 | 
103 | 
104 | # help
105 | .help-impl: .help-pre
106 | 	@echo "This makefile supports the following configurations:"
107 | 	@echo "    ${ALLCONFS}"
108 | 	@echo ""
109 | 	@echo "and the following targets:"
110 | 	@echo "    build  (default target)"
111 | 	@echo "    clean"
112 | 	@echo "    clobber"
113 | 	@echo "    all"
114 | 	@echo "    help"
115 | 	@echo ""
116 | 	@echo "Makefile Usage:"
117 | 	@echo "    make [CONF=<CONFIGURATION>] [SUB=no] build"
118 | 	@echo "    make [CONF=<CONFIGURATION>] [SUB=no] clean"
119 | 	@echo "    make [SUB=no] clobber"
120 | 	@echo "    make [SUB=no] all"
121 | 	@echo "    make help"
122 | 	@echo ""
123 | 	@echo "Target 'build' will build a specific configuration and, unless 'SUB=no',"
124 | 	@echo "    also build subprojects."
125 | 	@echo "Target 'clean' will clean a specific configuration and, unless 'SUB=no',"
126 | 	@echo "    also clean subprojects."
127 | 	@echo "Target 'clobber' will remove all built files from all configurations and,"
128 | 	@echo "    unless 'SUB=no', also from subprojects."
129 | 	@echo "Target 'all' will will build all configurations and, unless 'SUB=no',"
130 | 	@echo "    also build subprojects."
131 | 	@echo "Target 'help' prints this message."
132 | 	@echo ""
133 | 
134 | 


--------------------------------------------------------------------------------
/nbproject/Makefile-variables.mk:
--------------------------------------------------------------------------------
 1 | #
 2 | # Generated - do not edit!
 3 | #
 4 | # NOCDDL
 5 | #
 6 | CND_BASEDIR=`pwd`
 7 | CND_BUILDDIR=build
 8 | CND_DISTDIR=dist
 9 | # Debug configuration
10 | CND_PLATFORM_Debug=CUDA-Linux-x86
11 | CND_ARTIFACT_DIR_Debug=dist/Debug/CUDA-Linux-x86
12 | CND_ARTIFACT_NAME_Debug=kmeans_cuda
13 | CND_ARTIFACT_PATH_Debug=dist/Debug/CUDA-Linux-x86/kmeans_cuda
14 | CND_PACKAGE_DIR_Debug=dist/Debug/CUDA-Linux-x86/package
15 | CND_PACKAGE_NAME_Debug=kmeanscuda.tar
16 | CND_PACKAGE_PATH_Debug=dist/Debug/CUDA-Linux-x86/package/kmeanscuda.tar
17 | # Release configuration
18 | CND_PLATFORM_Release=CUDA-Linux-x86
19 | CND_ARTIFACT_DIR_Release=dist/Release/CUDA-Linux-x86
20 | CND_ARTIFACT_NAME_Release=kmeans_cuda
21 | CND_ARTIFACT_PATH_Release=dist/Release/CUDA-Linux-x86/kmeans_cuda
22 | CND_PACKAGE_DIR_Release=dist/Release/CUDA-Linux-x86/package
23 | CND_PACKAGE_NAME_Release=kmeanscuda.tar
24 | CND_PACKAGE_PATH_Release=dist/Release/CUDA-Linux-x86/package/kmeanscuda.tar
25 | #
26 | # include compiler specific variables
27 | #
28 | # dmake command
29 | ROOT:sh = test -f nbproject/private/Makefile-variables.mk || \
30 | 	(mkdir -p nbproject/private && touch nbproject/private/Makefile-variables.mk)
31 | #
32 | # gmake command
33 | .PHONY: $(shell test -f nbproject/private/Makefile-variables.mk || (mkdir -p nbproject/private && touch nbproject/private/Makefile-variables.mk))
34 | #
35 | include nbproject/private/Makefile-variables.mk
36 | 


--------------------------------------------------------------------------------
/nbproject/Package-Debug.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | 
 3 | #
 4 | # Generated - do not edit!
 5 | #
 6 | 
 7 | # Macros
 8 | TOP=`pwd`
 9 | CND_PLATFORM=CUDA-Linux-x86
10 | CND_CONF=Debug
11 | CND_DISTDIR=dist
12 | CND_BUILDDIR=build
13 | CND_DLIB_EXT=so
14 | NBTMPDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM}/tmp-packaging
15 | TMPDIRNAME=tmp-packaging
16 | OUTPUT_PATH=${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda
17 | OUTPUT_BASENAME=kmeans_cuda
18 | PACKAGE_TOP_DIR=kmeanscuda/
19 | 
20 | # Functions
21 | function checkReturnCode
22 | {
23 |     rc=$?
24 |     if [ $rc != 0 ]
25 |     then
26 |         exit $rc
27 |     fi
28 | }
29 | function makeDirectory
30 | # $1 directory path
31 | # $2 permission (optional)
32 | {
33 |     mkdir -p "$1"
34 |     checkReturnCode
35 |     if [ "$2" != "" ]
36 |     then
37 |       chmod $2 "$1"
38 |       checkReturnCode
39 |     fi
40 | }
41 | function copyFileToTmpDir
42 | # $1 from-file path
43 | # $2 to-file path
44 | # $3 permission
45 | {
46 |     cp "$1" "$2"
47 |     checkReturnCode
48 |     if [ "$3" != "" ]
49 |     then
50 |         chmod $3 "$2"
51 |         checkReturnCode
52 |     fi
53 | }
54 | 
55 | # Setup
56 | cd "${TOP}"
57 | mkdir -p ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package
58 | rm -rf ${NBTMPDIR}
59 | mkdir -p ${NBTMPDIR}
60 | 
61 | # Copy files and create directories and links
62 | cd "${TOP}"
63 | makeDirectory "${NBTMPDIR}/kmeanscuda/bin"
64 | copyFileToTmpDir "${OUTPUT_PATH}" "${NBTMPDIR}/${PACKAGE_TOP_DIR}bin/${OUTPUT_BASENAME}" 0755
65 | 
66 | 
67 | # Generate tar file
68 | cd "${TOP}"
69 | rm -f ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/kmeanscuda.tar
70 | cd ${NBTMPDIR}
71 | tar -vcf ../../../../${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/kmeanscuda.tar *
72 | checkReturnCode
73 | 
74 | # Cleanup
75 | cd "${TOP}"
76 | rm -rf ${NBTMPDIR}
77 | 


--------------------------------------------------------------------------------
/nbproject/Package-Release.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | 
 3 | #
 4 | # Generated - do not edit!
 5 | #
 6 | 
 7 | # Macros
 8 | TOP=`pwd`
 9 | CND_PLATFORM=CUDA-Linux-x86
10 | CND_CONF=Release
11 | CND_DISTDIR=dist
12 | CND_BUILDDIR=build
13 | CND_DLIB_EXT=so
14 | NBTMPDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM}/tmp-packaging
15 | TMPDIRNAME=tmp-packaging
16 | OUTPUT_PATH=${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/kmeans_cuda
17 | OUTPUT_BASENAME=kmeans_cuda
18 | PACKAGE_TOP_DIR=kmeanscuda/
19 | 
20 | # Functions
21 | function checkReturnCode
22 | {
23 |     rc=$?
24 |     if [ $rc != 0 ]
25 |     then
26 |         exit $rc
27 |     fi
28 | }
29 | function makeDirectory
30 | # $1 directory path
31 | # $2 permission (optional)
32 | {
33 |     mkdir -p "$1"
34 |     checkReturnCode
35 |     if [ "$2" != "" ]
36 |     then
37 |       chmod $2 "$1"
38 |       checkReturnCode
39 |     fi
40 | }
41 | function copyFileToTmpDir
42 | # $1 from-file path
43 | # $2 to-file path
44 | # $3 permission
45 | {
46 |     cp "$1" "$2"
47 |     checkReturnCode
48 |     if [ "$3" != "" ]
49 |     then
50 |         chmod $3 "$2"
51 |         checkReturnCode
52 |     fi
53 | }
54 | 
55 | # Setup
56 | cd "${TOP}"
57 | mkdir -p ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package
58 | rm -rf ${NBTMPDIR}
59 | mkdir -p ${NBTMPDIR}
60 | 
61 | # Copy files and create directories and links
62 | cd "${TOP}"
63 | makeDirectory "${NBTMPDIR}/kmeanscuda/bin"
64 | copyFileToTmpDir "${OUTPUT_PATH}" "${NBTMPDIR}/${PACKAGE_TOP_DIR}bin/${OUTPUT_BASENAME}" 0755
65 | 
66 | 
67 | # Generate tar file
68 | cd "${TOP}"
69 | rm -f ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/kmeanscuda.tar
70 | cd ${NBTMPDIR}
71 | tar -vcf ../../../../${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/kmeanscuda.tar *
72 | checkReturnCode
73 | 
74 | # Cleanup
75 | cd "${TOP}"
76 | rm -rf ${NBTMPDIR}
77 | 


--------------------------------------------------------------------------------
/nbproject/configurations.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configurationDescriptor version="90">
 3 |   <logicalFolder name="root" displayName="root" projectFiles="true" kind="ROOT">
 4 |     <logicalFolder name="HeaderFiles"
 5 |                    displayName="Header Files"
 6 |                    projectFiles="true">
 7 |       <itemPath>kMeansCuda.h</itemPath>
 8 |     </logicalFolder>
 9 |     <logicalFolder name="ResourceFiles"
10 |                    displayName="Resource Files"
11 |                    projectFiles="true">
12 |     </logicalFolder>
13 |     <logicalFolder name="SourceFiles"
14 |                    displayName="Source Files"
15 |                    projectFiles="true">
16 |       <itemPath>kMeansCuda.cu</itemPath>
17 |       <itemPath>main.cpp</itemPath>
18 |     </logicalFolder>
19 |     <logicalFolder name="TestFiles"
20 |                    displayName="Test Files"
21 |                    projectFiles="false"
22 |                    kind="TEST_LOGICAL_FOLDER">
23 |     </logicalFolder>
24 |     <logicalFolder name="ExternalFiles"
25 |                    displayName="Important Files"
26 |                    projectFiles="false"
27 |                    kind="IMPORTANT_FILES_FOLDER">
28 |       <itemPath>Makefile</itemPath>
29 |     </logicalFolder>
30 |   </logicalFolder>
31 |   <projectmakefile>Makefile</projectmakefile>
32 |   <confs>
33 |     <conf name="Debug" type="1">
34 |       <toolsSet>
35 |         <compilerSet>CUDA|GNU</compilerSet>
36 |         <dependencyChecking>false</dependencyChecking>
37 |         <rebuildPropChanged>false</rebuildPropChanged>
38 |       </toolsSet>
39 |       <compileType>
40 |       </compileType>
41 |       <item path="kMeansCuda.cu" ex="false" tool="1" flavor2="0">
42 |       </item>
43 |       <item path="kMeansCuda.h" ex="false" tool="3" flavor2="0">
44 |       </item>
45 |       <item path="main.cpp" ex="false" tool="1" flavor2="0">
46 |       </item>
47 |     </conf>
48 |     <conf name="Release" type="1">
49 |       <toolsSet>
50 |         <compilerSet>CUDA|GNU</compilerSet>
51 |         <dependencyChecking>false</dependencyChecking>
52 |         <rebuildPropChanged>false</rebuildPropChanged>
53 |       </toolsSet>
54 |       <compileType>
55 |         <cTool>
56 |           <developmentMode>5</developmentMode>
57 |         </cTool>
58 |         <ccTool>
59 |           <developmentMode>5</developmentMode>
60 |         </ccTool>
61 |         <fortranCompilerTool>
62 |           <developmentMode>5</developmentMode>
63 |         </fortranCompilerTool>
64 |         <asmTool>
65 |           <developmentMode>5</developmentMode>
66 |         </asmTool>
67 |       </compileType>
68 |       <item path="kMeansCuda.cu" ex="false" tool="1" flavor2="0">
69 |       </item>
70 |       <item path="kMeansCuda.h" ex="false" tool="3" flavor2="0">
71 |       </item>
72 |       <item path="main.cpp" ex="false" tool="1" flavor2="0">
73 |       </item>
74 |     </conf>
75 |   </confs>
76 | </configurationDescriptor>
77 | 


--------------------------------------------------------------------------------
/nbproject/private/Makefile-variables.mk:
--------------------------------------------------------------------------------
1 | #
2 | # Generated - do not edit!
3 | #
4 | # NOCDDL
5 | #
6 | # Debug configuration
7 | # Release configuration
8 | 


--------------------------------------------------------------------------------
/nbproject/private/configurations.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configurationDescriptor version="90">
 3 |   <projectmakefile>Makefile</projectmakefile>
 4 |   <confs>
 5 |     <conf name="Debug" type="1">
 6 |       <toolsSet>
 7 |         <developmentServer>localhost</developmentServer>
 8 |         <platform>2</platform>
 9 |       </toolsSet>
10 |       <dbx_gdbdebugger version="1">
11 |         <gdb_pathmaps>
12 |         </gdb_pathmaps>
13 |         <gdb_interceptlist>
14 |           <gdbinterceptoptions gdb_all="false" gdb_unhandled="true" gdb_unexpected="true"/>
15 |         </gdb_interceptlist>
16 |         <gdb_options>
17 |           <DebugOptions>
18 |           </DebugOptions>
19 |         </gdb_options>
20 |         <gdb_buildfirst gdb_buildfirst_overriden="false" gdb_buildfirst_old="false"/>
21 |       </dbx_gdbdebugger>
22 |       <nativedebugger version="1">
23 |         <engine>gdb</engine>
24 |       </nativedebugger>
25 |       <runprofile version="9">
26 |         <runcommandpicklist>
27 |           <runcommandpicklistitem>"${OUTPUT_PATH}"</runcommandpicklistitem>
28 |         </runcommandpicklist>
29 |         <runcommand>"${OUTPUT_PATH}"</runcommand>
30 |         <rundir></rundir>
31 |         <buildfirst>true</buildfirst>
32 |         <terminal-type>0</terminal-type>
33 |         <remove-instrumentation>0</remove-instrumentation>
34 |         <environment>
35 |         </environment>
36 |       </runprofile>
37 |     </conf>
38 |     <conf name="Release" type="1">
39 |       <toolsSet>
40 |         <developmentServer>localhost</developmentServer>
41 |         <platform>2</platform>
42 |       </toolsSet>
43 |       <dbx_gdbdebugger version="1">
44 |         <gdb_pathmaps>
45 |         </gdb_pathmaps>
46 |         <gdb_interceptlist>
47 |           <gdbinterceptoptions gdb_all="false" gdb_unhandled="true" gdb_unexpected="true"/>
48 |         </gdb_interceptlist>
49 |         <gdb_options>
50 |           <DebugOptions>
51 |           </DebugOptions>
52 |         </gdb_options>
53 |         <gdb_buildfirst gdb_buildfirst_overriden="false" gdb_buildfirst_old="false"/>
54 |       </dbx_gdbdebugger>
55 |       <nativedebugger version="1">
56 |         <engine>gdb</engine>
57 |       </nativedebugger>
58 |       <runprofile version="9">
59 |         <runcommandpicklist>
60 |           <runcommandpicklistitem>"${OUTPUT_PATH}"</runcommandpicklistitem>
61 |         </runcommandpicklist>
62 |         <runcommand>"${OUTPUT_PATH}"</runcommand>
63 |         <rundir></rundir>
64 |         <buildfirst>true</buildfirst>
65 |         <terminal-type>0</terminal-type>
66 |         <remove-instrumentation>0</remove-instrumentation>
67 |         <environment>
68 |         </environment>
69 |       </runprofile>
70 |     </conf>
71 |   </confs>
72 | </configurationDescriptor>
73 | 


--------------------------------------------------------------------------------
/nbproject/private/launcher.properties:
--------------------------------------------------------------------------------
 1 | # Launchers File syntax:
 2 | #
 3 | # [Must-have property line] 
 4 | # launcher1.runCommand=<Run Command>
 5 | # [Optional extra properties] 
 6 | # launcher1.displayName=<Display Name, runCommand by default>
 7 | # launcher1.buildCommand=<Build Command, Build Command specified in project properties by default>
 8 | # launcher1.runDir=<Run Directory, ${PROJECT_DIR} by default>
 9 | # launcher1.symbolFiles=<Symbol Files loaded by debugger, ${OUTPUT_PATH} by default>
10 | # launcher1.env.<Environment variable KEY>=<Environment variable VALUE>
11 | # (If this value is quoted with ` it is handled as a native command which execution result will become the value)
12 | # [Common launcher properties]
13 | # common.runDir=<Run Directory>
14 | # (This value is overwritten by a launcher specific runDir value if the latter exists)
15 | # common.env.<Environment variable KEY>=<Environment variable VALUE>
16 | # (Environment variables from common launcher are merged with launcher specific variables)
17 | # common.symbolFiles=<Symbol Files loaded by debugger>
18 | # (This value is overwritten by a launcher specific symbolFiles value if the latter exists)
19 | #
20 | # In runDir, symbolFiles and env fields you can use these macroses:
21 | # ${PROJECT_DIR}    -   project directory absolute path
22 | # ${OUTPUT_PATH}    -   linker output path (relative to project directory path)
23 | # ${OUTPUT_BASENAME}-   linker output filename
24 | # ${TESTDIR}        -   test files directory (relative to project directory path)
25 | # ${OBJECTDIR}      -   object files directory (relative to project directory path)
26 | # ${CND_DISTDIR}    -   distribution directory (relative to project directory path)
27 | # ${CND_BUILDDIR}   -   build directory (relative to project directory path)
28 | # ${CND_PLATFORM}   -   platform name
29 | # ${CND_CONF}       -   configuration name
30 | # ${CND_DLIB_EXT}   -   dynamic library extension
31 | #
32 | # All the project launchers must be listed in the file!
33 | #
34 | # launcher1.runCommand=...
35 | # launcher2.runCommand=...
36 | # ...
37 | # common.runDir=...
38 | # common.env.KEY=VALUE
39 | 
40 | # launcher1.runCommand=<type your run command here>


--------------------------------------------------------------------------------
/nbproject/private/private.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project-private xmlns="http://www.netbeans.org/ns/project-private/1">
 3 |     <data xmlns="http://www.netbeans.org/ns/make-project-private/1">
 4 |         <activeConfTypeElem>1</activeConfTypeElem>
 5 |         <activeConfIndexElem>0</activeConfIndexElem>
 6 |     </data>
 7 |     <editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/2" lastBookmarkId="0"/>
 8 |     <open-files xmlns="http://www.netbeans.org/ns/projectui-open-files/2">
 9 |         <group>
10 |             <file>file:/home/hvpham/code/kmeans_cuda/kMeansCuda.cu</file>
11 |             <file>file:/home/hvpham/code/kmeans_cuda/main.cpp</file>
12 |         </group>
13 |     </open-files>
14 | </project-private>
15 | 


--------------------------------------------------------------------------------
/nbproject/project.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://www.netbeans.org/ns/project/1">
 3 |     <type>org.netbeans.modules.cnd.makeproject</type>
 4 |     <configuration>
 5 |         <data xmlns="http://www.netbeans.org/ns/make-project/1">
 6 |             <name>kMeansCuda</name>
 7 |             <c-extensions/>
 8 |             <cpp-extensions>cpp,cu</cpp-extensions>
 9 |             <header-extensions>h</header-extensions>
10 |             <sourceEncoding>UTF-8</sourceEncoding>
11 |             <make-dep-projects/>
12 |             <sourceRootList/>
13 |             <confList>
14 |                 <confElem>
15 |                     <name>Debug</name>
16 |                     <type>1</type>
17 |                 </confElem>
18 |                 <confElem>
19 |                     <name>Release</name>
20 |                     <type>1</type>
21 |                 </confElem>
22 |             </confList>
23 |             <formatting>
24 |                 <project-formatting-style>false</project-formatting-style>
25 |             </formatting>
26 |         </data>
27 |     </configuration>
28 | </project>
29 | 


--------------------------------------------------------------------------------