├── .gitignore
├── CMakeLists.txt
├── README.rst
├── cuda
    ├── CMakeLists.txt
    ├── cuda-test.cu
    ├── fft.cu
    └── hello-cuda.cu
├── fluidsGL
    ├── CMakeLists.txt
    ├── defines.h
    ├── fluidsGL.cpp
    ├── fluidsGLFW.cpp
    ├── fluidsGL_kernels.cu
    ├── fluidsGL_kernels.h
    ├── fluidsQt.cpp
    ├── glfluids.cpp
    ├── glfluids.h
    ├── helper_timer.h
    ├── mainwindow.cpp
    └── mainwindow.h
├── gears
    ├── CMakeLists.txt
    ├── gears.c
    ├── gears.h
    ├── gears_GLUT.cpp
    ├── gears_Qt.cpp
    ├── gears_SDL.c
    ├── glwidget.cpp
    ├── glwidget.h
    ├── mainwindow.cpp
    └── mainwindow.h
└── thrust
    ├── CMakeLists.txt
    ├── device-vector.cu
    ├── random.cu
    ├── sort.cu
    ├── sum.cu
    ├── transform_reduce.cu
    ├── transformations.cu
    └── version.cu


/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | *.user
3 | *~
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.11)
 2 | 
 3 | project(cuda-examples)
 4 | 
 5 | add_subdirectory(cuda)
 6 | 
 7 | add_subdirectory(gears)
 8 | 
 9 | add_subdirectory(thrust)
10 | 
11 | add_subdirectory(fluidsGL)
12 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | CUDA Examples
 2 | ======================
 3 | 
 4 | Some CUDA, CUFFT and OpenGL examples.
 5 | 
 6 | Prerequisites
 7 | --------------
 8 | 
 9 | First install the prerequisites
10 | 
11 | On Ubuntu
12 | 
13 | ::
14 | 
15 |     sudo apt-get install cmake nvidia-cuda-toolkit freeglut3-dev libxmu-dev libxi-dev libsdl1.2-dev
16 | 
17 | On Arch Linux
18 | 
19 | ::
20 | 
21 |     sudo pacman -S cmake cuda freeglut glu sdl2
22 | 
23 | Build
24 | ------
25 | 
26 | ::
27 | 
28 |     mkdir build
29 |     cd build
30 |     cmake ..
31 |     make
32 |     
33 | 
34 | Run
35 | -------
36 | 
37 | ::
38 | 
39 |     fluidsGL/fluidsGL
40 | 


--------------------------------------------------------------------------------
/cuda/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(CUDA REQUIRED)
 2 | 
 3 | cuda_add_executable(hello-cuda hello-cuda.cu)
 4 | 
 5 | cuda_add_executable(cuda-test cuda-test.cu)
 6 | 
 7 | cuda_add_executable(fft fft.cu)
 8 | 
 9 | cuda_add_cufft_to_target(fft)
10 | 
11 | 


--------------------------------------------------------------------------------
/cuda/cuda-test.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <cuda.h>
 4 | 
 5 | __global__ void print(char *a,int N)
 6 | {
 7 |     char p[12]="Hello CUDA\n";
 8 |     int idx=blockIdx.x*blockDim.x + threadIdx.x;
 9 |     if(idx<N)
10 |     {
11 |         a[idx]=p[idx];
12 |     }
13 | }
14 | 
15 | int main(void)
16 | {
17 |     char *a_h,*a_d;
18 |     const int N=12;
19 |     size_t size=N*sizeof(char);
20 |     a_h=(char *)malloc(size);
21 |     cudaMalloc((void **)&a_d,size);
22 |     for(int i=0;i<N;i++)
23 |     {
24 |         a_h[i]=0;
25 |     }
26 |     cudaMemcpy(a_d,a_h,size,cudaMemcpyHostToDevice);
27 |     int blocksize=4;
28 |     int nblock=N/blocksize+(N%blocksize==0?0:1);
29 |     print<<<nblock,blocksize>>>(a_d,N)
30 |                               ;
31 |     cudaMemcpy(a_h,a_d,sizeof(char)*N,cudaMemcpyDeviceToHost)
32 |             ;
33 |     for(int i=0;i<N;i++)
34 |     {
35 |         printf("%c",a_h[i]);
36 |     }
37 |     free(a_h);
38 |     cudaFree(a_d);
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/cuda/fft.cu:
--------------------------------------------------------------------------------
  1 | /* Example showing the use of CUFFT for fast 1D-convolution using FFT. */
  2 | 
  3 | // includes, system
  4 | #include <stdlib.h>
  5 | #include <stdio.h>
  6 | #include <string.h>
  7 | #include <math.h>
  8 | 
  9 | // includes, project
 10 | #include <cufft.h>
 11 | 
 12 | // Complex data type
 13 | typedef float2 Complex;
 14 | static __device__ __host__ inline Complex ComplexAdd(Complex, Complex);
 15 | static __device__ __host__ inline Complex ComplexScale(Complex, float);
 16 | static __device__ __host__ inline Complex ComplexMul(Complex, Complex);
 17 | static __global__ void ComplexPointwiseMulAndScale(Complex*, const Complex*, int, float);
 18 | 
 19 | // Filtering functions
 20 | void Convolve(const Complex*, int, const Complex*, int, Complex*);
 21 | 
 22 | // Padding functions
 23 | int PadData(const Complex*, Complex**, int,
 24 |             const Complex*, Complex**, int);
 25 | 
 26 | ////////////////////////////////////////////////////////////////////////////////
 27 | // declaration, forward
 28 | void runTest(int argc, char** argv);
 29 | 
 30 | // The filter size is assumed to be a number smaller than the signal size
 31 | #define SIGNAL_SIZE        50
 32 | #define FILTER_KERNEL_SIZE 11
 33 | 
 34 | ////////////////////////////////////////////////////////////////////////////////
 35 | // Program main
 36 | ////////////////////////////////////////////////////////////////////////////////
 37 | int main(int argc, char** argv)
 38 | {
 39 |     runTest(argc, argv);
 40 | }
 41 | 
 42 | ////////////////////////////////////////////////////////////////////////////////
 43 | //! Run a simple test for CUDA
 44 | ////////////////////////////////////////////////////////////////////////////////
 45 | void runTest(int argc, char** argv)
 46 | {
 47 |     printf("[simpleCUFFT] is starting...\n");
 48 | 
 49 |     // Allocate host memory for the signal
 50 |     Complex* h_signal = (Complex*)malloc(sizeof(Complex) * SIGNAL_SIZE);
 51 |     // Initalize the memory for the signal
 52 |     for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
 53 |         h_signal[i].x = rand() / (float)RAND_MAX;
 54 |         h_signal[i].y = 0;
 55 |     }
 56 | 
 57 |     // Allocate host memory for the filter
 58 |     Complex* h_filter_kernel = (Complex*)malloc(sizeof(Complex) * FILTER_KERNEL_SIZE);
 59 |     // Initalize the memory for the filter
 60 |     for (unsigned int i = 0; i < FILTER_KERNEL_SIZE; ++i) {
 61 |         h_filter_kernel[i].x = rand() / (float)RAND_MAX;
 62 |         h_filter_kernel[i].y = 0;
 63 |     }
 64 | 
 65 |     // Pad signal and filter kernel
 66 |     Complex* h_padded_signal;
 67 |     Complex* h_padded_filter_kernel;
 68 |     int new_size = PadData(h_signal, &h_padded_signal, SIGNAL_SIZE,
 69 |                            h_filter_kernel, &h_padded_filter_kernel, FILTER_KERNEL_SIZE);
 70 |     int mem_size = sizeof(Complex) * new_size;
 71 | 
 72 |     // Allocate device memory for signal
 73 |     Complex* d_signal;
 74 |     cudaMalloc((void**)&d_signal, mem_size);
 75 |     // Copy host memory to device
 76 |     cudaMemcpy(d_signal, h_padded_signal, mem_size,
 77 |                cudaMemcpyHostToDevice);
 78 | 
 79 |     // Allocate device memory for filter kernel
 80 |     Complex* d_filter_kernel;
 81 |     cudaMalloc((void**)&d_filter_kernel, mem_size);
 82 | 
 83 |     // Copy host memory to device
 84 |     cudaMemcpy(d_filter_kernel, h_padded_filter_kernel, mem_size,
 85 |                cudaMemcpyHostToDevice);
 86 | 
 87 |     // CUFFT plan
 88 |     cufftHandle plan;
 89 |     cufftPlan1d(&plan, new_size, CUFFT_C2C, 1);
 90 | 
 91 |     // Transform signal and kernel
 92 |     printf("Transforming signal cufftExecC2C\n");
 93 |     cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD);
 94 |     cufftExecC2C(plan, (cufftComplex *)d_filter_kernel, (cufftComplex *)d_filter_kernel, CUFFT_FORWARD);
 95 | 
 96 |     // Multiply the coefficients together and normalize the result
 97 |     printf("Launching ComplexPointwiseMulAndScale<<< >>>\n");
 98 |     ComplexPointwiseMulAndScale<<<32, 256>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size);
 99 | 
100 |     // Transform signal back
101 |     printf("Transforming signal back cufftExecC2C\n");
102 |     cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE);
103 | 
104 |     // Copy device memory to host
105 |     Complex* h_convolved_signal = h_padded_signal;
106 |     cudaMemcpy(h_convolved_signal, d_signal, mem_size,
107 |                cudaMemcpyDeviceToHost);
108 | 
109 |     // Allocate host memory for the convolution result
110 |     Complex* h_convolved_signal_ref = (Complex*)malloc(sizeof(Complex) * SIGNAL_SIZE);
111 | 
112 |     // Convolve on the host
113 |     Convolve(h_signal, SIGNAL_SIZE,
114 |              h_filter_kernel, FILTER_KERNEL_SIZE,
115 |              h_convolved_signal_ref);
116 | 
117 |     //Destroy CUFFT context
118 |     cufftDestroy(plan);
119 | 
120 |     // cleanup memory
121 |     free(h_signal);
122 |     free(h_filter_kernel);
123 |     free(h_padded_signal);
124 |     free(h_padded_filter_kernel);
125 |     free(h_convolved_signal_ref);
126 |     cudaFree(d_signal);
127 |     cudaFree(d_filter_kernel);
128 | 
129 | }
130 | 
131 | // Pad data
132 | int PadData(const Complex* signal, Complex** padded_signal, int signal_size,
133 |             const Complex* filter_kernel, Complex** padded_filter_kernel, int filter_kernel_size)
134 | {
135 |     int minRadius = filter_kernel_size / 2;
136 |     int maxRadius = filter_kernel_size - minRadius;
137 |     int new_size = signal_size + maxRadius;
138 | 
139 |     // Pad signal
140 |     Complex* new_data = (Complex*)malloc(sizeof(Complex) * new_size);
141 |     memcpy(new_data +           0, signal,              signal_size * sizeof(Complex));
142 |     memset(new_data + signal_size,      0, (new_size - signal_size) * sizeof(Complex));
143 |     *padded_signal = new_data;
144 | 
145 |     // Pad filter
146 |     new_data = (Complex*)malloc(sizeof(Complex) * new_size);
147 |     memcpy(new_data +                    0, filter_kernel + minRadius,                       maxRadius * sizeof(Complex));
148 |     memset(new_data +            maxRadius,                         0, (new_size - filter_kernel_size) * sizeof(Complex));
149 |     memcpy(new_data + new_size - minRadius,             filter_kernel,                       minRadius * sizeof(Complex));
150 |     *padded_filter_kernel = new_data;
151 | 
152 |     return new_size;
153 | }
154 | 
155 | ////////////////////////////////////////////////////////////////////////////////
156 | // Filtering operations
157 | ////////////////////////////////////////////////////////////////////////////////
158 | 
159 | // Computes convolution on the host
160 | void Convolve(const Complex* signal, int signal_size,
161 |               const Complex* filter_kernel, int filter_kernel_size,
162 |               Complex* filtered_signal)
163 | {
164 |     int minRadius = filter_kernel_size / 2;
165 |     int maxRadius = filter_kernel_size - minRadius;
166 |     // Loop over output element indices
167 |     for (int i = 0; i < signal_size; ++i) {
168 |         filtered_signal[i].x = filtered_signal[i].y = 0;
169 |         // Loop over convolution indices
170 |         for (int j = - maxRadius + 1; j <= minRadius; ++j) {
171 |             int k = i + j;
172 |             if (k >= 0 && k < signal_size)
173 |                 filtered_signal[i] = ComplexAdd(filtered_signal[i], ComplexMul(signal[k], filter_kernel[minRadius - j]));
174 |         }
175 |     }
176 | }
177 | 
178 | ////////////////////////////////////////////////////////////////////////////////
179 | // Complex operations
180 | ////////////////////////////////////////////////////////////////////////////////
181 | 
182 | // Complex addition
183 | static __device__ __host__ inline Complex ComplexAdd(Complex a, Complex b)
184 | {
185 |     Complex c;
186 |     c.x = a.x + b.x;
187 |     c.y = a.y + b.y;
188 |     return c;
189 | }
190 | 
191 | // Complex scale
192 | static __device__ __host__ inline Complex ComplexScale(Complex a, float s)
193 | {
194 |     Complex c;
195 |     c.x = s * a.x;
196 |     c.y = s * a.y;
197 |     return c;
198 | }
199 | 
200 | // Complex multiplication
201 | static __device__ __host__ inline Complex ComplexMul(Complex a, Complex b)
202 | {
203 |     Complex c;
204 |     c.x = a.x * b.x - a.y * b.y;
205 |     c.y = a.x * b.y + a.y * b.x;
206 |     return c;
207 | }
208 | 
209 | // Complex pointwise multiplication
210 | static __global__ void ComplexPointwiseMulAndScale(Complex* a, const Complex* b, int size, float scale)
211 | {
212 |     const int numThreads = blockDim.x * gridDim.x;
213 |     const int threadID = blockIdx.x * blockDim.x + threadIdx.x;
214 |     for (int i = threadID; i < size; i += numThreads)
215 |         a[i] = ComplexScale(ComplexMul(a[i], b[i]), scale);
216 | }
217 | 


--------------------------------------------------------------------------------
/cuda/hello-cuda.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 | ** Hello World using CUDA
 3 | **
 4 | ** The string "Hello World!" is mangled then restored using a common CUDA idiom
 5 | **
 6 | ** Byron Galbraith
 7 | ** 2009-02-18
 8 | */
 9 | #include <cuda.h>
10 | #include <stdio.h>
11 | 
12 | // Device kernel
13 | __global__ void
14 | helloWorld(char* str)
15 | {
16 |     // determine where in the thread grid we are
17 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
18 | 
19 |     // unmangle output
20 |     str[idx] += idx;
21 | }
22 | 
23 | // Host function
24 | int
25 | main(int argc, char** argv)
26 | {
27 |     int i;
28 | 
29 |     // desired output
30 |     char str[] = "Hello World!";
31 | 
32 |     // mangle contents of output
33 |     // the null character is left intact for simplicity
34 |     for(i = 0; i < 12; i++)
35 |         str[i] -= i;
36 | 
37 |     // allocate memory on the device
38 |     char *d_str;
39 |     size_t size = sizeof(str);
40 |     cudaMalloc((void**)&d_str, size);
41 | 
42 |     // copy the string to the device
43 |     cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice);
44 | 
45 |     // set the grid and block sizes
46 |     dim3 dimGrid(2);   // one block per word
47 |     dim3 dimBlock(6); // one thread per character
48 | 
49 |     // invoke the kernel
50 |     helloWorld<<< dimGrid, dimBlock >>>(d_str);
51 | 
52 |     // retrieve the results from the device
53 |     cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost);
54 | 
55 |     // free up the allocated memory on the device
56 |     cudaFree(d_str);
57 | 
58 |     // everyone's favorite part
59 |     printf("%s\n", str);
60 | 
61 |     return 0;
62 | }
63 | 


--------------------------------------------------------------------------------
/fluidsGL/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(CUDA REQUIRED)
 2 | find_package(OpenGL REQUIRED)
 3 | find_package(GLEW REQUIRED)
 4 | find_package(GLUT REQUIRED)
 5 | find_package(PkgConfig)
 6 | pkg_check_modules(GLFW REQUIRED glfw3)
 7 | 
 8 | 
 9 | set(CMAKE_INCLUDE_CURRENT_DIR ON)
10 | 
11 | include_directories(${OPENGL_INCLUDE_DIR} ${GLUT_INCLUDE_DIR})
12 | 
13 | # Target older architectures for those who haven't upgraded their graphics card to the latest models.
14 | #set(CUDA_NVCC_FLAGS "-gencode arch=compute_11,code=sm_11 -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_50,code=compute_50 ")
15 | 
16 | cuda_add_library(fluidsGL_kernels
17 |     defines.h
18 |     fluidsGL_kernels.cu
19 | )
20 | cuda_add_cufft_to_target(fluidsGL_kernels)
21 | 
22 | # GLUT based
23 | add_executable(fluidsGL
24 |     helper_timer.h
25 |     fluidsGL_kernels.h
26 |     fluidsGL.cpp
27 | )
28 | target_link_libraries(fluidsGL fluidsGL_kernels ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES} ${GLUT_LIBRARIES})
29 | set_target_properties(fluidsGL PROPERTIES COMPILE_FLAGS "-std=c++11")
30 | 
31 | # GLFW based
32 | add_executable(fluidsGLFW
33 |     fluidsGL_kernels.h
34 |     fluidsGLFW.cpp
35 | )
36 | target_link_libraries(fluidsGLFW fluidsGL_kernels ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES} ${GLFW_LIBRARIES})
37 | set_target_properties(fluidsGLFW PROPERTIES COMPILE_FLAGS "-std=c++11")
38 | 
39 | # Qt based
40 | find_package(Qt5Widgets)
41 | if(Qt5Widgets_FOUND)
42 |     set(CMAKE_AUTOMOC ON)
43 |     find_package(Qt5OpenGL REQUIRED)
44 |     include_directories(${Qt5Widgets_INCLUDE_DIRS} ${Qt5OpenGL_INCLUDE_DIRS})
45 |     add_executable(fluidsQt glfluids.cpp fluidsQt.cpp mainwindow.cpp)
46 |     target_link_libraries(fluidsQt fluidsGL_kernels Qt5::Widgets Qt5::OpenGL ${OPENGL_LIBRARIES} )
47 |     set_target_properties(fluidsQt PROPERTIES COMPILE_FLAGS "-std=c++11")
48 | endif(Qt5Widgets_FOUND)
49 | 


--------------------------------------------------------------------------------
/fluidsGL/defines.h:
--------------------------------------------------------------------------------
 1 | #ifndef DEFINES_H
 2 | #define DEFINES_H
 3 | 
 4 | #define DIM    512       // Square size of solver domain
 5 | #define DS    (DIM*DIM)  // Total domain size
 6 | #define CPADW (DIM/2+1)  // Padded width for real->complex in-place FFT
 7 | #define RPADW (2*(DIM/2+1))  // Padded width for real->complex in-place FFT
 8 | #define PDS   (DIM*CPADW) // Padded total domain size
 9 | 
10 | #define DT     0.09f     // Delta T for interative solver
11 | #define VIS    0.0025f   // Viscosity constant
12 | #define FORCE (5.8f*DIM) // Force scale factor 
13 | #define FR     4         // Force update radius
14 | 
15 | #define TILEX 64 // Tile width
16 | #define TILEY 64 // Tile height
17 | #define TIDSX 64 // Tids in X
18 | #define TIDSY 4  // Tids in Y
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/fluidsGL/fluidsGL.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 |  * with this source code for terms and conditions that govern your use of
  6 |  * this software. Any use, reproduction, disclosure, or distribution of
  7 |  * this software and related documentation outside the terms of the EULA
  8 |  * is strictly prohibited.
  9 |  *
 10 |  */
 11 | 
 12 | // OpenGL Graphics includes
 13 | #include <GL/glew.h>
 14 | 
 15 | // Includes
 16 | #include <algorithm>
 17 | #include <stdio.h>
 18 | #include <stdlib.h>
 19 | #include <string.h>
 20 | 
 21 | // CUDA standard includes
 22 | #include <cuda_gl_interop.h>
 23 | #include <cuda_runtime.h>
 24 | 
 25 | // CUDA FFT Libraries
 26 | #include <cufft.h>
 27 | 
 28 | #if defined(__APPLE__) || defined(MACOSX)
 29 | #include <GLUT/glut.h>
 30 | #else
 31 | #include <GL/freeglut.h>
 32 | #endif
 33 | 
 34 | #include "helper_timer.h"
 35 | 
 36 | #include "defines.h"
 37 | #include "fluidsGL_kernels.h"
 38 | 
 39 | const char *sSDKname = "fluidsGL";
 40 | // CUDA example code that implements the frequency space version of
 41 | // Jos Stam's paper 'Stable Fluids' in 2D. This application uses the
 42 | // CUDA FFT library (CUFFT) to perform velocity diffusion and to
 43 | // force non-divergence in the velocity field at each time step. It uses
 44 | // CUDA-OpenGL interoperability to update the particle field directly
 45 | // instead of doing a copy to system memory before drawing. Texture is
 46 | // used for automatic bilinear interpolation at the velocity advection step.
 47 | 
 48 | void cleanup(void);
 49 | void reshape(int x, int y);
 50 | 
 51 | // CUFFT plan handle
 52 | cufftHandle planr2c;
 53 | cufftHandle planc2r;
 54 | 
 55 | static float2 *vxfield = NULL;
 56 | static float2 *vyfield = NULL;
 57 | 
 58 | float2 *hvfield = NULL;
 59 | float2 *dvfield = NULL;
 60 | static int wWidth = std::max(512, DIM);
 61 | static int wHeight = std::max(512, DIM);
 62 | 
 63 | static int clicked = 0;
 64 | static int fpsCount = 0;
 65 | static int fpsLimit = 1;
 66 | StopWatchInterface *timer = NULL;
 67 | 
 68 | // Particle data
 69 | static GLuint vbo = 0; // OpenGL vertex buffer object
 70 | static struct cudaGraphicsResource
 71 |     *cuda_vbo_resource;          // handles OpenGL-CUDA exchange
 72 | static float2 *particles = NULL; // particle positions in host memory
 73 | static int lastx = 0, lasty = 0;
 74 | 
 75 | // Texture pitch
 76 | size_t tPitch = 0;
 77 | 
 78 | bool g_bExitESC = false;
 79 | 
 80 | void simulateFluids(void) {
 81 |   // simulate fluid
 82 |   advectVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM,
 83 |                  DT);
 84 |   diffuseProject(vxfield, vyfield, CPADW, DIM, DT, VIS, planr2c, planc2r);
 85 |   updateVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM);
 86 |   advectParticles(cuda_vbo_resource, dvfield, DIM, DIM, DT);
 87 | }
 88 | 
 89 | void display(void) {
 90 | 
 91 |   sdkStartTimer(&timer);
 92 |   simulateFluids();
 93 | 
 94 |   // render points
 95 |   glClear(GL_COLOR_BUFFER_BIT);
 96 |   glClearColor(1, 1, 1, 1.0f);
 97 |   glColor4f(0, 0, 1, 0.5f);
 98 |   glPointSize(1);
 99 |   glEnable(GL_POINT_SMOOTH);
100 |   glEnable(GL_BLEND);
101 |   glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
102 |   glEnableClientState(GL_VERTEX_ARRAY);
103 |   glDisable(GL_DEPTH_TEST);
104 |   glDisable(GL_CULL_FACE);
105 |   glBindBuffer(GL_ARRAY_BUFFER, vbo);
106 |   glVertexPointer(2, GL_FLOAT, 0, NULL);
107 |   glDrawArrays(GL_POINTS, 0, DS);
108 |   glBindBuffer(GL_ARRAY_BUFFER, 0);
109 |   glDisableClientState(GL_VERTEX_ARRAY);
110 |   glDisableClientState(GL_TEXTURE_COORD_ARRAY);
111 |   glDisable(GL_TEXTURE_2D);
112 | 
113 |   // Finish timing before swap buffers to avoid refresh sync
114 |   sdkStopTimer(&timer);
115 |   glutSwapBuffers();
116 | 
117 |   fpsCount++;
118 | 
119 |   if (fpsCount == fpsLimit) {
120 |     char fps[256];
121 |     float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
122 |     sprintf(fps, "Cuda/GL Stable Fluids (%d x %d): %3.1f fps", DIM, DIM, ifps);
123 |     glutSetWindowTitle(fps);
124 |     fpsCount = 0;
125 |     fpsLimit = (int)std::max(ifps, 1.f);
126 |     sdkResetTimer(&timer);
127 |   }
128 | 
129 |   glutPostRedisplay();
130 | }
131 | 
132 | // very simple von neumann middle-square prng.  can't use rand() in -qatest
133 | // mode because its implementation varies across platforms which makes testing
134 | // for consistency in the important parts of this program difficult.
135 | float myrand(void) {
136 |   static int seed = 72191;
137 |   char sq[22];
138 |   return rand() / (float)RAND_MAX;
139 | }
140 | 
141 | void initParticles(float2 *p, int dx, int dy) {
142 |   int i, j;
143 | 
144 |   for (i = 0; i < dy; i++) {
145 |     for (j = 0; j < dx; j++) {
146 |       p[i * dx + j].x = (j + 0.5f + (myrand() - 0.5f)) / dx;
147 |       p[i * dx + j].y = (i + 0.5f + (myrand() - 0.5f)) / dy;
148 |     }
149 |   }
150 | }
151 | 
152 | void keyboard(unsigned char key, int x, int y) {
153 |   switch (key) {
154 |   case 27:
155 |     g_bExitESC = true;
156 | #if defined(__APPLE__) || defined(MACOSX)
157 |     exit(EXIT_SUCCESS);
158 | #else
159 |     glutDestroyWindow(glutGetWindow());
160 |     return;
161 | #endif
162 |     break;
163 | 
164 |   case 'r':
165 |     memset(hvfield, 0, sizeof(float2) * DS);
166 |     cudaMemcpy(dvfield, hvfield, sizeof(float2) * DS, cudaMemcpyHostToDevice);
167 | 
168 |     initParticles(particles, DIM, DIM);
169 | 
170 |     cudaGraphicsUnregisterResource(cuda_vbo_resource);
171 | 
172 |     glBindBuffer(GL_ARRAY_BUFFER, vbo);
173 |     glBufferData(GL_ARRAY_BUFFER, sizeof(float2) * DS, particles,
174 |                  GL_DYNAMIC_DRAW);
175 |     glBindBuffer(GL_ARRAY_BUFFER, 0);
176 | 
177 |     cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo,
178 |                                  cudaGraphicsMapFlagsNone);
179 |     break;
180 | 
181 |   default:
182 |     break;
183 |   }
184 | }
185 | 
186 | void click(int button, int updown, int x, int y) {
187 |   lastx = x;
188 |   lasty = y;
189 |   clicked = !clicked;
190 | }
191 | 
192 | void motion(int x, int y) {
193 |   // Convert motion coordinates to domain
194 |   float fx = (lastx / (float)wWidth);
195 |   float fy = (lasty / (float)wHeight);
196 |   int nx = (int)(fx * DIM);
197 |   int ny = (int)(fy * DIM);
198 | 
199 |   if (clicked && nx < DIM - FR && nx > FR - 1 && ny < DIM - FR && ny > FR - 1) {
200 |     int ddx = x - lastx;
201 |     int ddy = y - lasty;
202 |     fx = ddx / (float)wWidth;
203 |     fy = ddy / (float)wHeight;
204 |     int spy = ny - FR;
205 |     int spx = nx - FR;
206 |     addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy,
207 |               FR);
208 |     lastx = x;
209 |     lasty = y;
210 |   }
211 | 
212 |   glutPostRedisplay();
213 | }
214 | 
215 | void reshape(int x, int y) {
216 |   wWidth = x;
217 |   wHeight = y;
218 |   glViewport(0, 0, x, y);
219 |   glMatrixMode(GL_PROJECTION);
220 |   glLoadIdentity();
221 |   glOrtho(0, 1, 1, 0, 0, 1);
222 |   glMatrixMode(GL_MODELVIEW);
223 |   glLoadIdentity();
224 |   glutPostRedisplay();
225 | }
226 | 
227 | void cleanup(void) {
228 |   cudaGraphicsUnregisterResource(cuda_vbo_resource);
229 | 
230 |   unbind_texture();
231 |   delete_texture();
232 | 
233 |   // Free all host and device resources
234 |   free(hvfield);
235 |   free(particles);
236 |   cudaFree(dvfield);
237 |   cudaFree(vxfield);
238 |   cudaFree(vyfield);
239 |   cufftDestroy(planr2c);
240 |   cufftDestroy(planc2r);
241 | 
242 |   glBindBuffer(GL_ARRAY_BUFFER, 0);
243 |   glDeleteBuffers(1, &vbo);
244 | 
245 |   sdkDeleteTimer(&timer);
246 | 
247 |   if (g_bExitESC) {
248 |     // cudaDeviceReset causes the driver to clean up all state. While
249 |     // not mandatory in normal operation, it is good practice.  It is also
250 |     // needed to ensure correct operation when the application is being
251 |     // profiled. Calling cudaDeviceReset causes all profile data to be
252 |     // flushed before the application exits
253 |     cudaDeviceReset();
254 |   }
255 | }
256 | 
257 | int initGL(int *argc, char **argv) {
258 |   glutInit(argc, argv);
259 |   // glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
260 |   glutInitWindowSize(wWidth, wHeight);
261 |   glutCreateWindow("Compute Stable Fluids");
262 |   glutDisplayFunc(display);
263 |   glutKeyboardFunc(keyboard);
264 |   glutMouseFunc(click);
265 |   glutMotionFunc(motion);
266 |   glutReshapeFunc(reshape);
267 | 
268 |   glewInit();
269 | 
270 |   if (!glewIsSupported("GL_ARB_vertex_buffer_object")) {
271 |     fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing.");
272 |     fflush(stderr);
273 |     return false;
274 |   }
275 | 
276 |   return true;
277 | }
278 | 
279 | int main(int argc, char **argv) {
280 | 
281 |   // First initialize OpenGL context, so we can properly set the GL for CUDA.
282 |   // This is necessary in order to achieve optimal performance with OpenGL/CUDA
283 |   // interop.
284 |   if (false == initGL(&argc, argv)) {
285 |     exit(EXIT_SUCCESS);
286 |   }
287 | 
288 |   // Allocate and initialize host data
289 | 
290 |   sdkCreateTimer(&timer);
291 |   sdkResetTimer(&timer);
292 | 
293 |   hvfield = (float2 *)malloc(sizeof(float2) * DS);
294 |   memset(hvfield, 0, sizeof(float2) * DS);
295 | 
296 |   // Allocate and initialize device data
297 |   cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(float2) * DIM, DIM);
298 | 
299 |   cudaMemcpy(dvfield, hvfield, sizeof(float2) * DS, cudaMemcpyHostToDevice);
300 |   // Temporary complex velocity field data
301 |   cudaMalloc((void **)&vxfield, sizeof(float2) * PDS);
302 |   cudaMalloc((void **)&vyfield, sizeof(float2) * PDS);
303 | 
304 |   setup_texture(DIM, DIM);
305 |   bind_texture();
306 | 
307 |   // Create particle array
308 |   particles = (float2 *)malloc(sizeof(float2) * DS);
309 |   memset(particles, 0, sizeof(float2) * DS);
310 | 
311 |   initParticles(particles, DIM, DIM);
312 | 
313 |   // Create CUFFT transform plan configuration
314 |   cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C);
315 |   cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R);
316 |   // TODO: update kernels to use the new unpadded memory layout for perf
317 |   // rather than the old FFTW-compatible layout
318 |   //    cufftSetCompatibilityMode(planr2c, CUFFT_COMPATIBILITY_FFTW_PADDING);
319 |   //    cufftSetCompatibilityMode(planc2r, CUFFT_COMPATIBILITY_FFTW_PADDING);
320 | 
321 |   glGenBuffers(1, &vbo);
322 |   glBindBuffer(GL_ARRAY_BUFFER, vbo);
323 |   glBufferData(GL_ARRAY_BUFFER, sizeof(float2) * DS, particles,
324 |                GL_DYNAMIC_DRAW);
325 | 
326 |   GLint bsize;
327 |   glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &bsize);
328 |   if (bsize != (sizeof(float2) * DS))
329 |     goto EXTERR;
330 | 
331 |   glBindBuffer(GL_ARRAY_BUFFER, 0);
332 | 
333 |   cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo,
334 |                                cudaGraphicsMapFlagsNone);
335 | 
336 | #if defined(__APPLE__) || defined(MACOSX)
337 |   atexit(cleanup);
338 | #else
339 |   glutCloseFunc(cleanup);
340 | #endif
341 |   glutMainLoop();
342 | 
343 |   // cudaDeviceReset causes the driver to clean up all state. While
344 |   // not mandatory in normal operation, it is good practice.  It is also
345 |   // needed to ensure correct operation when the application is being
346 |   // profiled. Calling cudaDeviceReset causes all profile data to be
347 |   // flushed before the application exits
348 |   cudaDeviceReset();
349 |   exit(EXIT_SUCCESS);
350 | 
351 |   return 0;
352 | 
353 | EXTERR:
354 |   printf("Failed to initialize GL extensions.\n");
355 | 
356 |   // cudaDeviceReset causes the driver to clean up all state. While
357 |   // not mandatory in normal operation, it is good practice.  It is also
358 |   // needed to ensure correct operation when the application is being
359 |   // profiled. Calling cudaDeviceReset causes all profile data to be
360 |   // flushed before the application exits
361 |   cudaDeviceReset();
362 |   exit(EXIT_FAILURE);
363 | }
364 | 


--------------------------------------------------------------------------------
/fluidsGL/fluidsGLFW.cpp:
--------------------------------------------------------------------------------
 1 | // OpenGL Graphics includes
 2 | #include <GL/glew.h>
 3 | #include <GLFW/glfw3.h>
 4 | 
 5 | // Includes
 6 | #include <stdlib.h>
 7 | #include <stdio.h>
 8 | #include <string.h>
 9 | #include <algorithm>
10 | 
11 | const char* vertex_shader = R"(
12 | 
13 |     attribute float x;
14 |     attribute float y;
15 | 
16 |     void main() {
17 |         gl_Position = vec4(x, y, 0.0, 1.0);
18 |         gl_PointSize = 1.0;
19 |     }
20 | 
21 | )";
22 | 
23 | const char* fragment_shader = R"(
24 | 
25 |     void main() {
26 |         gl_FragClor = (1.0, 0.0, 0.0, 1.0);
27 |     }
28 | 
29 | )";
30 | 
31 | 
32 | float myrand(void)
33 | {
34 |     return rand()/(float)RAND_MAX;
35 | }
36 | 
37 | void initParticles(float* x, float* y, int dx, int dy)
38 | {
39 |     int i, j;
40 | 
41 |     for (i = 0; i < dy; i++)
42 |     {
43 |         for (j = 0; j < dx; j++)
44 |         {
45 |             x[i*dx+j] = (j+0.5f+(myrand() - 0.5f))/dx;
46 |             y[i*dx+j] = (i+0.5f+(myrand() - 0.5f))/dy;
47 |         }
48 |     }
49 | }
50 | 
51 | void init(void) {
52 | 
53 | }
54 | 
55 | int main(void){
56 | 
57 |     int width = 512;
58 |     int height = 512;
59 | 
60 |     if (!glfwInit()) exit(EXIT_FAILURE);
61 | 
62 |     auto w = glfwCreateWindow(512, 512, "Compute Stable Fluids", NULL, NULL);
63 |     if (!w) {
64 |         glfwTerminate();
65 |         exit(EXIT_FAILURE);
66 |     }
67 | 
68 |     if (!glewInit()) exit(EXIT_FAILURE);
69 | 
70 |     glfwMakeContextCurrent(w);
71 |     glfwSwapInterval(1);
72 | 
73 |     while (!glfwWindowShouldClose(w)){
74 |         //display();
75 |         glfwSwapBuffers(w);
76 |         glfwPollEvents();
77 |     }
78 | 
79 |     glfwTerminate();
80 | 
81 |     return 0;
82 | 
83 | }
84 | 


--------------------------------------------------------------------------------
/fluidsGL/fluidsGL_kernels.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 |  * with this source code for terms and conditions that govern your use of
  6 |  * this software. Any use, reproduction, disclosure, or distribution of
  7 |  * this software and related documentation outside the terms of the EULA
  8 |  * is strictly prohibited.
  9 |  *
 10 |  */
 11 | 
 12 | #include "defines.h"
 13 | #include "fluidsGL_kernels.h"
 14 | 
 15 | #include <stdio.h>
 16 | #include <stdlib.h>
 17 | 
 18 | #include <cufft.h>          // CUDA FFT Libraries
 19 | //#include <helper_cuda.h>    // Helper functions for CUDA Error handling
 20 | 
 21 | 
 22 | // Texture reference for reading velocity field
 23 | texture<float2, 2> texref;
 24 | static cudaArray *array = NULL;
 25 | 
 26 | void setup_texture(int x, int y)
 27 | {
 28 | 
 29 |     // Wrap mode appears to be the new default
 30 |     texref.filterMode = cudaFilterModeLinear;
 31 |     cudaChannelFormatDesc desc = cudaCreateChannelDesc<float2>();
 32 | 
 33 |     cudaMallocArray(&array, &desc, y, x);
 34 | }
 35 | 
 36 | void bind_texture(void)
 37 | {
 38 |     cudaBindTextureToArray(texref, array);
 39 | }
 40 | 
 41 | void unbind_texture(void)
 42 | {
 43 |     cudaUnbindTexture(texref);
 44 | }
 45 | 
 46 | void delete_texture(void)
 47 | {
 48 |     cudaFreeArray(array);
 49 | }
 50 | 
 51 | void update_texture(float2 *data, size_t wib, size_t h, size_t pitch)
 52 | {
 53 |     cudaMemcpy2DToArray(array, 0, 0, data, pitch, wib, h, cudaMemcpyDeviceToDevice);
 54 | }
 55 | 
 56 | // Note that these kernels are designed to work with arbitrary
 57 | // domain sizes, not just domains that are multiples of the tile
 58 | // size. Therefore, we have extra code that checks to make sure
 59 | // a given thread location falls within the domain boundaries in
 60 | // both X and Y. Also, the domain is covered by looping over
 61 | // multiple elements in the Y direction, while there is a one-to-one
 62 | // mapping between threads in X and the tile size in X.
 63 | // Nolan Goodnight 9/22/06
 64 | 
 65 | // This method adds constant force vectors to the velocity field
 66 | // stored in 'v' according to v(x,t+1) = v(x,t) + dt * f.
 67 | __global__ void
 68 | addForces_k(float2 *v, int dx, int dy, int spx, int spy, float fx, float fy, int r, size_t pitch)
 69 | {
 70 | 
 71 |     int tx = threadIdx.x;
 72 |     int ty = threadIdx.y;
 73 |     float2 *fj = (float2 *)((char *)v + (ty + spy) * pitch) + tx + spx;
 74 | 
 75 |     float2 vterm = *fj;
 76 |     tx -= r;
 77 |     ty -= r;
 78 |     float s = 1.f / (1.f + tx*tx*tx*tx + ty*ty*ty*ty);
 79 |     vterm.x += s * fx;
 80 |     vterm.y += s * fy;
 81 |     *fj = vterm;
 82 | }
 83 | 
 84 | // This method performs the velocity advection step, where we
 85 | // trace velocity vectors back in time to update each grid cell.
 86 | // That is, v(x,t+1) = v(p(x,-dt),t). Here we perform bilinear
 87 | // interpolation in the velocity space.
 88 | __global__ void
 89 | advectVelocity_k(float2 *v, float *vx, float *vy,
 90 |                  int dx, int pdx, int dy, float dt, int lb)
 91 | {
 92 | 
 93 |     int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
 94 |     int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
 95 |     int p;
 96 | 
 97 |     float2 vterm, ploc;
 98 |     float vxterm, vyterm;
 99 | 
100 |     // gtidx is the domain location in x for this thread
101 |     if (gtidx < dx)
102 |     {
103 |         for (p = 0; p < lb; p++)
104 |         {
105 |             // fi is the domain location in y for this thread
106 |             int fi = gtidy + p;
107 | 
108 |             if (fi < dy)
109 |             {
110 |                 int fj = fi * pdx + gtidx;
111 |                 vterm = tex2D(texref, (float)gtidx, (float)fi);
112 |                 ploc.x = (gtidx + 0.5f) - (dt * vterm.x * dx);
113 |                 ploc.y = (fi + 0.5f) - (dt * vterm.y * dy);
114 |                 vterm = tex2D(texref, ploc.x, ploc.y);
115 |                 vxterm = vterm.x;
116 |                 vyterm = vterm.y;
117 |                 vx[fj] = vxterm;
118 |                 vy[fj] = vyterm;
119 |             }
120 |         }
121 |     }
122 | }
123 | 
124 | // This method performs velocity diffusion and forces mass conservation
125 | // in the frequency domain. The inputs 'vx' and 'vy' are complex-valued
126 | // arrays holding the Fourier coefficients of the velocity field in
127 | // X and Y. Diffusion in this space takes a simple form described as:
128 | // v(k,t) = v(k,t) / (1 + visc * dt * k^2), where visc is the viscosity,
129 | // and k is the wavenumber. The projection step forces the Fourier
130 | // velocity vectors to be orthogonal to the vectors for each
131 | // wavenumber: v(k,t) = v(k,t) - ((k dot v(k,t) * k) / k^2.
132 | __global__ void
133 | diffuseProject_k(float2 *vx, float2 *vy, int dx, int dy, float dt,
134 |                  float visc, int lb)
135 | {
136 | 
137 |     int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
138 |     int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
139 |     int p;
140 | 
141 |     float2 xterm, yterm;
142 | 
143 |     // gtidx is the domain location in x for this thread
144 |     if (gtidx < dx)
145 |     {
146 |         for (p = 0; p < lb; p++)
147 |         {
148 |             // fi is the domain location in y for this thread
149 |             int fi = gtidy + p;
150 | 
151 |             if (fi < dy)
152 |             {
153 |                 int fj = fi * dx + gtidx;
154 |                 xterm = vx[fj];
155 |                 yterm = vy[fj];
156 | 
157 |                 // Compute the index of the wavenumber based on the
158 |                 // data order produced by a standard NN FFT.
159 |                 int iix = gtidx;
160 |                 int iiy = (fi>dy/2)?(fi-(dy)):fi;
161 | 
162 |                 // Velocity diffusion
163 |                 float kk = (float)(iix * iix + iiy * iiy); // k^2
164 |                 float diff = 1.f / (1.f + visc * dt * kk);
165 |                 xterm.x *= diff;
166 |                 xterm.y *= diff;
167 |                 yterm.x *= diff;
168 |                 yterm.y *= diff;
169 | 
170 |                 // Velocity projection
171 |                 if (kk > 0.f)
172 |                 {
173 |                     float rkk = 1.f / kk;
174 |                     // Real portion of velocity projection
175 |                     float rkp = (iix * xterm.x + iiy * yterm.x);
176 |                     // Imaginary portion of velocity projection
177 |                     float ikp = (iix * xterm.y + iiy * yterm.y);
178 |                     xterm.x -= rkk * rkp * iix;
179 |                     xterm.y -= rkk * ikp * iix;
180 |                     yterm.x -= rkk * rkp * iiy;
181 |                     yterm.y -= rkk * ikp * iiy;
182 |                 }
183 | 
184 |                 vx[fj] = xterm;
185 |                 vy[fj] = yterm;
186 |             }
187 |         }
188 |     }
189 | }
190 | 
191 | // This method updates the velocity field 'v' using the two complex
192 | // arrays from the previous step: 'vx' and 'vy'. Here we scale the
193 | // real components by 1/(dx*dy) to account for an unnormalized FFT.
194 | __global__ void
195 | updateVelocity_k(float2 *v, float *vx, float *vy,
196 |                  int dx, int pdx, int dy, int lb, size_t pitch)
197 | {
198 | 
199 |     int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
200 |     int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
201 |     int p;
202 | 
203 |     float vxterm, vyterm;
204 |     float2 nvterm;
205 | 
206 |     // gtidx is the domain location in x for this thread
207 |     if (gtidx < dx)
208 |     {
209 |         for (p = 0; p < lb; p++)
210 |         {
211 |             // fi is the domain location in y for this thread
212 |             int fi = gtidy + p;
213 | 
214 |             if (fi < dy)
215 |             {
216 |                 int fjr = fi * pdx + gtidx;
217 |                 vxterm = vx[fjr];
218 |                 vyterm = vy[fjr];
219 | 
220 |                 // Normalize the result of the inverse FFT
221 |                 float scale = 1.f / (dx * dy);
222 |                 nvterm.x = vxterm * scale;
223 |                 nvterm.y = vyterm * scale;
224 | 
225 |                 float2 *fj = (float2 *)((char *)v + fi * pitch) + gtidx;
226 |                 *fj = nvterm;
227 |             }
228 |         } // If this thread is inside the domain in Y
229 |     } // If this thread is inside the domain in X
230 | }
231 | 
232 | // This method updates the particles by moving particle positions
233 | // according to the velocity field and time step. That is, for each
234 | // particle: p(t+1) = p(t) + dt * v(p(t)).
235 | __global__ void
236 | advectParticles_k(float2 *part, float2 *v, int dx, int dy,
237 |                   float dt, int lb, size_t pitch)
238 | {
239 | 
240 |     int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
241 |     int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
242 |     int p;
243 | 
244 |     // gtidx is the domain location in x for this thread
245 |     float2 pterm, vterm;
246 | 
247 |     if (gtidx < dx)
248 |     {
249 |         for (p = 0; p < lb; p++)
250 |         {
251 |             // fi is the domain location in y for this thread
252 |             int fi = gtidy + p;
253 | 
254 |             if (fi < dy)
255 |             {
256 |                 int fj = fi * dx + gtidx;
257 |                 pterm = part[fj];
258 | 
259 |                 int xvi = ((int)(pterm.x * dx));
260 |                 int yvi = ((int)(pterm.y * dy));
261 |                 vterm = *((float2 *)((char *)v + yvi * pitch) + xvi);
262 | 
263 |                 pterm.x += dt * vterm.x;
264 |                 pterm.x = pterm.x - (int)pterm.x;
265 |                 pterm.x += 1.f;
266 |                 pterm.x = pterm.x - (int)pterm.x;
267 |                 pterm.y += dt * vterm.y;
268 |                 pterm.y = pterm.y - (int)pterm.y;
269 |                 pterm.y += 1.f;
270 |                 pterm.y = pterm.y - (int)pterm.y;
271 | 
272 |                 part[fj] = pterm;
273 |             }
274 |         } // If this thread is inside the domain in Y
275 |     } // If this thread is inside the domain in X
276 | }
277 | 
278 | 
279 | void addForces(float2 *v, int dx, int dy, int spx, int spy, float fx, float fy, int r)
280 | {
281 | 
282 |     dim3 tids(2*r+1, 2*r+1);
283 | 
284 |     addForces_k<<<1, tids>>>(v, dx, dy, spx, spy, fx, fy, r, tPitch);
285 | }
286 | 
287 | 
288 | void advectVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy, float dt)
289 | {
290 |     dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1));
291 | 
292 |     dim3 tids(TIDSX, TIDSY);
293 | 
294 |     update_texture(v, DIM*sizeof(float2), DIM, tPitch);
295 |     advectVelocity_k<<<grid, tids>>>(v, vx, vy, dx, pdx, dy, dt, TILEY/TIDSY);
296 | 
297 | }
298 | 
299 | 
300 | void diffuseProject(float2 *vx, float2 *vy, int dx, int dy, float dt, float visc,
301 |                     cufftHandle planr2c, cufftHandle planc2r)
302 | {
303 |     // Forward FFT
304 |     cufftExecR2C(planr2c, (cufftReal *)vx, (cufftComplex *)vx);
305 |     cufftExecR2C(planr2c, (cufftReal *)vy, (cufftComplex *)vy);
306 | 
307 |     uint3 grid = make_uint3((dx/TILEX)+(!(dx%TILEX)?0:1),
308 |                             (dy/TILEY)+(!(dy%TILEY)?0:1), 1);
309 |     uint3 tids = make_uint3(TIDSX, TIDSY, 1);
310 | 
311 |     diffuseProject_k<<<grid, tids>>>(vx, vy, dx, dy, dt, visc, TILEY/TIDSY);
312 | 
313 |     // Inverse FFT
314 |     cufftExecC2R(planc2r, (cufftComplex *)vx, (cufftReal *)vx);
315 |     cufftExecC2R(planc2r, (cufftComplex *)vy, (cufftReal *)vy);
316 | }
317 | 
318 | 
319 | void updateVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy)
320 | {
321 |     dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1));
322 |     dim3 tids(TIDSX, TIDSY);
323 | 
324 |     updateVelocity_k<<<grid, tids>>>(v, vx, vy, dx, pdx, dy, TILEY/TIDSY, tPitch);
325 | }
326 | 
327 | 
328 | void advectParticles(struct cudaGraphicsResource *cuda_vbo_resource, float2 *v, int dx, int dy, float dt)
329 | {
330 |     dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1));
331 |     dim3 tids(TIDSX, TIDSY);
332 | 
333 |     float2 *p;
334 |     cudaGraphicsMapResources(1, &cuda_vbo_resource, 0);
335 | 
336 |     size_t num_bytes;
337 |     cudaGraphicsResourceGetMappedPointer((void **)&p, &num_bytes,
338 |                                          cuda_vbo_resource);
339 | 
340 |     advectParticles_k<<<grid, tids>>>(p, v, dx, dy, dt, TILEY/TIDSY, tPitch);
341 | 
342 |     cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0);
343 | }
344 | 


--------------------------------------------------------------------------------
/fluidsGL/fluidsGL_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef __STABLEFLUIDS_KERNELS_H_
 2 | #define __STABLEFLUIDS_KERNELS_H_
 3 | 
 4 | #include <cuda.h>
 5 | #include <cufft.h>
 6 | 
 7 | // Texture pitch
 8 | extern size_t tPitch;
 9 | 
10 | void setup_texture(int x, int y);
11 | void bind_texture(void);
12 | void unbind_texture(void);
13 | void delete_texture(void);
14 | void update_texture(float2 *data, size_t w, size_t h, size_t pitch);
15 | 
16 | void addForces(float2 *v, int dx, int dy, int spx, int spy, float fx, float fy, int r);
17 | void advectVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy, float dt);
18 | void diffuseProject(float2 *vx, float2 *vy, int dx, int dy, float dt, float visc, cufftHandle planr2c, cufftHandle planc2r);
19 | void updateVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy);
20 | void advectParticles(struct cudaGraphicsResource *cuda_vbo_resource, float2 *v, int dx, int dy, float dt);
21 | 
22 | #endif
23 | 
24 | 


--------------------------------------------------------------------------------
/fluidsGL/fluidsQt.cpp:
--------------------------------------------------------------------------------
 1 | #include "mainwindow.h"
 2 | 
 3 | #include <QApplication>
 4 | 
 5 | int main(int argc, char *argv[])
 6 | {
 7 |     QApplication app(argc, argv);
 8 |     MainWindow window;
 9 |     window.show();
10 |     return app.exec();
11 | }
12 | 


--------------------------------------------------------------------------------
/fluidsGL/glfluids.cpp:
--------------------------------------------------------------------------------
  1 | #include "glfluids.h"
  2 | 
  3 | #include <QKeyEvent>
  4 | #include <QMouseEvent>
  5 | #include <QTimer>
  6 | 
  7 | // Texture pitch
  8 | size_t tPitch = 0;
  9 | 
 10 | void initParticles(float2 *p, int dx, int dy) {
 11 | 
 12 |   auto myrand = []() -> float { return qrand() / (float)RAND_MAX; };
 13 | 
 14 |   for (int i = 0; i < dy; i++) {
 15 |     for (int j = 0; j < dx; j++) {
 16 |       p[i * dx + j].x = (j + 0.5f + (myrand() - 0.5f)) / dx;
 17 |       p[i * dx + j].y = (i + 0.5f + (myrand() - 0.5f)) / dy;
 18 |     }
 19 |   }
 20 | }
 21 | 
 22 | void GLFluids::simulateFluids(void) {
 23 |   // simulate fluid
 24 |   advectVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM,
 25 |                  DT);
 26 |   diffuseProject(vxfield, vyfield, CPADW, DIM, DT, VIS, planr2c, planc2r);
 27 |   updateVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM);
 28 |   advectParticles(cuda_vbo_resource, dvfield, DIM, DIM, DT);
 29 | }
 30 | 
 31 | GLFluids::GLFluids(QWidget *parent) : QGLWidget(parent), QGLFunctions() {
 32 |   vbo = 0;
 33 | 
 34 |   wWidth = qMax(512, DIM);
 35 |   wHeight = qMax(512, DIM);
 36 | 
 37 |   hvfield = (float2 *)malloc(sizeof(float2) * DS);
 38 |   memset(hvfield, 0, sizeof(float2) * DS);
 39 | 
 40 |   // Allocate and initialize device data
 41 |   cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(float2) * DIM, DIM);
 42 | 
 43 |   cudaMemcpy(dvfield, hvfield, sizeof(float2) * DS, cudaMemcpyHostToDevice);
 44 |   // Temporary complex velocity field data
 45 |   cudaMalloc((void **)&vxfield, sizeof(float2) * PDS);
 46 |   cudaMalloc((void **)&vyfield, sizeof(float2) * PDS);
 47 | 
 48 |   setup_texture(DIM, DIM);
 49 |   bind_texture();
 50 | 
 51 |   // Create particle array
 52 |   particles = (float2 *)malloc(sizeof(float2) * DS);
 53 |   memset(particles, 0, sizeof(float2) * DS);
 54 |   initParticles(particles, DIM, DIM);
 55 | 
 56 |   // Create CUFFT transform plan configuration
 57 |   cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C);
 58 |   cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R);
 59 | 
 60 |   QTimer *timer = new QTimer(this);
 61 |   connect(timer, &QTimer::timeout, [&]() {
 62 |     simulateFluids();
 63 |     updateGL();
 64 |   });
 65 |   timer->start(0);
 66 | }
 67 | 
 68 | GLFluids::~GLFluids() {
 69 | 
 70 |   cudaGraphicsUnregisterResource(cuda_vbo_resource);
 71 | 
 72 |   unbind_texture();
 73 |   delete_texture();
 74 | 
 75 |   // Free all host and device resources
 76 |   free(hvfield);
 77 |   free(particles);
 78 |   cudaFree(dvfield);
 79 |   cudaFree(vxfield);
 80 |   cudaFree(vyfield);
 81 |   cufftDestroy(planr2c);
 82 |   cufftDestroy(planc2r);
 83 | 
 84 |   glBindBuffer(GL_ARRAY_BUFFER, 0);
 85 |   glDeleteBuffers(1, &vbo);
 86 | }
 87 | 
 88 | void GLFluids::reset() {
 89 |   memset(hvfield, 0, sizeof(float2) * DS);
 90 |   cudaMemcpy(dvfield, hvfield, sizeof(float2) * DS, cudaMemcpyHostToDevice);
 91 | 
 92 |   initParticles(particles, DIM, DIM);
 93 | 
 94 |   glBindBuffer(GL_ARRAY_BUFFER, vbo);
 95 |   glBufferData(GL_ARRAY_BUFFER, sizeof(float2) * DS, particles,
 96 |                GL_DYNAMIC_DRAW);
 97 |   glBindBuffer(GL_ARRAY_BUFFER, 0);
 98 | 
 99 |   cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo,
100 |                                cudaGraphicsMapFlagsNone);
101 | }
102 | 
103 | void GLFluids::initializeGL() {
104 |   initializeGLFunctions();
105 |   glGenBuffers(1, &vbo);
106 |   glBindBuffer(GL_ARRAY_BUFFER, vbo);
107 |   glBufferData(GL_ARRAY_BUFFER, sizeof(float2) * DS, particles,
108 |                GL_DYNAMIC_DRAW);
109 |   glBindBuffer(GL_ARRAY_BUFFER, 0);
110 |   cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo,
111 |                                cudaGraphicsMapFlagsNone);
112 | }
113 | 
114 | void GLFluids::paintGL() {
115 |   glClear(GL_COLOR_BUFFER_BIT);
116 |   glClearColor(1, 1, 1, 1.0f);
117 |   glColor4f(0, 0, 1, 0.5f);
118 |   glPointSize(1);
119 |   glEnable(GL_POINT_SMOOTH);
120 |   glEnable(GL_BLEND);
121 |   glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
122 |   glEnableClientState(GL_VERTEX_ARRAY);
123 |   glDisable(GL_DEPTH_TEST);
124 |   glDisable(GL_CULL_FACE);
125 |   glBindBuffer(GL_ARRAY_BUFFER, vbo);
126 |   glVertexPointer(2, GL_FLOAT, 0, NULL);
127 |   glDrawArrays(GL_POINTS, 0, DS);
128 |   glBindBuffer(GL_ARRAY_BUFFER, 0);
129 |   glDisableClientState(GL_VERTEX_ARRAY);
130 |   glDisableClientState(GL_TEXTURE_COORD_ARRAY);
131 |   glDisable(GL_TEXTURE_2D);
132 | }
133 | 
134 | void GLFluids::resizeGL(int x, int y) {
135 |   wWidth = x;
136 |   wHeight = y;
137 |   glViewport(0, 0, x, y);
138 |   glMatrixMode(GL_PROJECTION);
139 |   glLoadIdentity();
140 |   glOrtho(0, 1, 1, 0, 0, 1);
141 |   glMatrixMode(GL_MODELVIEW);
142 |   glLoadIdentity();
143 | }
144 | 
145 | void GLFluids::mousePressEvent(QMouseEvent *event) {
146 |   auto lastPos = event->pos();
147 |   lastx = lastPos.x();
148 |   lasty = lastPos.y();
149 | }
150 | 
151 | void GLFluids::mouseMoveEvent(QMouseEvent *event) {
152 |   int x = event->x();
153 |   int y = event->y();
154 | 
155 |   // Convert motion coordinates to domain
156 |   float fx = (lastx / (float)wWidth);
157 |   float fy = (lasty / (float)wHeight);
158 |   int nx = (int)(fx * DIM);
159 |   int ny = (int)(fy * DIM);
160 | 
161 |   if (event->buttons() & Qt::LeftButton) {
162 |     int ddx = x - lastx;
163 |     int ddy = y - lasty;
164 |     fx = ddx / (float)wWidth;
165 |     fy = ddy / (float)wHeight;
166 |     int spy = ny - FR;
167 |     int spx = nx - FR;
168 |     addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy,
169 |               FR);
170 |   }
171 | 
172 |   lastx = x;
173 |   lasty = y;
174 | }
175 | 


--------------------------------------------------------------------------------
/fluidsGL/glfluids.h:
--------------------------------------------------------------------------------
 1 | #ifndef GLFLUIDS_H
 2 | #define GLFLUIDS_H
 3 | 
 4 | // Qt
 5 | #include <QGLWidget>
 6 | #include <QGLFunctions>
 7 | 
 8 | // CUDA standard includes
 9 | #include <cuda_runtime.h>
10 | #include <cuda_gl_interop.h>
11 | 
12 | // CUDA FFT Libraries
13 | #include <cufft.h>
14 | 
15 | #include "defines.h"
16 | #include "fluidsGL_kernels.h"
17 | 
18 | class GLFluids : public QGLWidget, protected QGLFunctions
19 | {
20 |     Q_OBJECT
21 | 
22 | public:
23 |     GLFluids(QWidget *parent = 0);
24 |     ~GLFluids();
25 |     void reset();
26 | 
27 | protected:
28 | 
29 |     void initializeGL();
30 |     void paintGL();
31 |     void resizeGL(int x, int y);
32 | 
33 |     void mousePressEvent(QMouseEvent *event);
34 |     void mouseMoveEvent(QMouseEvent *event);
35 | 
36 | private:
37 |     void simulateFluids();
38 | 
39 |     float2 *vxfield;
40 |     float2 *vyfield;
41 | 
42 |     float2 *hvfield;
43 |     float2 *dvfield;
44 | 
45 |     int wWidth;
46 |     int wHeight;
47 |     int lastx = 0, lasty = 0;
48 | 
49 |     // Particle data
50 |     GLuint vbo;                 // OpenGL vertex buffer object
51 |     struct cudaGraphicsResource *cuda_vbo_resource; // handles OpenGL-CUDA exchange
52 |     float2 *particles; // particle positions in host memory
53 | 
54 |     // CUFFT plan handle
55 |     cufftHandle planr2c;
56 |     cufftHandle planc2r;
57 | };
58 | 
59 | 
60 | #endif // GLFLUIDS_H
61 | 


--------------------------------------------------------------------------------
/fluidsGL/helper_timer.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 |  * with this source code for terms and conditions that govern your use of
  6 |  * this software. Any use, reproduction, disclosure, or distribution of
  7 |  * this software and related documentation outside the terms of the EULA
  8 |  * is strictly prohibited.
  9 |  *
 10 |  */
 11 | 
 12 | // Helper Timing Functions
 13 | #ifndef HELPER_TIMER_H
 14 | #define HELPER_TIMER_H
 15 | 
 16 | #ifndef EXIT_WAIVED
 17 | #define EXIT_WAIVED 2
 18 | #endif
 19 | 
 20 | // includes, system
 21 | #include <vector>
 22 | 
 23 | // Definition of the StopWatch Interface, this is used if we don't want to use the CUT functions
 24 | // But rather in a self contained class interface
 25 | class StopWatchInterface
 26 | {
 27 |     public:
 28 |         StopWatchInterface() {};
 29 |         virtual ~StopWatchInterface() {};
 30 | 
 31 |     public:
 32 |         //! Start time measurement
 33 |         virtual void start() = 0;
 34 | 
 35 |         //! Stop time measurement
 36 |         virtual void stop() = 0;
 37 | 
 38 |         //! Reset time counters to zero
 39 |         virtual void reset() = 0;
 40 | 
 41 |         //! Time in msec. after start. If the stop watch is still running (i.e. there
 42 |         //! was no call to stop()) then the elapsed time is returned, otherwise the
 43 |         //! time between the last start() and stop call is returned
 44 |         virtual float getTime() = 0;
 45 | 
 46 |         //! Mean time to date based on the number of times the stopwatch has been
 47 |         //! _stopped_ (ie finished sessions) and the current total time
 48 |         virtual float getAverageTime() = 0;
 49 | };
 50 | 
 51 | 
 52 | //////////////////////////////////////////////////////////////////
 53 | // Begin Stopwatch timer class definitions for all OS platforms //
 54 | //////////////////////////////////////////////////////////////////
 55 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
 56 | // includes, system
 57 | #define WINDOWS_LEAN_AND_MEAN
 58 | #include <windows.h>
 59 | #undef min
 60 | #undef max
 61 | 
 62 | //! Windows specific implementation of StopWatch
 63 | class StopWatchWin : public StopWatchInterface
 64 | {
 65 |     public:
 66 |         //! Constructor, default
 67 |         StopWatchWin() :
 68 |             start_time(),     end_time(),
 69 |             diff_time(0.0f),  total_time(0.0f),
 70 |             running(false), clock_sessions(0), freq(0), freq_set(false)
 71 |         {
 72 |             if (! freq_set)
 73 |             {
 74 |                 // helper variable
 75 |                 LARGE_INTEGER temp;
 76 | 
 77 |                 // get the tick frequency from the OS
 78 |                 QueryPerformanceFrequency((LARGE_INTEGER *) &temp);
 79 | 
 80 |                 // convert to type in which it is needed
 81 |                 freq = ((double) temp.QuadPart) / 1000.0;
 82 | 
 83 |                 // rememeber query
 84 |                 freq_set = true;
 85 |             }
 86 |         };
 87 | 
 88 |         // Destructor
 89 |         ~StopWatchWin() { };
 90 | 
 91 |     public:
 92 |         //! Start time measurement
 93 |         inline void start();
 94 | 
 95 |         //! Stop time measurement
 96 |         inline void stop();
 97 | 
 98 |         //! Reset time counters to zero
 99 |         inline void reset();
100 | 
101 |         //! Time in msec. after start. If the stop watch is still running (i.e. there
102 |         //! was no call to stop()) then the elapsed time is returned, otherwise the
103 |         //! time between the last start() and stop call is returned
104 |         inline float getTime();
105 | 
106 |         //! Mean time to date based on the number of times the stopwatch has been
107 |         //! _stopped_ (ie finished sessions) and the current total time
108 |         inline float getAverageTime();
109 | 
110 |     private:
111 |         // member variables
112 | 
113 |         //! Start of measurement
114 |         LARGE_INTEGER  start_time;
115 |         //! End of measurement
116 |         LARGE_INTEGER  end_time;
117 | 
118 |         //! Time difference between the last start and stop
119 |         float  diff_time;
120 | 
121 |         //! TOTAL time difference between starts and stops
122 |         float  total_time;
123 | 
124 |         //! flag if the stop watch is running
125 |         bool running;
126 | 
127 |         //! Number of times clock has been started
128 |         //! and stopped to allow averaging
129 |         int clock_sessions;
130 | 
131 |         //! tick frequency
132 |         double  freq;
133 | 
134 |         //! flag if the frequency has been set
135 |         bool  freq_set;
136 | };
137 | 
138 | // functions, inlined
139 | 
140 | ////////////////////////////////////////////////////////////////////////////////
141 | //! Start time measurement
142 | ////////////////////////////////////////////////////////////////////////////////
143 | inline void
144 | StopWatchWin::start()
145 | {
146 |     QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
147 |     running = true;
148 | }
149 | 
150 | ////////////////////////////////////////////////////////////////////////////////
151 | //! Stop time measurement and increment add to the current diff_time summation
152 | //! variable. Also increment the number of times this clock has been run.
153 | ////////////////////////////////////////////////////////////////////////////////
154 | inline void
155 | StopWatchWin::stop()
156 | {
157 |     QueryPerformanceCounter((LARGE_INTEGER *) &end_time);
158 |     diff_time = (float)
159 |                 (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
160 | 
161 |     total_time += diff_time;
162 |     clock_sessions++;
163 |     running = false;
164 | }
165 | 
166 | ////////////////////////////////////////////////////////////////////////////////
167 | //! Reset the timer to 0. Does not change the timer running state but does
168 | //! recapture this point in time as the current start time if it is running.
169 | ////////////////////////////////////////////////////////////////////////////////
170 | inline void
171 | StopWatchWin::reset()
172 | {
173 |     diff_time = 0;
174 |     total_time = 0;
175 |     clock_sessions = 0;
176 | 
177 |     if (running)
178 |     {
179 |         QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
180 |     }
181 | }
182 | 
183 | 
184 | ////////////////////////////////////////////////////////////////////////////////
185 | //! Time in msec. after start. If the stop watch is still running (i.e. there
186 | //! was no call to stop()) then the elapsed time is returned added to the
187 | //! current diff_time sum, otherwise the current summed time difference alone
188 | //! is returned.
189 | ////////////////////////////////////////////////////////////////////////////////
190 | inline float
191 | StopWatchWin::getTime()
192 | {
193 |     // Return the TOTAL time to date
194 |     float retval = total_time;
195 | 
196 |     if (running)
197 |     {
198 |         LARGE_INTEGER temp;
199 |         QueryPerformanceCounter((LARGE_INTEGER *) &temp);
200 |         retval += (float)
201 |                   (((double)(temp.QuadPart - start_time.QuadPart)) / freq);
202 |     }
203 | 
204 |     return retval;
205 | }
206 | 
207 | ////////////////////////////////////////////////////////////////////////////////
208 | //! Time in msec. for a single run based on the total number of COMPLETED runs
209 | //! and the total time.
210 | ////////////////////////////////////////////////////////////////////////////////
211 | inline float
212 | StopWatchWin::getAverageTime()
213 | {
214 |     return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
215 | }
216 | #else
217 | // Declarations for Stopwatch on Linux and Mac OSX
218 | // includes, system
219 | #include <ctime>
220 | #include <sys/time.h>
221 | 
222 | //! Windows specific implementation of StopWatch
223 | class StopWatchLinux : public StopWatchInterface
224 | {
225 |     public:
226 |         //! Constructor, default
227 |         StopWatchLinux() :
228 |             start_time(), diff_time(0.0), total_time(0.0),
229 |             running(false), clock_sessions(0)
230 |         { };
231 | 
232 |         // Destructor
233 |         virtual ~StopWatchLinux()
234 |         { };
235 | 
236 |     public:
237 |         //! Start time measurement
238 |         inline void start();
239 | 
240 |         //! Stop time measurement
241 |         inline void stop();
242 | 
243 |         //! Reset time counters to zero
244 |         inline void reset();
245 | 
246 |         //! Time in msec. after start. If the stop watch is still running (i.e. there
247 |         //! was no call to stop()) then the elapsed time is returned, otherwise the
248 |         //! time between the last start() and stop call is returned
249 |         inline float getTime();
250 | 
251 |         //! Mean time to date based on the number of times the stopwatch has been
252 |         //! _stopped_ (ie finished sessions) and the current total time
253 |         inline float getAverageTime();
254 | 
255 |     private:
256 | 
257 |         // helper functions
258 | 
259 |         //! Get difference between start time and current time
260 |         inline float getDiffTime();
261 | 
262 |     private:
263 | 
264 |         // member variables
265 | 
266 |         //! Start of measurement
267 |         struct timeval  start_time;
268 | 
269 |         //! Time difference between the last start and stop
270 |         float  diff_time;
271 | 
272 |         //! TOTAL time difference between starts and stops
273 |         float  total_time;
274 | 
275 |         //! flag if the stop watch is running
276 |         bool running;
277 | 
278 |         //! Number of times clock has been started
279 |         //! and stopped to allow averaging
280 |         int clock_sessions;
281 | };
282 | 
283 | // functions, inlined
284 | 
285 | ////////////////////////////////////////////////////////////////////////////////
286 | //! Start time measurement
287 | ////////////////////////////////////////////////////////////////////////////////
288 | inline void
289 | StopWatchLinux::start()
290 | {
291 |     gettimeofday(&start_time, 0);
292 |     running = true;
293 | }
294 | 
295 | ////////////////////////////////////////////////////////////////////////////////
296 | //! Stop time measurement and increment add to the current diff_time summation
297 | //! variable. Also increment the number of times this clock has been run.
298 | ////////////////////////////////////////////////////////////////////////////////
299 | inline void
300 | StopWatchLinux::stop()
301 | {
302 |     diff_time = getDiffTime();
303 |     total_time += diff_time;
304 |     running = false;
305 |     clock_sessions++;
306 | }
307 | 
308 | ////////////////////////////////////////////////////////////////////////////////
309 | //! Reset the timer to 0. Does not change the timer running state but does
310 | //! recapture this point in time as the current start time if it is running.
311 | ////////////////////////////////////////////////////////////////////////////////
312 | inline void
313 | StopWatchLinux::reset()
314 | {
315 |     diff_time = 0;
316 |     total_time = 0;
317 |     clock_sessions = 0;
318 | 
319 |     if (running)
320 |     {
321 |         gettimeofday(&start_time, 0);
322 |     }
323 | }
324 | 
325 | ////////////////////////////////////////////////////////////////////////////////
326 | //! Time in msec. after start. If the stop watch is still running (i.e. there
327 | //! was no call to stop()) then the elapsed time is returned added to the
328 | //! current diff_time sum, otherwise the current summed time difference alone
329 | //! is returned.
330 | ////////////////////////////////////////////////////////////////////////////////
331 | inline float
332 | StopWatchLinux::getTime()
333 | {
334 |     // Return the TOTAL time to date
335 |     float retval = total_time;
336 | 
337 |     if (running)
338 |     {
339 |         retval += getDiffTime();
340 |     }
341 | 
342 |     return retval;
343 | }
344 | 
345 | ////////////////////////////////////////////////////////////////////////////////
346 | //! Time in msec. for a single run based on the total number of COMPLETED runs
347 | //! and the total time.
348 | ////////////////////////////////////////////////////////////////////////////////
349 | inline float
350 | StopWatchLinux::getAverageTime()
351 | {
352 |     return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
353 | }
354 | ////////////////////////////////////////////////////////////////////////////////
355 | 
356 | ////////////////////////////////////////////////////////////////////////////////
357 | inline float
358 | StopWatchLinux::getDiffTime()
359 | {
360 |     struct timeval t_time;
361 |     gettimeofday(&t_time, 0);
362 | 
363 |     // time difference in milli-seconds
364 |     return (float)(1000.0 * (t_time.tv_sec - start_time.tv_sec)
365 |                    + (0.001 * (t_time.tv_usec - start_time.tv_usec)));
366 | }
367 | #endif // WIN32
368 | 
369 | ////////////////////////////////////////////////////////////////////////////////
370 | //! Timer functionality exported
371 | 
372 | ////////////////////////////////////////////////////////////////////////////////
373 | //! Create a new timer
374 | //! @return true if a time has been created, otherwise false
375 | //! @param  name of the new timer, 0 if the creation failed
376 | ////////////////////////////////////////////////////////////////////////////////
377 | inline bool
378 | sdkCreateTimer(StopWatchInterface **timer_interface)
379 | {
380 |     //printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
381 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
382 |     *timer_interface = (StopWatchInterface *)new StopWatchWin();
383 | #else
384 |     *timer_interface = (StopWatchInterface *)new StopWatchLinux();
385 | #endif
386 |     return (*timer_interface != NULL) ? true : false;
387 | }
388 | 
389 | 
390 | ////////////////////////////////////////////////////////////////////////////////
391 | //! Delete a timer
392 | //! @return true if a time has been deleted, otherwise false
393 | //! @param  name of the timer to delete
394 | ////////////////////////////////////////////////////////////////////////////////
395 | inline bool
396 | sdkDeleteTimer(StopWatchInterface **timer_interface)
397 | {
398 |     //printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
399 |     if (*timer_interface)
400 |     {
401 |         delete *timer_interface;
402 |         *timer_interface = NULL;
403 |     }
404 | 
405 |     return true;
406 | }
407 | 
408 | ////////////////////////////////////////////////////////////////////////////////
409 | //! Start the time with name \a name
410 | //! @param name  name of the timer to start
411 | ////////////////////////////////////////////////////////////////////////////////
412 | inline bool
413 | sdkStartTimer(StopWatchInterface **timer_interface)
414 | {
415 |     //printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
416 |     if (*timer_interface)
417 |     {
418 |         (*timer_interface)->start();
419 |     }
420 | 
421 |     return true;
422 | }
423 | 
424 | ////////////////////////////////////////////////////////////////////////////////
425 | //! Stop the time with name \a name. Does not reset.
426 | //! @param name  name of the timer to stop
427 | ////////////////////////////////////////////////////////////////////////////////
428 | inline bool
429 | sdkStopTimer(StopWatchInterface **timer_interface)
430 | {
431 |     // printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
432 |     if (*timer_interface)
433 |     {
434 |         (*timer_interface)->stop();
435 |     }
436 | 
437 |     return true;
438 | }
439 | 
440 | ////////////////////////////////////////////////////////////////////////////////
441 | //! Resets the timer's counter.
442 | //! @param name  name of the timer to reset.
443 | ////////////////////////////////////////////////////////////////////////////////
444 | inline bool
445 | sdkResetTimer(StopWatchInterface **timer_interface)
446 | {
447 |     // printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
448 |     if (*timer_interface)
449 |     {
450 |         (*timer_interface)->reset();
451 |     }
452 | 
453 |     return true;
454 | }
455 | 
456 | ////////////////////////////////////////////////////////////////////////////////
457 | //! Return the average time for timer execution as the total time
458 | //! for the timer dividied by the number of completed (stopped) runs the timer
459 | //! has made.
460 | //! Excludes the current running time if the timer is currently running.
461 | //! @param name  name of the timer to return the time of
462 | ////////////////////////////////////////////////////////////////////////////////
463 | inline float
464 | sdkGetAverageTimerValue(StopWatchInterface **timer_interface)
465 | {
466 |     //  printf("sdkGetAverageTimerValue called object %08x\n", (void *)*timer_interface);
467 |     if (*timer_interface)
468 |     {
469 |         return (*timer_interface)->getAverageTime();
470 |     }
471 |     else
472 |     {
473 |         return 0.0f;
474 |     }
475 | }
476 | 
477 | ////////////////////////////////////////////////////////////////////////////////
478 | //! Total execution time for the timer over all runs since the last reset
479 | //! or timer creation.
480 | //! @param name  name of the timer to obtain the value of.
481 | ////////////////////////////////////////////////////////////////////////////////
482 | inline float
483 | sdkGetTimerValue(StopWatchInterface **timer_interface)
484 | {
485 |     // printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
486 |     if (*timer_interface)
487 |     {
488 |         return (*timer_interface)->getTime();
489 |     }
490 |     else
491 |     {
492 |         return 0.0f;
493 |     }
494 | }
495 | 
496 | #endif // HELPER_TIMER_H
497 | 


--------------------------------------------------------------------------------
/fluidsGL/mainwindow.cpp:
--------------------------------------------------------------------------------
 1 | #include "mainwindow.h"
 2 | #include "glfluids.h"
 3 | 
 4 | #include <QtWidgets>
 5 | 
 6 | 
 7 | MainWindow::MainWindow()
 8 | {
 9 | 
10 |     auto glFluids = new GLFluids();
11 | 
12 |     auto glWidgetArea = new QScrollArea;
13 |     glWidgetArea->setWidget(glFluids);
14 |     glWidgetArea->setWidgetResizable(true);
15 |     glWidgetArea->setHorizontalScrollBarPolicy(Qt::ScrollBarAlwaysOff);
16 |     glWidgetArea->setVerticalScrollBarPolicy(Qt::ScrollBarAlwaysOff);
17 |     glWidgetArea->setSizePolicy(QSizePolicy::Ignored, QSizePolicy::Ignored);
18 |     glWidgetArea->setMinimumSize(512, 512);
19 |     setCentralWidget(glWidgetArea);
20 | 
21 |     auto fileMenu = new QMenu(tr("File"), this);
22 |     menuBar()->addMenu(fileMenu);
23 | 
24 |     auto quitAction = fileMenu->addAction(tr("E&xit"));
25 |     quitAction->setShortcuts(QKeySequence::Quit);
26 |     connect(quitAction, &QAction::triggered, this, &QApplication::quit);
27 | 
28 |     auto resetAction = fileMenu->addAction(tr("&Reset"));
29 |     resetAction->setShortcut(Qt::Key_R);
30 |     connect(resetAction, &QAction::triggered, glFluids, &GLFluids::reset);
31 | 
32 | //    glFluids->setFocusPolicy(Qt::StrongFocus);
33 | //    glFluids->setFocus();
34 | 
35 |     setWindowTitle(tr("Qt Fluids"));
36 |     resize(512, 512);
37 | }
38 | 


--------------------------------------------------------------------------------
/fluidsGL/mainwindow.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAINWINDOW_H
 2 | #define MAINWINDOW_H
 3 | 
 4 | #include <QMainWindow>
 5 | 
 6 | 
 7 | class MainWindow : public QMainWindow
 8 | {
 9 |     Q_OBJECT
10 | 
11 | public:
12 |     MainWindow();
13 | };
14 | 
15 | #endif // MAINWINDOW_H
16 | 


--------------------------------------------------------------------------------
/gears/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(OpenGL REQUIRED)
 2 | 
 3 | include(FindPkgConfig)
 4 | 
 5 | add_library(gears gears.c)
 6 | target_link_libraries(gears ${OPENGL_LIBRARIES} m)
 7 | set_target_properties(gears PROPERTIES COMPILE_FLAGS "-std=c99")
 8 | 
 9 | find_package(GLUT)
10 | if(GLUT_FOUND)
11 |     include_directories(${OPENGL_INCLUDE_DIR} ${GLUT_INCLUDE_DIR})
12 |     add_executable(gears_GLUT gears_GLUT.cpp)
13 |     target_link_libraries(gears_GLUT ${GLUT_LIBRARIES} gears ${OPENGL_LIBRARIES} m)
14 | endif(GLUT_FOUND)
15 | 
16 | pkg_search_module(SDL2 sdl2)
17 | if(SDL2_FOUND)
18 |     include_directories(${SDL2_INCLUDE_DIRS})
19 |     add_executable(gears_SDL gears_SDL.c)
20 |     target_link_libraries(gears_SDL ${SDL2_LIBRARIES} gears)
21 | endif(SDL2_FOUND)
22 | 
23 | find_package(Qt5Widgets)
24 | if(Qt5Widgets_FOUND)
25 |     find_package(Qt5OpenGL REQUIRED)
26 |     set(CMAKE_AUTOMOC TRUE)
27 |     include_directories(${Qt5Widgets_INCLUDE_DIRS} ${Qt5OpenGL_INCLUDE_DIRS})
28 |     add_executable(gears_Qt ${FILES_MOC} gears_Qt.cpp mainwindow.cpp glwidget.cpp)
29 |     target_link_libraries(gears_Qt Qt5::Widgets Qt5::OpenGL gears)
30 |     set_target_properties(gears_Qt PROPERTIES COMPILE_FLAGS "-std=c++11")
31 | endif(Qt5Widgets_FOUND)
32 | 


--------------------------------------------------------------------------------
/gears/gears.c:
--------------------------------------------------------------------------------
  1 | #include "gears.h"
  2 | 
  3 | #include <math.h>
  4 | 
  5 | 
  6 | void gears_initialize(Gears *g)
  7 | {
  8 |     static const GLfloat lightPos[4] = { 5.0f, 5.0f, 10.0f, 1.0f };
  9 |     static const GLfloat reflectance1[4] = { 0.8f, 0.1f, 0.0f, 1.0f };
 10 |     static const GLfloat reflectance2[4] = { 0.0f, 0.8f, 0.2f, 1.0f };
 11 |     static const GLfloat reflectance3[4] = { 0.2f, 0.2f, 1.0f, 1.0f };
 12 | 
 13 |     glLightfv(GL_LIGHT0, GL_POSITION, lightPos);
 14 |     glEnable(GL_LIGHTING);
 15 |     glEnable(GL_LIGHT0);
 16 |     glEnable(GL_DEPTH_TEST);
 17 | 
 18 |     g->gear1 = gears_make(reflectance1, 1.0, 4.0, 1.0, 0.7, 20);
 19 |     g->gear2 = gears_make(reflectance2, 0.5, 2.0, 2.0, 0.7, 10);
 20 |     g->gear3 = gears_make(reflectance3, 1.3, 2.0, 0.5, 0.7, 10);
 21 | 
 22 |     glEnable(GL_NORMALIZE);
 23 |     glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
 24 | }
 25 | 
 26 | void gears_paint(const Gears *g)
 27 | {
 28 |     glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
 29 | 
 30 |     glPushMatrix();
 31 |     glRotated(g->xRot / 16.0, 1.0, 0.0, 0.0);
 32 |     glRotated(g->yRot / 16.0, 0.0, 1.0, 0.0);
 33 |     glRotated(g->zRot / 16.0, 0.0, 0.0, 1.0);
 34 | 
 35 |     gears_draw(g->gear1, -3.0, -2.0, 0.0, g->gear1Rot / 16.0);
 36 |     gears_draw(g->gear2, +3.1, -2.0, 0.0, -2.0 * (g->gear1Rot / 16.0) - 9.0);
 37 | 
 38 |     glRotated(+90.0, 1.0, 0.0, 0.0);
 39 |     gears_draw(g->gear3, -3.1, -1.8, -2.2, +2.0 * (g->gear1Rot / 16.0) - 2.0);
 40 | 
 41 |     glPopMatrix();
 42 | }
 43 | 
 44 | void gears_resize(int width, int height)
 45 | {
 46 |     int side = MIN(width, height);
 47 |     glViewport((width - side) / 2, (height - side) / 2, side, side);
 48 | 
 49 |     glMatrixMode(GL_PROJECTION);
 50 |     glLoadIdentity();
 51 |     glFrustum(-1.0, +1.0, -1.0, 1.0, 5.0, 60.0);
 52 |     glMatrixMode(GL_MODELVIEW);
 53 |     glLoadIdentity();
 54 |     glTranslated(0.0, 0.0, -40.0);
 55 | }
 56 | 
 57 | void gears_advance(Gears *g)
 58 | {
 59 |     g->gear1Rot += 2 * 16;
 60 | }
 61 | 
 62 | GLuint gears_make(const GLfloat *reflectance, GLdouble innerRadius,
 63 |                   GLdouble outerRadius, GLdouble thickness,
 64 |                   GLdouble toothSize, GLint toothCount)
 65 | {
 66 |     const double Pi = 3.14159265358979323846;
 67 | 
 68 |     GLuint list = glGenLists(1);
 69 |     glNewList(list, GL_COMPILE);
 70 |     glMaterialfv(GL_FRONT, GL_AMBIENT_AND_DIFFUSE, reflectance);
 71 | 
 72 |     GLdouble r0 = innerRadius;
 73 |     GLdouble r1 = outerRadius - toothSize / 2.0;
 74 |     GLdouble r2 = outerRadius + toothSize / 2.0;
 75 |     GLdouble delta = (2.0 * Pi / toothCount) / 4.0;
 76 |     GLdouble z = thickness / 2.0;
 77 | 
 78 |     glShadeModel(GL_FLAT);
 79 | 
 80 |     for (int i = 0; i < 2; ++i) {
 81 |         GLdouble sign = (i == 0) ? +1.0 : -1.0;
 82 | 
 83 |         glNormal3d(0.0, 0.0, sign);
 84 | 
 85 |         glBegin(GL_QUAD_STRIP);
 86 |         for (int j = 0; j <= toothCount; ++j) {
 87 |             GLdouble angle = 2.0 * Pi * j / toothCount;
 88 |             glVertex3d(r0 * cos(angle), r0 * sin(angle), sign * z);
 89 |             glVertex3d(r1 * cos(angle), r1 * sin(angle), sign * z);
 90 |             glVertex3d(r0 * cos(angle), r0 * sin(angle), sign * z);
 91 |             glVertex3d(r1 * cos(angle + 3 * delta), r1 * sin(angle + 3 * delta), sign * z);
 92 |         }
 93 |         glEnd();
 94 | 
 95 |         glBegin(GL_QUADS);
 96 |         for (int j = 0; j < toothCount; ++j) {
 97 |             GLdouble angle = 2.0 * Pi * j / toothCount;
 98 |             glVertex3d(r1 * cos(angle), r1 * sin(angle), sign * z);
 99 |             glVertex3d(r2 * cos(angle + delta), r2 * sin(angle + delta), sign * z);
100 |             glVertex3d(r2 * cos(angle + 2 * delta), r2 * sin(angle + 2 * delta), sign * z);
101 |             glVertex3d(r1 * cos(angle + 3 * delta), r1 * sin(angle + 3 * delta), sign * z);
102 |         }
103 |         glEnd();
104 |     }
105 | 
106 |     glBegin(GL_QUAD_STRIP);
107 |     for (int i = 0; i < toothCount; ++i) {
108 |         for (int j = 0; j < 2; ++j) {
109 |             GLdouble angle = 2.0 * Pi * (i + j / 2.0) / toothCount;
110 |             GLdouble s1 = r1;
111 |             GLdouble s2 = r2;
112 |             if (j == 1) {
113 |                 GLdouble tmp = s1; s1 = s2; s2 = tmp;
114 |             }
115 | 
116 |             glNormal3d(cos(angle), sin(angle), 0.0);
117 |             glVertex3d(s1 * cos(angle), s1 * sin(angle), +z);
118 |             glVertex3d(s1 * cos(angle), s1 * sin(angle), -z);
119 | 
120 |             glNormal3d(s2 * sin(angle + delta) - s1 * sin(angle),
121 |                        s1 * cos(angle) - s2 * cos(angle + delta), 0.0);
122 |             glVertex3d(s2 * cos(angle + delta), s2 * sin(angle + delta), +z);
123 |             glVertex3d(s2 * cos(angle + delta), s2 * sin(angle + delta), -z);
124 |         }
125 |     }
126 |     glVertex3d(r1, 0.0, +z);
127 |     glVertex3d(r1, 0.0, -z);
128 |     glEnd();
129 | 
130 |     glShadeModel(GL_SMOOTH);
131 | 
132 |     glBegin(GL_QUAD_STRIP);
133 |     for (int i = 0; i <= toothCount; ++i) {
134 |         GLdouble angle = i * 2.0 * Pi / toothCount;
135 |         glNormal3d(-cos(angle), -sin(angle), 0.0);
136 |         glVertex3d(r0 * cos(angle), r0 * sin(angle), +z);
137 |         glVertex3d(r0 * cos(angle), r0 * sin(angle), -z);
138 |     }
139 |     glEnd();
140 | 
141 |     glEndList();
142 | 
143 |     return list;
144 | }
145 | 
146 | 
147 | void gears_draw(GLuint gear, GLdouble dx, GLdouble dy, GLdouble dz,
148 |                 GLdouble angle)
149 | {
150 |     glPushMatrix();
151 |     glTranslated(dx, dy, dz);
152 |     glRotated(angle, 0.0, 0.0, 1.0);
153 |     glCallList(gear);
154 |     glPopMatrix();
155 | }
156 | 
157 | 
158 | void gears_normalize_angle(int *angle)
159 | {
160 |     while (*angle < 0)
161 |         *angle += 360 * 16;
162 |     while (*angle > 360 * 16)
163 |         *angle -= 360 * 16;
164 | }
165 | 


--------------------------------------------------------------------------------
/gears/gears.h:
--------------------------------------------------------------------------------
 1 | #ifndef GEARS_H
 2 | #define GEARS_H
 3 | 
 4 | #ifdef __APPLE__
 5 | #include <OpenGL/gl.h>
 6 | #else
 7 | #include <GL/gl.h>
 8 | #endif
 9 | 
10 | typedef struct {
11 |     GLuint gear1;
12 |     GLuint gear2;
13 |     GLuint gear3;
14 |     int xRot;
15 |     int yRot;
16 |     int zRot;
17 |     int gear1Rot;
18 | } Gears;
19 | 
20 | #define MIN(a,b) (((a)<(b))?(a):(b))
21 | #define MAX(a,b) (((a)>(b))?(a):(b))
22 | 
23 | void gears_initialize(Gears *g);
24 | 
25 | void gears_paint(const Gears *g);
26 | 
27 | void gears_resize(int width, int height);
28 | 
29 | void gears_advance(Gears *g);
30 | 
31 | GLuint gears_make(const GLfloat *reflectance, GLdouble innerRadius,
32 |                   GLdouble outerRadius, GLdouble thickness,
33 |                   GLdouble toothSize, GLint toothCount);
34 | 
35 | void gears_draw(GLuint gear, GLdouble dx, GLdouble dy, GLdouble dz,
36 |                 GLdouble angle);
37 | 
38 | void gears_normalize_angle(int *angle);
39 | 
40 | #endif // GEARS_H
41 | 


--------------------------------------------------------------------------------
/gears/gears_GLUT.cpp:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | 
 6 | #ifdef __APPLE__
 7 | #include <GLUT/glut.h>
 8 | #else
 9 | #include <GL/glut.h>
10 | #endif
11 | 
12 | extern "C" {
13 | #include "gears.h"
14 | }
15 | 
16 | static Gears g = {0, 0, 0, 0, 0, 0, 0};
17 | 
18 | static void key(unsigned char k, int x, int y) {
19 |   switch (k) {
20 |   case 27: /* Escape */
21 |     exit(0);
22 |   default:
23 |     return;
24 |   }
25 | }
26 | 
27 | void __display(void) {
28 |   gears_paint(&g);
29 |   glutSwapBuffers();
30 | }
31 | 
32 | void idle(void) {
33 |   gears_advance(&g);
34 |   glutPostRedisplay();
35 | }
36 | 
37 | void visible(int vis) {
38 |   if (vis == GLUT_VISIBLE) {
39 |     glutIdleFunc(idle);
40 |   } else {
41 |     glutIdleFunc(nullptr);
42 |   }
43 | }
44 | 
45 | int main(int argc, char *argv[]) {
46 |   glutInit(&argc, argv);
47 |   glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
48 | 
49 |   glutInitWindowPosition(100, 100);
50 |   glutInitWindowSize(300, 300);
51 |   glutCreateWindow("Gears GLUT");
52 | 
53 |   gears_initialize(&g);
54 |   gears_resize(glutGet(GLUT_WINDOW_WIDTH), glutGet(GLUT_WINDOW_HEIGHT));
55 | 
56 |   glutDisplayFunc(__display);
57 |   glutReshapeFunc(gears_resize);
58 |   glutKeyboardFunc(key);
59 |   glutVisibilityFunc(visible);
60 | 
61 |   glutMainLoop();
62 |   return 0;
63 | }
64 | 


--------------------------------------------------------------------------------
/gears/gears_Qt.cpp:
--------------------------------------------------------------------------------
 1 | #include "mainwindow.h"
 2 | 
 3 | #include <QApplication>
 4 | 
 5 | int main(int argc, char *argv[])
 6 | {
 7 |     QApplication app(argc, argv);
 8 |     MainWindow window;
 9 |     window.show();
10 |     return app.exec();
11 | }
12 | 


--------------------------------------------------------------------------------
/gears/gears_SDL.c:
--------------------------------------------------------------------------------
 1 | #include <SDL.h>
 2 | #include <SDL_opengl.h>
 3 | 
 4 | #include "gears.h"
 5 | 
 6 | int main(){
 7 | 
 8 |     SDL_Init(SDL_INIT_VIDEO);
 9 | 
10 |     SDL_Window *window = SDL_CreateWindow(
11 |                 "SDL2 Gears",
12 |                 SDL_WINDOWPOS_UNDEFINED,
13 |                 SDL_WINDOWPOS_UNDEFINED,
14 |                 640, 480,
15 |                 SDL_WINDOW_OPENGL|SDL_WINDOW_RESIZABLE
16 |                 );
17 | 
18 |     SDL_GLContext glcontext = SDL_GL_CreateContext(window);
19 | 
20 |     Gears g = {0, 0, 0, 0, 0, 0, 0};
21 | 
22 |     gears_initialize(&g);
23 |     int width; int height;
24 |     SDL_GetWindowSize(window, &width, &height);
25 |     gears_resize(width, height);
26 | 
27 |     int done = 0;
28 |     while(!done) {
29 |         SDL_Event e;
30 |         while(SDL_PollEvent(&e)) {
31 |             switch(e.type) {
32 |                 case SDL_KEYDOWN:
33 |                     done = 1;
34 |                     break;
35 |                 case SDL_QUIT:
36 |                     done = 1;
37 |                     break;
38 |                 default:
39 |                     break;
40 |             }
41 |         }
42 |         gears_paint(&g);
43 |         SDL_GL_SwapWindow(window);
44 |         SDL_Delay(10);
45 | 
46 |         gears_advance(&g);
47 |     }
48 | 
49 | 
50 |     SDL_GL_DeleteContext(glcontext);
51 |     SDL_DestroyWindow(window);
52 |     SDL_Quit();
53 | 
54 |     return 0;
55 | }
56 | 


--------------------------------------------------------------------------------
/gears/glwidget.cpp:
--------------------------------------------------------------------------------
 1 | #include "glwidget.h"
 2 | 
 3 | #include <QTimer>
 4 | #include <QMouseEvent>
 5 | 
 6 | #include <math.h>
 7 | 
 8 | GLWidget::GLWidget(QWidget *parent)
 9 |     : QGLWidget(parent)
10 | {
11 |     g = {0, 0, 0, 0, 0, 0, 0};
12 | 
13 |     QTimer *timer = new QTimer(this);
14 |     connect(timer, &QTimer::timeout, [&](){
15 |         gears_advance(&g);
16 |         updateGL();
17 |     });
18 |     timer->start(20);
19 | }
20 | 
21 | GLWidget::~GLWidget()
22 | {
23 |     makeCurrent();
24 |     glDeleteLists(g.gear1, 1);
25 |     glDeleteLists(g.gear2, 1);
26 |     glDeleteLists(g.gear3, 1);
27 | }
28 | 
29 | void GLWidget::setXRotation(int angle)
30 | {
31 |     gears_normalize_angle(&angle);
32 |     if (angle != g.xRot) {
33 |         g.xRot = angle;
34 |         emit xRotationChanged(angle);
35 |         updateGL();
36 |     }
37 | }
38 | 
39 | void GLWidget::setYRotation(int angle)
40 | {
41 |     gears_normalize_angle(&angle);
42 |     if (angle != g.yRot) {
43 |         g.yRot = angle;
44 |         emit yRotationChanged(angle);
45 |         updateGL();
46 |     }
47 | }
48 | 
49 | void GLWidget::setZRotation(int angle)
50 | {
51 |     gears_normalize_angle(&angle);
52 |     if (angle != g.zRot) {
53 |         g.zRot = angle;
54 |         emit zRotationChanged(angle);
55 |         updateGL();
56 |     }
57 | }
58 | 
59 | void GLWidget::initializeGL()
60 | {
61 |     gears_initialize(&g);
62 | }
63 | 
64 | void GLWidget::paintGL()
65 | {
66 |     gears_paint(&g);
67 | }
68 | 
69 | void GLWidget::resizeGL(int width, int height)
70 | {
71 |     gears_resize(width, height);
72 | }
73 | 
74 | void GLWidget::mousePressEvent(QMouseEvent *event)
75 | {
76 |     lastPos = event->pos();
77 | }
78 | 
79 | void GLWidget::mouseMoveEvent(QMouseEvent *event)
80 | {
81 |     int dx = event->x() - lastPos.x();
82 |     int dy = event->y() - lastPos.y();
83 | 
84 |     if (event->buttons() & Qt::LeftButton) {
85 |         setXRotation(g.xRot + 8 * dy);
86 |         setYRotation(g.yRot + 8 * dx);
87 |     } else if (event->buttons() & Qt::RightButton) {
88 |         setXRotation(g.xRot + 8 * dy);
89 |         setZRotation(g.zRot + 8 * dx);
90 |     }
91 |     lastPos = event->pos();
92 | }
93 | 


--------------------------------------------------------------------------------
/gears/glwidget.h:
--------------------------------------------------------------------------------
 1 | #ifndef GLWIDGET_H
 2 | #define GLWIDGET_H
 3 | 
 4 | #include <QGLWidget>
 5 | 
 6 | extern "C" {
 7 | #include "gears.h"
 8 | }
 9 | 
10 | class GLWidget : public QGLWidget
11 | {
12 |     Q_OBJECT
13 | 
14 | public:
15 |     GLWidget(QWidget *parent = 0);
16 |     ~GLWidget();
17 | 
18 |     int xRotation() const { return g.xRot; }
19 |     int yRotation() const { return g.yRot; }
20 |     int zRotation() const { return g.zRot; }
21 | 
22 | public slots:
23 |     void setXRotation(int angle);
24 |     void setYRotation(int angle);
25 |     void setZRotation(int angle);
26 | 
27 | signals:
28 |     void xRotationChanged(int angle);
29 |     void yRotationChanged(int angle);
30 |     void zRotationChanged(int angle);
31 | 
32 | protected:
33 |     void initializeGL();
34 |     void paintGL();
35 |     void resizeGL(int width, int height);
36 |     void mousePressEvent(QMouseEvent *event);
37 |     void mouseMoveEvent(QMouseEvent *event);
38 | 
39 | private:
40 |     Gears g;
41 |     QPoint lastPos;
42 | };
43 | 
44 | #endif // GLWIDGET_H
45 | 


--------------------------------------------------------------------------------
/gears/mainwindow.cpp:
--------------------------------------------------------------------------------
 1 | #include "glwidget.h"
 2 | #include "mainwindow.h"
 3 | 
 4 | #include <QtWidgets>
 5 | 
 6 | 
 7 | MainWindow::MainWindow()
 8 | {
 9 | 
10 |     auto glWidget = new GLWidget;
11 | 
12 |     auto glWidgetArea = new QScrollArea;
13 |     glWidgetArea->setWidget(glWidget);
14 |     glWidgetArea->setWidgetResizable(true);
15 |     glWidgetArea->setHorizontalScrollBarPolicy(Qt::ScrollBarAlwaysOff);
16 |     glWidgetArea->setVerticalScrollBarPolicy(Qt::ScrollBarAlwaysOff);
17 |     glWidgetArea->setSizePolicy(QSizePolicy::Ignored, QSizePolicy::Ignored);
18 |     glWidgetArea->setMinimumSize(50, 50);
19 | 
20 |     auto createSlider = [&](void (GLWidget::*changedSignal)(int),
21 |                             void (GLWidget::*setterSlot)(int)) -> QSlider* {
22 |         QSlider *slider = new QSlider(Qt::Horizontal);
23 |         slider->setRange(0, 360 * 16);
24 |         slider->setSingleStep(16);
25 |         slider->setPageStep(15 * 16);
26 |         slider->setTickInterval(15 * 16);
27 |         slider->setTickPosition(QSlider::TicksRight);
28 |         connect(slider, &QSlider::valueChanged, glWidget, setterSlot);
29 |         connect(glWidget, changedSignal, slider, &QSlider::setValue);
30 | 
31 |         return slider;
32 |     };
33 | 
34 |     auto xSlider = createSlider(&GLWidget::xRotationChanged, &GLWidget::setXRotation);
35 |     auto ySlider = createSlider(&GLWidget::yRotationChanged, &GLWidget::setYRotation);
36 |     auto zSlider = createSlider(&GLWidget::zRotationChanged, &GLWidget::setZRotation);
37 | 
38 |     //Create actions and menus
39 |     auto exitAct = new QAction(tr("E&xit"), this);
40 |     exitAct->setShortcuts(QKeySequence::Quit);
41 |     connect(exitAct, &QAction::triggered, this, &MainWindow::close);
42 | 
43 |     auto aboutQtAct = new QAction(tr("About &Qt"), this);
44 |     connect(aboutQtAct, &QAction::triggered, qApp, &QApplication::aboutQt);
45 | 
46 |     auto fileMenu = menuBar()->addMenu(tr("&File"));
47 |     fileMenu->addSeparator();
48 |     fileMenu->addAction(exitAct);
49 | 
50 |     auto helpMenu = menuBar()->addMenu(tr("&Help"));
51 |     helpMenu->addAction(aboutQtAct);
52 | 
53 |     auto centralLayout = new QVBoxLayout;
54 |     centralLayout->addWidget(glWidgetArea);
55 |     centralLayout->addWidget(xSlider);
56 |     centralLayout->addWidget(ySlider);
57 |     centralLayout->addWidget(zSlider);
58 | 
59 |     auto centralWidget = new QWidget;
60 |     setCentralWidget(centralWidget);
61 |     centralWidget->setLayout(centralLayout);
62 | 
63 |     xSlider->setValue(15 * 16);
64 |     ySlider->setValue(345 * 16);
65 |     zSlider->setValue(0 * 16);
66 | 
67 |     setWindowTitle(tr("Qt Gears"));
68 |     resize(400, 300);
69 | }
70 | 


--------------------------------------------------------------------------------
/gears/mainwindow.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAINWINDOW_H
 2 | #define MAINWINDOW_H
 3 | 
 4 | #include <QMainWindow>
 5 | 
 6 | 
 7 | class MainWindow : public QMainWindow
 8 | {
 9 |     Q_OBJECT
10 | 
11 | public:
12 |     MainWindow();
13 | };
14 | 
15 | #endif // MAINWINDOW_H
16 | 


--------------------------------------------------------------------------------
/thrust/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(CUDA REQUIRED)
 2 | 
 3 | if(APPLE)
 4 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
 5 | endif()
 6 | 
 7 | cuda_add_executable(version version.cu)
 8 | 
 9 | cuda_add_executable(device-vector device-vector.cu)
10 | 
11 | cuda_add_executable(transformations transformations.cu)
12 | 
13 | cuda_add_executable(transform_reduce transform_reduce.cu)
14 | 
15 | cuda_add_executable(sum sum.cu)
16 | 
17 | cuda_add_executable(sort sort.cu)
18 | 
19 | cuda_add_executable(random random.cu)
20 | 


--------------------------------------------------------------------------------
/thrust/device-vector.cu:
--------------------------------------------------------------------------------
 1 | #include <thrust/host_vector.h>
 2 | #include <thrust/device_vector.h>
 3 | 
 4 | #include <iostream>
 5 | 
 6 | int main(void) {
 7 | 
 8 |     thrust::host_vector<int> H(4);
 9 | 
10 |     H[0] = 14;
11 |     H[1] = 20;
12 |     H[2] = 38;
13 |     H[3] = 46;
14 | 
15 |     std::cout << "H has size " << H.size() << std::endl;
16 | 
17 |     // print contents of H
18 |     for(int i = 0; i < H.size(); i++)
19 |         std::cout << "H[" << i << "] = " << H[i] << std::endl;
20 | 
21 |     // resize H
22 |     H.resize(2);
23 | 
24 |     std::cout << "H now has size " << H.size() << std::endl;
25 | 
26 |     // Copy host_vector H to device_vector D
27 |     thrust::device_vector<int> D = H;
28 | 
29 |     // elements of D can be modified
30 |     D[0] = 99;
31 |     D[1] = 88;
32 | 
33 |     // print contents of D
34 |     for(int i = 0; i < D.size(); i++)
35 |         std::cout << "D[" << i << "] = " << D[i] << std::endl;
36 | 
37 |     // H and D are automatically deleted when the function returns
38 |     return 0;
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/thrust/random.cu:
--------------------------------------------------------------------------------
 1 | #include <thrust/host_vector.h>
 2 | #include <thrust/device_vector.h>
 3 | #include <thrust/generate.h>
 4 | #include <thrust/reduce.h>
 5 | #include <thrust/functional.h>
 6 | #include <algorithm>
 7 | #include <cstdlib>
 8 | 
 9 | int main(void)
10 | {
11 |   // generate random data serially
12 |   thrust::host_vector<int> h_vec(100);
13 |   std::generate(h_vec.begin(), h_vec.end(), rand);
14 | 
15 |   // transfer to device and compute sum
16 |   thrust::device_vector<int> d_vec = h_vec;
17 |   int x = thrust::reduce(d_vec.begin(), d_vec.end(), 0, thrust::plus<int>());
18 |   std::cout << x << std::endl;
19 |   return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/thrust/sort.cu:
--------------------------------------------------------------------------------
 1 | #include <thrust/host_vector.h>
 2 | #include <thrust/device_vector.h>
 3 | #include <thrust/generate.h>
 4 | #include <thrust/sort.h>
 5 | #include <thrust/copy.h>
 6 | #include <algorithm>
 7 | #include <cstdlib>
 8 | 
 9 | int main(void)
10 | {
11 |   // generate 32M random numbers serially
12 |   thrust::host_vector<int> h_vec(32 << 15);
13 |   std::generate(h_vec.begin(), h_vec.end(), rand);
14 | 
15 |   // transfer data to the device
16 |   thrust::device_vector<int> d_vec = h_vec;
17 | 
18 |   // sort data on the device (846M keys per second on GeForce GTX 480)
19 |   thrust::sort(d_vec.begin(), d_vec.end());
20 | 
21 |   // transfer data back to host
22 |   thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
23 | 
24 |   return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/thrust/sum.cu:
--------------------------------------------------------------------------------
 1 | #include <thrust/host_vector.h>
 2 | #include <thrust/device_vector.h>
 3 | #include <thrust/generate.h>
 4 | #include <thrust/reduce.h>
 5 | #include <thrust/functional.h>
 6 | #include <thrust/random.h>
 7 | 
 8 | int my_rand(void)
 9 | {
10 |   static thrust::default_random_engine rng;
11 |   static thrust::uniform_int_distribution<int> dist(0, 9999);
12 |   return dist(rng);
13 | }
14 | 
15 | int main(void)
16 | {
17 |   // generate random data on the host
18 |   thrust::host_vector<int> h_vec(100);
19 |   thrust::generate(h_vec.begin(), h_vec.end(), my_rand);
20 | 
21 |   // transfer to device and compute sum
22 |   thrust::device_vector<int> d_vec = h_vec;
23 | 
24 |   // initial value of the reduction
25 |   int init = 0;
26 | 
27 |   // binary operation used to reduce values
28 |   thrust::plus<int> binary_op;
29 | 
30 |   // compute sum on the device
31 |   int sum = thrust::reduce(d_vec.begin(), d_vec.end(), init, binary_op);
32 | 
33 |   // print the sum
34 |   std::cout << "sum is " << sum << std::endl;
35 | 
36 |   return 0;
37 | }
38 | 


--------------------------------------------------------------------------------
/thrust/transform_reduce.cu:
--------------------------------------------------------------------------------
 1 | #include <thrust/transform_reduce.h>
 2 | #include <thrust/functional.h>
 3 | #include <thrust/device_vector.h>
 4 | #include <thrust/host_vector.h>
 5 | #include <cmath>
 6 | 
 7 | // square<T> computes the square of a number f(x) -> x*x
 8 | template <typename T>
 9 | struct square
10 | {
11 |     __host__ __device__
12 |     T operator()(const T& x) const {
13 |         return x * x;
14 |     }
15 | };
16 | 
17 | int main(void)
18 | {
19 |     // initialize host array
20 |     float x[4];
21 |     x[0] = 1.0;
22 |     x[1] = 2.0;
23 |     x[2] = 3.0;
24 |     x[3] = 4.0;
25 | 
26 |     // transfer to device
27 |     thrust::device_vector<float> d_x(x, x + 4);
28 | 
29 |     // setup arguments
30 |     square<float>        unary_op;
31 |     thrust::plus<float> binary_op;
32 |     float init = 0;
33 | 
34 |     // compute norm
35 |     float norm = std::sqrt( thrust::transform_reduce(d_x.begin(), d_x.end(), unary_op, init, binary_op) );
36 | 
37 |     std::cout << norm << std::endl;
38 | 
39 |     return 0;
40 | }
41 | 


--------------------------------------------------------------------------------
/thrust/transformations.cu:
--------------------------------------------------------------------------------
 1 | #include <thrust/device_vector.h>
 2 | #include <thrust/transform.h>
 3 | #include <thrust/sequence.h>
 4 | #include <thrust/copy.h>
 5 | #include <thrust/fill.h>
 6 | #include <thrust/replace.h>
 7 | #include <thrust/functional.h>
 8 | #include <iostream>
 9 | 
10 | struct saxpy_functor
11 | {
12 |     const float a;
13 | 
14 |     saxpy_functor(float _a) : a(_a) {}
15 | 
16 |     __host__ __device__
17 |     float operator()(const float& x, const float& y) const {
18 |         return a * x + y;
19 |     }
20 | };
21 | 
22 | void saxpy_fast(float A, thrust::device_vector<float>& X, thrust::device_vector<float>& Y)
23 | {
24 |     // Y <- A * X + Y
25 |     thrust::transform(X.begin(), X.end(), Y.begin(), Y.begin(), saxpy_functor(A));
26 | }
27 | 
28 | void saxpy_slow(float A, thrust::device_vector<float>& X, thrust::device_vector<float>& Y)
29 | {
30 |     thrust::device_vector<float> temp(X.size());
31 | 
32 |     // temp <- A
33 |     thrust::fill(temp.begin(), temp.end(), A);
34 | 
35 |     // temp <- A * X
36 |     thrust::transform(X.begin(), X.end(), temp.begin(), temp.begin(), thrust::multiplies<float>());
37 | 
38 |     // Y <- A * X + Y
39 |     thrust::transform(temp.begin(), temp.end(), Y.begin(), Y.begin(), thrust::plus<float>());
40 | }
41 | 
42 | int main(void)
43 | {
44 |     // allocate three device_vectors with 10 elements
45 |     thrust::device_vector<int> X(10);
46 |     thrust::device_vector<int> Y(10);
47 |     thrust::device_vector<int> Z(10);
48 | 
49 |     // initialize X to 0,1,2,3, ....
50 |     thrust::sequence(X.begin(), X.end());
51 | 
52 |     // compute Y = -X
53 |     thrust::transform(X.begin(), X.end(), Y.begin(), thrust::negate<int>());
54 | 
55 |     // fill Z with twos
56 |     thrust::fill(Z.begin(), Z.end(), 2);
57 | 
58 |     // compute Y = X mod 2
59 |     thrust::transform(X.begin(), X.end(), Z.begin(), Y.begin(), thrust::modulus<int>());
60 | 
61 |     // replace all the ones in Y with tens
62 |     thrust::replace(Y.begin(), Y.end(), 1, 10);
63 | 
64 |     // print Y
65 |     thrust::copy(Y.begin(), Y.end(), std::ostream_iterator<int>(std::cout, "\n"));
66 | 
67 | 
68 |     thrust::device_vector<float> x(10);
69 |     thrust::device_vector<float> y(10);
70 | 
71 |     // y <- a * x + y
72 |     saxpy_fast(2.0, x, y);
73 | 
74 |     // y <- a * x + y
75 |     saxpy_slow(0.5, x, y);
76 | 
77 |     return 0;
78 | }
79 | 


--------------------------------------------------------------------------------
/thrust/version.cu:
--------------------------------------------------------------------------------
 1 | #include <thrust/version.h>
 2 | #include <iostream>
 3 | 
 4 | int main(void)
 5 | {
 6 |   int major = THRUST_MAJOR_VERSION;
 7 |   int minor = THRUST_MINOR_VERSION;
 8 | 
 9 |   std::cout << "Thrust v" << major << "." << minor << std::endl;
10 | 
11 |   return 0;
12 | }
13 | 


--------------------------------------------------------------------------------