├── .gitignore ├── CMakeLists.txt ├── README.rst ├── cuda ├── CMakeLists.txt ├── cuda-test.cu ├── fft.cu └── hello-cuda.cu ├── fluidsGL ├── CMakeLists.txt ├── defines.h ├── fluidsGL.cpp ├── fluidsGLFW.cpp ├── fluidsGL_kernels.cu ├── fluidsGL_kernels.h ├── fluidsQt.cpp ├── glfluids.cpp ├── glfluids.h ├── helper_timer.h ├── mainwindow.cpp └── mainwindow.h ├── gears ├── CMakeLists.txt ├── gears.c ├── gears.h ├── gears_GLUT.cpp ├── gears_Qt.cpp ├── gears_SDL.c ├── glwidget.cpp ├── glwidget.h ├── mainwindow.cpp └── mainwindow.h └── thrust ├── CMakeLists.txt ├── device-vector.cu ├── random.cu ├── sort.cu ├── sum.cu ├── transform_reduce.cu ├── transformations.cu └── version.cu /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | *.user 3 | *~ 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11) 2 | 3 | project(cuda-examples) 4 | 5 | add_subdirectory(cuda) 6 | 7 | add_subdirectory(gears) 8 | 9 | add_subdirectory(thrust) 10 | 11 | add_subdirectory(fluidsGL) 12 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | CUDA Examples 2 | ====================== 3 | 4 | Some CUDA, CUFFT and OpenGL examples. 5 | 6 | Prerequisites 7 | -------------- 8 | 9 | First install the prerequisites 10 | 11 | On Ubuntu 12 | 13 | :: 14 | 15 | sudo apt-get install cmake nvidia-cuda-toolkit freeglut3-dev libxmu-dev libxi-dev libsdl1.2-dev 16 | 17 | On Arch Linux 18 | 19 | :: 20 | 21 | sudo pacman -S cmake cuda freeglut glu sdl2 22 | 23 | Build 24 | ------ 25 | 26 | :: 27 | 28 | mkdir build 29 | cd build 30 | cmake .. 31 | make 32 | 33 | 34 | Run 35 | ------- 36 | 37 | :: 38 | 39 | fluidsGL/fluidsGL 40 | -------------------------------------------------------------------------------- /cuda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(CUDA REQUIRED) 2 | 3 | cuda_add_executable(hello-cuda hello-cuda.cu) 4 | 5 | cuda_add_executable(cuda-test cuda-test.cu) 6 | 7 | cuda_add_executable(fft fft.cu) 8 | 9 | cuda_add_cufft_to_target(fft) 10 | 11 | -------------------------------------------------------------------------------- /cuda/cuda-test.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | __global__ void print(char *a,int N) 6 | { 7 | char p[12]="Hello CUDA\n"; 8 | int idx=blockIdx.x*blockDim.x + threadIdx.x; 9 | if(idx>>(a_d,N) 30 | ; 31 | cudaMemcpy(a_h,a_d,sizeof(char)*N,cudaMemcpyDeviceToHost) 32 | ; 33 | for(int i=0;i 5 | #include 6 | #include 7 | #include 8 | 9 | // includes, project 10 | #include 11 | 12 | // Complex data type 13 | typedef float2 Complex; 14 | static __device__ __host__ inline Complex ComplexAdd(Complex, Complex); 15 | static __device__ __host__ inline Complex ComplexScale(Complex, float); 16 | static __device__ __host__ inline Complex ComplexMul(Complex, Complex); 17 | static __global__ void ComplexPointwiseMulAndScale(Complex*, const Complex*, int, float); 18 | 19 | // Filtering functions 20 | void Convolve(const Complex*, int, const Complex*, int, Complex*); 21 | 22 | // Padding functions 23 | int PadData(const Complex*, Complex**, int, 24 | const Complex*, Complex**, int); 25 | 26 | //////////////////////////////////////////////////////////////////////////////// 27 | // declaration, forward 28 | void runTest(int argc, char** argv); 29 | 30 | // The filter size is assumed to be a number smaller than the signal size 31 | #define SIGNAL_SIZE 50 32 | #define FILTER_KERNEL_SIZE 11 33 | 34 | //////////////////////////////////////////////////////////////////////////////// 35 | // Program main 36 | //////////////////////////////////////////////////////////////////////////////// 37 | int main(int argc, char** argv) 38 | { 39 | runTest(argc, argv); 40 | } 41 | 42 | //////////////////////////////////////////////////////////////////////////////// 43 | //! Run a simple test for CUDA 44 | //////////////////////////////////////////////////////////////////////////////// 45 | void runTest(int argc, char** argv) 46 | { 47 | printf("[simpleCUFFT] is starting...\n"); 48 | 49 | // Allocate host memory for the signal 50 | Complex* h_signal = (Complex*)malloc(sizeof(Complex) * SIGNAL_SIZE); 51 | // Initalize the memory for the signal 52 | for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) { 53 | h_signal[i].x = rand() / (float)RAND_MAX; 54 | h_signal[i].y = 0; 55 | } 56 | 57 | // Allocate host memory for the filter 58 | Complex* h_filter_kernel = (Complex*)malloc(sizeof(Complex) * FILTER_KERNEL_SIZE); 59 | // Initalize the memory for the filter 60 | for (unsigned int i = 0; i < FILTER_KERNEL_SIZE; ++i) { 61 | h_filter_kernel[i].x = rand() / (float)RAND_MAX; 62 | h_filter_kernel[i].y = 0; 63 | } 64 | 65 | // Pad signal and filter kernel 66 | Complex* h_padded_signal; 67 | Complex* h_padded_filter_kernel; 68 | int new_size = PadData(h_signal, &h_padded_signal, SIGNAL_SIZE, 69 | h_filter_kernel, &h_padded_filter_kernel, FILTER_KERNEL_SIZE); 70 | int mem_size = sizeof(Complex) * new_size; 71 | 72 | // Allocate device memory for signal 73 | Complex* d_signal; 74 | cudaMalloc((void**)&d_signal, mem_size); 75 | // Copy host memory to device 76 | cudaMemcpy(d_signal, h_padded_signal, mem_size, 77 | cudaMemcpyHostToDevice); 78 | 79 | // Allocate device memory for filter kernel 80 | Complex* d_filter_kernel; 81 | cudaMalloc((void**)&d_filter_kernel, mem_size); 82 | 83 | // Copy host memory to device 84 | cudaMemcpy(d_filter_kernel, h_padded_filter_kernel, mem_size, 85 | cudaMemcpyHostToDevice); 86 | 87 | // CUFFT plan 88 | cufftHandle plan; 89 | cufftPlan1d(&plan, new_size, CUFFT_C2C, 1); 90 | 91 | // Transform signal and kernel 92 | printf("Transforming signal cufftExecC2C\n"); 93 | cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD); 94 | cufftExecC2C(plan, (cufftComplex *)d_filter_kernel, (cufftComplex *)d_filter_kernel, CUFFT_FORWARD); 95 | 96 | // Multiply the coefficients together and normalize the result 97 | printf("Launching ComplexPointwiseMulAndScale<<< >>>\n"); 98 | ComplexPointwiseMulAndScale<<<32, 256>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size); 99 | 100 | // Transform signal back 101 | printf("Transforming signal back cufftExecC2C\n"); 102 | cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE); 103 | 104 | // Copy device memory to host 105 | Complex* h_convolved_signal = h_padded_signal; 106 | cudaMemcpy(h_convolved_signal, d_signal, mem_size, 107 | cudaMemcpyDeviceToHost); 108 | 109 | // Allocate host memory for the convolution result 110 | Complex* h_convolved_signal_ref = (Complex*)malloc(sizeof(Complex) * SIGNAL_SIZE); 111 | 112 | // Convolve on the host 113 | Convolve(h_signal, SIGNAL_SIZE, 114 | h_filter_kernel, FILTER_KERNEL_SIZE, 115 | h_convolved_signal_ref); 116 | 117 | //Destroy CUFFT context 118 | cufftDestroy(plan); 119 | 120 | // cleanup memory 121 | free(h_signal); 122 | free(h_filter_kernel); 123 | free(h_padded_signal); 124 | free(h_padded_filter_kernel); 125 | free(h_convolved_signal_ref); 126 | cudaFree(d_signal); 127 | cudaFree(d_filter_kernel); 128 | 129 | } 130 | 131 | // Pad data 132 | int PadData(const Complex* signal, Complex** padded_signal, int signal_size, 133 | const Complex* filter_kernel, Complex** padded_filter_kernel, int filter_kernel_size) 134 | { 135 | int minRadius = filter_kernel_size / 2; 136 | int maxRadius = filter_kernel_size - minRadius; 137 | int new_size = signal_size + maxRadius; 138 | 139 | // Pad signal 140 | Complex* new_data = (Complex*)malloc(sizeof(Complex) * new_size); 141 | memcpy(new_data + 0, signal, signal_size * sizeof(Complex)); 142 | memset(new_data + signal_size, 0, (new_size - signal_size) * sizeof(Complex)); 143 | *padded_signal = new_data; 144 | 145 | // Pad filter 146 | new_data = (Complex*)malloc(sizeof(Complex) * new_size); 147 | memcpy(new_data + 0, filter_kernel + minRadius, maxRadius * sizeof(Complex)); 148 | memset(new_data + maxRadius, 0, (new_size - filter_kernel_size) * sizeof(Complex)); 149 | memcpy(new_data + new_size - minRadius, filter_kernel, minRadius * sizeof(Complex)); 150 | *padded_filter_kernel = new_data; 151 | 152 | return new_size; 153 | } 154 | 155 | //////////////////////////////////////////////////////////////////////////////// 156 | // Filtering operations 157 | //////////////////////////////////////////////////////////////////////////////// 158 | 159 | // Computes convolution on the host 160 | void Convolve(const Complex* signal, int signal_size, 161 | const Complex* filter_kernel, int filter_kernel_size, 162 | Complex* filtered_signal) 163 | { 164 | int minRadius = filter_kernel_size / 2; 165 | int maxRadius = filter_kernel_size - minRadius; 166 | // Loop over output element indices 167 | for (int i = 0; i < signal_size; ++i) { 168 | filtered_signal[i].x = filtered_signal[i].y = 0; 169 | // Loop over convolution indices 170 | for (int j = - maxRadius + 1; j <= minRadius; ++j) { 171 | int k = i + j; 172 | if (k >= 0 && k < signal_size) 173 | filtered_signal[i] = ComplexAdd(filtered_signal[i], ComplexMul(signal[k], filter_kernel[minRadius - j])); 174 | } 175 | } 176 | } 177 | 178 | //////////////////////////////////////////////////////////////////////////////// 179 | // Complex operations 180 | //////////////////////////////////////////////////////////////////////////////// 181 | 182 | // Complex addition 183 | static __device__ __host__ inline Complex ComplexAdd(Complex a, Complex b) 184 | { 185 | Complex c; 186 | c.x = a.x + b.x; 187 | c.y = a.y + b.y; 188 | return c; 189 | } 190 | 191 | // Complex scale 192 | static __device__ __host__ inline Complex ComplexScale(Complex a, float s) 193 | { 194 | Complex c; 195 | c.x = s * a.x; 196 | c.y = s * a.y; 197 | return c; 198 | } 199 | 200 | // Complex multiplication 201 | static __device__ __host__ inline Complex ComplexMul(Complex a, Complex b) 202 | { 203 | Complex c; 204 | c.x = a.x * b.x - a.y * b.y; 205 | c.y = a.x * b.y + a.y * b.x; 206 | return c; 207 | } 208 | 209 | // Complex pointwise multiplication 210 | static __global__ void ComplexPointwiseMulAndScale(Complex* a, const Complex* b, int size, float scale) 211 | { 212 | const int numThreads = blockDim.x * gridDim.x; 213 | const int threadID = blockIdx.x * blockDim.x + threadIdx.x; 214 | for (int i = threadID; i < size; i += numThreads) 215 | a[i] = ComplexScale(ComplexMul(a[i], b[i]), scale); 216 | } 217 | -------------------------------------------------------------------------------- /cuda/hello-cuda.cu: -------------------------------------------------------------------------------- 1 | /* 2 | ** Hello World using CUDA 3 | ** 4 | ** The string "Hello World!" is mangled then restored using a common CUDA idiom 5 | ** 6 | ** Byron Galbraith 7 | ** 2009-02-18 8 | */ 9 | #include 10 | #include 11 | 12 | // Device kernel 13 | __global__ void 14 | helloWorld(char* str) 15 | { 16 | // determine where in the thread grid we are 17 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 18 | 19 | // unmangle output 20 | str[idx] += idx; 21 | } 22 | 23 | // Host function 24 | int 25 | main(int argc, char** argv) 26 | { 27 | int i; 28 | 29 | // desired output 30 | char str[] = "Hello World!"; 31 | 32 | // mangle contents of output 33 | // the null character is left intact for simplicity 34 | for(i = 0; i < 12; i++) 35 | str[i] -= i; 36 | 37 | // allocate memory on the device 38 | char *d_str; 39 | size_t size = sizeof(str); 40 | cudaMalloc((void**)&d_str, size); 41 | 42 | // copy the string to the device 43 | cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice); 44 | 45 | // set the grid and block sizes 46 | dim3 dimGrid(2); // one block per word 47 | dim3 dimBlock(6); // one thread per character 48 | 49 | // invoke the kernel 50 | helloWorld<<< dimGrid, dimBlock >>>(d_str); 51 | 52 | // retrieve the results from the device 53 | cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost); 54 | 55 | // free up the allocated memory on the device 56 | cudaFree(d_str); 57 | 58 | // everyone's favorite part 59 | printf("%s\n", str); 60 | 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /fluidsGL/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(CUDA REQUIRED) 2 | find_package(OpenGL REQUIRED) 3 | find_package(GLEW REQUIRED) 4 | find_package(GLUT REQUIRED) 5 | find_package(PkgConfig) 6 | pkg_check_modules(GLFW REQUIRED glfw3) 7 | 8 | 9 | set(CMAKE_INCLUDE_CURRENT_DIR ON) 10 | 11 | include_directories(${OPENGL_INCLUDE_DIR} ${GLUT_INCLUDE_DIR}) 12 | 13 | # Target older architectures for those who haven't upgraded their graphics card to the latest models. 14 | #set(CUDA_NVCC_FLAGS "-gencode arch=compute_11,code=sm_11 -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_50,code=compute_50 ") 15 | 16 | cuda_add_library(fluidsGL_kernels 17 | defines.h 18 | fluidsGL_kernels.cu 19 | ) 20 | cuda_add_cufft_to_target(fluidsGL_kernels) 21 | 22 | # GLUT based 23 | add_executable(fluidsGL 24 | helper_timer.h 25 | fluidsGL_kernels.h 26 | fluidsGL.cpp 27 | ) 28 | target_link_libraries(fluidsGL fluidsGL_kernels ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES} ${GLUT_LIBRARIES}) 29 | set_target_properties(fluidsGL PROPERTIES COMPILE_FLAGS "-std=c++11") 30 | 31 | # GLFW based 32 | add_executable(fluidsGLFW 33 | fluidsGL_kernels.h 34 | fluidsGLFW.cpp 35 | ) 36 | target_link_libraries(fluidsGLFW fluidsGL_kernels ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES} ${GLFW_LIBRARIES}) 37 | set_target_properties(fluidsGLFW PROPERTIES COMPILE_FLAGS "-std=c++11") 38 | 39 | # Qt based 40 | find_package(Qt5Widgets) 41 | if(Qt5Widgets_FOUND) 42 | set(CMAKE_AUTOMOC ON) 43 | find_package(Qt5OpenGL REQUIRED) 44 | include_directories(${Qt5Widgets_INCLUDE_DIRS} ${Qt5OpenGL_INCLUDE_DIRS}) 45 | add_executable(fluidsQt glfluids.cpp fluidsQt.cpp mainwindow.cpp) 46 | target_link_libraries(fluidsQt fluidsGL_kernels Qt5::Widgets Qt5::OpenGL ${OPENGL_LIBRARIES} ) 47 | set_target_properties(fluidsQt PROPERTIES COMPILE_FLAGS "-std=c++11") 48 | endif(Qt5Widgets_FOUND) 49 | -------------------------------------------------------------------------------- /fluidsGL/defines.h: -------------------------------------------------------------------------------- 1 | #ifndef DEFINES_H 2 | #define DEFINES_H 3 | 4 | #define DIM 512 // Square size of solver domain 5 | #define DS (DIM*DIM) // Total domain size 6 | #define CPADW (DIM/2+1) // Padded width for real->complex in-place FFT 7 | #define RPADW (2*(DIM/2+1)) // Padded width for real->complex in-place FFT 8 | #define PDS (DIM*CPADW) // Padded total domain size 9 | 10 | #define DT 0.09f // Delta T for interative solver 11 | #define VIS 0.0025f // Viscosity constant 12 | #define FORCE (5.8f*DIM) // Force scale factor 13 | #define FR 4 // Force update radius 14 | 15 | #define TILEX 64 // Tile width 16 | #define TILEY 64 // Tile height 17 | #define TIDSX 64 // Tids in X 18 | #define TIDSY 4 // Tids in Y 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /fluidsGL/fluidsGL.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // OpenGL Graphics includes 13 | #include 14 | 15 | // Includes 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | // CUDA standard includes 22 | #include 23 | #include 24 | 25 | // CUDA FFT Libraries 26 | #include 27 | 28 | #if defined(__APPLE__) || defined(MACOSX) 29 | #include 30 | #else 31 | #include 32 | #endif 33 | 34 | #include "helper_timer.h" 35 | 36 | #include "defines.h" 37 | #include "fluidsGL_kernels.h" 38 | 39 | const char *sSDKname = "fluidsGL"; 40 | // CUDA example code that implements the frequency space version of 41 | // Jos Stam's paper 'Stable Fluids' in 2D. This application uses the 42 | // CUDA FFT library (CUFFT) to perform velocity diffusion and to 43 | // force non-divergence in the velocity field at each time step. It uses 44 | // CUDA-OpenGL interoperability to update the particle field directly 45 | // instead of doing a copy to system memory before drawing. Texture is 46 | // used for automatic bilinear interpolation at the velocity advection step. 47 | 48 | void cleanup(void); 49 | void reshape(int x, int y); 50 | 51 | // CUFFT plan handle 52 | cufftHandle planr2c; 53 | cufftHandle planc2r; 54 | 55 | static float2 *vxfield = NULL; 56 | static float2 *vyfield = NULL; 57 | 58 | float2 *hvfield = NULL; 59 | float2 *dvfield = NULL; 60 | static int wWidth = std::max(512, DIM); 61 | static int wHeight = std::max(512, DIM); 62 | 63 | static int clicked = 0; 64 | static int fpsCount = 0; 65 | static int fpsLimit = 1; 66 | StopWatchInterface *timer = NULL; 67 | 68 | // Particle data 69 | static GLuint vbo = 0; // OpenGL vertex buffer object 70 | static struct cudaGraphicsResource 71 | *cuda_vbo_resource; // handles OpenGL-CUDA exchange 72 | static float2 *particles = NULL; // particle positions in host memory 73 | static int lastx = 0, lasty = 0; 74 | 75 | // Texture pitch 76 | size_t tPitch = 0; 77 | 78 | bool g_bExitESC = false; 79 | 80 | void simulateFluids(void) { 81 | // simulate fluid 82 | advectVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM, 83 | DT); 84 | diffuseProject(vxfield, vyfield, CPADW, DIM, DT, VIS, planr2c, planc2r); 85 | updateVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM); 86 | advectParticles(cuda_vbo_resource, dvfield, DIM, DIM, DT); 87 | } 88 | 89 | void display(void) { 90 | 91 | sdkStartTimer(&timer); 92 | simulateFluids(); 93 | 94 | // render points 95 | glClear(GL_COLOR_BUFFER_BIT); 96 | glClearColor(1, 1, 1, 1.0f); 97 | glColor4f(0, 0, 1, 0.5f); 98 | glPointSize(1); 99 | glEnable(GL_POINT_SMOOTH); 100 | glEnable(GL_BLEND); 101 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 102 | glEnableClientState(GL_VERTEX_ARRAY); 103 | glDisable(GL_DEPTH_TEST); 104 | glDisable(GL_CULL_FACE); 105 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 106 | glVertexPointer(2, GL_FLOAT, 0, NULL); 107 | glDrawArrays(GL_POINTS, 0, DS); 108 | glBindBuffer(GL_ARRAY_BUFFER, 0); 109 | glDisableClientState(GL_VERTEX_ARRAY); 110 | glDisableClientState(GL_TEXTURE_COORD_ARRAY); 111 | glDisable(GL_TEXTURE_2D); 112 | 113 | // Finish timing before swap buffers to avoid refresh sync 114 | sdkStopTimer(&timer); 115 | glutSwapBuffers(); 116 | 117 | fpsCount++; 118 | 119 | if (fpsCount == fpsLimit) { 120 | char fps[256]; 121 | float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f); 122 | sprintf(fps, "Cuda/GL Stable Fluids (%d x %d): %3.1f fps", DIM, DIM, ifps); 123 | glutSetWindowTitle(fps); 124 | fpsCount = 0; 125 | fpsLimit = (int)std::max(ifps, 1.f); 126 | sdkResetTimer(&timer); 127 | } 128 | 129 | glutPostRedisplay(); 130 | } 131 | 132 | // very simple von neumann middle-square prng. can't use rand() in -qatest 133 | // mode because its implementation varies across platforms which makes testing 134 | // for consistency in the important parts of this program difficult. 135 | float myrand(void) { 136 | static int seed = 72191; 137 | char sq[22]; 138 | return rand() / (float)RAND_MAX; 139 | } 140 | 141 | void initParticles(float2 *p, int dx, int dy) { 142 | int i, j; 143 | 144 | for (i = 0; i < dy; i++) { 145 | for (j = 0; j < dx; j++) { 146 | p[i * dx + j].x = (j + 0.5f + (myrand() - 0.5f)) / dx; 147 | p[i * dx + j].y = (i + 0.5f + (myrand() - 0.5f)) / dy; 148 | } 149 | } 150 | } 151 | 152 | void keyboard(unsigned char key, int x, int y) { 153 | switch (key) { 154 | case 27: 155 | g_bExitESC = true; 156 | #if defined(__APPLE__) || defined(MACOSX) 157 | exit(EXIT_SUCCESS); 158 | #else 159 | glutDestroyWindow(glutGetWindow()); 160 | return; 161 | #endif 162 | break; 163 | 164 | case 'r': 165 | memset(hvfield, 0, sizeof(float2) * DS); 166 | cudaMemcpy(dvfield, hvfield, sizeof(float2) * DS, cudaMemcpyHostToDevice); 167 | 168 | initParticles(particles, DIM, DIM); 169 | 170 | cudaGraphicsUnregisterResource(cuda_vbo_resource); 171 | 172 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 173 | glBufferData(GL_ARRAY_BUFFER, sizeof(float2) * DS, particles, 174 | GL_DYNAMIC_DRAW); 175 | glBindBuffer(GL_ARRAY_BUFFER, 0); 176 | 177 | cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, 178 | cudaGraphicsMapFlagsNone); 179 | break; 180 | 181 | default: 182 | break; 183 | } 184 | } 185 | 186 | void click(int button, int updown, int x, int y) { 187 | lastx = x; 188 | lasty = y; 189 | clicked = !clicked; 190 | } 191 | 192 | void motion(int x, int y) { 193 | // Convert motion coordinates to domain 194 | float fx = (lastx / (float)wWidth); 195 | float fy = (lasty / (float)wHeight); 196 | int nx = (int)(fx * DIM); 197 | int ny = (int)(fy * DIM); 198 | 199 | if (clicked && nx < DIM - FR && nx > FR - 1 && ny < DIM - FR && ny > FR - 1) { 200 | int ddx = x - lastx; 201 | int ddy = y - lasty; 202 | fx = ddx / (float)wWidth; 203 | fy = ddy / (float)wHeight; 204 | int spy = ny - FR; 205 | int spx = nx - FR; 206 | addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy, 207 | FR); 208 | lastx = x; 209 | lasty = y; 210 | } 211 | 212 | glutPostRedisplay(); 213 | } 214 | 215 | void reshape(int x, int y) { 216 | wWidth = x; 217 | wHeight = y; 218 | glViewport(0, 0, x, y); 219 | glMatrixMode(GL_PROJECTION); 220 | glLoadIdentity(); 221 | glOrtho(0, 1, 1, 0, 0, 1); 222 | glMatrixMode(GL_MODELVIEW); 223 | glLoadIdentity(); 224 | glutPostRedisplay(); 225 | } 226 | 227 | void cleanup(void) { 228 | cudaGraphicsUnregisterResource(cuda_vbo_resource); 229 | 230 | unbind_texture(); 231 | delete_texture(); 232 | 233 | // Free all host and device resources 234 | free(hvfield); 235 | free(particles); 236 | cudaFree(dvfield); 237 | cudaFree(vxfield); 238 | cudaFree(vyfield); 239 | cufftDestroy(planr2c); 240 | cufftDestroy(planc2r); 241 | 242 | glBindBuffer(GL_ARRAY_BUFFER, 0); 243 | glDeleteBuffers(1, &vbo); 244 | 245 | sdkDeleteTimer(&timer); 246 | 247 | if (g_bExitESC) { 248 | // cudaDeviceReset causes the driver to clean up all state. While 249 | // not mandatory in normal operation, it is good practice. It is also 250 | // needed to ensure correct operation when the application is being 251 | // profiled. Calling cudaDeviceReset causes all profile data to be 252 | // flushed before the application exits 253 | cudaDeviceReset(); 254 | } 255 | } 256 | 257 | int initGL(int *argc, char **argv) { 258 | glutInit(argc, argv); 259 | // glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE); 260 | glutInitWindowSize(wWidth, wHeight); 261 | glutCreateWindow("Compute Stable Fluids"); 262 | glutDisplayFunc(display); 263 | glutKeyboardFunc(keyboard); 264 | glutMouseFunc(click); 265 | glutMotionFunc(motion); 266 | glutReshapeFunc(reshape); 267 | 268 | glewInit(); 269 | 270 | if (!glewIsSupported("GL_ARB_vertex_buffer_object")) { 271 | fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing."); 272 | fflush(stderr); 273 | return false; 274 | } 275 | 276 | return true; 277 | } 278 | 279 | int main(int argc, char **argv) { 280 | 281 | // First initialize OpenGL context, so we can properly set the GL for CUDA. 282 | // This is necessary in order to achieve optimal performance with OpenGL/CUDA 283 | // interop. 284 | if (false == initGL(&argc, argv)) { 285 | exit(EXIT_SUCCESS); 286 | } 287 | 288 | // Allocate and initialize host data 289 | 290 | sdkCreateTimer(&timer); 291 | sdkResetTimer(&timer); 292 | 293 | hvfield = (float2 *)malloc(sizeof(float2) * DS); 294 | memset(hvfield, 0, sizeof(float2) * DS); 295 | 296 | // Allocate and initialize device data 297 | cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(float2) * DIM, DIM); 298 | 299 | cudaMemcpy(dvfield, hvfield, sizeof(float2) * DS, cudaMemcpyHostToDevice); 300 | // Temporary complex velocity field data 301 | cudaMalloc((void **)&vxfield, sizeof(float2) * PDS); 302 | cudaMalloc((void **)&vyfield, sizeof(float2) * PDS); 303 | 304 | setup_texture(DIM, DIM); 305 | bind_texture(); 306 | 307 | // Create particle array 308 | particles = (float2 *)malloc(sizeof(float2) * DS); 309 | memset(particles, 0, sizeof(float2) * DS); 310 | 311 | initParticles(particles, DIM, DIM); 312 | 313 | // Create CUFFT transform plan configuration 314 | cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C); 315 | cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R); 316 | // TODO: update kernels to use the new unpadded memory layout for perf 317 | // rather than the old FFTW-compatible layout 318 | // cufftSetCompatibilityMode(planr2c, CUFFT_COMPATIBILITY_FFTW_PADDING); 319 | // cufftSetCompatibilityMode(planc2r, CUFFT_COMPATIBILITY_FFTW_PADDING); 320 | 321 | glGenBuffers(1, &vbo); 322 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 323 | glBufferData(GL_ARRAY_BUFFER, sizeof(float2) * DS, particles, 324 | GL_DYNAMIC_DRAW); 325 | 326 | GLint bsize; 327 | glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &bsize); 328 | if (bsize != (sizeof(float2) * DS)) 329 | goto EXTERR; 330 | 331 | glBindBuffer(GL_ARRAY_BUFFER, 0); 332 | 333 | cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, 334 | cudaGraphicsMapFlagsNone); 335 | 336 | #if defined(__APPLE__) || defined(MACOSX) 337 | atexit(cleanup); 338 | #else 339 | glutCloseFunc(cleanup); 340 | #endif 341 | glutMainLoop(); 342 | 343 | // cudaDeviceReset causes the driver to clean up all state. While 344 | // not mandatory in normal operation, it is good practice. It is also 345 | // needed to ensure correct operation when the application is being 346 | // profiled. Calling cudaDeviceReset causes all profile data to be 347 | // flushed before the application exits 348 | cudaDeviceReset(); 349 | exit(EXIT_SUCCESS); 350 | 351 | return 0; 352 | 353 | EXTERR: 354 | printf("Failed to initialize GL extensions.\n"); 355 | 356 | // cudaDeviceReset causes the driver to clean up all state. While 357 | // not mandatory in normal operation, it is good practice. It is also 358 | // needed to ensure correct operation when the application is being 359 | // profiled. Calling cudaDeviceReset causes all profile data to be 360 | // flushed before the application exits 361 | cudaDeviceReset(); 362 | exit(EXIT_FAILURE); 363 | } 364 | -------------------------------------------------------------------------------- /fluidsGL/fluidsGLFW.cpp: -------------------------------------------------------------------------------- 1 | // OpenGL Graphics includes 2 | #include 3 | #include 4 | 5 | // Includes 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | const char* vertex_shader = R"( 12 | 13 | attribute float x; 14 | attribute float y; 15 | 16 | void main() { 17 | gl_Position = vec4(x, y, 0.0, 1.0); 18 | gl_PointSize = 1.0; 19 | } 20 | 21 | )"; 22 | 23 | const char* fragment_shader = R"( 24 | 25 | void main() { 26 | gl_FragClor = (1.0, 0.0, 0.0, 1.0); 27 | } 28 | 29 | )"; 30 | 31 | 32 | float myrand(void) 33 | { 34 | return rand()/(float)RAND_MAX; 35 | } 36 | 37 | void initParticles(float* x, float* y, int dx, int dy) 38 | { 39 | int i, j; 40 | 41 | for (i = 0; i < dy; i++) 42 | { 43 | for (j = 0; j < dx; j++) 44 | { 45 | x[i*dx+j] = (j+0.5f+(myrand() - 0.5f))/dx; 46 | y[i*dx+j] = (i+0.5f+(myrand() - 0.5f))/dy; 47 | } 48 | } 49 | } 50 | 51 | void init(void) { 52 | 53 | } 54 | 55 | int main(void){ 56 | 57 | int width = 512; 58 | int height = 512; 59 | 60 | if (!glfwInit()) exit(EXIT_FAILURE); 61 | 62 | auto w = glfwCreateWindow(512, 512, "Compute Stable Fluids", NULL, NULL); 63 | if (!w) { 64 | glfwTerminate(); 65 | exit(EXIT_FAILURE); 66 | } 67 | 68 | if (!glewInit()) exit(EXIT_FAILURE); 69 | 70 | glfwMakeContextCurrent(w); 71 | glfwSwapInterval(1); 72 | 73 | while (!glfwWindowShouldClose(w)){ 74 | //display(); 75 | glfwSwapBuffers(w); 76 | glfwPollEvents(); 77 | } 78 | 79 | glfwTerminate(); 80 | 81 | return 0; 82 | 83 | } 84 | -------------------------------------------------------------------------------- /fluidsGL/fluidsGL_kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #include "defines.h" 13 | #include "fluidsGL_kernels.h" 14 | 15 | #include 16 | #include 17 | 18 | #include // CUDA FFT Libraries 19 | //#include // Helper functions for CUDA Error handling 20 | 21 | 22 | // Texture reference for reading velocity field 23 | texture texref; 24 | static cudaArray *array = NULL; 25 | 26 | void setup_texture(int x, int y) 27 | { 28 | 29 | // Wrap mode appears to be the new default 30 | texref.filterMode = cudaFilterModeLinear; 31 | cudaChannelFormatDesc desc = cudaCreateChannelDesc(); 32 | 33 | cudaMallocArray(&array, &desc, y, x); 34 | } 35 | 36 | void bind_texture(void) 37 | { 38 | cudaBindTextureToArray(texref, array); 39 | } 40 | 41 | void unbind_texture(void) 42 | { 43 | cudaUnbindTexture(texref); 44 | } 45 | 46 | void delete_texture(void) 47 | { 48 | cudaFreeArray(array); 49 | } 50 | 51 | void update_texture(float2 *data, size_t wib, size_t h, size_t pitch) 52 | { 53 | cudaMemcpy2DToArray(array, 0, 0, data, pitch, wib, h, cudaMemcpyDeviceToDevice); 54 | } 55 | 56 | // Note that these kernels are designed to work with arbitrary 57 | // domain sizes, not just domains that are multiples of the tile 58 | // size. Therefore, we have extra code that checks to make sure 59 | // a given thread location falls within the domain boundaries in 60 | // both X and Y. Also, the domain is covered by looping over 61 | // multiple elements in the Y direction, while there is a one-to-one 62 | // mapping between threads in X and the tile size in X. 63 | // Nolan Goodnight 9/22/06 64 | 65 | // This method adds constant force vectors to the velocity field 66 | // stored in 'v' according to v(x,t+1) = v(x,t) + dt * f. 67 | __global__ void 68 | addForces_k(float2 *v, int dx, int dy, int spx, int spy, float fx, float fy, int r, size_t pitch) 69 | { 70 | 71 | int tx = threadIdx.x; 72 | int ty = threadIdx.y; 73 | float2 *fj = (float2 *)((char *)v + (ty + spy) * pitch) + tx + spx; 74 | 75 | float2 vterm = *fj; 76 | tx -= r; 77 | ty -= r; 78 | float s = 1.f / (1.f + tx*tx*tx*tx + ty*ty*ty*ty); 79 | vterm.x += s * fx; 80 | vterm.y += s * fy; 81 | *fj = vterm; 82 | } 83 | 84 | // This method performs the velocity advection step, where we 85 | // trace velocity vectors back in time to update each grid cell. 86 | // That is, v(x,t+1) = v(p(x,-dt),t). Here we perform bilinear 87 | // interpolation in the velocity space. 88 | __global__ void 89 | advectVelocity_k(float2 *v, float *vx, float *vy, 90 | int dx, int pdx, int dy, float dt, int lb) 91 | { 92 | 93 | int gtidx = blockIdx.x * blockDim.x + threadIdx.x; 94 | int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb; 95 | int p; 96 | 97 | float2 vterm, ploc; 98 | float vxterm, vyterm; 99 | 100 | // gtidx is the domain location in x for this thread 101 | if (gtidx < dx) 102 | { 103 | for (p = 0; p < lb; p++) 104 | { 105 | // fi is the domain location in y for this thread 106 | int fi = gtidy + p; 107 | 108 | if (fi < dy) 109 | { 110 | int fj = fi * pdx + gtidx; 111 | vterm = tex2D(texref, (float)gtidx, (float)fi); 112 | ploc.x = (gtidx + 0.5f) - (dt * vterm.x * dx); 113 | ploc.y = (fi + 0.5f) - (dt * vterm.y * dy); 114 | vterm = tex2D(texref, ploc.x, ploc.y); 115 | vxterm = vterm.x; 116 | vyterm = vterm.y; 117 | vx[fj] = vxterm; 118 | vy[fj] = vyterm; 119 | } 120 | } 121 | } 122 | } 123 | 124 | // This method performs velocity diffusion and forces mass conservation 125 | // in the frequency domain. The inputs 'vx' and 'vy' are complex-valued 126 | // arrays holding the Fourier coefficients of the velocity field in 127 | // X and Y. Diffusion in this space takes a simple form described as: 128 | // v(k,t) = v(k,t) / (1 + visc * dt * k^2), where visc is the viscosity, 129 | // and k is the wavenumber. The projection step forces the Fourier 130 | // velocity vectors to be orthogonal to the vectors for each 131 | // wavenumber: v(k,t) = v(k,t) - ((k dot v(k,t) * k) / k^2. 132 | __global__ void 133 | diffuseProject_k(float2 *vx, float2 *vy, int dx, int dy, float dt, 134 | float visc, int lb) 135 | { 136 | 137 | int gtidx = blockIdx.x * blockDim.x + threadIdx.x; 138 | int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb; 139 | int p; 140 | 141 | float2 xterm, yterm; 142 | 143 | // gtidx is the domain location in x for this thread 144 | if (gtidx < dx) 145 | { 146 | for (p = 0; p < lb; p++) 147 | { 148 | // fi is the domain location in y for this thread 149 | int fi = gtidy + p; 150 | 151 | if (fi < dy) 152 | { 153 | int fj = fi * dx + gtidx; 154 | xterm = vx[fj]; 155 | yterm = vy[fj]; 156 | 157 | // Compute the index of the wavenumber based on the 158 | // data order produced by a standard NN FFT. 159 | int iix = gtidx; 160 | int iiy = (fi>dy/2)?(fi-(dy)):fi; 161 | 162 | // Velocity diffusion 163 | float kk = (float)(iix * iix + iiy * iiy); // k^2 164 | float diff = 1.f / (1.f + visc * dt * kk); 165 | xterm.x *= diff; 166 | xterm.y *= diff; 167 | yterm.x *= diff; 168 | yterm.y *= diff; 169 | 170 | // Velocity projection 171 | if (kk > 0.f) 172 | { 173 | float rkk = 1.f / kk; 174 | // Real portion of velocity projection 175 | float rkp = (iix * xterm.x + iiy * yterm.x); 176 | // Imaginary portion of velocity projection 177 | float ikp = (iix * xterm.y + iiy * yterm.y); 178 | xterm.x -= rkk * rkp * iix; 179 | xterm.y -= rkk * ikp * iix; 180 | yterm.x -= rkk * rkp * iiy; 181 | yterm.y -= rkk * ikp * iiy; 182 | } 183 | 184 | vx[fj] = xterm; 185 | vy[fj] = yterm; 186 | } 187 | } 188 | } 189 | } 190 | 191 | // This method updates the velocity field 'v' using the two complex 192 | // arrays from the previous step: 'vx' and 'vy'. Here we scale the 193 | // real components by 1/(dx*dy) to account for an unnormalized FFT. 194 | __global__ void 195 | updateVelocity_k(float2 *v, float *vx, float *vy, 196 | int dx, int pdx, int dy, int lb, size_t pitch) 197 | { 198 | 199 | int gtidx = blockIdx.x * blockDim.x + threadIdx.x; 200 | int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb; 201 | int p; 202 | 203 | float vxterm, vyterm; 204 | float2 nvterm; 205 | 206 | // gtidx is the domain location in x for this thread 207 | if (gtidx < dx) 208 | { 209 | for (p = 0; p < lb; p++) 210 | { 211 | // fi is the domain location in y for this thread 212 | int fi = gtidy + p; 213 | 214 | if (fi < dy) 215 | { 216 | int fjr = fi * pdx + gtidx; 217 | vxterm = vx[fjr]; 218 | vyterm = vy[fjr]; 219 | 220 | // Normalize the result of the inverse FFT 221 | float scale = 1.f / (dx * dy); 222 | nvterm.x = vxterm * scale; 223 | nvterm.y = vyterm * scale; 224 | 225 | float2 *fj = (float2 *)((char *)v + fi * pitch) + gtidx; 226 | *fj = nvterm; 227 | } 228 | } // If this thread is inside the domain in Y 229 | } // If this thread is inside the domain in X 230 | } 231 | 232 | // This method updates the particles by moving particle positions 233 | // according to the velocity field and time step. That is, for each 234 | // particle: p(t+1) = p(t) + dt * v(p(t)). 235 | __global__ void 236 | advectParticles_k(float2 *part, float2 *v, int dx, int dy, 237 | float dt, int lb, size_t pitch) 238 | { 239 | 240 | int gtidx = blockIdx.x * blockDim.x + threadIdx.x; 241 | int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb; 242 | int p; 243 | 244 | // gtidx is the domain location in x for this thread 245 | float2 pterm, vterm; 246 | 247 | if (gtidx < dx) 248 | { 249 | for (p = 0; p < lb; p++) 250 | { 251 | // fi is the domain location in y for this thread 252 | int fi = gtidy + p; 253 | 254 | if (fi < dy) 255 | { 256 | int fj = fi * dx + gtidx; 257 | pterm = part[fj]; 258 | 259 | int xvi = ((int)(pterm.x * dx)); 260 | int yvi = ((int)(pterm.y * dy)); 261 | vterm = *((float2 *)((char *)v + yvi * pitch) + xvi); 262 | 263 | pterm.x += dt * vterm.x; 264 | pterm.x = pterm.x - (int)pterm.x; 265 | pterm.x += 1.f; 266 | pterm.x = pterm.x - (int)pterm.x; 267 | pterm.y += dt * vterm.y; 268 | pterm.y = pterm.y - (int)pterm.y; 269 | pterm.y += 1.f; 270 | pterm.y = pterm.y - (int)pterm.y; 271 | 272 | part[fj] = pterm; 273 | } 274 | } // If this thread is inside the domain in Y 275 | } // If this thread is inside the domain in X 276 | } 277 | 278 | 279 | void addForces(float2 *v, int dx, int dy, int spx, int spy, float fx, float fy, int r) 280 | { 281 | 282 | dim3 tids(2*r+1, 2*r+1); 283 | 284 | addForces_k<<<1, tids>>>(v, dx, dy, spx, spy, fx, fy, r, tPitch); 285 | } 286 | 287 | 288 | void advectVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy, float dt) 289 | { 290 | dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1)); 291 | 292 | dim3 tids(TIDSX, TIDSY); 293 | 294 | update_texture(v, DIM*sizeof(float2), DIM, tPitch); 295 | advectVelocity_k<<>>(v, vx, vy, dx, pdx, dy, dt, TILEY/TIDSY); 296 | 297 | } 298 | 299 | 300 | void diffuseProject(float2 *vx, float2 *vy, int dx, int dy, float dt, float visc, 301 | cufftHandle planr2c, cufftHandle planc2r) 302 | { 303 | // Forward FFT 304 | cufftExecR2C(planr2c, (cufftReal *)vx, (cufftComplex *)vx); 305 | cufftExecR2C(planr2c, (cufftReal *)vy, (cufftComplex *)vy); 306 | 307 | uint3 grid = make_uint3((dx/TILEX)+(!(dx%TILEX)?0:1), 308 | (dy/TILEY)+(!(dy%TILEY)?0:1), 1); 309 | uint3 tids = make_uint3(TIDSX, TIDSY, 1); 310 | 311 | diffuseProject_k<<>>(vx, vy, dx, dy, dt, visc, TILEY/TIDSY); 312 | 313 | // Inverse FFT 314 | cufftExecC2R(planc2r, (cufftComplex *)vx, (cufftReal *)vx); 315 | cufftExecC2R(planc2r, (cufftComplex *)vy, (cufftReal *)vy); 316 | } 317 | 318 | 319 | void updateVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy) 320 | { 321 | dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1)); 322 | dim3 tids(TIDSX, TIDSY); 323 | 324 | updateVelocity_k<<>>(v, vx, vy, dx, pdx, dy, TILEY/TIDSY, tPitch); 325 | } 326 | 327 | 328 | void advectParticles(struct cudaGraphicsResource *cuda_vbo_resource, float2 *v, int dx, int dy, float dt) 329 | { 330 | dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1)); 331 | dim3 tids(TIDSX, TIDSY); 332 | 333 | float2 *p; 334 | cudaGraphicsMapResources(1, &cuda_vbo_resource, 0); 335 | 336 | size_t num_bytes; 337 | cudaGraphicsResourceGetMappedPointer((void **)&p, &num_bytes, 338 | cuda_vbo_resource); 339 | 340 | advectParticles_k<<>>(p, v, dx, dy, dt, TILEY/TIDSY, tPitch); 341 | 342 | cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0); 343 | } 344 | -------------------------------------------------------------------------------- /fluidsGL/fluidsGL_kernels.h: -------------------------------------------------------------------------------- 1 | #ifndef __STABLEFLUIDS_KERNELS_H_ 2 | #define __STABLEFLUIDS_KERNELS_H_ 3 | 4 | #include 5 | #include 6 | 7 | // Texture pitch 8 | extern size_t tPitch; 9 | 10 | void setup_texture(int x, int y); 11 | void bind_texture(void); 12 | void unbind_texture(void); 13 | void delete_texture(void); 14 | void update_texture(float2 *data, size_t w, size_t h, size_t pitch); 15 | 16 | void addForces(float2 *v, int dx, int dy, int spx, int spy, float fx, float fy, int r); 17 | void advectVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy, float dt); 18 | void diffuseProject(float2 *vx, float2 *vy, int dx, int dy, float dt, float visc, cufftHandle planr2c, cufftHandle planc2r); 19 | void updateVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy); 20 | void advectParticles(struct cudaGraphicsResource *cuda_vbo_resource, float2 *v, int dx, int dy, float dt); 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /fluidsGL/fluidsQt.cpp: -------------------------------------------------------------------------------- 1 | #include "mainwindow.h" 2 | 3 | #include 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | QApplication app(argc, argv); 8 | MainWindow window; 9 | window.show(); 10 | return app.exec(); 11 | } 12 | -------------------------------------------------------------------------------- /fluidsGL/glfluids.cpp: -------------------------------------------------------------------------------- 1 | #include "glfluids.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // Texture pitch 8 | size_t tPitch = 0; 9 | 10 | void initParticles(float2 *p, int dx, int dy) { 11 | 12 | auto myrand = []() -> float { return qrand() / (float)RAND_MAX; }; 13 | 14 | for (int i = 0; i < dy; i++) { 15 | for (int j = 0; j < dx; j++) { 16 | p[i * dx + j].x = (j + 0.5f + (myrand() - 0.5f)) / dx; 17 | p[i * dx + j].y = (i + 0.5f + (myrand() - 0.5f)) / dy; 18 | } 19 | } 20 | } 21 | 22 | void GLFluids::simulateFluids(void) { 23 | // simulate fluid 24 | advectVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM, 25 | DT); 26 | diffuseProject(vxfield, vyfield, CPADW, DIM, DT, VIS, planr2c, planc2r); 27 | updateVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM); 28 | advectParticles(cuda_vbo_resource, dvfield, DIM, DIM, DT); 29 | } 30 | 31 | GLFluids::GLFluids(QWidget *parent) : QGLWidget(parent), QGLFunctions() { 32 | vbo = 0; 33 | 34 | wWidth = qMax(512, DIM); 35 | wHeight = qMax(512, DIM); 36 | 37 | hvfield = (float2 *)malloc(sizeof(float2) * DS); 38 | memset(hvfield, 0, sizeof(float2) * DS); 39 | 40 | // Allocate and initialize device data 41 | cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(float2) * DIM, DIM); 42 | 43 | cudaMemcpy(dvfield, hvfield, sizeof(float2) * DS, cudaMemcpyHostToDevice); 44 | // Temporary complex velocity field data 45 | cudaMalloc((void **)&vxfield, sizeof(float2) * PDS); 46 | cudaMalloc((void **)&vyfield, sizeof(float2) * PDS); 47 | 48 | setup_texture(DIM, DIM); 49 | bind_texture(); 50 | 51 | // Create particle array 52 | particles = (float2 *)malloc(sizeof(float2) * DS); 53 | memset(particles, 0, sizeof(float2) * DS); 54 | initParticles(particles, DIM, DIM); 55 | 56 | // Create CUFFT transform plan configuration 57 | cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C); 58 | cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R); 59 | 60 | QTimer *timer = new QTimer(this); 61 | connect(timer, &QTimer::timeout, [&]() { 62 | simulateFluids(); 63 | updateGL(); 64 | }); 65 | timer->start(0); 66 | } 67 | 68 | GLFluids::~GLFluids() { 69 | 70 | cudaGraphicsUnregisterResource(cuda_vbo_resource); 71 | 72 | unbind_texture(); 73 | delete_texture(); 74 | 75 | // Free all host and device resources 76 | free(hvfield); 77 | free(particles); 78 | cudaFree(dvfield); 79 | cudaFree(vxfield); 80 | cudaFree(vyfield); 81 | cufftDestroy(planr2c); 82 | cufftDestroy(planc2r); 83 | 84 | glBindBuffer(GL_ARRAY_BUFFER, 0); 85 | glDeleteBuffers(1, &vbo); 86 | } 87 | 88 | void GLFluids::reset() { 89 | memset(hvfield, 0, sizeof(float2) * DS); 90 | cudaMemcpy(dvfield, hvfield, sizeof(float2) * DS, cudaMemcpyHostToDevice); 91 | 92 | initParticles(particles, DIM, DIM); 93 | 94 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 95 | glBufferData(GL_ARRAY_BUFFER, sizeof(float2) * DS, particles, 96 | GL_DYNAMIC_DRAW); 97 | glBindBuffer(GL_ARRAY_BUFFER, 0); 98 | 99 | cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, 100 | cudaGraphicsMapFlagsNone); 101 | } 102 | 103 | void GLFluids::initializeGL() { 104 | initializeGLFunctions(); 105 | glGenBuffers(1, &vbo); 106 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 107 | glBufferData(GL_ARRAY_BUFFER, sizeof(float2) * DS, particles, 108 | GL_DYNAMIC_DRAW); 109 | glBindBuffer(GL_ARRAY_BUFFER, 0); 110 | cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, 111 | cudaGraphicsMapFlagsNone); 112 | } 113 | 114 | void GLFluids::paintGL() { 115 | glClear(GL_COLOR_BUFFER_BIT); 116 | glClearColor(1, 1, 1, 1.0f); 117 | glColor4f(0, 0, 1, 0.5f); 118 | glPointSize(1); 119 | glEnable(GL_POINT_SMOOTH); 120 | glEnable(GL_BLEND); 121 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 122 | glEnableClientState(GL_VERTEX_ARRAY); 123 | glDisable(GL_DEPTH_TEST); 124 | glDisable(GL_CULL_FACE); 125 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 126 | glVertexPointer(2, GL_FLOAT, 0, NULL); 127 | glDrawArrays(GL_POINTS, 0, DS); 128 | glBindBuffer(GL_ARRAY_BUFFER, 0); 129 | glDisableClientState(GL_VERTEX_ARRAY); 130 | glDisableClientState(GL_TEXTURE_COORD_ARRAY); 131 | glDisable(GL_TEXTURE_2D); 132 | } 133 | 134 | void GLFluids::resizeGL(int x, int y) { 135 | wWidth = x; 136 | wHeight = y; 137 | glViewport(0, 0, x, y); 138 | glMatrixMode(GL_PROJECTION); 139 | glLoadIdentity(); 140 | glOrtho(0, 1, 1, 0, 0, 1); 141 | glMatrixMode(GL_MODELVIEW); 142 | glLoadIdentity(); 143 | } 144 | 145 | void GLFluids::mousePressEvent(QMouseEvent *event) { 146 | auto lastPos = event->pos(); 147 | lastx = lastPos.x(); 148 | lasty = lastPos.y(); 149 | } 150 | 151 | void GLFluids::mouseMoveEvent(QMouseEvent *event) { 152 | int x = event->x(); 153 | int y = event->y(); 154 | 155 | // Convert motion coordinates to domain 156 | float fx = (lastx / (float)wWidth); 157 | float fy = (lasty / (float)wHeight); 158 | int nx = (int)(fx * DIM); 159 | int ny = (int)(fy * DIM); 160 | 161 | if (event->buttons() & Qt::LeftButton) { 162 | int ddx = x - lastx; 163 | int ddy = y - lasty; 164 | fx = ddx / (float)wWidth; 165 | fy = ddy / (float)wHeight; 166 | int spy = ny - FR; 167 | int spx = nx - FR; 168 | addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy, 169 | FR); 170 | } 171 | 172 | lastx = x; 173 | lasty = y; 174 | } 175 | -------------------------------------------------------------------------------- /fluidsGL/glfluids.h: -------------------------------------------------------------------------------- 1 | #ifndef GLFLUIDS_H 2 | #define GLFLUIDS_H 3 | 4 | // Qt 5 | #include 6 | #include 7 | 8 | // CUDA standard includes 9 | #include 10 | #include 11 | 12 | // CUDA FFT Libraries 13 | #include 14 | 15 | #include "defines.h" 16 | #include "fluidsGL_kernels.h" 17 | 18 | class GLFluids : public QGLWidget, protected QGLFunctions 19 | { 20 | Q_OBJECT 21 | 22 | public: 23 | GLFluids(QWidget *parent = 0); 24 | ~GLFluids(); 25 | void reset(); 26 | 27 | protected: 28 | 29 | void initializeGL(); 30 | void paintGL(); 31 | void resizeGL(int x, int y); 32 | 33 | void mousePressEvent(QMouseEvent *event); 34 | void mouseMoveEvent(QMouseEvent *event); 35 | 36 | private: 37 | void simulateFluids(); 38 | 39 | float2 *vxfield; 40 | float2 *vyfield; 41 | 42 | float2 *hvfield; 43 | float2 *dvfield; 44 | 45 | int wWidth; 46 | int wHeight; 47 | int lastx = 0, lasty = 0; 48 | 49 | // Particle data 50 | GLuint vbo; // OpenGL vertex buffer object 51 | struct cudaGraphicsResource *cuda_vbo_resource; // handles OpenGL-CUDA exchange 52 | float2 *particles; // particle positions in host memory 53 | 54 | // CUFFT plan handle 55 | cufftHandle planr2c; 56 | cufftHandle planc2r; 57 | }; 58 | 59 | 60 | #endif // GLFLUIDS_H 61 | -------------------------------------------------------------------------------- /fluidsGL/helper_timer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // Helper Timing Functions 13 | #ifndef HELPER_TIMER_H 14 | #define HELPER_TIMER_H 15 | 16 | #ifndef EXIT_WAIVED 17 | #define EXIT_WAIVED 2 18 | #endif 19 | 20 | // includes, system 21 | #include 22 | 23 | // Definition of the StopWatch Interface, this is used if we don't want to use the CUT functions 24 | // But rather in a self contained class interface 25 | class StopWatchInterface 26 | { 27 | public: 28 | StopWatchInterface() {}; 29 | virtual ~StopWatchInterface() {}; 30 | 31 | public: 32 | //! Start time measurement 33 | virtual void start() = 0; 34 | 35 | //! Stop time measurement 36 | virtual void stop() = 0; 37 | 38 | //! Reset time counters to zero 39 | virtual void reset() = 0; 40 | 41 | //! Time in msec. after start. If the stop watch is still running (i.e. there 42 | //! was no call to stop()) then the elapsed time is returned, otherwise the 43 | //! time between the last start() and stop call is returned 44 | virtual float getTime() = 0; 45 | 46 | //! Mean time to date based on the number of times the stopwatch has been 47 | //! _stopped_ (ie finished sessions) and the current total time 48 | virtual float getAverageTime() = 0; 49 | }; 50 | 51 | 52 | ////////////////////////////////////////////////////////////////// 53 | // Begin Stopwatch timer class definitions for all OS platforms // 54 | ////////////////////////////////////////////////////////////////// 55 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 56 | // includes, system 57 | #define WINDOWS_LEAN_AND_MEAN 58 | #include 59 | #undef min 60 | #undef max 61 | 62 | //! Windows specific implementation of StopWatch 63 | class StopWatchWin : public StopWatchInterface 64 | { 65 | public: 66 | //! Constructor, default 67 | StopWatchWin() : 68 | start_time(), end_time(), 69 | diff_time(0.0f), total_time(0.0f), 70 | running(false), clock_sessions(0), freq(0), freq_set(false) 71 | { 72 | if (! freq_set) 73 | { 74 | // helper variable 75 | LARGE_INTEGER temp; 76 | 77 | // get the tick frequency from the OS 78 | QueryPerformanceFrequency((LARGE_INTEGER *) &temp); 79 | 80 | // convert to type in which it is needed 81 | freq = ((double) temp.QuadPart) / 1000.0; 82 | 83 | // rememeber query 84 | freq_set = true; 85 | } 86 | }; 87 | 88 | // Destructor 89 | ~StopWatchWin() { }; 90 | 91 | public: 92 | //! Start time measurement 93 | inline void start(); 94 | 95 | //! Stop time measurement 96 | inline void stop(); 97 | 98 | //! Reset time counters to zero 99 | inline void reset(); 100 | 101 | //! Time in msec. after start. If the stop watch is still running (i.e. there 102 | //! was no call to stop()) then the elapsed time is returned, otherwise the 103 | //! time between the last start() and stop call is returned 104 | inline float getTime(); 105 | 106 | //! Mean time to date based on the number of times the stopwatch has been 107 | //! _stopped_ (ie finished sessions) and the current total time 108 | inline float getAverageTime(); 109 | 110 | private: 111 | // member variables 112 | 113 | //! Start of measurement 114 | LARGE_INTEGER start_time; 115 | //! End of measurement 116 | LARGE_INTEGER end_time; 117 | 118 | //! Time difference between the last start and stop 119 | float diff_time; 120 | 121 | //! TOTAL time difference between starts and stops 122 | float total_time; 123 | 124 | //! flag if the stop watch is running 125 | bool running; 126 | 127 | //! Number of times clock has been started 128 | //! and stopped to allow averaging 129 | int clock_sessions; 130 | 131 | //! tick frequency 132 | double freq; 133 | 134 | //! flag if the frequency has been set 135 | bool freq_set; 136 | }; 137 | 138 | // functions, inlined 139 | 140 | //////////////////////////////////////////////////////////////////////////////// 141 | //! Start time measurement 142 | //////////////////////////////////////////////////////////////////////////////// 143 | inline void 144 | StopWatchWin::start() 145 | { 146 | QueryPerformanceCounter((LARGE_INTEGER *) &start_time); 147 | running = true; 148 | } 149 | 150 | //////////////////////////////////////////////////////////////////////////////// 151 | //! Stop time measurement and increment add to the current diff_time summation 152 | //! variable. Also increment the number of times this clock has been run. 153 | //////////////////////////////////////////////////////////////////////////////// 154 | inline void 155 | StopWatchWin::stop() 156 | { 157 | QueryPerformanceCounter((LARGE_INTEGER *) &end_time); 158 | diff_time = (float) 159 | (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq); 160 | 161 | total_time += diff_time; 162 | clock_sessions++; 163 | running = false; 164 | } 165 | 166 | //////////////////////////////////////////////////////////////////////////////// 167 | //! Reset the timer to 0. Does not change the timer running state but does 168 | //! recapture this point in time as the current start time if it is running. 169 | //////////////////////////////////////////////////////////////////////////////// 170 | inline void 171 | StopWatchWin::reset() 172 | { 173 | diff_time = 0; 174 | total_time = 0; 175 | clock_sessions = 0; 176 | 177 | if (running) 178 | { 179 | QueryPerformanceCounter((LARGE_INTEGER *) &start_time); 180 | } 181 | } 182 | 183 | 184 | //////////////////////////////////////////////////////////////////////////////// 185 | //! Time in msec. after start. If the stop watch is still running (i.e. there 186 | //! was no call to stop()) then the elapsed time is returned added to the 187 | //! current diff_time sum, otherwise the current summed time difference alone 188 | //! is returned. 189 | //////////////////////////////////////////////////////////////////////////////// 190 | inline float 191 | StopWatchWin::getTime() 192 | { 193 | // Return the TOTAL time to date 194 | float retval = total_time; 195 | 196 | if (running) 197 | { 198 | LARGE_INTEGER temp; 199 | QueryPerformanceCounter((LARGE_INTEGER *) &temp); 200 | retval += (float) 201 | (((double)(temp.QuadPart - start_time.QuadPart)) / freq); 202 | } 203 | 204 | return retval; 205 | } 206 | 207 | //////////////////////////////////////////////////////////////////////////////// 208 | //! Time in msec. for a single run based on the total number of COMPLETED runs 209 | //! and the total time. 210 | //////////////////////////////////////////////////////////////////////////////// 211 | inline float 212 | StopWatchWin::getAverageTime() 213 | { 214 | return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f; 215 | } 216 | #else 217 | // Declarations for Stopwatch on Linux and Mac OSX 218 | // includes, system 219 | #include 220 | #include 221 | 222 | //! Windows specific implementation of StopWatch 223 | class StopWatchLinux : public StopWatchInterface 224 | { 225 | public: 226 | //! Constructor, default 227 | StopWatchLinux() : 228 | start_time(), diff_time(0.0), total_time(0.0), 229 | running(false), clock_sessions(0) 230 | { }; 231 | 232 | // Destructor 233 | virtual ~StopWatchLinux() 234 | { }; 235 | 236 | public: 237 | //! Start time measurement 238 | inline void start(); 239 | 240 | //! Stop time measurement 241 | inline void stop(); 242 | 243 | //! Reset time counters to zero 244 | inline void reset(); 245 | 246 | //! Time in msec. after start. If the stop watch is still running (i.e. there 247 | //! was no call to stop()) then the elapsed time is returned, otherwise the 248 | //! time between the last start() and stop call is returned 249 | inline float getTime(); 250 | 251 | //! Mean time to date based on the number of times the stopwatch has been 252 | //! _stopped_ (ie finished sessions) and the current total time 253 | inline float getAverageTime(); 254 | 255 | private: 256 | 257 | // helper functions 258 | 259 | //! Get difference between start time and current time 260 | inline float getDiffTime(); 261 | 262 | private: 263 | 264 | // member variables 265 | 266 | //! Start of measurement 267 | struct timeval start_time; 268 | 269 | //! Time difference between the last start and stop 270 | float diff_time; 271 | 272 | //! TOTAL time difference between starts and stops 273 | float total_time; 274 | 275 | //! flag if the stop watch is running 276 | bool running; 277 | 278 | //! Number of times clock has been started 279 | //! and stopped to allow averaging 280 | int clock_sessions; 281 | }; 282 | 283 | // functions, inlined 284 | 285 | //////////////////////////////////////////////////////////////////////////////// 286 | //! Start time measurement 287 | //////////////////////////////////////////////////////////////////////////////// 288 | inline void 289 | StopWatchLinux::start() 290 | { 291 | gettimeofday(&start_time, 0); 292 | running = true; 293 | } 294 | 295 | //////////////////////////////////////////////////////////////////////////////// 296 | //! Stop time measurement and increment add to the current diff_time summation 297 | //! variable. Also increment the number of times this clock has been run. 298 | //////////////////////////////////////////////////////////////////////////////// 299 | inline void 300 | StopWatchLinux::stop() 301 | { 302 | diff_time = getDiffTime(); 303 | total_time += diff_time; 304 | running = false; 305 | clock_sessions++; 306 | } 307 | 308 | //////////////////////////////////////////////////////////////////////////////// 309 | //! Reset the timer to 0. Does not change the timer running state but does 310 | //! recapture this point in time as the current start time if it is running. 311 | //////////////////////////////////////////////////////////////////////////////// 312 | inline void 313 | StopWatchLinux::reset() 314 | { 315 | diff_time = 0; 316 | total_time = 0; 317 | clock_sessions = 0; 318 | 319 | if (running) 320 | { 321 | gettimeofday(&start_time, 0); 322 | } 323 | } 324 | 325 | //////////////////////////////////////////////////////////////////////////////// 326 | //! Time in msec. after start. If the stop watch is still running (i.e. there 327 | //! was no call to stop()) then the elapsed time is returned added to the 328 | //! current diff_time sum, otherwise the current summed time difference alone 329 | //! is returned. 330 | //////////////////////////////////////////////////////////////////////////////// 331 | inline float 332 | StopWatchLinux::getTime() 333 | { 334 | // Return the TOTAL time to date 335 | float retval = total_time; 336 | 337 | if (running) 338 | { 339 | retval += getDiffTime(); 340 | } 341 | 342 | return retval; 343 | } 344 | 345 | //////////////////////////////////////////////////////////////////////////////// 346 | //! Time in msec. for a single run based on the total number of COMPLETED runs 347 | //! and the total time. 348 | //////////////////////////////////////////////////////////////////////////////// 349 | inline float 350 | StopWatchLinux::getAverageTime() 351 | { 352 | return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f; 353 | } 354 | //////////////////////////////////////////////////////////////////////////////// 355 | 356 | //////////////////////////////////////////////////////////////////////////////// 357 | inline float 358 | StopWatchLinux::getDiffTime() 359 | { 360 | struct timeval t_time; 361 | gettimeofday(&t_time, 0); 362 | 363 | // time difference in milli-seconds 364 | return (float)(1000.0 * (t_time.tv_sec - start_time.tv_sec) 365 | + (0.001 * (t_time.tv_usec - start_time.tv_usec))); 366 | } 367 | #endif // WIN32 368 | 369 | //////////////////////////////////////////////////////////////////////////////// 370 | //! Timer functionality exported 371 | 372 | //////////////////////////////////////////////////////////////////////////////// 373 | //! Create a new timer 374 | //! @return true if a time has been created, otherwise false 375 | //! @param name of the new timer, 0 if the creation failed 376 | //////////////////////////////////////////////////////////////////////////////// 377 | inline bool 378 | sdkCreateTimer(StopWatchInterface **timer_interface) 379 | { 380 | //printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface); 381 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 382 | *timer_interface = (StopWatchInterface *)new StopWatchWin(); 383 | #else 384 | *timer_interface = (StopWatchInterface *)new StopWatchLinux(); 385 | #endif 386 | return (*timer_interface != NULL) ? true : false; 387 | } 388 | 389 | 390 | //////////////////////////////////////////////////////////////////////////////// 391 | //! Delete a timer 392 | //! @return true if a time has been deleted, otherwise false 393 | //! @param name of the timer to delete 394 | //////////////////////////////////////////////////////////////////////////////// 395 | inline bool 396 | sdkDeleteTimer(StopWatchInterface **timer_interface) 397 | { 398 | //printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface); 399 | if (*timer_interface) 400 | { 401 | delete *timer_interface; 402 | *timer_interface = NULL; 403 | } 404 | 405 | return true; 406 | } 407 | 408 | //////////////////////////////////////////////////////////////////////////////// 409 | //! Start the time with name \a name 410 | //! @param name name of the timer to start 411 | //////////////////////////////////////////////////////////////////////////////// 412 | inline bool 413 | sdkStartTimer(StopWatchInterface **timer_interface) 414 | { 415 | //printf("sdkStartTimer called object %08x\n", (void *)*timer_interface); 416 | if (*timer_interface) 417 | { 418 | (*timer_interface)->start(); 419 | } 420 | 421 | return true; 422 | } 423 | 424 | //////////////////////////////////////////////////////////////////////////////// 425 | //! Stop the time with name \a name. Does not reset. 426 | //! @param name name of the timer to stop 427 | //////////////////////////////////////////////////////////////////////////////// 428 | inline bool 429 | sdkStopTimer(StopWatchInterface **timer_interface) 430 | { 431 | // printf("sdkStopTimer called object %08x\n", (void *)*timer_interface); 432 | if (*timer_interface) 433 | { 434 | (*timer_interface)->stop(); 435 | } 436 | 437 | return true; 438 | } 439 | 440 | //////////////////////////////////////////////////////////////////////////////// 441 | //! Resets the timer's counter. 442 | //! @param name name of the timer to reset. 443 | //////////////////////////////////////////////////////////////////////////////// 444 | inline bool 445 | sdkResetTimer(StopWatchInterface **timer_interface) 446 | { 447 | // printf("sdkResetTimer called object %08x\n", (void *)*timer_interface); 448 | if (*timer_interface) 449 | { 450 | (*timer_interface)->reset(); 451 | } 452 | 453 | return true; 454 | } 455 | 456 | //////////////////////////////////////////////////////////////////////////////// 457 | //! Return the average time for timer execution as the total time 458 | //! for the timer dividied by the number of completed (stopped) runs the timer 459 | //! has made. 460 | //! Excludes the current running time if the timer is currently running. 461 | //! @param name name of the timer to return the time of 462 | //////////////////////////////////////////////////////////////////////////////// 463 | inline float 464 | sdkGetAverageTimerValue(StopWatchInterface **timer_interface) 465 | { 466 | // printf("sdkGetAverageTimerValue called object %08x\n", (void *)*timer_interface); 467 | if (*timer_interface) 468 | { 469 | return (*timer_interface)->getAverageTime(); 470 | } 471 | else 472 | { 473 | return 0.0f; 474 | } 475 | } 476 | 477 | //////////////////////////////////////////////////////////////////////////////// 478 | //! Total execution time for the timer over all runs since the last reset 479 | //! or timer creation. 480 | //! @param name name of the timer to obtain the value of. 481 | //////////////////////////////////////////////////////////////////////////////// 482 | inline float 483 | sdkGetTimerValue(StopWatchInterface **timer_interface) 484 | { 485 | // printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface); 486 | if (*timer_interface) 487 | { 488 | return (*timer_interface)->getTime(); 489 | } 490 | else 491 | { 492 | return 0.0f; 493 | } 494 | } 495 | 496 | #endif // HELPER_TIMER_H 497 | -------------------------------------------------------------------------------- /fluidsGL/mainwindow.cpp: -------------------------------------------------------------------------------- 1 | #include "mainwindow.h" 2 | #include "glfluids.h" 3 | 4 | #include 5 | 6 | 7 | MainWindow::MainWindow() 8 | { 9 | 10 | auto glFluids = new GLFluids(); 11 | 12 | auto glWidgetArea = new QScrollArea; 13 | glWidgetArea->setWidget(glFluids); 14 | glWidgetArea->setWidgetResizable(true); 15 | glWidgetArea->setHorizontalScrollBarPolicy(Qt::ScrollBarAlwaysOff); 16 | glWidgetArea->setVerticalScrollBarPolicy(Qt::ScrollBarAlwaysOff); 17 | glWidgetArea->setSizePolicy(QSizePolicy::Ignored, QSizePolicy::Ignored); 18 | glWidgetArea->setMinimumSize(512, 512); 19 | setCentralWidget(glWidgetArea); 20 | 21 | auto fileMenu = new QMenu(tr("File"), this); 22 | menuBar()->addMenu(fileMenu); 23 | 24 | auto quitAction = fileMenu->addAction(tr("E&xit")); 25 | quitAction->setShortcuts(QKeySequence::Quit); 26 | connect(quitAction, &QAction::triggered, this, &QApplication::quit); 27 | 28 | auto resetAction = fileMenu->addAction(tr("&Reset")); 29 | resetAction->setShortcut(Qt::Key_R); 30 | connect(resetAction, &QAction::triggered, glFluids, &GLFluids::reset); 31 | 32 | // glFluids->setFocusPolicy(Qt::StrongFocus); 33 | // glFluids->setFocus(); 34 | 35 | setWindowTitle(tr("Qt Fluids")); 36 | resize(512, 512); 37 | } 38 | -------------------------------------------------------------------------------- /fluidsGL/mainwindow.h: -------------------------------------------------------------------------------- 1 | #ifndef MAINWINDOW_H 2 | #define MAINWINDOW_H 3 | 4 | #include 5 | 6 | 7 | class MainWindow : public QMainWindow 8 | { 9 | Q_OBJECT 10 | 11 | public: 12 | MainWindow(); 13 | }; 14 | 15 | #endif // MAINWINDOW_H 16 | -------------------------------------------------------------------------------- /gears/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(OpenGL REQUIRED) 2 | 3 | include(FindPkgConfig) 4 | 5 | add_library(gears gears.c) 6 | target_link_libraries(gears ${OPENGL_LIBRARIES} m) 7 | set_target_properties(gears PROPERTIES COMPILE_FLAGS "-std=c99") 8 | 9 | find_package(GLUT) 10 | if(GLUT_FOUND) 11 | include_directories(${OPENGL_INCLUDE_DIR} ${GLUT_INCLUDE_DIR}) 12 | add_executable(gears_GLUT gears_GLUT.cpp) 13 | target_link_libraries(gears_GLUT ${GLUT_LIBRARIES} gears ${OPENGL_LIBRARIES} m) 14 | endif(GLUT_FOUND) 15 | 16 | pkg_search_module(SDL2 sdl2) 17 | if(SDL2_FOUND) 18 | include_directories(${SDL2_INCLUDE_DIRS}) 19 | add_executable(gears_SDL gears_SDL.c) 20 | target_link_libraries(gears_SDL ${SDL2_LIBRARIES} gears) 21 | endif(SDL2_FOUND) 22 | 23 | find_package(Qt5Widgets) 24 | if(Qt5Widgets_FOUND) 25 | find_package(Qt5OpenGL REQUIRED) 26 | set(CMAKE_AUTOMOC TRUE) 27 | include_directories(${Qt5Widgets_INCLUDE_DIRS} ${Qt5OpenGL_INCLUDE_DIRS}) 28 | add_executable(gears_Qt ${FILES_MOC} gears_Qt.cpp mainwindow.cpp glwidget.cpp) 29 | target_link_libraries(gears_Qt Qt5::Widgets Qt5::OpenGL gears) 30 | set_target_properties(gears_Qt PROPERTIES COMPILE_FLAGS "-std=c++11") 31 | endif(Qt5Widgets_FOUND) 32 | -------------------------------------------------------------------------------- /gears/gears.c: -------------------------------------------------------------------------------- 1 | #include "gears.h" 2 | 3 | #include 4 | 5 | 6 | void gears_initialize(Gears *g) 7 | { 8 | static const GLfloat lightPos[4] = { 5.0f, 5.0f, 10.0f, 1.0f }; 9 | static const GLfloat reflectance1[4] = { 0.8f, 0.1f, 0.0f, 1.0f }; 10 | static const GLfloat reflectance2[4] = { 0.0f, 0.8f, 0.2f, 1.0f }; 11 | static const GLfloat reflectance3[4] = { 0.2f, 0.2f, 1.0f, 1.0f }; 12 | 13 | glLightfv(GL_LIGHT0, GL_POSITION, lightPos); 14 | glEnable(GL_LIGHTING); 15 | glEnable(GL_LIGHT0); 16 | glEnable(GL_DEPTH_TEST); 17 | 18 | g->gear1 = gears_make(reflectance1, 1.0, 4.0, 1.0, 0.7, 20); 19 | g->gear2 = gears_make(reflectance2, 0.5, 2.0, 2.0, 0.7, 10); 20 | g->gear3 = gears_make(reflectance3, 1.3, 2.0, 0.5, 0.7, 10); 21 | 22 | glEnable(GL_NORMALIZE); 23 | glClearColor(0.0f, 0.0f, 0.0f, 1.0f); 24 | } 25 | 26 | void gears_paint(const Gears *g) 27 | { 28 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); 29 | 30 | glPushMatrix(); 31 | glRotated(g->xRot / 16.0, 1.0, 0.0, 0.0); 32 | glRotated(g->yRot / 16.0, 0.0, 1.0, 0.0); 33 | glRotated(g->zRot / 16.0, 0.0, 0.0, 1.0); 34 | 35 | gears_draw(g->gear1, -3.0, -2.0, 0.0, g->gear1Rot / 16.0); 36 | gears_draw(g->gear2, +3.1, -2.0, 0.0, -2.0 * (g->gear1Rot / 16.0) - 9.0); 37 | 38 | glRotated(+90.0, 1.0, 0.0, 0.0); 39 | gears_draw(g->gear3, -3.1, -1.8, -2.2, +2.0 * (g->gear1Rot / 16.0) - 2.0); 40 | 41 | glPopMatrix(); 42 | } 43 | 44 | void gears_resize(int width, int height) 45 | { 46 | int side = MIN(width, height); 47 | glViewport((width - side) / 2, (height - side) / 2, side, side); 48 | 49 | glMatrixMode(GL_PROJECTION); 50 | glLoadIdentity(); 51 | glFrustum(-1.0, +1.0, -1.0, 1.0, 5.0, 60.0); 52 | glMatrixMode(GL_MODELVIEW); 53 | glLoadIdentity(); 54 | glTranslated(0.0, 0.0, -40.0); 55 | } 56 | 57 | void gears_advance(Gears *g) 58 | { 59 | g->gear1Rot += 2 * 16; 60 | } 61 | 62 | GLuint gears_make(const GLfloat *reflectance, GLdouble innerRadius, 63 | GLdouble outerRadius, GLdouble thickness, 64 | GLdouble toothSize, GLint toothCount) 65 | { 66 | const double Pi = 3.14159265358979323846; 67 | 68 | GLuint list = glGenLists(1); 69 | glNewList(list, GL_COMPILE); 70 | glMaterialfv(GL_FRONT, GL_AMBIENT_AND_DIFFUSE, reflectance); 71 | 72 | GLdouble r0 = innerRadius; 73 | GLdouble r1 = outerRadius - toothSize / 2.0; 74 | GLdouble r2 = outerRadius + toothSize / 2.0; 75 | GLdouble delta = (2.0 * Pi / toothCount) / 4.0; 76 | GLdouble z = thickness / 2.0; 77 | 78 | glShadeModel(GL_FLAT); 79 | 80 | for (int i = 0; i < 2; ++i) { 81 | GLdouble sign = (i == 0) ? +1.0 : -1.0; 82 | 83 | glNormal3d(0.0, 0.0, sign); 84 | 85 | glBegin(GL_QUAD_STRIP); 86 | for (int j = 0; j <= toothCount; ++j) { 87 | GLdouble angle = 2.0 * Pi * j / toothCount; 88 | glVertex3d(r0 * cos(angle), r0 * sin(angle), sign * z); 89 | glVertex3d(r1 * cos(angle), r1 * sin(angle), sign * z); 90 | glVertex3d(r0 * cos(angle), r0 * sin(angle), sign * z); 91 | glVertex3d(r1 * cos(angle + 3 * delta), r1 * sin(angle + 3 * delta), sign * z); 92 | } 93 | glEnd(); 94 | 95 | glBegin(GL_QUADS); 96 | for (int j = 0; j < toothCount; ++j) { 97 | GLdouble angle = 2.0 * Pi * j / toothCount; 98 | glVertex3d(r1 * cos(angle), r1 * sin(angle), sign * z); 99 | glVertex3d(r2 * cos(angle + delta), r2 * sin(angle + delta), sign * z); 100 | glVertex3d(r2 * cos(angle + 2 * delta), r2 * sin(angle + 2 * delta), sign * z); 101 | glVertex3d(r1 * cos(angle + 3 * delta), r1 * sin(angle + 3 * delta), sign * z); 102 | } 103 | glEnd(); 104 | } 105 | 106 | glBegin(GL_QUAD_STRIP); 107 | for (int i = 0; i < toothCount; ++i) { 108 | for (int j = 0; j < 2; ++j) { 109 | GLdouble angle = 2.0 * Pi * (i + j / 2.0) / toothCount; 110 | GLdouble s1 = r1; 111 | GLdouble s2 = r2; 112 | if (j == 1) { 113 | GLdouble tmp = s1; s1 = s2; s2 = tmp; 114 | } 115 | 116 | glNormal3d(cos(angle), sin(angle), 0.0); 117 | glVertex3d(s1 * cos(angle), s1 * sin(angle), +z); 118 | glVertex3d(s1 * cos(angle), s1 * sin(angle), -z); 119 | 120 | glNormal3d(s2 * sin(angle + delta) - s1 * sin(angle), 121 | s1 * cos(angle) - s2 * cos(angle + delta), 0.0); 122 | glVertex3d(s2 * cos(angle + delta), s2 * sin(angle + delta), +z); 123 | glVertex3d(s2 * cos(angle + delta), s2 * sin(angle + delta), -z); 124 | } 125 | } 126 | glVertex3d(r1, 0.0, +z); 127 | glVertex3d(r1, 0.0, -z); 128 | glEnd(); 129 | 130 | glShadeModel(GL_SMOOTH); 131 | 132 | glBegin(GL_QUAD_STRIP); 133 | for (int i = 0; i <= toothCount; ++i) { 134 | GLdouble angle = i * 2.0 * Pi / toothCount; 135 | glNormal3d(-cos(angle), -sin(angle), 0.0); 136 | glVertex3d(r0 * cos(angle), r0 * sin(angle), +z); 137 | glVertex3d(r0 * cos(angle), r0 * sin(angle), -z); 138 | } 139 | glEnd(); 140 | 141 | glEndList(); 142 | 143 | return list; 144 | } 145 | 146 | 147 | void gears_draw(GLuint gear, GLdouble dx, GLdouble dy, GLdouble dz, 148 | GLdouble angle) 149 | { 150 | glPushMatrix(); 151 | glTranslated(dx, dy, dz); 152 | glRotated(angle, 0.0, 0.0, 1.0); 153 | glCallList(gear); 154 | glPopMatrix(); 155 | } 156 | 157 | 158 | void gears_normalize_angle(int *angle) 159 | { 160 | while (*angle < 0) 161 | *angle += 360 * 16; 162 | while (*angle > 360 * 16) 163 | *angle -= 360 * 16; 164 | } 165 | -------------------------------------------------------------------------------- /gears/gears.h: -------------------------------------------------------------------------------- 1 | #ifndef GEARS_H 2 | #define GEARS_H 3 | 4 | #ifdef __APPLE__ 5 | #include 6 | #else 7 | #include 8 | #endif 9 | 10 | typedef struct { 11 | GLuint gear1; 12 | GLuint gear2; 13 | GLuint gear3; 14 | int xRot; 15 | int yRot; 16 | int zRot; 17 | int gear1Rot; 18 | } Gears; 19 | 20 | #define MIN(a,b) (((a)<(b))?(a):(b)) 21 | #define MAX(a,b) (((a)>(b))?(a):(b)) 22 | 23 | void gears_initialize(Gears *g); 24 | 25 | void gears_paint(const Gears *g); 26 | 27 | void gears_resize(int width, int height); 28 | 29 | void gears_advance(Gears *g); 30 | 31 | GLuint gears_make(const GLfloat *reflectance, GLdouble innerRadius, 32 | GLdouble outerRadius, GLdouble thickness, 33 | GLdouble toothSize, GLint toothCount); 34 | 35 | void gears_draw(GLuint gear, GLdouble dx, GLdouble dy, GLdouble dz, 36 | GLdouble angle); 37 | 38 | void gears_normalize_angle(int *angle); 39 | 40 | #endif // GEARS_H 41 | -------------------------------------------------------------------------------- /gears/gears_GLUT.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #ifdef __APPLE__ 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | extern "C" { 13 | #include "gears.h" 14 | } 15 | 16 | static Gears g = {0, 0, 0, 0, 0, 0, 0}; 17 | 18 | static void key(unsigned char k, int x, int y) { 19 | switch (k) { 20 | case 27: /* Escape */ 21 | exit(0); 22 | default: 23 | return; 24 | } 25 | } 26 | 27 | void __display(void) { 28 | gears_paint(&g); 29 | glutSwapBuffers(); 30 | } 31 | 32 | void idle(void) { 33 | gears_advance(&g); 34 | glutPostRedisplay(); 35 | } 36 | 37 | void visible(int vis) { 38 | if (vis == GLUT_VISIBLE) { 39 | glutIdleFunc(idle); 40 | } else { 41 | glutIdleFunc(nullptr); 42 | } 43 | } 44 | 45 | int main(int argc, char *argv[]) { 46 | glutInit(&argc, argv); 47 | glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE); 48 | 49 | glutInitWindowPosition(100, 100); 50 | glutInitWindowSize(300, 300); 51 | glutCreateWindow("Gears GLUT"); 52 | 53 | gears_initialize(&g); 54 | gears_resize(glutGet(GLUT_WINDOW_WIDTH), glutGet(GLUT_WINDOW_HEIGHT)); 55 | 56 | glutDisplayFunc(__display); 57 | glutReshapeFunc(gears_resize); 58 | glutKeyboardFunc(key); 59 | glutVisibilityFunc(visible); 60 | 61 | glutMainLoop(); 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /gears/gears_Qt.cpp: -------------------------------------------------------------------------------- 1 | #include "mainwindow.h" 2 | 3 | #include 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | QApplication app(argc, argv); 8 | MainWindow window; 9 | window.show(); 10 | return app.exec(); 11 | } 12 | -------------------------------------------------------------------------------- /gears/gears_SDL.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "gears.h" 5 | 6 | int main(){ 7 | 8 | SDL_Init(SDL_INIT_VIDEO); 9 | 10 | SDL_Window *window = SDL_CreateWindow( 11 | "SDL2 Gears", 12 | SDL_WINDOWPOS_UNDEFINED, 13 | SDL_WINDOWPOS_UNDEFINED, 14 | 640, 480, 15 | SDL_WINDOW_OPENGL|SDL_WINDOW_RESIZABLE 16 | ); 17 | 18 | SDL_GLContext glcontext = SDL_GL_CreateContext(window); 19 | 20 | Gears g = {0, 0, 0, 0, 0, 0, 0}; 21 | 22 | gears_initialize(&g); 23 | int width; int height; 24 | SDL_GetWindowSize(window, &width, &height); 25 | gears_resize(width, height); 26 | 27 | int done = 0; 28 | while(!done) { 29 | SDL_Event e; 30 | while(SDL_PollEvent(&e)) { 31 | switch(e.type) { 32 | case SDL_KEYDOWN: 33 | done = 1; 34 | break; 35 | case SDL_QUIT: 36 | done = 1; 37 | break; 38 | default: 39 | break; 40 | } 41 | } 42 | gears_paint(&g); 43 | SDL_GL_SwapWindow(window); 44 | SDL_Delay(10); 45 | 46 | gears_advance(&g); 47 | } 48 | 49 | 50 | SDL_GL_DeleteContext(glcontext); 51 | SDL_DestroyWindow(window); 52 | SDL_Quit(); 53 | 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /gears/glwidget.cpp: -------------------------------------------------------------------------------- 1 | #include "glwidget.h" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | GLWidget::GLWidget(QWidget *parent) 9 | : QGLWidget(parent) 10 | { 11 | g = {0, 0, 0, 0, 0, 0, 0}; 12 | 13 | QTimer *timer = new QTimer(this); 14 | connect(timer, &QTimer::timeout, [&](){ 15 | gears_advance(&g); 16 | updateGL(); 17 | }); 18 | timer->start(20); 19 | } 20 | 21 | GLWidget::~GLWidget() 22 | { 23 | makeCurrent(); 24 | glDeleteLists(g.gear1, 1); 25 | glDeleteLists(g.gear2, 1); 26 | glDeleteLists(g.gear3, 1); 27 | } 28 | 29 | void GLWidget::setXRotation(int angle) 30 | { 31 | gears_normalize_angle(&angle); 32 | if (angle != g.xRot) { 33 | g.xRot = angle; 34 | emit xRotationChanged(angle); 35 | updateGL(); 36 | } 37 | } 38 | 39 | void GLWidget::setYRotation(int angle) 40 | { 41 | gears_normalize_angle(&angle); 42 | if (angle != g.yRot) { 43 | g.yRot = angle; 44 | emit yRotationChanged(angle); 45 | updateGL(); 46 | } 47 | } 48 | 49 | void GLWidget::setZRotation(int angle) 50 | { 51 | gears_normalize_angle(&angle); 52 | if (angle != g.zRot) { 53 | g.zRot = angle; 54 | emit zRotationChanged(angle); 55 | updateGL(); 56 | } 57 | } 58 | 59 | void GLWidget::initializeGL() 60 | { 61 | gears_initialize(&g); 62 | } 63 | 64 | void GLWidget::paintGL() 65 | { 66 | gears_paint(&g); 67 | } 68 | 69 | void GLWidget::resizeGL(int width, int height) 70 | { 71 | gears_resize(width, height); 72 | } 73 | 74 | void GLWidget::mousePressEvent(QMouseEvent *event) 75 | { 76 | lastPos = event->pos(); 77 | } 78 | 79 | void GLWidget::mouseMoveEvent(QMouseEvent *event) 80 | { 81 | int dx = event->x() - lastPos.x(); 82 | int dy = event->y() - lastPos.y(); 83 | 84 | if (event->buttons() & Qt::LeftButton) { 85 | setXRotation(g.xRot + 8 * dy); 86 | setYRotation(g.yRot + 8 * dx); 87 | } else if (event->buttons() & Qt::RightButton) { 88 | setXRotation(g.xRot + 8 * dy); 89 | setZRotation(g.zRot + 8 * dx); 90 | } 91 | lastPos = event->pos(); 92 | } 93 | -------------------------------------------------------------------------------- /gears/glwidget.h: -------------------------------------------------------------------------------- 1 | #ifndef GLWIDGET_H 2 | #define GLWIDGET_H 3 | 4 | #include 5 | 6 | extern "C" { 7 | #include "gears.h" 8 | } 9 | 10 | class GLWidget : public QGLWidget 11 | { 12 | Q_OBJECT 13 | 14 | public: 15 | GLWidget(QWidget *parent = 0); 16 | ~GLWidget(); 17 | 18 | int xRotation() const { return g.xRot; } 19 | int yRotation() const { return g.yRot; } 20 | int zRotation() const { return g.zRot; } 21 | 22 | public slots: 23 | void setXRotation(int angle); 24 | void setYRotation(int angle); 25 | void setZRotation(int angle); 26 | 27 | signals: 28 | void xRotationChanged(int angle); 29 | void yRotationChanged(int angle); 30 | void zRotationChanged(int angle); 31 | 32 | protected: 33 | void initializeGL(); 34 | void paintGL(); 35 | void resizeGL(int width, int height); 36 | void mousePressEvent(QMouseEvent *event); 37 | void mouseMoveEvent(QMouseEvent *event); 38 | 39 | private: 40 | Gears g; 41 | QPoint lastPos; 42 | }; 43 | 44 | #endif // GLWIDGET_H 45 | -------------------------------------------------------------------------------- /gears/mainwindow.cpp: -------------------------------------------------------------------------------- 1 | #include "glwidget.h" 2 | #include "mainwindow.h" 3 | 4 | #include 5 | 6 | 7 | MainWindow::MainWindow() 8 | { 9 | 10 | auto glWidget = new GLWidget; 11 | 12 | auto glWidgetArea = new QScrollArea; 13 | glWidgetArea->setWidget(glWidget); 14 | glWidgetArea->setWidgetResizable(true); 15 | glWidgetArea->setHorizontalScrollBarPolicy(Qt::ScrollBarAlwaysOff); 16 | glWidgetArea->setVerticalScrollBarPolicy(Qt::ScrollBarAlwaysOff); 17 | glWidgetArea->setSizePolicy(QSizePolicy::Ignored, QSizePolicy::Ignored); 18 | glWidgetArea->setMinimumSize(50, 50); 19 | 20 | auto createSlider = [&](void (GLWidget::*changedSignal)(int), 21 | void (GLWidget::*setterSlot)(int)) -> QSlider* { 22 | QSlider *slider = new QSlider(Qt::Horizontal); 23 | slider->setRange(0, 360 * 16); 24 | slider->setSingleStep(16); 25 | slider->setPageStep(15 * 16); 26 | slider->setTickInterval(15 * 16); 27 | slider->setTickPosition(QSlider::TicksRight); 28 | connect(slider, &QSlider::valueChanged, glWidget, setterSlot); 29 | connect(glWidget, changedSignal, slider, &QSlider::setValue); 30 | 31 | return slider; 32 | }; 33 | 34 | auto xSlider = createSlider(&GLWidget::xRotationChanged, &GLWidget::setXRotation); 35 | auto ySlider = createSlider(&GLWidget::yRotationChanged, &GLWidget::setYRotation); 36 | auto zSlider = createSlider(&GLWidget::zRotationChanged, &GLWidget::setZRotation); 37 | 38 | //Create actions and menus 39 | auto exitAct = new QAction(tr("E&xit"), this); 40 | exitAct->setShortcuts(QKeySequence::Quit); 41 | connect(exitAct, &QAction::triggered, this, &MainWindow::close); 42 | 43 | auto aboutQtAct = new QAction(tr("About &Qt"), this); 44 | connect(aboutQtAct, &QAction::triggered, qApp, &QApplication::aboutQt); 45 | 46 | auto fileMenu = menuBar()->addMenu(tr("&File")); 47 | fileMenu->addSeparator(); 48 | fileMenu->addAction(exitAct); 49 | 50 | auto helpMenu = menuBar()->addMenu(tr("&Help")); 51 | helpMenu->addAction(aboutQtAct); 52 | 53 | auto centralLayout = new QVBoxLayout; 54 | centralLayout->addWidget(glWidgetArea); 55 | centralLayout->addWidget(xSlider); 56 | centralLayout->addWidget(ySlider); 57 | centralLayout->addWidget(zSlider); 58 | 59 | auto centralWidget = new QWidget; 60 | setCentralWidget(centralWidget); 61 | centralWidget->setLayout(centralLayout); 62 | 63 | xSlider->setValue(15 * 16); 64 | ySlider->setValue(345 * 16); 65 | zSlider->setValue(0 * 16); 66 | 67 | setWindowTitle(tr("Qt Gears")); 68 | resize(400, 300); 69 | } 70 | -------------------------------------------------------------------------------- /gears/mainwindow.h: -------------------------------------------------------------------------------- 1 | #ifndef MAINWINDOW_H 2 | #define MAINWINDOW_H 3 | 4 | #include 5 | 6 | 7 | class MainWindow : public QMainWindow 8 | { 9 | Q_OBJECT 10 | 11 | public: 12 | MainWindow(); 13 | }; 14 | 15 | #endif // MAINWINDOW_H 16 | -------------------------------------------------------------------------------- /thrust/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(CUDA REQUIRED) 2 | 3 | if(APPLE) 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++") 5 | endif() 6 | 7 | cuda_add_executable(version version.cu) 8 | 9 | cuda_add_executable(device-vector device-vector.cu) 10 | 11 | cuda_add_executable(transformations transformations.cu) 12 | 13 | cuda_add_executable(transform_reduce transform_reduce.cu) 14 | 15 | cuda_add_executable(sum sum.cu) 16 | 17 | cuda_add_executable(sort sort.cu) 18 | 19 | cuda_add_executable(random random.cu) 20 | -------------------------------------------------------------------------------- /thrust/device-vector.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | int main(void) { 7 | 8 | thrust::host_vector H(4); 9 | 10 | H[0] = 14; 11 | H[1] = 20; 12 | H[2] = 38; 13 | H[3] = 46; 14 | 15 | std::cout << "H has size " << H.size() << std::endl; 16 | 17 | // print contents of H 18 | for(int i = 0; i < H.size(); i++) 19 | std::cout << "H[" << i << "] = " << H[i] << std::endl; 20 | 21 | // resize H 22 | H.resize(2); 23 | 24 | std::cout << "H now has size " << H.size() << std::endl; 25 | 26 | // Copy host_vector H to device_vector D 27 | thrust::device_vector D = H; 28 | 29 | // elements of D can be modified 30 | D[0] = 99; 31 | D[1] = 88; 32 | 33 | // print contents of D 34 | for(int i = 0; i < D.size(); i++) 35 | std::cout << "D[" << i << "] = " << D[i] << std::endl; 36 | 37 | // H and D are automatically deleted when the function returns 38 | return 0; 39 | 40 | } 41 | -------------------------------------------------------------------------------- /thrust/random.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int main(void) 10 | { 11 | // generate random data serially 12 | thrust::host_vector h_vec(100); 13 | std::generate(h_vec.begin(), h_vec.end(), rand); 14 | 15 | // transfer to device and compute sum 16 | thrust::device_vector d_vec = h_vec; 17 | int x = thrust::reduce(d_vec.begin(), d_vec.end(), 0, thrust::plus()); 18 | std::cout << x << std::endl; 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /thrust/sort.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int main(void) 10 | { 11 | // generate 32M random numbers serially 12 | thrust::host_vector h_vec(32 << 15); 13 | std::generate(h_vec.begin(), h_vec.end(), rand); 14 | 15 | // transfer data to the device 16 | thrust::device_vector d_vec = h_vec; 17 | 18 | // sort data on the device (846M keys per second on GeForce GTX 480) 19 | thrust::sort(d_vec.begin(), d_vec.end()); 20 | 21 | // transfer data back to host 22 | thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /thrust/sum.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | int my_rand(void) 9 | { 10 | static thrust::default_random_engine rng; 11 | static thrust::uniform_int_distribution dist(0, 9999); 12 | return dist(rng); 13 | } 14 | 15 | int main(void) 16 | { 17 | // generate random data on the host 18 | thrust::host_vector h_vec(100); 19 | thrust::generate(h_vec.begin(), h_vec.end(), my_rand); 20 | 21 | // transfer to device and compute sum 22 | thrust::device_vector d_vec = h_vec; 23 | 24 | // initial value of the reduction 25 | int init = 0; 26 | 27 | // binary operation used to reduce values 28 | thrust::plus binary_op; 29 | 30 | // compute sum on the device 31 | int sum = thrust::reduce(d_vec.begin(), d_vec.end(), init, binary_op); 32 | 33 | // print the sum 34 | std::cout << "sum is " << sum << std::endl; 35 | 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /thrust/transform_reduce.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | // square computes the square of a number f(x) -> x*x 8 | template 9 | struct square 10 | { 11 | __host__ __device__ 12 | T operator()(const T& x) const { 13 | return x * x; 14 | } 15 | }; 16 | 17 | int main(void) 18 | { 19 | // initialize host array 20 | float x[4]; 21 | x[0] = 1.0; 22 | x[1] = 2.0; 23 | x[2] = 3.0; 24 | x[3] = 4.0; 25 | 26 | // transfer to device 27 | thrust::device_vector d_x(x, x + 4); 28 | 29 | // setup arguments 30 | square unary_op; 31 | thrust::plus binary_op; 32 | float init = 0; 33 | 34 | // compute norm 35 | float norm = std::sqrt( thrust::transform_reduce(d_x.begin(), d_x.end(), unary_op, init, binary_op) ); 36 | 37 | std::cout << norm << std::endl; 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /thrust/transformations.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | struct saxpy_functor 11 | { 12 | const float a; 13 | 14 | saxpy_functor(float _a) : a(_a) {} 15 | 16 | __host__ __device__ 17 | float operator()(const float& x, const float& y) const { 18 | return a * x + y; 19 | } 20 | }; 21 | 22 | void saxpy_fast(float A, thrust::device_vector& X, thrust::device_vector& Y) 23 | { 24 | // Y <- A * X + Y 25 | thrust::transform(X.begin(), X.end(), Y.begin(), Y.begin(), saxpy_functor(A)); 26 | } 27 | 28 | void saxpy_slow(float A, thrust::device_vector& X, thrust::device_vector& Y) 29 | { 30 | thrust::device_vector temp(X.size()); 31 | 32 | // temp <- A 33 | thrust::fill(temp.begin(), temp.end(), A); 34 | 35 | // temp <- A * X 36 | thrust::transform(X.begin(), X.end(), temp.begin(), temp.begin(), thrust::multiplies()); 37 | 38 | // Y <- A * X + Y 39 | thrust::transform(temp.begin(), temp.end(), Y.begin(), Y.begin(), thrust::plus()); 40 | } 41 | 42 | int main(void) 43 | { 44 | // allocate three device_vectors with 10 elements 45 | thrust::device_vector X(10); 46 | thrust::device_vector Y(10); 47 | thrust::device_vector Z(10); 48 | 49 | // initialize X to 0,1,2,3, .... 50 | thrust::sequence(X.begin(), X.end()); 51 | 52 | // compute Y = -X 53 | thrust::transform(X.begin(), X.end(), Y.begin(), thrust::negate()); 54 | 55 | // fill Z with twos 56 | thrust::fill(Z.begin(), Z.end(), 2); 57 | 58 | // compute Y = X mod 2 59 | thrust::transform(X.begin(), X.end(), Z.begin(), Y.begin(), thrust::modulus()); 60 | 61 | // replace all the ones in Y with tens 62 | thrust::replace(Y.begin(), Y.end(), 1, 10); 63 | 64 | // print Y 65 | thrust::copy(Y.begin(), Y.end(), std::ostream_iterator(std::cout, "\n")); 66 | 67 | 68 | thrust::device_vector x(10); 69 | thrust::device_vector y(10); 70 | 71 | // y <- a * x + y 72 | saxpy_fast(2.0, x, y); 73 | 74 | // y <- a * x + y 75 | saxpy_slow(0.5, x, y); 76 | 77 | return 0; 78 | } 79 | -------------------------------------------------------------------------------- /thrust/version.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(void) 5 | { 6 | int major = THRUST_MAJOR_VERSION; 7 | int minor = THRUST_MINOR_VERSION; 8 | 9 | std::cout << "Thrust v" << major << "." << minor << std::endl; 10 | 11 | return 0; 12 | } 13 | --------------------------------------------------------------------------------