├── CodingAssignments ├── basic_matrix_mul │ ├── README.md │ └── basic_matrix_mul.cu ├── mat_vec_mul │ ├── README.md │ └── mat_vec_mul.cu ├── tiled_matrix_mul_shared_mem │ └── README.md └── vector_add │ ├── README.md │ └── vector_add.cu ├── Colab_Test ├── Intro_to_CUDA_C_Part_1-Student.zip └── README.md ├── Get_GPU_Properties ├── Makefile ├── Makefile_gmatch_cu ├── Makefile_gpu_props_cu ├── common │ ├── book.h │ ├── common_functions.h │ ├── constants_32bp.h │ ├── db_file_names.h │ ├── gpu_arch_constants.h │ └── qry_file_names.h ├── get_device_properties └── get_device_properties.cu ├── README.md ├── TechDocs ├── NVIDIA-Turing-Architecture-Whitepaper.pdf └── nsight_profiler_explained.pdf └── common └── GL ├── glext.h └── glut.h /CodingAssignments/basic_matrix_mul/README.md: -------------------------------------------------------------------------------- 1 | ## Basic Matrix Multiplication 2 | 3 | *This program performs matrix-matrix multiplication with each thread calculating only one output element* 4 | 5 | --- 6 |
7 | 8 | In the host code: 9 | 10 | - We allocate memory for the input and output images. 11 | - We initialize the input image with a random values. 12 | - We launch the kernel. 13 | - We copy the output image to the host. 14 | - And finally we free both host and device memory. 15 | 16 | In the kernel function we check if the thread is within the bounds of the matrix. 17 | 18 | If the thread is within the bounds we calculate the output element by multiplying the input elements of the two matrices residing in the global memory and we store the result in the output matrix. 19 | 20 | For each iteration in the for-loop the kernel function has to perform two load operations and two arithmetic operations. This deteriorates the performance of the kernel due to long-latency load operations. -------------------------------------------------------------------------------- /CodingAssignments/basic_matrix_mul/basic_matrix_mul.cu: -------------------------------------------------------------------------------- 1 | /*/ 2 | * 3 | * This program implements matrix-matrix multiplication in its simplest form. 4 | * 5 | * Compile with: 6 | * nvcc basic_matrix_mul.cu 7 | * 8 | * Run with: 9 | * ./a.out 10 | * 11 | /*/ 12 | 13 | #include 14 | 15 | #define DEBUG 16 | 17 | // Compute A * B 18 | // A and B can have arbitrary dimensions 19 | // Sgemm stands for single precision general matrix-matrix multiply 20 | __global__ void sgemm(float *A, float *B, float *C, int numARows, int numAColumns, 21 | int numBRows, int numBColumns) { 22 | int row = blockIdx.y * blockDim.y + threadIdx.y; 23 | int col = blockIdx.x * blockDim.x + threadIdx.x; 24 | 25 | if (row >= numARows || col >= numBColumns) return; 26 | 27 | float sum = 0; 28 | for (int k = 0; k < numAColumns; k++) 29 | sum += A[row * numAColumns + k] * B[k * numBColumns + col]; 30 | 31 | C[row * numBColumns + col] = sum; 32 | } 33 | 34 | 35 | int main(int argc, char **argv) { 36 | 37 | float *hostA, *hostB, *hostC; 38 | float *deviceA, *deviceB, *deviceC; 39 | int numARows, numAColumns; 40 | int numBRows, numBColumns; 41 | int numCRows, numCColumns; 42 | 43 | if (argc != 5){ 44 | printf("Usage: ./a.out \n"); 45 | return 1; 46 | } 47 | 48 | numARows = atoi(argv[1]); 49 | numAColumns = atoi(argv[2]); 50 | numBRows = atoi(argv[3]); 51 | numBColumns = atoi(argv[4]); 52 | 53 | numCRows = numARows; 54 | numCColumns = numBColumns; 55 | 56 | if(numAColumns != numBRows) { 57 | printf("Number of columns in A must be the same as the number of rows in B\n"); 58 | return 1; 59 | } 60 | 61 | // Allocate memory on host 62 | hostA = (float *) malloc(numARows * numAColumns * sizeof(float)); 63 | hostB = (float *) malloc(numBRows * numBColumns * sizeof(float)); 64 | hostC = (float *) malloc(numCRows * numCColumns * sizeof(float)); 65 | 66 | // Allocate memory on device 67 | cudaMalloc((void **) &deviceA, numARows * numAColumns * sizeof(float)); 68 | cudaMalloc((void **) &deviceB, numBRows * numBColumns * sizeof(float)); 69 | cudaMalloc((void **) &deviceC, numCRows * numCColumns * sizeof(float)); 70 | 71 | // Initialize host memory 72 | srand(time(NULL)); 73 | for (int i = 0; i < numARows; i++) 74 | for (int j = 0; j < numAColumns; j++) 75 | hostA[i * numAColumns + j] = rand() / (float) RAND_MAX; 76 | 77 | for (int i = 0; i < numBRows; i++) 78 | for (int j = 0; j < numBColumns; j++) 79 | hostB[i * numBColumns + j] = rand() / (float) RAND_MAX; 80 | 81 | #ifdef DEBUG 82 | // Show input matrices 83 | printf("A:\n"); 84 | for (int i = 0; i < numARows; i++) { 85 | for (int j = 0; j < numAColumns; j++) 86 | printf("%f ", hostA[i * numAColumns + j]); 87 | printf("\n"); 88 | } 89 | printf("\n"); 90 | 91 | printf("B:\n"); 92 | for (int i = 0; i < numBRows; i++) { 93 | for (int j = 0; j < numBColumns; j++) 94 | printf("%f ", hostB[i * numBColumns + j]); 95 | printf("\n"); 96 | } 97 | printf("\n"); 98 | #endif 99 | 100 | // Copy host memory to device 101 | cudaMemcpy(deviceA, hostA, numARows * numAColumns * sizeof(float), cudaMemcpyHostToDevice); 102 | cudaMemcpy(deviceB, hostB, numBRows * numBColumns * sizeof(float), cudaMemcpyHostToDevice); 103 | 104 | // Launch kernel 105 | dim3 blockDim(16, 16); 106 | dim3 gridDim(ceil((float)numCColumns / blockDim.x), ceil((float)numCRows / blockDim.y)); 107 | sgemm<<>>(deviceA, deviceB, deviceC, 108 | numARows, numAColumns, 109 | numBRows, numBColumns); 110 | 111 | // Copy device memory to host 112 | cudaMemcpy(hostC, deviceC, numCRows * numCColumns * sizeof(float), cudaMemcpyDeviceToHost); 113 | 114 | #ifdef DEBUG 115 | // Print results 116 | printf("C:\n"); 117 | for (int i = 0; i < numCRows; i++) { 118 | for (int j = 0; j < numCColumns; j++) 119 | printf("%f ", hostC[i * numCColumns + j]); 120 | printf("\n"); 121 | } 122 | #endif 123 | 124 | // Free memory 125 | free(hostA); 126 | free(hostB); 127 | free(hostC); 128 | cudaFree(deviceA); 129 | cudaFree(deviceB); 130 | cudaFree(deviceC); 131 | 132 | return 0; 133 | } -------------------------------------------------------------------------------- /CodingAssignments/mat_vec_mul/README.md: -------------------------------------------------------------------------------- 1 | ## Matrix-Vector Multiplication 2 | 3 | *Code for Exercise 2 from [here](../../exercises/README.MD)* 4 | 5 | --- 6 | 7 | The host code: 8 | 9 | - Allocates memory for the input and output matrices on host and initializes the memory. 10 | - Allocates memory for the input and output matrices on device and copys the input matrices to the device. 11 | - Launches the kernel. 12 | - Copys the output matrix from the device to the host and prints the results. 13 | - Frees the memory on the device and host. 14 | 15 | 16 | The kernel **mat_vec_mul** a thread for each element of the output matrix. It uses a *for-loop* and each thread iterates over a row of the B matrix and the C vector to compute the result for the A matrix. The load operations for the B matrix are not coalesced and the kernel will be underutilized due to its high latency load operations. -------------------------------------------------------------------------------- /CodingAssignments/mat_vec_mul/mat_vec_mul.cu: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | //#define DEBUG 6 | 7 | 8 | __global__ 9 | void mat_vec_mul(float *A, float *B, float *C, int n) 10 | { 11 | int i = blockIdx.x * blockDim.x + threadIdx.x; 12 | if (i >= n) return; 13 | 14 | A[i] = 0; 15 | for (int k = 0; k < n; k++) 16 | { 17 | A[i] += B[i * n + k] * C[k]; 18 | } 19 | } 20 | 21 | 22 | int main(int argc, char **argv) 23 | { 24 | int n; 25 | 26 | float *h_A, *h_B, *h_C; 27 | float *d_A, *d_B, *d_C; 28 | 29 | if (argc != 2){ 30 | printf("Usage: ./a.out \n"); 31 | return 1; 32 | } 33 | 34 | n = atoi(argv[1]); 35 | 36 | // Allocate memory on host 37 | h_A = (float *)malloc(n * sizeof(float)); 38 | h_B = (float *)malloc(n * n * sizeof(float)); 39 | h_C = (float *)malloc(n * sizeof(float)); 40 | 41 | // Initialize host memory 42 | for (int i = 0; i < n; i++) 43 | { 44 | h_C[i] = 1; 45 | for (int j = 0; j < n; j++) 46 | { 47 | h_B[i * n + j] = 1; 48 | } 49 | } 50 | 51 | // Allocate memory on device 52 | cudaMalloc((void **)&d_A, n * sizeof(float)); 53 | cudaMalloc((void **)&d_B, n * n * sizeof(float)); 54 | cudaMalloc((void **)&d_C, n * sizeof(float)); 55 | 56 | // Copy host memory to device memory 57 | cudaMemcpy(d_C, h_C, n * sizeof(float), cudaMemcpyHostToDevice); 58 | cudaMemcpy(d_B, h_B, n * n * sizeof(float), cudaMemcpyHostToDevice); 59 | 60 | // Launch the kernel 61 | dim3 dimBlock(128); 62 | dim3 dimGrid(ceil(n / 128.0f)); 63 | mat_vec_mul<<>>(d_A, d_B, d_C, n); 64 | 65 | // Copy device memory to host memory 66 | cudaMemcpy(h_A, d_A, n * sizeof(float), cudaMemcpyDeviceToHost); 67 | 68 | #ifdef DEBUG 69 | // Print the result 70 | for (int i = 0; i < n; i++) 71 | { 72 | printf("%f ", h_A[i]); 73 | } 74 | printf("\n"); 75 | #endif 76 | 77 | // Free device memory 78 | cudaFree(d_A); 79 | cudaFree(d_B); 80 | cudaFree(d_C); 81 | 82 | // Free host memory 83 | free(h_A); 84 | free(h_B); 85 | free(h_C); 86 | 87 | return 0; 88 | } -------------------------------------------------------------------------------- /CodingAssignments/tiled_matrix_mul_shared_mem/README.md: -------------------------------------------------------------------------------- 1 | ## Tiled Matrix Multiplication Using Shared Memory 2 | 3 | *This program performs matrix-matrix multiplication using shared memory to reduce the number of load operations.* 4 | 5 | --- 6 |
7 | 8 | In the host code: 9 | - We allocate memory for the input and output matrices. 10 | - We initialize the input matrices with a random values. 11 | - We copy the input matrices to the device. 12 | - We launch the kernel. 13 | - We copy the output matrix to the host. 14 | - And finally we free both host and device memory. 15 | 16 | In the kernel function first we declare the shared memory for the matrix multiplication and compute the row and column that each thread will load from. 17 | 18 | The number of iterations (phases) in the for-loop is equal to number of columns of the A matrix and the number of rows of the B matrix divided by the TILE_WIDTH. 19 | 20 | The number of rows of the A matrix and the number of columns of the B matrix are handled by the number of blocks in the grid. 21 | 22 | Inside the for loop we check if the load operation is performed within both the A and B matrices. 23 | - If it does we load the corresponding element. 24 | - If it does not we assign the value to 0 so that the result will not be corrupted with random values. 25 | 26 | After that we synchronize the threads so that the dot product is computed only after all the threads have finished loading the values and no random values are left in the shared memory. 27 | 28 | Then we accumulate the dot product and we sychronize the threads again so that no thread will start loading new values (in the next iteration) before the dot product is computed. 29 | 30 | Finally if the current thread is within the output matrix bound we store the computed element to the output matrix. -------------------------------------------------------------------------------- /CodingAssignments/vector_add/README.md: -------------------------------------------------------------------------------- 1 | ## Vector Add 2 | 3 | *This program performs vector addition* 4 | 5 | --- 6 |
7 | 8 | In the host code: 9 | 10 | - We allocate memory for the input and output vectors. 11 | - We initialize the input vectors with a random values. 12 | - We launch the kernel. 13 | - We copy the output vector to the host. 14 | - And finally we free both host and device memory. 15 | 16 | The kernel first checks if the thread's i variable is inside the vector bounds and then performs the add operation between the input vectors and stores the output to the output vector. -------------------------------------------------------------------------------- /CodingAssignments/vector_add/vector_add.cu: -------------------------------------------------------------------------------- 1 | /*/ 2 | * 3 | * A program that implements vector addition 4 | * 5 | * Compile with: 6 | * nvcc vector_add.cu 7 | * 8 | * Run with: 9 | * ./a.out 10 | * 11 | /*/ 12 | 13 | #include 14 | 15 | #define DEBUG 16 | 17 | __global__ 18 | void vec_add(int *in1, int *in2, int *out, int n) 19 | { 20 | int i = blockIdx.x * blockDim.x + threadIdx.x; 21 | 22 | if (i >= n) return; 23 | 24 | out[i] = in1[i] + in2[i]; 25 | } 26 | 27 | 28 | int main(int argc, char* argv[]) 29 | { 30 | 31 | int inputLength; 32 | 33 | int *hostInput1, *hostInput2; 34 | int *hostOutput; 35 | 36 | int *deviceInput1, *deviceInput2; 37 | int *deviceOutput; 38 | 39 | int blockSize; 40 | 41 | if (argc != 3) 42 | { 43 | printf("Usage: %s \n", argv[0]); 44 | return 1; 45 | } 46 | 47 | blockSize = strtol(argv[1], NULL, 10); 48 | inputLength = strtol(argv[2], NULL, 10); 49 | 50 | // Allocate host memory for the input and output data 51 | hostInput1 = (int*)malloc(inputLength * sizeof(int)); 52 | hostInput2 = (int*)malloc(inputLength * sizeof(int)); 53 | hostOutput = (int*)malloc(inputLength * sizeof(int)); 54 | 55 | // Allocate device memory for the input and output data 56 | cudaMalloc((void**) &deviceInput1, inputLength * sizeof(int)); 57 | cudaMalloc((void**) &deviceInput2, inputLength * sizeof(int)); 58 | cudaMalloc((void**) &deviceOutput, inputLength * sizeof(int)); 59 | 60 | // Initialize the host vectors 61 | srand(time(NULL)); 62 | for (int i = 0; i < inputLength; i++) 63 | { 64 | hostInput1[i] = rand() % 100; 65 | hostInput2[i] = rand() % 100; 66 | } 67 | 68 | // Copy the host input data to the device 69 | cudaMemcpy(deviceInput1, hostInput1, inputLength * sizeof(int), cudaMemcpyHostToDevice); 70 | cudaMemcpy(deviceInput2, hostInput2, inputLength * sizeof(int), cudaMemcpyHostToDevice); 71 | 72 | // Launch the kernel 73 | vec_add<<>>(deviceInput1, deviceInput2, deviceOutput, inputLength); 74 | 75 | // Copy the device output data to the host 76 | cudaMemcpy(hostOutput, deviceOutput, inputLength * sizeof(int), cudaMemcpyDeviceToHost); 77 | 78 | #ifdef DEBUG 79 | // Print the results 80 | for (int i = 0; i < inputLength; i++) 81 | printf("%d + %d = %d\n", hostInput1[i], hostInput2[i], hostOutput[i]); 82 | #endif 83 | 84 | // Free the device memory 85 | cudaFree(deviceInput1); 86 | cudaFree(deviceInput2); 87 | cudaFree(deviceOutput); 88 | 89 | // Free the host memory 90 | free(hostInput1); 91 | free(hostInput2); 92 | free(hostOutput); 93 | } -------------------------------------------------------------------------------- /Colab_Test/Intro_to_CUDA_C_Part_1-Student.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenoRobotics/CUDA_C_and_GPU/87dadf7d66311aa0db03ff163fafa7c545cc5298/Colab_Test/Intro_to_CUDA_C_Part_1-Student.zip -------------------------------------------------------------------------------- /Colab_Test/README.md: -------------------------------------------------------------------------------- 1 | # Test Colab Jupyter Notebook Setup and Usage for: Part I of the Intro to CUDA C/C++ and GPU Arch. Course 2 | 3 | ## Steps 4 | 5 | 1) Download Zip file. 6 | 2) Unzip and extract "Intro_toCUDA_C_Part_1" folder to your Google Drive 7 | 3) Double click on the Intro_to_CUDA_C_PART_1.pynb (Jupyter Notebook) file in the folder. 8 | 4) Follow the steps in the Notebook. 9 | 10 | Good luck! 11 | -------------------------------------------------------------------------------- /Get_GPU_Properties/Makefile: -------------------------------------------------------------------------------- 1 | # -lcurand -lcublas -lcusparse -lcufft -lnpp -lcudart 2 | 3 | get_device_properties : get_device_properties.cu 4 | nvcc -I. -I/usr/local/cuda-5.0/include/ get_device_properties.cu -o get_device_properties 5 | 6 | 7 | clean: 8 | rm -f *.o *~ core .depend get_device_properties 9 | 10 | depend .depend dep: 11 | $(CC) $(CFLAGS) -M *.c > $@ 12 | 13 | 14 | ifeq (.depend,$(wildcard .depend)) 15 | include .depend 16 | endif 17 | -------------------------------------------------------------------------------- /Get_GPU_Properties/Makefile_gmatch_cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenoRobotics/CUDA_C_and_GPU/87dadf7d66311aa0db03ff163fafa7c545cc5298/Get_GPU_Properties/Makefile_gmatch_cu -------------------------------------------------------------------------------- /Get_GPU_Properties/Makefile_gpu_props_cu: -------------------------------------------------------------------------------- 1 | # -lcurand -lcublas -lcusparse -lcufft -lnpp -lcudart 2 | 3 | get_device_properties : get_device_properties.cu 4 | nvcc -I. -I/usr/local/cuda/include/ get_device_properties.cu -o get_device_properties 5 | 6 | 7 | clean: 8 | rm -f *.o *~ core .depend get_device_properties 9 | 10 | depend .depend dep: 11 | $(CC) $(CFLAGS) -M *.c > $@ 12 | 13 | 14 | ifeq (.depend,$(wildcard .depend)) 15 | include .depend 16 | endif 17 | -------------------------------------------------------------------------------- /Get_GPU_Properties/common/book.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NVIDIA Corporation and its licensors retain all intellectual property and 5 | * proprietary rights in and to this software and related documentation. 6 | * Any use, reproduction, disclosure, or distribution of this software 7 | * and related documentation without an express license agreement from 8 | * NVIDIA Corporation is strictly prohibited. 9 | * 10 | * Please refer to the applicable NVIDIA end user license agreement (EULA) 11 | * associated with this source code for terms and conditions that govern 12 | * your use of this NVIDIA software. 13 | * 14 | */ 15 | 16 | 17 | #ifndef __BOOK_H__ 18 | #define __BOOK_H__ 19 | #include 20 | #include 21 | //#include 22 | #include 23 | 24 | 25 | static void HandleError( cudaError_t err, 26 | const char *file, 27 | int line ) { 28 | if (err != cudaSuccess) { 29 | printf( "%s in %s at line %d\n", cudaGetErrorString( err ), 30 | file, line ); 31 | exit( EXIT_FAILURE ); 32 | } 33 | } 34 | #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) 35 | 36 | 37 | #define HANDLE_NULL( a ) {if (a == NULL) { \ 38 | printf( "Host memory failed in %s at line %d\n", \ 39 | __FILE__, __LINE__ ); \ 40 | exit( EXIT_FAILURE );}} 41 | /* 42 | template< typename T > 43 | void swap( T& a, T& b ) { 44 | T t = a; 45 | a = b; 46 | b = t; 47 | } 48 | */ 49 | 50 | void* big_random_block( int size ) { 51 | int i=0; 52 | unsigned char *data = (unsigned char*)malloc( size ); 53 | HANDLE_NULL( data ); 54 | for (i=0; i 360) hue -= 360; 75 | else if (hue < 0) hue += 360; 76 | 77 | if (hue < 60) 78 | return (unsigned char)(255 * (n1 + (n2-n1)*hue/60)); 79 | if (hue < 180) 80 | return (unsigned char)(255 * n2); 81 | if (hue < 240) 82 | return (unsigned char)(255 * (n1 + (n2-n1)*(240-hue)/60)); 83 | return (unsigned char)(255 * n1); 84 | } 85 | 86 | /* 87 | __global__ void float_to_color( unsigned char *optr, 88 | const float *outSrc ) { 89 | // map from threadIdx/BlockIdx to pixel position 90 | int x = threadIdx.x + blockIdx.x * blockDim.x; 91 | int y = threadIdx.y + blockIdx.y * blockDim.y; 92 | int offset = x + y * blockDim.x * gridDim.x; 93 | 94 | float l = outSrc[offset]; 95 | float s = 1; 96 | int h = (180 + (int)(360.0f * outSrc[offset])) % 360; 97 | float m1, m2; 98 | 99 | if (l <= 0.5f) 100 | m2 = l * (1 + s); 101 | else 102 | m2 = l + s - l * s; 103 | m1 = 2 * l - m2; 104 | 105 | optr[offset*4 + 0] = value( m1, m2, h+120 ); 106 | optr[offset*4 + 1] = value( m1, m2, h ); 107 | optr[offset*4 + 2] = value( m1, m2, h -120 ); 108 | optr[offset*4 + 3] = 255; 109 | } 110 | 111 | __global__ void float_to_color( uchar4 *optr, 112 | const float *outSrc ) { 113 | 114 | // map from threadIdx/BlockIdx to pixel position 115 | int x = threadIdx.x + blockIdx.x * blockDim.x; 116 | int y = threadIdx.y + blockIdx.y * blockDim.y; 117 | int offset = x + y * blockDim.x * gridDim.x; 118 | 119 | float l = outSrc[offset]; 120 | float s = 1; 121 | int h = (180 + (int)(360.0f * outSrc[offset])) % 360; 122 | float m1, m2; 123 | 124 | if (l <= 0.5f) 125 | m2 = l * (1 + s); 126 | else 127 | m2 = l + s - l * s; 128 | m1 = 2 * l - m2; 129 | 130 | optr[offset].x = value( m1, m2, h+120 ); 131 | optr[offset].y = value( m1, m2, h ); 132 | optr[offset].z = value( m1, m2, h -120 ); 133 | optr[offset].w = 255; 134 | } 135 | 136 | */ 137 | 138 | #if _WIN32 139 | //Windows threads. 140 | #include 141 | 142 | typedef HANDLE CUTThread; 143 | typedef unsigned (WINAPI *CUT_THREADROUTINE)(void *); 144 | 145 | #define CUT_THREADPROC unsigned WINAPI 146 | #define CUT_THREADEND return 0 147 | 148 | #else 149 | //POSIX threads. 150 | #include 151 | 152 | typedef pthread_t CUTThread; 153 | typedef void *(*CUT_THREADROUTINE)(void *); 154 | 155 | #define CUT_THREADPROC void 156 | #define CUT_THREADEND 157 | #endif 158 | 159 | //Create thread. 160 | CUTThread start_thread( CUT_THREADROUTINE, void *data ); 161 | 162 | //Wait for thread to finish. 163 | void end_thread( CUTThread thread ); 164 | 165 | //Destroy thread. 166 | void destroy_thread( CUTThread thread ); 167 | 168 | //Wait for multiple threads. 169 | void wait_for_threads( const CUTThread *threads, int num ); 170 | 171 | #if _WIN32 172 | //Create thread 173 | CUTThread start_thread(CUT_THREADROUTINE func, void *data){ 174 | return CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, data, 0, NULL); 175 | } 176 | 177 | //Wait for thread to finish 178 | void end_thread(CUTThread thread){ 179 | WaitForSingleObject(thread, INFINITE); 180 | CloseHandle(thread); 181 | } 182 | 183 | //Destroy thread 184 | void destroy_thread( CUTThread thread ){ 185 | TerminateThread(thread, 0); 186 | CloseHandle(thread); 187 | } 188 | 189 | //Wait for multiple threads 190 | void wait_for_threads(const CUTThread * threads, int num){ 191 | WaitForMultipleObjects(num, threads, true, INFINITE); 192 | 193 | for(int i = 0; i < num; i++) 194 | CloseHandle(threads[i]); 195 | } 196 | 197 | #else 198 | //Create thread 199 | CUTThread start_thread(CUT_THREADROUTINE func, void * data){ 200 | pthread_t thread; 201 | pthread_create(&thread, NULL, func, data); 202 | return thread; 203 | } 204 | 205 | //Wait for thread to finish 206 | void end_thread(CUTThread thread){ 207 | pthread_join(thread, NULL); 208 | } 209 | 210 | //Destroy thread 211 | void destroy_thread( CUTThread thread ){ 212 | pthread_cancel(thread); 213 | } 214 | 215 | //Wait for multiple threads 216 | void wait_for_threads(const CUTThread * threads, int num){ 217 | int i = 0; 218 | 219 | for(i = 0; i < num; i++) 220 | end_thread( threads[i] ); 221 | } 222 | 223 | #endif 224 | 225 | 226 | 227 | 228 | #endif // __BOOK_H__ 229 | -------------------------------------------------------------------------------- /Get_GPU_Properties/common/common_functions.h: -------------------------------------------------------------------------------- 1 | // File: common_functions.h 2 | 3 | 4 | //Shared Functions 5 | 6 | long skip_header_data(FILE *); 7 | int power(int , int ); 8 | unsigned int hash_algorithm(unsigned int , int, unsigned int ); 9 | unsigned int hash_algorithm_32bp(long long , int, unsigned int); 10 | long long sequence_reverse_complement(long long , int ); 11 | bool check_for_valid_nucleotide(char); 12 | unsigned int nucleotide_to_uint(char); 13 | long long bp_string_to_uint(char *string, int x_mer_size); 14 | 15 | 16 | long skip_header_data(FILE *fin) { 17 | char hdr_string[120]; 18 | char nucleotide; 19 | long current_file_addr = 0; 20 | bool DONE = FALSE; 21 | 22 | current_file_addr = ftell(fin); // Get current file position 23 | 24 | while (!DONE) { 25 | if (fscanf(fin, "%c", &nucleotide) <= 0) 26 | DONE = TRUE; 27 | else if (check_for_valid_nucleotide(nucleotide)) 28 | DONE = TRUE; 29 | else { 30 | fgets(hdr_string , 120 , fin); 31 | current_file_addr = ftell(fin); 32 | } 33 | } 34 | 35 | return current_file_addr; 36 | } 37 | 38 | int power(int x, int y) { 39 | int result = 1; 40 | int i; 41 | 42 | for (i=0; i < y; i++) { 43 | result = result * x; 44 | } 45 | return result; 46 | } 47 | 48 | int log_2(int num) { 49 | 50 | int shift_cnt = 0; 51 | int shift_val = 0; 52 | bool one_found = FALSE; 53 | 54 | shift_val = num; 55 | 56 | while(!one_found) { 57 | shift_val = shift_val >> 1; 58 | shift_cnt += 1; 59 | if (shift_val == 1) 60 | one_found = TRUE; 61 | 62 | } 63 | 64 | return shift_cnt; 65 | 66 | } 67 | 68 | unsigned int hash_algorithm(unsigned int base_x, int x_mer_size, unsigned int bit_mask) { 69 | 70 | unsigned int hashed_array_addr = 0; 71 | unsigned int base_x_xor_upper = 0; 72 | unsigned int base_x_xor_lower = 0; 73 | 74 | /* //2x 75 | base_x_xor_upper = ((base_x >> 0) ^ (base_x >> 9) ^ (base_x >> 19)) & bit_mask; 76 | base_x_xor_lower = ((base_x >> 16) ^ (base_x >> 6) ^ (base_x >> 23)) & bit_mask; 77 | */ 78 | /* //4x 79 | base_x_xor_upper = ((base_x >> 0) ^ (base_x >> 9) ^ (base_x >> 19) ^ (base_x >> 18)) & bit_mask; 80 | base_x_xor_lower = ((base_x >> 16) ^ (base_x >> 6) ^ (base_x >> 23) ) & bit_mask; 81 | */ 82 | base_x_xor_upper = ((base_x >> 0) ^ (base_x >> 8) ^ (base_x >> 19) ^ (base_x >> 18)) & bit_mask; 83 | base_x_xor_lower = ((base_x >> 16) ^ (base_x >> 9) ^ (base_x >> 23) ^ (base_x >> 18)) & bit_mask; 84 | 85 | hashed_array_addr = (base_x_xor_lower + base_x_xor_upper) & bit_mask; 86 | 87 | 88 | return hashed_array_addr; //hashed_array_addr 89 | 90 | } 91 | 92 | 93 | unsigned int hash_algorithm_32bp(long long base_x, int x_mer_size, unsigned int bit_mask) { 94 | 95 | unsigned int hashed_array_addr = 0; 96 | unsigned int base_x_xor_upper = 0; 97 | unsigned int base_x_xor_lower = 0; 98 | 99 | 100 | base_x_xor_upper = ( ((base_x >> 44) & 0xf0f) ^ (base_x >> 0) ^ (base_x >> 9) ) & HASH_MASK; 101 | base_x_xor_lower = ( ((base_x >> 51) & 0x5a5) ^ (base_x >> 16) ^ (base_x >> 30) ) & HASH_MASK; 102 | 103 | /* 104 | //original 105 | base_x_xor_upper = ((base_x >> 46) ^ (base_x >> 0) ^ (base_x >> 9) ^ (base_x >> 39)) & bit_mask; 106 | base_x_xor_lower = ((base_x >> 16) ^ (base_x >> 30) ^ (base_x >> 45) ^ (base_x >> 23)) & bit_mask; 107 | */ 108 | hashed_array_addr = (base_x_xor_lower + base_x_xor_upper) & HASH_MASK; 109 | 110 | return hashed_array_addr; 111 | } 112 | 113 | 114 | long long sequence_reverse_complement(long long orig_sequence,int x_mer_size) { 115 | 116 | int i; 117 | long long rev_comp_seq = 0; 118 | unsigned int comp_base[x_mer_size]; //5' Position of original sequence - complemented = index 0 119 | //3' Position of original sequence - complemented = index 15 120 | 121 | for (i=0; i < x_mer_size; i++) { 122 | comp_base[i] = ((orig_sequence >> (((x_mer_size - i) - 1) * 2)) & 3) ^ 3; 123 | } 124 | 125 | for (i= (x_mer_size - 1); i >= 0 ; i--) { 126 | rev_comp_seq = (comp_base[i] << (i * 2)) | rev_comp_seq; 127 | } 128 | 129 | return rev_comp_seq; 130 | } 131 | 132 | 133 | bool check_for_valid_nucleotide(char nucleotide) { 134 | unsigned int uint_nuke = 99; 135 | 136 | switch (nucleotide) { 137 | case 'A' : 138 | uint_nuke = 0; 139 | break; 140 | 141 | case 'a' : 142 | uint_nuke = 0; 143 | break; 144 | 145 | case 'C' : 146 | uint_nuke = 1; 147 | break; 148 | 149 | case 'c' : 150 | uint_nuke = 1; 151 | break; 152 | 153 | case 'G' : 154 | uint_nuke = 2; 155 | break; 156 | 157 | case 'g' : 158 | uint_nuke = 2; 159 | break; 160 | 161 | case 'T' : 162 | uint_nuke = 3; 163 | break; 164 | 165 | case 't' : 166 | uint_nuke = 3; 167 | break; 168 | 169 | default : 170 | uint_nuke = 99; 171 | } 172 | 173 | if (uint_nuke < 99) 174 | return TRUE; 175 | else 176 | return FALSE; 177 | } 178 | 179 | unsigned int nucleotide_to_uint(char nucleotide) { 180 | unsigned int uint_nuke; 181 | switch (nucleotide) { 182 | case 'A' : 183 | uint_nuke = 0; 184 | break; 185 | 186 | case 'a' : 187 | uint_nuke = 0; 188 | break; 189 | 190 | case 'C' : 191 | uint_nuke = 1; 192 | break; 193 | 194 | case 'c' : 195 | uint_nuke = 1; 196 | break; 197 | 198 | case 'G' : 199 | uint_nuke = 2; 200 | break; 201 | 202 | case 'g' : 203 | uint_nuke = 2; 204 | break; 205 | 206 | case 'T' : 207 | uint_nuke = 3; 208 | break; 209 | 210 | case 't' : 211 | uint_nuke = 3; 212 | break; 213 | 214 | default : 215 | uint_nuke = 0; 216 | } 217 | return uint_nuke; 218 | } 219 | 220 | 221 | long long bp_string_to_uint(char *string, int x_mer_size) { 222 | long long uint_nuke; 223 | long long sequence = 0; 224 | int i; 225 | 226 | 227 | for (i=0; i < x_mer_size; i++) { 228 | switch (string[i]) { 229 | case 'A' : 230 | uint_nuke = 0; 231 | break; 232 | 233 | case 'a' : 234 | uint_nuke = 0; 235 | break; 236 | 237 | case 'C' : 238 | uint_nuke = 1; 239 | break; 240 | 241 | case 'c' : 242 | uint_nuke = 1; 243 | break; 244 | 245 | case 'G' : 246 | uint_nuke = 2; 247 | break; 248 | 249 | case 'g' : 250 | uint_nuke = 2; 251 | break; 252 | 253 | case 'T' : 254 | uint_nuke = 3; 255 | break; 256 | 257 | case 't' : 258 | uint_nuke = 3; 259 | break; 260 | 261 | default : 262 | uint_nuke = 0; 263 | } 264 | sequence = sequence | (uint_nuke << ((x_mer_size-1-i)*2)); 265 | } 266 | return sequence; 267 | } 268 | 269 | 270 | 271 | -------------------------------------------------------------------------------- /Get_GPU_Properties/common/constants_32bp.h: -------------------------------------------------------------------------------- 1 | // File: constants_32bp.h 2 | 3 | // Constants 4 | #define NUM_OF_SEQUENCES_IN_CACHE 16 * 1024 //8 * 1024 // Number of database sequences stored in cache 5 | #define NUM_OF_ENTRIES_IN_PTR_ARRAYS 8 * 1024 //4 * 1024 6 | #define HASH_MASK 0x1fff //Correlates to TABLE_LENGTH 7 | #define NUM_OF_LUTS_USED 12 // Number of parallel LUTs used to find first occurrence 8 | #define NUM_OF_BPS_PER_QRY_SEQ 32 // Particular to the current algorithm 9 | #define X_MER_SIZE NUM_OF_BPS_PER_QRY_SEQ 10 | #define NUM_OF_BITS_FOR_QRY_SEQ_ID 16 11 | #define NUM_OF_BITS_FOR_DB_SEQ_ID 16 12 | #define NUM_OF_BITS_FOR_TOTAL_RSLT_WORD 16 // 13 bits for 1st occurrence offset + 3'b000 13 | #define MAX_HITS 300 // Max number of hits/ptrs per bin recorded 14 | #define NUM_OF_BITS_PER_NUCLEOTIDE 2 15 | #define NUM_OF_BYTES_PER_WORD 4 16 | 17 | #define QRY_SEGMENT_ID_INDICATOR 0x80000000 // Upper 3 bits of the 32 bit data result indicates that 18 | // the Qry segment ID # can be found in the lower 29 bits 19 | #define DB_SEGMENT_ID_INDICATOR 0xa0000000 // Upper 3 bits of the 32 bit data result indicates that 20 | // the DB segment ID # can be found in the lower 29 bits 21 | #define ADDITION_SEARCH_REQD_INDICATOR 0xc0000000 // Upper 3 bits of the 32 bit data result indicates that 22 | // additional search required because # of unique matches is 23 | // greater than # of unique lookup brams in FPGA. Seq Id is 24 | // located in the lower 16 bits. 25 | 26 | //Visual/Analyzed Report Processing Constanst 27 | 28 | #define SHOT_GUN_OVERSAMPLE_FACTOR 5 // Temporary variable. This value will be set and passed in main scripts. 29 | #define NUM_OF_WORDS_PER_MATCH 4 // Number of 32 bit words per match 30 | #define OVERSAMPLE_CUSION_FACTOR 5 // Extra entries factor above oversample factor 31 | #define NUM_OF_RSLT_RECORD_ENTRIES_PER_DB_OFFSET SHOT_GUN_OVERSAMPLE_FACTOR*NUM_OF_WORDS_PER_MATCH*OVERSAMPLE_CUSION_FACTOR 32 | 33 | // Other 34 | //create boolean logic 35 | #ifndef BOOLEAN 36 | typedef int bool; 37 | #endif 38 | #define FALSE 0 39 | #define TRUE 1 40 | 41 | -------------------------------------------------------------------------------- /Get_GPU_Properties/common/db_file_names.h: -------------------------------------------------------------------------------- 1 | //File: db_file_names.h 2 | // 3 | //Used by db_preprocessor.c and gmatch.cu files 4 | 5 | const char *BIN_UNIQUE_CNT_FILE = "../db_files/unique_cnt_file.txt"; // Num of Unique Values/bin File 6 | const char *CACHE_PTR_FILE_1 = "../db_files/cache_ptr_file_1.txt"; // Ptrs to "Cache" Memory 1 7 | const char *CACHE_PTR_FILE_2 = "../db_files/cache_ptr_file_2.txt"; // Ptrs to "Cache" Memory 2 8 | const char *CACHE_PTR_FILE_3 = "../db_files/cache_ptr_file_3.txt"; // Ptrs to "Cache" Memory 3 9 | const char *CACHE_PTR_FILE_4 = "../db_files/cache_ptr_file_4.txt"; // Ptrs to "Cache" Memory 4 10 | const char *CACHE_PTR_FILE_5 = "../db_files/cache_ptr_file_5.txt"; // Ptrs to "Cache" Memory 5 11 | const char *CACHE_PTR_FILE_6 = "../db_files/cache_ptr_file_6.txt"; // Ptrs to "Cache" Memory 6 12 | const char *CACHE_PTR_FILE_7 = "../db_files/cache_ptr_file_7.txt"; // Ptrs to "Cache" Memory 7 13 | const char *CACHE_PTR_FILE_8 = "../db_files/cache_ptr_file_8.txt"; // Ptrs to "Cache" Memory 8 14 | const char *CACHE_PTR_FILE_9 = "../db_files/cache_ptr_file_9.txt"; // Ptrs to "Cache" Memory 9 15 | const char *CACHE_PTR_FILE_10 = "../db_files/cache_ptr_file_10.txt"; // Ptrs to "Cache" Memory 10 16 | const char *CACHE_PTR_FILE_11 = "../db_files/cache_ptr_file_11.txt"; // Ptrs to "Cache" Memory 11 17 | const char *CACHE_PTR_FILE_12 = "../db_files/cache_ptr_file_12.txt"; // Ptrs to "Cache" Memory 12 18 | const char *CACHE_PTR_FILE_13 = "../db_files/cache_ptr_file_13.txt"; // Ptrs to "Cache" Memory 13 19 | const char *CACHE_PTR_FILE_14 = "../db_files/cache_ptr_file_14.txt"; // Ptrs to "Cache" Memory 14 20 | const char *CACHE_FILE = "../db_files/cache_file.txt"; // Database Segment File 21 | const char *CACHE_FILE_RAW = "../db_files/cache_file_raw.txt"; // Database Segment File 22 | const char *STATS_FILE = "../db_files/stats.txt"; 23 | const char *NUM_DB_SEGS = "../db_files/num_db_segs.txt"; // Number of DB Segments Processed 24 | -------------------------------------------------------------------------------- /Get_GPU_Properties/common/gpu_arch_constants.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenoRobotics/CUDA_C_and_GPU/87dadf7d66311aa0db03ff163fafa7c545cc5298/Get_GPU_Properties/common/gpu_arch_constants.h -------------------------------------------------------------------------------- /Get_GPU_Properties/common/qry_file_names.h: -------------------------------------------------------------------------------- 1 | //File: qry_file_names.h 2 | // 3 | //Used by bp_to_hex_converter.c and gmatch.cu files 4 | 5 | const char *QUERY_RAW = "../qry_files/query_data_bp_format.txt"; 6 | const char *QUERY_HEX = "../qry_files/query_data_hex_format.txt"; 7 | const char *QUERY_CNT = "../qry_files/query_sequence_count.txt"; 8 | -------------------------------------------------------------------------------- /Get_GPU_Properties/get_device_properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenoRobotics/CUDA_C_and_GPU/87dadf7d66311aa0db03ff163fafa7c545cc5298/Get_GPU_Properties/get_device_properties -------------------------------------------------------------------------------- /Get_GPU_Properties/get_device_properties.cu: -------------------------------------------------------------------------------- 1 | //************************************************ 2 | // Name: get_device_properties.c 3 | // Copyright: ZenoMachines, LLC 4 | // Author: Peter J. Zeno 5 | // Date: 06/08/11 6 | // Description: Gets/Displays all GPUs on the computer 7 | // along with their properties. 8 | // 9 | //************************************************ 10 | // USAGE: 11 | // ./get_device_properties 12 | // 13 | 14 | #include "./common/book.h" 15 | 16 | //output files 17 | //const char *OUTFILE = "./common/gpu_arch_constants.h"; 18 | //const char *MAKEFILE = "Makefile_gmatch_cu"; 19 | 20 | //file handles 21 | //FILE *fout; 22 | //FILE *fmake; 23 | 24 | int main( void ) { 25 | 26 | cudaDeviceProp prop; 27 | int count; 28 | int i=0; 29 | /* 30 | //open output files 31 | if ((fout = fopen(OUTFILE,"w+")) == NULL) 32 | printf("Cannot open %s for writing",OUTFILE); 33 | if ((fmake = fopen(MAKEFILE,"w+")) == NULL) 34 | printf("Cannot open %s for writing",MAKEFILE); 35 | */ 36 | HANDLE_ERROR( cudaGetDeviceCount( &count ) ) ; 37 | 38 | for (i=0; i< count; i++) { 39 | HANDLE_ERROR( cudaGetDeviceProperties( &prop, i ) ) ; 40 | 41 | printf( "\n\n" ) ; 42 | printf( "--- General Information for device %d ---\n\n" , i ) ; 43 | printf( "Name: %s\n", prop.name ) ; 44 | printf( "Compute capability: %d.%d\n" , prop.major, prop.minor ) ; 45 | printf( "Device Clock rate: %d MHz\n" , prop.clockRate/1000 ) ; //comes in kilo-hertz 46 | printf( "Memory Clock rate: %d MHz\n" , prop.memoryClockRate/1000 ) ; //comes in kilo_hertz 47 | printf( "Device copy overlap: " ) ; 48 | if ( prop.deviceOverlap) 49 | printf( "Enabled\n" ) ; 50 | else 51 | printf( "Disabled\n" ) ; 52 | printf( "Kernel execution timeout: " ) ; 53 | if ( prop.kernelExecTimeoutEnabled) 54 | printf( "Enabled\n" ) ; 55 | else 56 | printf( "Disabled\n" ) ; 57 | printf( "\n" ) ; 58 | printf( "--- Memory Information for device %d ---\n\n" , i ) ; 59 | printf( "Total global mem: %4.1f MBs\n" , (double) prop.totalGlobalMem/(1024 * 1024) ) ; 60 | printf( "Total constant Mem: %lu KBs\n" , (long unsigned int) prop.totalConstMem/1024 ) ; 61 | printf( "Max mem pitch: %lu MBs\n", (long unsigned int) prop.memPitch/(1024 * 1024) ) ; 62 | printf( "Texture Alignment: %lu\n" , (long unsigned int) prop.textureAlignment ) ; 63 | printf( " \n" ) ; 64 | printf( "--- MP Information for device %d --- \n\n", i ) ; 65 | printf( "Multiprocessor count: %d\n" , prop.multiProcessorCount ) ; 66 | printf( "Shared mem per block: %lu KBs\n", (long unsigned int) prop.sharedMemPerBlock/1024 ) ; 67 | printf( "Registers per block: %d K\n", prop.regsPerBlock/1024 ) ; 68 | printf( "Threads in warp: %d\n", prop.warpSize ) ; 69 | printf( "Max threads per block: %d\n" , prop.maxThreadsPerBlock ) ; 70 | printf( "Max threads per MP: %d\n" , prop.maxThreadsPerMultiProcessor); 71 | printf( "Max thread dimensions: (%d, %d, %d) \n" , 72 | prop.maxThreadsDim[0] , prop.maxThreadsDim[1] , 73 | prop.maxThreadsDim[2] ) ; 74 | printf( "Max grid dimensions: (%d, %d, %d) \n" , 75 | prop.maxGridSize[0] , prop.maxGridSize[1] , 76 | prop.maxGridSize[2] ) ; 77 | printf( " \n" ) ; 78 | } 79 | /* 80 | //Create Makefile for gmatch 81 | fprintf(fmake,"# Makefile for gmatch.cu program. \n"); 82 | fprintf(fmake,"# Created by get_device_properties program. \n\n\n"); 83 | fprintf(fmake,"gmatch : gmatch.cu \n"); 84 | fprintf(fmake," nvcc -I. -I/usr/local/cuda/include/ -I/usr/local/cuda/include/crt/ -L/usr/local/cuda/lib64/ -lcuda --ptxas-options=-v -arch=sm_%d%d gmatch.cu -o gmatch \n\n\n", prop.major, prop.minor); 85 | fprintf(fmake,"clean: \n"); 86 | fprintf(fmake," rm -f *.o *~ core .depend gmatch \n\n\n"); 87 | fprintf(fmake,"depend .depend dep: \n"); 88 | fprintf(fmake," $(CC) $(CFLAGS) -M *.c > $@ \n\n\n"); 89 | 90 | //Create GPU Specific Constants header file 91 | fprintf(fout,"//GPU Specific Constants Header File. \n"); 92 | fprintf(fout,"// \n"); 93 | fprintf(fout,"//Created by get_device_properties program. \n\n\n"); 94 | fprintf(fout,"const int blocksPerGrid = %d;\n" , prop.multiProcessorCount); 95 | fprintf(fout,"const int max_num_of_threads_per_block = %d;\n" , prop.maxThreadsPerBlock); 96 | fprintf(fout,"\n\n"); 97 | 98 | 99 | //close output files 100 | fclose(fout); 101 | fclose(fmake); 102 | */ 103 | } 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intro to CUDA C and GPU Architecture Course - 6 hr and extended versions 2 | 3 | Note: Book used for this course is "Programming Massively Parallel Processors - A Hands-on Approach" 4 | The current newest edition is the 4th edition. However, there is a free PDF of for the 4th edition. So, feel free 5 | to use the free PDF found here: 6 | 7 | http://gpu.di.unimi.it/books/PMPP-3rd-Edition.pdf 8 | 9 | ## Links to 3rd edition and materials: 10 | 11 | https://shop.elsevier.com/books/programming-massively-parallel-processors/kirk/978-0-12-811986-0 12 | 13 | ## Book resources root links: 14 | https://booksite.elsevier.com/9780128119860/ 15 | https://booksite.elsevier.com/9780128119860/lecture.php (Extra Lecture Slides) 16 | 17 | ## Labs for Course link: 18 | https://github.com/R100001/Programming-Massively-Parallel-Processors/tree/master 19 | 20 | ## CUDA C++ Programming Guide link: 21 | https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html 22 | 23 | 24 | 25 | # Course Outline: 26 | - nVidia GPU Architecture to Support CUDA 27 | - Intro to CUDA C and Host program format 28 | - CUDA threads, blocks, and indexing 29 | - nvcc compiler 30 | - kernel launch 31 | - memory management 32 | - kernel and host code synchronization 33 | - Tensor Cores: Architecture and NN Application 34 | - CUDA Memory Hierarchy 35 | - Shared memory and thread synchronization 36 | DRAM Circuit Operation Considerations: Access Types, Latency Caused by Non-Batch Fetches 37 | - Performance Considerations 38 | - Brief Coverage of PyTorch with CUDA, cuDNN, and cuVSLAM 39 | 40 | The course uses Jupyter Notebook - Colab. If you have your a GPU on your computer and wish to use a different application or command line execution, please feel free to do so. 41 | 42 | * Follow these directions to get acquanted with running CUDA code on the Jupyter Notebook platform: \ 43 | https://www.geeksforgeeks.org/how-to-run-cuda-c-c-on-jupyter-notebook-in-google-colaboratory/ 44 | 45 | 46 | ## Key Course Takeaways 47 | - Why Nvidia GPU Architectures Changed to General Purpose Processing Architectures (CUDA Arch) 48 | - CPU vs GPU Hardware Architecture: Key differences in unit processor's complexity and why. 49 | - GPU Hardware Basic Components Used For CUDA General Purpose Processing 50 | - GPU Hardware to Software Vocabular Mapping/Translation 51 | - Block and Thread Level Indexing Concept (through Lecture and Programming Homework Problems). 52 | - Memory Hierarchy 53 | - DRAM Circuit Operation Considerations: Access Types, Latency Caused by Non-Batch Fetches 54 | - Memory Coalescing vs Non-Coalesced Access Pattern Impact on Performance 55 | - Performance Considerations 56 | - CUDA, Numba, Cupy, Tensorflow, Pytorch relations 57 | 58 | ## Prerequisites 59 | - Working Knowledge of C 60 | - Exposure to Basic Computer Architecture 61 | 62 | ## GPU Access for Gaining Programming Experience 63 | Methods: 64 | 1) Nvida GPU installed on your own computer (via Windows, Linux, or Mac OS) 65 | 2) Use of Google Colab-Notebook through your web browser to gain free access of GPU via Cloud Service. 66 | 67 | ## Installing CUDA on Windows 68 | https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html 69 | 70 | ## Verify CUDA Install (RHEL or Ubuntu) and Toolkit 71 | https://xcat-docs.readthedocs.io/en/stable/advanced/gpu/nvidia/verify_cuda_install.html 72 | 73 | ## Other Links of Possible Interest or Reference 74 | https://pytorch.org/docs/stable/notes/cuda.html 75 | 76 | 77 | 78 | # Signup Instructions 79 | Information about signing up for the 5hr courses I offer (The Zeno Institute of Robotics and Artificial Intelligence), is as follows: 80 | 81 | You can purchase some of these courses through the [ZenoRobotcs.com](https://www.zenorobotics.com/courses) website. Other payment option is through Venmo, PayPal, Zelle, or Cash app. There is a $5/course savings for using Zelle. Please contact me for payment details for non-website methods. 82 | 83 | Once you pay, I will send you a link to the booking calendar where you can setup your times. 84 | 85 | When selecting your hour slots, please only choose an hour block for the first hour meeting. This will give me a chance to find out about your HW & SW setup, point out links to get you started, etc. Please limit any single day session to 2 hours max to give you time to absorb the concepts and do some programming/homework problems. Also, you don’t have to book all 5 hr time slots at once. You can select them as time progresses if you wish. 86 | 87 | 88 | 89 | # Additional Learning Resources Links 90 | 91 | ## YouTube 92 | Tom Nurkkala - Video talks for various Computer Science courses at Taylor University: 93 | 94 | - CUDA Hardware \ 95 | https://www.youtube.com/watch?v=kUqkOAU84bA 96 | 97 | - Intro to GPU Programming \ 98 | https://www.youtube.com/watch?v=G-EimI4q-TQ 99 | 100 | CUDA University Courses 101 | 102 | University of Illinois : Current Course: ECE408/CS483 103 | Taught by Professor Wen-mei W. Hwu and David Kirk, NVIDIA CUDA Scientist. \ 104 | https://developer.nvidia.com/educators/existing-courses#2 105 | 106 | Other: 107 | 108 | - Data Access Pattern Matters: How CUDA Programming Works | GTC 2022 (6:55 and on) \ 109 | https://www.youtube.com/watch?v=n6M8R8-PlnE 110 | 111 | - Tutorial: CUDA programming in Python with numba and cupy: \ 112 | https://www.youtube.com/watch?v=9bBsvpg-Xlk 113 | 114 | 115 | ## Code Links 116 | 117 | - CUDA Samples \ 118 | https://github.com/nvidia/cuda-samples 119 | 120 | - Programming-Massively-Parallel-Processors Learning Material (Reading/Images, Exercises, & Labs) \ 121 | https://github.com/R100001/Programming-Massively-Parallel-Processors/tree/master 122 | 123 | - CUDA Concepts Cheat Sheet \ 124 | https://kdm.icm.edu.pl/Tutorials/GPU-intro/introduction.en/ 125 | 126 | ## Colab 127 | 128 | - How to Use a GPU In Google Colab \ 129 | https://www.geeksforgeeks.org/how-to-use-gpu-in-google-colab/ \ 130 | https://www.geeksforgeeks.org/how-to-run-cuda-c-c-on-jupyter-notebook-in-google-colaboratory/ 131 | 132 | - How to Use Colab \ 133 | https://www.geeksforgeeks.org/how-to-use-google-colab/ 134 | 135 | - How to use GPU acceleration in PyTorch \ 136 | https://www.geeksforgeeks.org/how-to-use-gpu-acceleration-in-pytorch/ 137 | 138 | - Colab Site \ 139 | https://colab.research.google.com 140 | 141 | - Example CUDA GPU Use Github/Notebook \ 142 | https://colab.research.google.com/github/ShimaaElabd/CUDA-GPU-Contrast-Enhancement/blob/master/CUDA_GPU.ipynb#scrollTo=mgH5HreZ2WS9 143 | 144 | - Example: GPU calculation in python with Cupy and Numba \ 145 | https://colab.research.google.com/drive/15IDLiUMRJbKqZUZPccyigudINCD5uZ71?usp=sharing 146 | 147 | 148 | ## PTX and SASS 149 | 150 | - Parallel Thread Execution (PTX) \ 151 | https://docs.nvidia.com/cuda/parallel-thread-execution/index.html 152 | 153 | - PTX and SASS Assembly Debugging \ 154 | https://docs.nvidia.com/gameworks/content/developertools/desktop/ptx_sass_assembly_debugging.htm 155 | 156 | 157 | ## PyCUDA 158 | 159 | https://pypi.org/project/pycuda/ 160 | 161 | 162 | ## Cupy 163 | 164 | - About \ 165 | https://cupy.dev/ 166 | 167 | - Interoperability \ 168 | https://docs.cupy.dev/en/stable/user_guide/interoperability.html 169 | 170 | 171 | 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /TechDocs/NVIDIA-Turing-Architecture-Whitepaper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenoRobotics/CUDA_C_and_GPU/87dadf7d66311aa0db03ff163fafa7c545cc5298/TechDocs/NVIDIA-Turing-Architecture-Whitepaper.pdf -------------------------------------------------------------------------------- /TechDocs/nsight_profiler_explained.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZenoRobotics/CUDA_C_and_GPU/87dadf7d66311aa0db03ff163fafa7c545cc5298/TechDocs/nsight_profiler_explained.pdf -------------------------------------------------------------------------------- /common/GL/glut.h: -------------------------------------------------------------------------------- 1 | #ifndef __glut_h__ 2 | #define __glut_h__ 3 | 4 | /* Copyright (c) Mark J. Kilgard, 1994, 1995, 1996, 1998. */ 5 | 6 | /* This program is freely distributable without licensing fees and is 7 | provided without guarantee or warrantee expressed or implied. This 8 | program is -not- in the public domain. */ 9 | 10 | #if defined(_WIN32) 11 | 12 | /* GLUT 3.7 now tries to avoid including 13 | to avoid name space pollution, but Win32's 14 | needs APIENTRY and WINGDIAPI defined properly. */ 15 | # if 0 16 | /* This would put tons of macros and crap in our clean name space. */ 17 | # define WIN32_LEAN_AND_MEAN 18 | # include 19 | # else 20 | /* XXX This is from Win32's */ 21 | # ifndef APIENTRY 22 | # define GLUT_APIENTRY_DEFINED 23 | # if (_MSC_VER >= 800) || defined(_STDCALL_SUPPORTED) || defined(__BORLANDC__) || defined(__LCC__) 24 | # define APIENTRY __stdcall 25 | # else 26 | # define APIENTRY 27 | # endif 28 | # endif 29 | /* XXX This is from Win32's */ 30 | # ifndef CALLBACK 31 | # if (defined(_M_MRX000) || defined(_M_IX86) || defined(_M_ALPHA) || defined(_M_PPC)) && !defined(MIDL_PASS) || defined(__LCC__) 32 | # define CALLBACK __stdcall 33 | # else 34 | # define CALLBACK 35 | # endif 36 | # endif 37 | /* XXX Hack for lcc compiler. It doesn't support __declspec(dllimport), just __stdcall. */ 38 | # if defined( __LCC__ ) 39 | # undef WINGDIAPI 40 | # define WINGDIAPI __stdcall 41 | # else 42 | /* XXX This is from Win32's and */ 43 | # ifndef WINGDIAPI 44 | # define GLUT_WINGDIAPI_DEFINED 45 | # define WINGDIAPI __declspec(dllimport) 46 | # endif 47 | # endif 48 | /* XXX This is from Win32's */ 49 | # ifndef _WCHAR_T_DEFINED 50 | typedef unsigned short wchar_t; 51 | # define _WCHAR_T_DEFINED 52 | # endif 53 | # endif 54 | 55 | /* To disable automatic library usage for GLUT, define GLUT_NO_LIB_PRAGMA 56 | in your compile preprocessor options. */ 57 | # if !defined(GLUT_BUILDING_LIB) && !defined(GLUT_NO_LIB_PRAGMA) 58 | # pragma comment (lib, "winmm.lib") /* link with Windows MultiMedia lib */ 59 | /* To enable automatic SGI OpenGL for Windows library usage for GLUT, 60 | define GLUT_USE_SGI_OPENGL in your compile preprocessor options. */ 61 | # ifdef GLUT_USE_SGI_OPENGL 62 | # pragma comment (lib, "opengl.lib") /* link with SGI OpenGL for Windows lib */ 63 | # pragma comment (lib, "glu.lib") /* link with SGI OpenGL Utility lib */ 64 | # pragma comment (lib, "glut.lib") /* link with Win32 GLUT for SGI OpenGL lib */ 65 | # else 66 | # pragma comment (lib, "opengl32.lib") /* link with Microsoft OpenGL lib */ 67 | # pragma comment (lib, "glu32.lib") /* link with Microsoft OpenGL Utility lib */ 68 | # pragma comment (lib, "glut32.lib") /* link with Win32 GLUT lib */ 69 | # endif 70 | # endif 71 | 72 | /* To disable supression of annoying warnings about floats being promoted 73 | to doubles, define GLUT_NO_WARNING_DISABLE in your compile preprocessor 74 | options. */ 75 | # ifndef GLUT_NO_WARNING_DISABLE 76 | # pragma warning (disable:4244) /* Disable bogus VC++ 4.2 conversion warnings. */ 77 | # pragma warning (disable:4305) /* VC++ 5.0 version of above warning. */ 78 | # endif 79 | 80 | /* Win32 has an annoying issue where there are multiple C run-time 81 | libraries (CRTs). If the executable is linked with a different CRT 82 | from the GLUT DLL, the GLUT DLL will not share the same CRT static 83 | data seen by the executable. In particular, atexit callbacks registered 84 | in the executable will not be called if GLUT calls its (different) 85 | exit routine). GLUT is typically built with the 86 | "/MD" option (the CRT with multithreading DLL support), but the Visual 87 | C++ linker default is "/ML" (the single threaded CRT). 88 | 89 | One workaround to this issue is requiring users to always link with 90 | the same CRT as GLUT is compiled with. That requires users supply a 91 | non-standard option. GLUT 3.7 has its own built-in workaround where 92 | the executable's "exit" function pointer is covertly passed to GLUT. 93 | GLUT then calls the executable's exit function pointer to ensure that 94 | any "atexit" calls registered by the application are called if GLUT 95 | needs to exit. 96 | 97 | Note that the __glut*WithExit routines should NEVER be called directly. 98 | To avoid the atexit workaround, #define GLUT_DISABLE_ATEXIT_HACK. */ 99 | 100 | /* XXX This is from Win32's */ 101 | # if !defined(_MSC_VER) && !defined(__cdecl) 102 | /* Define __cdecl for non-Microsoft compilers. */ 103 | # define __cdecl 104 | # define GLUT_DEFINED___CDECL 105 | # endif 106 | # ifndef _CRTIMP 107 | # ifdef _NTSDK 108 | /* Definition compatible with NT SDK */ 109 | # define _CRTIMP 110 | # else 111 | /* Current definition */ 112 | # ifdef _DLL 113 | # define _CRTIMP __declspec(dllimport) 114 | # else 115 | # define _CRTIMP 116 | # endif 117 | # endif 118 | # define GLUT_DEFINED__CRTIMP 119 | # endif 120 | 121 | /* GLUT API entry point declarations for Win32. */ 122 | # ifdef GLUT_BUILDING_LIB 123 | # define GLUTAPI __declspec(dllexport) 124 | # else 125 | # ifdef _DLL 126 | # define GLUTAPI __declspec(dllimport) 127 | # else 128 | # define GLUTAPI extern 129 | # endif 130 | # endif 131 | 132 | /* GLUT callback calling convention for Win32. */ 133 | # define GLUTCALLBACK __cdecl 134 | 135 | #endif /* _WIN32 */ 136 | 137 | #include 138 | #include 139 | 140 | #ifdef __cplusplus 141 | extern "C" { 142 | #endif 143 | 144 | #if defined(_WIN32) 145 | # ifndef GLUT_BUILDING_LIB 146 | extern _CRTIMP void __cdecl exit(int); 147 | # endif 148 | #else 149 | /* non-Win32 case. */ 150 | /* Define APIENTRY and CALLBACK to nothing if we aren't on Win32. */ 151 | # define APIENTRY 152 | # define GLUT_APIENTRY_DEFINED 153 | # define CALLBACK 154 | /* Define GLUTAPI and GLUTCALLBACK as below if we aren't on Win32. */ 155 | # define GLUTAPI extern 156 | # define GLUTCALLBACK 157 | /* Prototype exit for the non-Win32 case (see above). */ 158 | extern void exit(int); 159 | #endif 160 | 161 | /** 162 | GLUT API revision history: 163 | 164 | GLUT_API_VERSION is updated to reflect incompatible GLUT 165 | API changes (interface changes, semantic changes, deletions, 166 | or additions). 167 | 168 | GLUT_API_VERSION=1 First public release of GLUT. 11/29/94 169 | 170 | GLUT_API_VERSION=2 Added support for OpenGL/GLX multisampling, 171 | extension. Supports new input devices like tablet, dial and button 172 | box, and Spaceball. Easy to query OpenGL extensions. 173 | 174 | GLUT_API_VERSION=3 glutMenuStatus added. 175 | 176 | GLUT_API_VERSION=4 glutInitDisplayString, glutWarpPointer, 177 | glutBitmapLength, glutStrokeLength, glutWindowStatusFunc, dynamic 178 | video resize subAPI, glutPostWindowRedisplay, glutKeyboardUpFunc, 179 | glutSpecialUpFunc, glutIgnoreKeyRepeat, glutSetKeyRepeat, 180 | glutJoystickFunc, glutForceJoystickFunc (NOT FINALIZED!). 181 | **/ 182 | #ifndef GLUT_API_VERSION /* allow this to be overriden */ 183 | #define GLUT_API_VERSION 3 184 | #endif 185 | 186 | /** 187 | GLUT implementation revision history: 188 | 189 | GLUT_XLIB_IMPLEMENTATION is updated to reflect both GLUT 190 | API revisions and implementation revisions (ie, bug fixes). 191 | 192 | GLUT_XLIB_IMPLEMENTATION=1 mjk's first public release of 193 | GLUT Xlib-based implementation. 11/29/94 194 | 195 | GLUT_XLIB_IMPLEMENTATION=2 mjk's second public release of 196 | GLUT Xlib-based implementation providing GLUT version 2 197 | interfaces. 198 | 199 | GLUT_XLIB_IMPLEMENTATION=3 mjk's GLUT 2.2 images. 4/17/95 200 | 201 | GLUT_XLIB_IMPLEMENTATION=4 mjk's GLUT 2.3 images. 6/?/95 202 | 203 | GLUT_XLIB_IMPLEMENTATION=5 mjk's GLUT 3.0 images. 10/?/95 204 | 205 | GLUT_XLIB_IMPLEMENTATION=7 mjk's GLUT 3.1+ with glutWarpPoitner. 7/24/96 206 | 207 | GLUT_XLIB_IMPLEMENTATION=8 mjk's GLUT 3.1+ with glutWarpPoitner 208 | and video resize. 1/3/97 209 | 210 | GLUT_XLIB_IMPLEMENTATION=9 mjk's GLUT 3.4 release with early GLUT 4 routines. 211 | 212 | GLUT_XLIB_IMPLEMENTATION=11 Mesa 2.5's GLUT 3.6 release. 213 | 214 | GLUT_XLIB_IMPLEMENTATION=12 mjk's GLUT 3.6 release with early GLUT 4 routines + signal handling. 215 | 216 | GLUT_XLIB_IMPLEMENTATION=13 mjk's GLUT 3.7 beta with GameGLUT support. 217 | 218 | GLUT_XLIB_IMPLEMENTATION=14 mjk's GLUT 3.7 beta with f90gl friend interface. 219 | 220 | GLUT_XLIB_IMPLEMENTATION=15 mjk's GLUT 3.7 beta sync'ed with Mesa 221 | **/ 222 | #ifndef GLUT_XLIB_IMPLEMENTATION /* Allow this to be overriden. */ 223 | #define GLUT_XLIB_IMPLEMENTATION 15 224 | #endif 225 | 226 | /* Display mode bit masks. */ 227 | #define GLUT_RGB 0 228 | #define GLUT_RGBA GLUT_RGB 229 | #define GLUT_INDEX 1 230 | #define GLUT_SINGLE 0 231 | #define GLUT_DOUBLE 2 232 | #define GLUT_ACCUM 4 233 | #define GLUT_ALPHA 8 234 | #define GLUT_DEPTH 16 235 | #define GLUT_STENCIL 32 236 | #if (GLUT_API_VERSION >= 2) 237 | #define GLUT_MULTISAMPLE 128 238 | #define GLUT_STEREO 256 239 | #endif 240 | #if (GLUT_API_VERSION >= 3) 241 | #define GLUT_LUMINANCE 512 242 | #endif 243 | 244 | /* Mouse buttons. */ 245 | #define GLUT_LEFT_BUTTON 0 246 | #define GLUT_MIDDLE_BUTTON 1 247 | #define GLUT_RIGHT_BUTTON 2 248 | 249 | /* Mouse button state. */ 250 | #define GLUT_DOWN 0 251 | #define GLUT_UP 1 252 | 253 | #if (GLUT_API_VERSION >= 2) 254 | /* function keys */ 255 | #define GLUT_KEY_F1 1 256 | #define GLUT_KEY_F2 2 257 | #define GLUT_KEY_F3 3 258 | #define GLUT_KEY_F4 4 259 | #define GLUT_KEY_F5 5 260 | #define GLUT_KEY_F6 6 261 | #define GLUT_KEY_F7 7 262 | #define GLUT_KEY_F8 8 263 | #define GLUT_KEY_F9 9 264 | #define GLUT_KEY_F10 10 265 | #define GLUT_KEY_F11 11 266 | #define GLUT_KEY_F12 12 267 | /* directional keys */ 268 | #define GLUT_KEY_LEFT 100 269 | #define GLUT_KEY_UP 101 270 | #define GLUT_KEY_RIGHT 102 271 | #define GLUT_KEY_DOWN 103 272 | #define GLUT_KEY_PAGE_UP 104 273 | #define GLUT_KEY_PAGE_DOWN 105 274 | #define GLUT_KEY_HOME 106 275 | #define GLUT_KEY_END 107 276 | #define GLUT_KEY_INSERT 108 277 | #endif 278 | 279 | /* Entry/exit state. */ 280 | #define GLUT_LEFT 0 281 | #define GLUT_ENTERED 1 282 | 283 | /* Menu usage state. */ 284 | #define GLUT_MENU_NOT_IN_USE 0 285 | #define GLUT_MENU_IN_USE 1 286 | 287 | /* Visibility state. */ 288 | #define GLUT_NOT_VISIBLE 0 289 | #define GLUT_VISIBLE 1 290 | 291 | /* Window status state. */ 292 | #define GLUT_HIDDEN 0 293 | #define GLUT_FULLY_RETAINED 1 294 | #define GLUT_PARTIALLY_RETAINED 2 295 | #define GLUT_FULLY_COVERED 3 296 | 297 | /* Color index component selection values. */ 298 | #define GLUT_RED 0 299 | #define GLUT_GREEN 1 300 | #define GLUT_BLUE 2 301 | 302 | #if defined(_WIN32) 303 | /* Stroke font constants (use these in GLUT program). */ 304 | #define GLUT_STROKE_ROMAN ((void*)0) 305 | #define GLUT_STROKE_MONO_ROMAN ((void*)1) 306 | 307 | /* Bitmap font constants (use these in GLUT program). */ 308 | #define GLUT_BITMAP_9_BY_15 ((void*)2) 309 | #define GLUT_BITMAP_8_BY_13 ((void*)3) 310 | #define GLUT_BITMAP_TIMES_ROMAN_10 ((void*)4) 311 | #define GLUT_BITMAP_TIMES_ROMAN_24 ((void*)5) 312 | #if (GLUT_API_VERSION >= 3) 313 | #define GLUT_BITMAP_HELVETICA_10 ((void*)6) 314 | #define GLUT_BITMAP_HELVETICA_12 ((void*)7) 315 | #define GLUT_BITMAP_HELVETICA_18 ((void*)8) 316 | #endif 317 | #else 318 | /* Stroke font opaque addresses (use constants instead in source code). */ 319 | GLUTAPI void *glutStrokeRoman; 320 | GLUTAPI void *glutStrokeMonoRoman; 321 | 322 | /* Stroke font constants (use these in GLUT program). */ 323 | #define GLUT_STROKE_ROMAN (&glutStrokeRoman) 324 | #define GLUT_STROKE_MONO_ROMAN (&glutStrokeMonoRoman) 325 | 326 | /* Bitmap font opaque addresses (use constants instead in source code). */ 327 | GLUTAPI void *glutBitmap9By15; 328 | GLUTAPI void *glutBitmap8By13; 329 | GLUTAPI void *glutBitmapTimesRoman10; 330 | GLUTAPI void *glutBitmapTimesRoman24; 331 | GLUTAPI void *glutBitmapHelvetica10; 332 | GLUTAPI void *glutBitmapHelvetica12; 333 | GLUTAPI void *glutBitmapHelvetica18; 334 | 335 | /* Bitmap font constants (use these in GLUT program). */ 336 | #define GLUT_BITMAP_9_BY_15 (&glutBitmap9By15) 337 | #define GLUT_BITMAP_8_BY_13 (&glutBitmap8By13) 338 | #define GLUT_BITMAP_TIMES_ROMAN_10 (&glutBitmapTimesRoman10) 339 | #define GLUT_BITMAP_TIMES_ROMAN_24 (&glutBitmapTimesRoman24) 340 | #if (GLUT_API_VERSION >= 3) 341 | #define GLUT_BITMAP_HELVETICA_10 (&glutBitmapHelvetica10) 342 | #define GLUT_BITMAP_HELVETICA_12 (&glutBitmapHelvetica12) 343 | #define GLUT_BITMAP_HELVETICA_18 (&glutBitmapHelvetica18) 344 | #endif 345 | #endif 346 | 347 | /* glutGet parameters. */ 348 | #define GLUT_WINDOW_X ((GLenum) 100) 349 | #define GLUT_WINDOW_Y ((GLenum) 101) 350 | #define GLUT_WINDOW_WIDTH ((GLenum) 102) 351 | #define GLUT_WINDOW_HEIGHT ((GLenum) 103) 352 | #define GLUT_WINDOW_BUFFER_SIZE ((GLenum) 104) 353 | #define GLUT_WINDOW_STENCIL_SIZE ((GLenum) 105) 354 | #define GLUT_WINDOW_DEPTH_SIZE ((GLenum) 106) 355 | #define GLUT_WINDOW_RED_SIZE ((GLenum) 107) 356 | #define GLUT_WINDOW_GREEN_SIZE ((GLenum) 108) 357 | #define GLUT_WINDOW_BLUE_SIZE ((GLenum) 109) 358 | #define GLUT_WINDOW_ALPHA_SIZE ((GLenum) 110) 359 | #define GLUT_WINDOW_ACCUM_RED_SIZE ((GLenum) 111) 360 | #define GLUT_WINDOW_ACCUM_GREEN_SIZE ((GLenum) 112) 361 | #define GLUT_WINDOW_ACCUM_BLUE_SIZE ((GLenum) 113) 362 | #define GLUT_WINDOW_ACCUM_ALPHA_SIZE ((GLenum) 114) 363 | #define GLUT_WINDOW_DOUBLEBUFFER ((GLenum) 115) 364 | #define GLUT_WINDOW_RGBA ((GLenum) 116) 365 | #define GLUT_WINDOW_PARENT ((GLenum) 117) 366 | #define GLUT_WINDOW_NUM_CHILDREN ((GLenum) 118) 367 | #define GLUT_WINDOW_COLORMAP_SIZE ((GLenum) 119) 368 | #if (GLUT_API_VERSION >= 2) 369 | #define GLUT_WINDOW_NUM_SAMPLES ((GLenum) 120) 370 | #define GLUT_WINDOW_STEREO ((GLenum) 121) 371 | #endif 372 | #if (GLUT_API_VERSION >= 3) 373 | #define GLUT_WINDOW_CURSOR ((GLenum) 122) 374 | #endif 375 | #define GLUT_SCREEN_WIDTH ((GLenum) 200) 376 | #define GLUT_SCREEN_HEIGHT ((GLenum) 201) 377 | #define GLUT_SCREEN_WIDTH_MM ((GLenum) 202) 378 | #define GLUT_SCREEN_HEIGHT_MM ((GLenum) 203) 379 | #define GLUT_MENU_NUM_ITEMS ((GLenum) 300) 380 | #define GLUT_DISPLAY_MODE_POSSIBLE ((GLenum) 400) 381 | #define GLUT_INIT_WINDOW_X ((GLenum) 500) 382 | #define GLUT_INIT_WINDOW_Y ((GLenum) 501) 383 | #define GLUT_INIT_WINDOW_WIDTH ((GLenum) 502) 384 | #define GLUT_INIT_WINDOW_HEIGHT ((GLenum) 503) 385 | #define GLUT_INIT_DISPLAY_MODE ((GLenum) 504) 386 | #if (GLUT_API_VERSION >= 2) 387 | #define GLUT_ELAPSED_TIME ((GLenum) 700) 388 | #endif 389 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 13) 390 | #define GLUT_WINDOW_FORMAT_ID ((GLenum) 123) 391 | #endif 392 | 393 | #if (GLUT_API_VERSION >= 2) 394 | /* glutDeviceGet parameters. */ 395 | #define GLUT_HAS_KEYBOARD ((GLenum) 600) 396 | #define GLUT_HAS_MOUSE ((GLenum) 601) 397 | #define GLUT_HAS_SPACEBALL ((GLenum) 602) 398 | #define GLUT_HAS_DIAL_AND_BUTTON_BOX ((GLenum) 603) 399 | #define GLUT_HAS_TABLET ((GLenum) 604) 400 | #define GLUT_NUM_MOUSE_BUTTONS ((GLenum) 605) 401 | #define GLUT_NUM_SPACEBALL_BUTTONS ((GLenum) 606) 402 | #define GLUT_NUM_BUTTON_BOX_BUTTONS ((GLenum) 607) 403 | #define GLUT_NUM_DIALS ((GLenum) 608) 404 | #define GLUT_NUM_TABLET_BUTTONS ((GLenum) 609) 405 | #endif 406 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 13) 407 | #define GLUT_DEVICE_IGNORE_KEY_REPEAT ((GLenum) 610) 408 | #define GLUT_DEVICE_KEY_REPEAT ((GLenum) 611) 409 | #define GLUT_HAS_JOYSTICK ((GLenum) 612) 410 | #define GLUT_OWNS_JOYSTICK ((GLenum) 613) 411 | #define GLUT_JOYSTICK_BUTTONS ((GLenum) 614) 412 | #define GLUT_JOYSTICK_AXES ((GLenum) 615) 413 | #define GLUT_JOYSTICK_POLL_RATE ((GLenum) 616) 414 | #endif 415 | 416 | #if (GLUT_API_VERSION >= 3) 417 | /* glutLayerGet parameters. */ 418 | #define GLUT_OVERLAY_POSSIBLE ((GLenum) 800) 419 | #define GLUT_LAYER_IN_USE ((GLenum) 801) 420 | #define GLUT_HAS_OVERLAY ((GLenum) 802) 421 | #define GLUT_TRANSPARENT_INDEX ((GLenum) 803) 422 | #define GLUT_NORMAL_DAMAGED ((GLenum) 804) 423 | #define GLUT_OVERLAY_DAMAGED ((GLenum) 805) 424 | 425 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 9) 426 | /* glutVideoResizeGet parameters. */ 427 | #define GLUT_VIDEO_RESIZE_POSSIBLE ((GLenum) 900) 428 | #define GLUT_VIDEO_RESIZE_IN_USE ((GLenum) 901) 429 | #define GLUT_VIDEO_RESIZE_X_DELTA ((GLenum) 902) 430 | #define GLUT_VIDEO_RESIZE_Y_DELTA ((GLenum) 903) 431 | #define GLUT_VIDEO_RESIZE_WIDTH_DELTA ((GLenum) 904) 432 | #define GLUT_VIDEO_RESIZE_HEIGHT_DELTA ((GLenum) 905) 433 | #define GLUT_VIDEO_RESIZE_X ((GLenum) 906) 434 | #define GLUT_VIDEO_RESIZE_Y ((GLenum) 907) 435 | #define GLUT_VIDEO_RESIZE_WIDTH ((GLenum) 908) 436 | #define GLUT_VIDEO_RESIZE_HEIGHT ((GLenum) 909) 437 | #endif 438 | 439 | /* glutUseLayer parameters. */ 440 | #define GLUT_NORMAL ((GLenum) 0) 441 | #define GLUT_OVERLAY ((GLenum) 1) 442 | 443 | /* glutGetModifiers return mask. */ 444 | #define GLUT_ACTIVE_SHIFT 1 445 | #define GLUT_ACTIVE_CTRL 2 446 | #define GLUT_ACTIVE_ALT 4 447 | 448 | /* glutSetCursor parameters. */ 449 | /* Basic arrows. */ 450 | #define GLUT_CURSOR_RIGHT_ARROW 0 451 | #define GLUT_CURSOR_LEFT_ARROW 1 452 | /* Symbolic cursor shapes. */ 453 | #define GLUT_CURSOR_INFO 2 454 | #define GLUT_CURSOR_DESTROY 3 455 | #define GLUT_CURSOR_HELP 4 456 | #define GLUT_CURSOR_CYCLE 5 457 | #define GLUT_CURSOR_SPRAY 6 458 | #define GLUT_CURSOR_WAIT 7 459 | #define GLUT_CURSOR_TEXT 8 460 | #define GLUT_CURSOR_CROSSHAIR 9 461 | /* Directional cursors. */ 462 | #define GLUT_CURSOR_UP_DOWN 10 463 | #define GLUT_CURSOR_LEFT_RIGHT 11 464 | /* Sizing cursors. */ 465 | #define GLUT_CURSOR_TOP_SIDE 12 466 | #define GLUT_CURSOR_BOTTOM_SIDE 13 467 | #define GLUT_CURSOR_LEFT_SIDE 14 468 | #define GLUT_CURSOR_RIGHT_SIDE 15 469 | #define GLUT_CURSOR_TOP_LEFT_CORNER 16 470 | #define GLUT_CURSOR_TOP_RIGHT_CORNER 17 471 | #define GLUT_CURSOR_BOTTOM_RIGHT_CORNER 18 472 | #define GLUT_CURSOR_BOTTOM_LEFT_CORNER 19 473 | /* Inherit from parent window. */ 474 | #define GLUT_CURSOR_INHERIT 100 475 | /* Blank cursor. */ 476 | #define GLUT_CURSOR_NONE 101 477 | /* Fullscreen crosshair (if available). */ 478 | #define GLUT_CURSOR_FULL_CROSSHAIR 102 479 | #endif 480 | 481 | /* GLUT initialization sub-API. */ 482 | GLUTAPI void APIENTRY glutInit(int *argcp, char **argv); 483 | #if defined(_WIN32) && !defined(GLUT_DISABLE_ATEXIT_HACK) 484 | GLUTAPI void APIENTRY __glutInitWithExit(int *argcp, char **argv, void (__cdecl *exitfunc)(int)); 485 | #ifndef GLUT_BUILDING_LIB 486 | static void APIENTRY glutInit_ATEXIT_HACK(int *argcp, char **argv) { __glutInitWithExit(argcp, argv, exit); } 487 | #define glutInit glutInit_ATEXIT_HACK 488 | #endif 489 | #endif 490 | GLUTAPI void APIENTRY glutInitDisplayMode(unsigned int mode); 491 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 9) 492 | GLUTAPI void APIENTRY glutInitDisplayString(const char *string); 493 | #endif 494 | GLUTAPI void APIENTRY glutInitWindowPosition(int x, int y); 495 | GLUTAPI void APIENTRY glutInitWindowSize(int width, int height); 496 | GLUTAPI void APIENTRY glutMainLoop(void); 497 | 498 | /* GLUT window sub-API. */ 499 | GLUTAPI int APIENTRY glutCreateWindow(const char *title); 500 | #if defined(_WIN32) && !defined(GLUT_DISABLE_ATEXIT_HACK) 501 | GLUTAPI int APIENTRY __glutCreateWindowWithExit(const char *title, void (__cdecl *exitfunc)(int)); 502 | #ifndef GLUT_BUILDING_LIB 503 | static int APIENTRY glutCreateWindow_ATEXIT_HACK(const char *title) { return __glutCreateWindowWithExit(title, exit); } 504 | #define glutCreateWindow glutCreateWindow_ATEXIT_HACK 505 | #endif 506 | #endif 507 | GLUTAPI int APIENTRY glutCreateSubWindow(int win, int x, int y, int width, int height); 508 | GLUTAPI void APIENTRY glutDestroyWindow(int win); 509 | GLUTAPI void APIENTRY glutPostRedisplay(void); 510 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 11) 511 | GLUTAPI void APIENTRY glutPostWindowRedisplay(int win); 512 | #endif 513 | GLUTAPI void APIENTRY glutSwapBuffers(void); 514 | GLUTAPI int APIENTRY glutGetWindow(void); 515 | GLUTAPI void APIENTRY glutSetWindow(int win); 516 | GLUTAPI void APIENTRY glutSetWindowTitle(const char *title); 517 | GLUTAPI void APIENTRY glutSetIconTitle(const char *title); 518 | GLUTAPI void APIENTRY glutPositionWindow(int x, int y); 519 | GLUTAPI void APIENTRY glutReshapeWindow(int width, int height); 520 | GLUTAPI void APIENTRY glutPopWindow(void); 521 | GLUTAPI void APIENTRY glutPushWindow(void); 522 | GLUTAPI void APIENTRY glutIconifyWindow(void); 523 | GLUTAPI void APIENTRY glutShowWindow(void); 524 | GLUTAPI void APIENTRY glutHideWindow(void); 525 | #if (GLUT_API_VERSION >= 3) 526 | GLUTAPI void APIENTRY glutFullScreen(void); 527 | GLUTAPI void APIENTRY glutSetCursor(int cursor); 528 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 9) 529 | GLUTAPI void APIENTRY glutWarpPointer(int x, int y); 530 | #endif 531 | 532 | /* GLUT overlay sub-API. */ 533 | GLUTAPI void APIENTRY glutEstablishOverlay(void); 534 | GLUTAPI void APIENTRY glutRemoveOverlay(void); 535 | GLUTAPI void APIENTRY glutUseLayer(GLenum layer); 536 | GLUTAPI void APIENTRY glutPostOverlayRedisplay(void); 537 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 11) 538 | GLUTAPI void APIENTRY glutPostWindowOverlayRedisplay(int win); 539 | #endif 540 | GLUTAPI void APIENTRY glutShowOverlay(void); 541 | GLUTAPI void APIENTRY glutHideOverlay(void); 542 | #endif 543 | 544 | /* GLUT menu sub-API. */ 545 | GLUTAPI int APIENTRY glutCreateMenu(void (GLUTCALLBACK *func)(int)); 546 | #if defined(_WIN32) && !defined(GLUT_DISABLE_ATEXIT_HACK) 547 | GLUTAPI int APIENTRY __glutCreateMenuWithExit(void (GLUTCALLBACK *func)(int), void (__cdecl *exitfunc)(int)); 548 | #ifndef GLUT_BUILDING_LIB 549 | static int APIENTRY glutCreateMenu_ATEXIT_HACK(void (GLUTCALLBACK *func)(int)) { return __glutCreateMenuWithExit(func, exit); } 550 | #define glutCreateMenu glutCreateMenu_ATEXIT_HACK 551 | #endif 552 | #endif 553 | GLUTAPI void APIENTRY glutDestroyMenu(int menu); 554 | GLUTAPI int APIENTRY glutGetMenu(void); 555 | GLUTAPI void APIENTRY glutSetMenu(int menu); 556 | GLUTAPI void APIENTRY glutAddMenuEntry(const char *label, int value); 557 | GLUTAPI void APIENTRY glutAddSubMenu(const char *label, int submenu); 558 | GLUTAPI void APIENTRY glutChangeToMenuEntry(int item, const char *label, int value); 559 | GLUTAPI void APIENTRY glutChangeToSubMenu(int item, const char *label, int submenu); 560 | GLUTAPI void APIENTRY glutRemoveMenuItem(int item); 561 | GLUTAPI void APIENTRY glutAttachMenu(int button); 562 | GLUTAPI void APIENTRY glutDetachMenu(int button); 563 | 564 | /* GLUT window callback sub-API. */ 565 | GLUTAPI void APIENTRY glutDisplayFunc(void (GLUTCALLBACK *func)(void)); 566 | GLUTAPI void APIENTRY glutReshapeFunc(void (GLUTCALLBACK *func)(int width, int height)); 567 | GLUTAPI void APIENTRY glutKeyboardFunc(void (GLUTCALLBACK *func)(unsigned char key, int x, int y)); 568 | GLUTAPI void APIENTRY glutMouseFunc(void (GLUTCALLBACK *func)(int button, int state, int x, int y)); 569 | GLUTAPI void APIENTRY glutMotionFunc(void (GLUTCALLBACK *func)(int x, int y)); 570 | GLUTAPI void APIENTRY glutPassiveMotionFunc(void (GLUTCALLBACK *func)(int x, int y)); 571 | GLUTAPI void APIENTRY glutEntryFunc(void (GLUTCALLBACK *func)(int state)); 572 | GLUTAPI void APIENTRY glutVisibilityFunc(void (GLUTCALLBACK *func)(int state)); 573 | GLUTAPI void APIENTRY glutIdleFunc(void (GLUTCALLBACK *func)(void)); 574 | GLUTAPI void APIENTRY glutTimerFunc(unsigned int millis, void (GLUTCALLBACK *func)(int value), int value); 575 | GLUTAPI void APIENTRY glutMenuStateFunc(void (GLUTCALLBACK *func)(int state)); 576 | #if (GLUT_API_VERSION >= 2) 577 | GLUTAPI void APIENTRY glutSpecialFunc(void (GLUTCALLBACK *func)(int key, int x, int y)); 578 | GLUTAPI void APIENTRY glutSpaceballMotionFunc(void (GLUTCALLBACK *func)(int x, int y, int z)); 579 | GLUTAPI void APIENTRY glutSpaceballRotateFunc(void (GLUTCALLBACK *func)(int x, int y, int z)); 580 | GLUTAPI void APIENTRY glutSpaceballButtonFunc(void (GLUTCALLBACK *func)(int button, int state)); 581 | GLUTAPI void APIENTRY glutButtonBoxFunc(void (GLUTCALLBACK *func)(int button, int state)); 582 | GLUTAPI void APIENTRY glutDialsFunc(void (GLUTCALLBACK *func)(int dial, int value)); 583 | GLUTAPI void APIENTRY glutTabletMotionFunc(void (GLUTCALLBACK *func)(int x, int y)); 584 | GLUTAPI void APIENTRY glutTabletButtonFunc(void (GLUTCALLBACK *func)(int button, int state, int x, int y)); 585 | #if (GLUT_API_VERSION >= 3) 586 | GLUTAPI void APIENTRY glutMenuStatusFunc(void (GLUTCALLBACK *func)(int status, int x, int y)); 587 | GLUTAPI void APIENTRY glutOverlayDisplayFunc(void (GLUTCALLBACK *func)(void)); 588 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 9) 589 | GLUTAPI void APIENTRY glutWindowStatusFunc(void (GLUTCALLBACK *func)(int state)); 590 | #endif 591 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 13) 592 | GLUTAPI void APIENTRY glutKeyboardUpFunc(void (GLUTCALLBACK *func)(unsigned char key, int x, int y)); 593 | GLUTAPI void APIENTRY glutSpecialUpFunc(void (GLUTCALLBACK *func)(int key, int x, int y)); 594 | GLUTAPI void APIENTRY glutJoystickFunc(void (GLUTCALLBACK *func)(unsigned int buttonMask, int x, int y, int z), int pollInterval); 595 | #endif 596 | #endif 597 | #endif 598 | 599 | /* GLUT color index sub-API. */ 600 | GLUTAPI void APIENTRY glutSetColor(int, GLfloat red, GLfloat green, GLfloat blue); 601 | GLUTAPI GLfloat APIENTRY glutGetColor(int ndx, int component); 602 | GLUTAPI void APIENTRY glutCopyColormap(int win); 603 | 604 | /* GLUT state retrieval sub-API. */ 605 | GLUTAPI int APIENTRY glutGet(GLenum type); 606 | GLUTAPI int APIENTRY glutDeviceGet(GLenum type); 607 | #if (GLUT_API_VERSION >= 2) 608 | /* GLUT extension support sub-API */ 609 | GLUTAPI int APIENTRY glutExtensionSupported(const char *name); 610 | #endif 611 | #if (GLUT_API_VERSION >= 3) 612 | GLUTAPI int APIENTRY glutGetModifiers(void); 613 | GLUTAPI int APIENTRY glutLayerGet(GLenum type); 614 | #endif 615 | 616 | /* GLUT font sub-API */ 617 | GLUTAPI void APIENTRY glutBitmapCharacter(void *font, int character); 618 | GLUTAPI int APIENTRY glutBitmapWidth(void *font, int character); 619 | GLUTAPI void APIENTRY glutStrokeCharacter(void *font, int character); 620 | GLUTAPI int APIENTRY glutStrokeWidth(void *font, int character); 621 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 9) 622 | GLUTAPI int APIENTRY glutBitmapLength(void *font, const unsigned char *string); 623 | GLUTAPI int APIENTRY glutStrokeLength(void *font, const unsigned char *string); 624 | #endif 625 | 626 | /* GLUT pre-built models sub-API */ 627 | GLUTAPI void APIENTRY glutWireSphere(GLdouble radius, GLint slices, GLint stacks); 628 | GLUTAPI void APIENTRY glutSolidSphere(GLdouble radius, GLint slices, GLint stacks); 629 | GLUTAPI void APIENTRY glutWireCone(GLdouble base, GLdouble height, GLint slices, GLint stacks); 630 | GLUTAPI void APIENTRY glutSolidCone(GLdouble base, GLdouble height, GLint slices, GLint stacks); 631 | GLUTAPI void APIENTRY glutWireCube(GLdouble size); 632 | GLUTAPI void APIENTRY glutSolidCube(GLdouble size); 633 | GLUTAPI void APIENTRY glutWireTorus(GLdouble innerRadius, GLdouble outerRadius, GLint sides, GLint rings); 634 | GLUTAPI void APIENTRY glutSolidTorus(GLdouble innerRadius, GLdouble outerRadius, GLint sides, GLint rings); 635 | GLUTAPI void APIENTRY glutWireDodecahedron(void); 636 | GLUTAPI void APIENTRY glutSolidDodecahedron(void); 637 | GLUTAPI void APIENTRY glutWireTeapot(GLdouble size); 638 | GLUTAPI void APIENTRY glutSolidTeapot(GLdouble size); 639 | GLUTAPI void APIENTRY glutWireOctahedron(void); 640 | GLUTAPI void APIENTRY glutSolidOctahedron(void); 641 | GLUTAPI void APIENTRY glutWireTetrahedron(void); 642 | GLUTAPI void APIENTRY glutSolidTetrahedron(void); 643 | GLUTAPI void APIENTRY glutWireIcosahedron(void); 644 | GLUTAPI void APIENTRY glutSolidIcosahedron(void); 645 | 646 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 9) 647 | /* GLUT video resize sub-API. */ 648 | GLUTAPI int APIENTRY glutVideoResizeGet(GLenum param); 649 | GLUTAPI void APIENTRY glutSetupVideoResizing(void); 650 | GLUTAPI void APIENTRY glutStopVideoResizing(void); 651 | GLUTAPI void APIENTRY glutVideoResize(int x, int y, int width, int height); 652 | GLUTAPI void APIENTRY glutVideoPan(int x, int y, int width, int height); 653 | 654 | /* GLUT debugging sub-API. */ 655 | GLUTAPI void APIENTRY glutReportErrors(void); 656 | #endif 657 | 658 | #if (GLUT_API_VERSION >= 4 || GLUT_XLIB_IMPLEMENTATION >= 13) 659 | /* GLUT device control sub-API. */ 660 | /* glutSetKeyRepeat modes. */ 661 | #define GLUT_KEY_REPEAT_OFF 0 662 | #define GLUT_KEY_REPEAT_ON 1 663 | #define GLUT_KEY_REPEAT_DEFAULT 2 664 | 665 | /* Joystick button masks. */ 666 | #define GLUT_JOYSTICK_BUTTON_A 1 667 | #define GLUT_JOYSTICK_BUTTON_B 2 668 | #define GLUT_JOYSTICK_BUTTON_C 4 669 | #define GLUT_JOYSTICK_BUTTON_D 8 670 | 671 | GLUTAPI void APIENTRY glutIgnoreKeyRepeat(int ignore); 672 | GLUTAPI void APIENTRY glutSetKeyRepeat(int repeatMode); 673 | GLUTAPI void APIENTRY glutForceJoystickFunc(void); 674 | 675 | /* GLUT game mode sub-API. */ 676 | /* glutGameModeGet. */ 677 | #define GLUT_GAME_MODE_ACTIVE ((GLenum) 0) 678 | #define GLUT_GAME_MODE_POSSIBLE ((GLenum) 1) 679 | #define GLUT_GAME_MODE_WIDTH ((GLenum) 2) 680 | #define GLUT_GAME_MODE_HEIGHT ((GLenum) 3) 681 | #define GLUT_GAME_MODE_PIXEL_DEPTH ((GLenum) 4) 682 | #define GLUT_GAME_MODE_REFRESH_RATE ((GLenum) 5) 683 | #define GLUT_GAME_MODE_DISPLAY_CHANGED ((GLenum) 6) 684 | 685 | GLUTAPI void APIENTRY glutGameModeString(const char *string); 686 | GLUTAPI int APIENTRY glutEnterGameMode(void); 687 | GLUTAPI void APIENTRY glutLeaveGameMode(void); 688 | GLUTAPI int APIENTRY glutGameModeGet(GLenum mode); 689 | #endif 690 | 691 | #ifdef __cplusplus 692 | } 693 | 694 | #endif 695 | 696 | #ifdef GLUT_APIENTRY_DEFINED 697 | # undef GLUT_APIENTRY_DEFINED 698 | # undef APIENTRY 699 | #endif 700 | 701 | #ifdef GLUT_WINGDIAPI_DEFINED 702 | # undef GLUT_WINGDIAPI_DEFINED 703 | # undef WINGDIAPI 704 | #endif 705 | 706 | #ifdef GLUT_DEFINED___CDECL 707 | # undef GLUT_DEFINED___CDECL 708 | # undef __cdecl 709 | #endif 710 | 711 | #ifdef GLUT_DEFINED__CRTIMP 712 | # undef GLUT_DEFINED__CRTIMP 713 | # undef _CRTIMP 714 | #endif 715 | 716 | #endif /* __glut_h__ */ 717 | --------------------------------------------------------------------------------