├── .gitattributes ├── .gitignore ├── CUDA ├── README.md ├── Tutorial 1 - Hello, CUDA │ └── kernel.cu └── Tutorial 2 - CUDA load image │ ├── Lenna.png │ └── kernel.cu ├── OpenCL ├── README.md ├── Tutorial 1 - Hello, OpenCL │ ├── main.cpp │ └── simple_add.cl └── Tutorial 2 - OpenCL load image │ ├── Lenna.png │ ├── cl_tutorial_2_copy.cl │ └── main.cpp ├── README.md ├── includes └── PNG.h └── vs ├── CUDA ├── README.md ├── Tutorial 1 - Hello, CUDA │ ├── Tutorial 1 - Hello, CUDA.vcxproj │ └── kernel.cu ├── Tutorial 2 - CUDA load image │ ├── Lenna.png │ ├── Tutorial 2 - CUDA load image.vcxproj │ └── kernel.cu └── Tutorial 3 - CUDA basic image filtering │ ├── Lenna.png │ ├── Tutorial 3 - CUDA basic image filtering.vcxproj │ └── kernel.cu ├── OpenCL ├── README.md ├── Tutorial 1 - Hello, OpenCL │ ├── Tutorial 1 - Hello, OpenCL.vcxproj │ ├── Tutorial 1 - Hello, OpenCL.vcxproj.filters │ ├── main.cpp │ └── simple_add.cl ├── Tutorial 2 - OpenCL load image │ ├── Lenna.png │ ├── Tutorial 2 - OpenCL load image.vcxproj │ ├── Tutorial 2 - OpenCL load image.vcxproj.filters │ ├── cl_tutorial_2_copy.cl │ └── main.cpp └── Tutorial 3 - OpenCL basic image filtering │ ├── Lenna.png │ ├── Tutorial 3 - OpenCL basic image filtering.vcxproj │ ├── Tutorial 3 - OpenCL basic image filtering.vcxproj.filters │ ├── cl_tutorial_3_boxFilter.cl │ └── main.cpp ├── OpenCL_CUDA_Tutorials.sln └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | ## Ignore Visual Studio temporary files, build results, and 3 | ## files generated by popular Visual Studio add-ons. 4 | 5 | # User-specific files 6 | *.suo 7 | *.user 8 | *.userosscache 9 | *.sln.docstates 10 | 11 | # User-specific files (MonoDevelop/Xamarin Studio) 12 | *.userprefs 13 | 14 | # Build results 15 | [Dd]ebug/ 16 | [Dd]ebugPublic/ 17 | [Rr]elease/ 18 | [Rr]eleases/ 19 | x64/ 20 | x86/ 21 | build/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | 26 | # Visual Studio 2015 cache/options directory 27 | .vs/ 28 | # Uncomment if you have tasks that create the project's static files in wwwroot 29 | #wwwroot/ 30 | 31 | # MSTest test Results 32 | [Tt]est[Rr]esult*/ 33 | [Bb]uild[Ll]og.* 34 | 35 | # NUNIT 36 | *.VisualState.xml 37 | TestResult.xml 38 | 39 | # Build Results of an ATL Project 40 | [Dd]ebugPS/ 41 | [Rr]eleasePS/ 42 | dlldata.c 43 | 44 | # DNX 45 | project.lock.json 46 | artifacts/ 47 | 48 | *_i.c 49 | *_p.c 50 | *_i.h 51 | *.ilk 52 | *.meta 53 | *.obj 54 | *.pch 55 | *.pdb 56 | *.pgc 57 | *.pgd 58 | *.rsp 59 | *.sbr 60 | *.tlb 61 | *.tli 62 | *.tlh 63 | *.tmp 64 | *.tmp_proj 65 | *.log 66 | *.vspscc 67 | *.vssscc 68 | .builds 69 | *.pidb 70 | *.svclog 71 | *.scc 72 | 73 | # Chutzpah Test files 74 | _Chutzpah* 75 | 76 | # Visual C++ cache files 77 | ipch/ 78 | *.aps 79 | *.ncb 80 | *.opendb 81 | *.opensdf 82 | *.sdf 83 | *.cachefile 84 | 85 | # Visual Studio profiler 86 | *.psess 87 | *.vsp 88 | *.vspx 89 | *.sap 90 | 91 | # TFS 2012 Local Workspace 92 | $tf/ 93 | 94 | # Guidance Automation Toolkit 95 | *.gpState 96 | 97 | # ReSharper is a .NET coding add-in 98 | _ReSharper*/ 99 | *.[Rr]e[Ss]harper 100 | *.DotSettings.user 101 | 102 | # JustCode is a .NET coding add-in 103 | .JustCode 104 | 105 | # TeamCity is a build add-in 106 | _TeamCity* 107 | 108 | # DotCover is a Code Coverage Tool 109 | *.dotCover 110 | 111 | # NCrunch 112 | _NCrunch_* 113 | .*crunch*.local.xml 114 | nCrunchTemp_* 115 | 116 | # MightyMoose 117 | *.mm.* 118 | AutoTest.Net/ 119 | 120 | # Web workbench (sass) 121 | .sass-cache/ 122 | 123 | # Installshield output folder 124 | [Ee]xpress/ 125 | 126 | # DocProject is a documentation generator add-in 127 | DocProject/buildhelp/ 128 | DocProject/Help/*.HxT 129 | DocProject/Help/*.HxC 130 | DocProject/Help/*.hhc 131 | DocProject/Help/*.hhk 132 | DocProject/Help/*.hhp 133 | DocProject/Help/Html2 134 | DocProject/Help/html 135 | 136 | # Click-Once directory 137 | publish/ 138 | 139 | # Publish Web Output 140 | *.[Pp]ublish.xml 141 | *.azurePubxml 142 | # TODO: Comment the next line if you want to checkin your web deploy settings 143 | # but database connection strings (with potential passwords) will be unencrypted 144 | *.pubxml 145 | *.publishproj 146 | 147 | # NuGet Packages 148 | *.nupkg 149 | # The packages folder can be ignored because of Package Restore 150 | **/packages/* 151 | # except build/, which is used as an MSBuild target. 152 | !**/packages/build/ 153 | # Uncomment if necessary however generally it will be regenerated when needed 154 | #!**/packages/repositories.config 155 | 156 | # Windows Azure Build Output 157 | csx/ 158 | *.build.csdef 159 | 160 | # Windows Azure Emulator 161 | ecf/ 162 | rcf/ 163 | 164 | # Windows Store app package directory 165 | AppPackages/ 166 | BundleArtifacts/ 167 | 168 | # Visual Studio cache files 169 | # files ending in .cache can be ignored 170 | *.[Cc]ache 171 | # but keep track of directories ending in .cache 172 | !*.[Cc]ache/ 173 | 174 | # Others 175 | ClientBin/ 176 | [Ss]tyle[Cc]op.* 177 | ~$* 178 | *~ 179 | *.dbmdl 180 | *.dbproj.schemaview 181 | *.pfx 182 | *.publishsettings 183 | node_modules/ 184 | orleans.codegen.cs 185 | 186 | # RIA/Silverlight projects 187 | Generated_Code/ 188 | 189 | # Backup & report files from converting an old project file 190 | # to a newer Visual Studio version. Backup files are not needed, 191 | # because we have git ;-) 192 | _UpgradeReport_Files/ 193 | Backup*/ 194 | UpgradeLog*.XML 195 | UpgradeLog*.htm 196 | 197 | # SQL Server files 198 | *.mdf 199 | *.ldf 200 | 201 | # Business Intelligence projects 202 | *.rdl.data 203 | *.bim.layout 204 | *.bim_*.settings 205 | 206 | # Microsoft Fakes 207 | FakesAssemblies/ 208 | 209 | # GhostDoc plugin setting file 210 | *.GhostDoc.xml 211 | 212 | # Node.js Tools for Visual Studio 213 | .ntvs_analysis.dat 214 | 215 | # Visual Studio 6 build log 216 | *.plg 217 | 218 | # Visual Studio 6 workspace options file 219 | *.opt 220 | 221 | # Visual Studio LightSwitch build output 222 | **/*.HTMLClient/GeneratedArtifacts 223 | **/*.DesktopClient/GeneratedArtifacts 224 | **/*.DesktopClient/ModelManifest.xml 225 | **/*.Server/GeneratedArtifacts 226 | **/*.Server/ModelManifest.xml 227 | _Pvt_Extensions 228 | 229 | # Paket dependency manager 230 | .paket/paket.exe 231 | 232 | # FAKE - F# Make 233 | .fake/ -------------------------------------------------------------------------------- /CUDA/README.md: -------------------------------------------------------------------------------- 1 | Source for CUDA tutorials -------------------------------------------------------------------------------- /CUDA/Tutorial 1 - Hello, CUDA/kernel.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | 4 | #include 5 | #include 6 | 7 | __global__ void simple_add(const int *A, const int *B, int *C) 8 | { 9 | C[threadIdx.x] = A[threadIdx.x] + B[threadIdx.x]; 10 | } 11 | 12 | int main(int arg, char* args[]) 13 | { 14 | const int size = 10; 15 | int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; 16 | int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }; 17 | int C[size]; 18 | 19 | int *buffer_A = 0; 20 | int *buffer_B = 0; 21 | int *buffer_C = 0; 22 | cudaError_t cudaStatus; 23 | 24 | // Choose which GPU to run on, change this on a multi-GPU system. 25 | cudaStatus = cudaSetDevice(0); 26 | if (cudaStatus != cudaSuccess) 27 | { 28 | std::cout << "No CUDA devices found!" << std::endl; 29 | exit(1); 30 | } 31 | 32 | cudaDeviceProp prop; 33 | cudaGetDeviceProperties(&prop, 0); 34 | 35 | std::cout << "Using device: " << prop.name << std::endl; 36 | 37 | // Allocate GPU buffers for three vectors (two input, one output). 38 | cudaMalloc((void**)&buffer_A, size * sizeof(int)); 39 | cudaMalloc((void**)&buffer_B, size * sizeof(int)); 40 | cudaMalloc((void**)&buffer_C, size * sizeof(int)); 41 | 42 | // Copy input vectors from host memory to GPU buffers. 43 | cudaMemcpy(buffer_A, A, size * sizeof(int), cudaMemcpyHostToDevice); 44 | cudaMemcpy(buffer_B, B, size * sizeof(int), cudaMemcpyHostToDevice); 45 | 46 | // Launch a kernel on the GPU with one thread for each element. 47 | simple_add<<<1, size>>>(buffer_A, buffer_B, buffer_C); 48 | 49 | // Check for any errors launching the kernel 50 | cudaStatus = cudaGetLastError(); 51 | if (cudaStatus != cudaSuccess) 52 | { 53 | std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl; 54 | cudaFree(buffer_A); 55 | cudaFree(buffer_B); 56 | cudaFree(buffer_C); 57 | exit(1); 58 | } 59 | 60 | // cudaDeviceSynchronize waits for the kernel to finish, and returns 61 | // any errors encountered during the launch. 62 | cudaStatus = cudaDeviceSynchronize(); 63 | if (cudaStatus != cudaSuccess) 64 | { 65 | std::cout << "Could not synchronize device!" << std::endl; 66 | cudaFree(buffer_A); 67 | cudaFree(buffer_B); 68 | cudaFree(buffer_C); 69 | exit(1); 70 | } 71 | 72 | // Copy output vector from GPU buffer to host memory. 73 | cudaStatus = cudaMemcpy(C, buffer_C, size * sizeof(int), cudaMemcpyDeviceToHost); 74 | cudaFree(buffer_A); 75 | cudaFree(buffer_B); 76 | cudaFree(buffer_C); 77 | 78 | if(cudaStatus != cudaSuccess) 79 | { 80 | std::cout << "Could not copy buffer memory to host!" << std::endl; 81 | exit(1); 82 | } 83 | 84 | //Prints the array 85 | std::cout << "Result:" << std::endl; 86 | for (int i = 0; i < size; i++) 87 | { 88 | std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl; 89 | } 90 | 91 | // cudaDeviceReset must be called before exiting in order for profiling and 92 | // tracing tools such as Nsight and Visual Profiler to show complete traces. 93 | cudaStatus = cudaDeviceReset(); 94 | if (cudaStatus != cudaSuccess) 95 | { 96 | std::cout << "Device reset failed!" << std::endl; 97 | exit(1); 98 | } 99 | 100 | return 0; 101 | } -------------------------------------------------------------------------------- /CUDA/Tutorial 2 - CUDA load image/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/CUDA/Tutorial 2 - CUDA load image/Lenna.png -------------------------------------------------------------------------------- /CUDA/Tutorial 2 - CUDA load image/kernel.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | 4 | #include 5 | #include 6 | 7 | #include "PNG.h" 8 | 9 | __global__ void copy(const unsigned char* in, unsigned char* out) 10 | { 11 | int x = blockIdx.x; 12 | int y = threadIdx.x; 13 | int width = blockDim.x; 14 | int index = (x + y * width) * 4; 15 | 16 | //copy each color channel 17 | out[index] = in[index]; 18 | out[index + 1] = in[index + 1]; 19 | out[index + 2] = in[index + 2]; 20 | out[index + 3] = in[index + 3]; 21 | } 22 | 23 | int main(int arg, char* args[]) 24 | { 25 | PNG inPng("Lenna.png"); 26 | PNG outPng; 27 | outPng.Create(inPng.w, inPng.h); 28 | 29 | //store width and height so we can use them for our output image later 30 | const unsigned int w = inPng.w; 31 | const unsigned int h = inPng.h; 32 | //4 because there are 4 color channels R, G, B, and A 33 | int size = w * h * 4; 34 | 35 | unsigned char *in = 0; 36 | unsigned char *out = 0; 37 | cudaError_t cudaStatus; 38 | 39 | // Choose which GPU to run on, change this on a multi-GPU system. 40 | cudaStatus = cudaSetDevice(0); 41 | if (cudaStatus != cudaSuccess) 42 | { 43 | std::cout << "No CUDA devices found!" << std::endl; 44 | exit(1); 45 | } 46 | 47 | //prints the device the kernel will be running on 48 | cudaDeviceProp prop; 49 | cudaGetDeviceProperties(&prop, 0); 50 | std::cout << "Using device: " << prop.name << std::endl; 51 | 52 | // Allocate GPU buffers for the images 53 | cudaMalloc((void**)&in, size * sizeof(unsigned char)); 54 | cudaMalloc((void**)&out, size * sizeof(unsigned char)); 55 | 56 | // Copy image data from host memory to GPU buffers. 57 | cudaMemcpy(in, &inPng.data[0], size * sizeof(unsigned char), cudaMemcpyHostToDevice); 58 | 59 | //free the input image because we do not need it anymore 60 | inPng.Free(); 61 | 62 | // Launch a kernel on the GPU with one thread for each element. 63 | copy<<>>(in, out); 64 | 65 | // Check for any errors launching the kernel 66 | cudaStatus = cudaGetLastError(); 67 | if (cudaStatus != cudaSuccess) 68 | { 69 | std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl; 70 | cudaFree(in); 71 | cudaFree(out); 72 | exit(1); 73 | } 74 | 75 | // cudaDeviceSynchronize waits for the kernel to finish, and returns 76 | // any errors encountered during the launch. 77 | cudaStatus = cudaDeviceSynchronize(); 78 | if (cudaStatus != cudaSuccess) 79 | { 80 | std::cout << "Could not synchronize device!" << std::endl; 81 | cudaFree(in); 82 | cudaFree(out); 83 | exit(1); 84 | } 85 | 86 | //temporary array to store the result from opencl 87 | auto tmp = new unsigned char[w * h * 4]; 88 | // Copy output vector from GPU buffer to host memory. 89 | cudaStatus = cudaMemcpy(tmp, out, size * sizeof(unsigned char), cudaMemcpyDeviceToHost); 90 | cudaFree(in); 91 | cudaFree(out); 92 | 93 | //copy the data from the temp array to the png 94 | std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data)); 95 | 96 | //write the image to file 97 | outPng.Save("cuda_tutorial_2.png"); 98 | //free the iamge's resources since we are done with it 99 | outPng.Free(); 100 | 101 | //free the temp array 102 | delete[] tmp; 103 | 104 | if(cudaStatus != cudaSuccess) 105 | { 106 | std::cout << "Could not copy buffer memory to host!" << std::endl; 107 | exit(1); 108 | } 109 | 110 | // cudaDeviceReset must be called before exiting in order for profiling and 111 | // tracing tools such as Nsight and Visual Profiler to show complete traces. 112 | cudaStatus = cudaDeviceReset(); 113 | if (cudaStatus != cudaSuccess) 114 | { 115 | std::cout << "Device reset failed!" << std::endl; 116 | exit(1); 117 | } 118 | 119 | return 0; 120 | } -------------------------------------------------------------------------------- /OpenCL/README.md: -------------------------------------------------------------------------------- 1 | Source for OpenCL tutorials -------------------------------------------------------------------------------- /OpenCL/Tutorial 1 - Hello, OpenCL/main.cpp: -------------------------------------------------------------------------------- 1 | #define __CL_ENABLE_EXCEPTIONS 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | std::string readFile(std::string fileName) 9 | { 10 | std::ifstream t(fileName); 11 | std::string str((std::istreambuf_iterator(t)), std::istreambuf_iterator()); 12 | return str; 13 | } 14 | 15 | int main(int arg, char* args[]) 16 | { 17 | const int size = 10; 18 | int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; 19 | int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }; 20 | int C[size]; 21 | 22 | //stl vector to store all of the available platforms 23 | std::vector platforms; 24 | //get all available platforms 25 | cl::Platform::get(&platforms); 26 | 27 | if (platforms.size() == 0) 28 | { 29 | std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system. 30 | exit(1); 31 | } 32 | 33 | //Create a stl vector to store all of the availbe devices to use from the first platform. 34 | std::vector devices; 35 | //Get the available devices from the platform. For me the platform for my 980ti is actually th e second in the platform list but for simplicity we will use the first one. 36 | platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); 37 | //Set the device to the first device in the platform. You can have more than one device associated with a single platform, for instance if you had two of the same GPUs on your system in SLI or CrossFire. 38 | cl::Device device = devices[0]; 39 | 40 | //This is just helpful to see what device and platform you are using. 41 | std::cout << "Using device: " << device.getInfo() << std::endl; 42 | std::cout << "Using platform: " << platforms[0].getInfo() << std::endl; 43 | 44 | //Finally create the OpenCL context from the device you have chosen. 45 | cl::Context context(device); 46 | 47 | cl::Buffer buffer_A(context, CL_MEM_READ_WRITE, sizeof(int) * size); 48 | cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, sizeof(int) * size); 49 | cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, sizeof(int) * size); 50 | 51 | //A source object for your program 52 | cl::Program::Sources sources; 53 | std::string kernel_code = readFile("simple_add.cl"); 54 | //Add your program source 55 | sources.push_back({ kernel_code.c_str(),kernel_code.length() }); 56 | 57 | //Create your OpenCL program and build it. 58 | cl::Program program(context, sources); 59 | if (program.build({ device }) != CL_SUCCESS) 60 | { 61 | std::cout << " Error building: " << program.getBuildInfo(device) << std::endl;//print the build log to find any issues with your source 62 | exit(1);//Quit if your program doesn't compile 63 | } 64 | 65 | cl::CommandQueue queue(context, device, 0, NULL); 66 | 67 | //Write our buffers that we are adding to our OpenCL device 68 | queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int) * size, A); 69 | queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(int) * size, B); 70 | 71 | //Create our Kernel (basically what is the starting point for our OpenCL program) 72 | cl::Kernel simple_add(program, "simple_add"); 73 | //Set our arguements for the kernel 74 | simple_add.setArg(0, buffer_A); 75 | simple_add.setArg(1, buffer_B); 76 | simple_add.setArg(2, buffer_C); 77 | 78 | //Make sure that our queue is done with all of its tasks before continuing 79 | queue.finish(); 80 | 81 | //Create an event that we can use to wait for our program to finish running 82 | cl::Event e; 83 | //This runs our program, the ranges here are the offset, global, local ranges that our code runs in. 84 | queue.enqueueNDRangeKernel(simple_add, cl::NullRange, cl::NDRange(size), cl::NullRange, 0, &e); 85 | 86 | //Waits for our program to finish 87 | e.wait(); 88 | //Reads the output written to our buffer into our final array 89 | queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(int) * size, C); 90 | 91 | //prints the array 92 | std::cout << "Result:" << std::endl; 93 | for (int i = 0; i < size; i++) 94 | { 95 | std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl; 96 | } 97 | 98 | return 0; 99 | } -------------------------------------------------------------------------------- /OpenCL/Tutorial 1 - Hello, OpenCL/simple_add.cl: -------------------------------------------------------------------------------- 1 | void kernel simple_add(global const int* A, global const int* B, global int* C) 2 | { 3 | C[get_global_id(0)] = A[get_global_id(0)] + B[get_global_id(0)]; 4 | } -------------------------------------------------------------------------------- /OpenCL/Tutorial 2 - OpenCL load image/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/OpenCL/Tutorial 2 - OpenCL load image/Lenna.png -------------------------------------------------------------------------------- /OpenCL/Tutorial 2 - OpenCL load image/cl_tutorial_2_copy.cl: -------------------------------------------------------------------------------- 1 | const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; 2 | 3 | void kernel copy(__read_only image2d_t in, __write_only image2d_t out) 4 | { 5 | int x = get_global_id(0); 6 | int y = get_global_id(1); 7 | int2 pos = (int2)(x, y); 8 | uint4 pixel = read_imageui(in, smp, pos); 9 | write_imageui(out, pos, pixel); 10 | } -------------------------------------------------------------------------------- /OpenCL/Tutorial 2 - OpenCL load image/main.cpp: -------------------------------------------------------------------------------- 1 | //#define __CL_ENABLE_EXCEPTIONS 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "PNG.h" 13 | 14 | std::string readFile(std::string fileName) 15 | { 16 | std::ifstream t(fileName); 17 | std::string str((std::istreambuf_iterator(t)), std::istreambuf_iterator()); 18 | return str; 19 | } 20 | 21 | int main(int arg, char* args[]) 22 | { 23 | std::vector platforms; 24 | cl::Platform::get(&platforms); 25 | if (platforms.size() == 0) 26 | { 27 | std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system. 28 | exit(1); 29 | } 30 | std::vector devices; 31 | platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); 32 | cl::Device device = devices[0]; 33 | std::cout << "Using device: " << device.getInfo() << std::endl; 34 | std::cout << "Using platform: " << platforms[0].getInfo() << std::endl; 35 | cl::Context context(device); 36 | 37 | //load our image 38 | PNG inPng("Lenna.png"); 39 | 40 | //store width and height so we can use them for our output image later 41 | const unsigned int w = inPng.w; 42 | const unsigned int h = inPng.h; 43 | 44 | //input image 45 | const cl::ImageFormat format(CL_RGBA, CL_UNSIGNED_INT8); 46 | cl::Image2D in(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, format, w, h, 0, &inPng.data[0]); 47 | 48 | //we are done with the image so free up its memory 49 | inPng.Free(); 50 | 51 | //output image 52 | cl::Image2D out(context, CL_MEM_WRITE_ONLY, format, w, h, 0, NULL); 53 | 54 | cl::Program::Sources sources; 55 | std::string kernel_code = readFile("cl_tutorial_2_copy.cl"); 56 | //Add your program source 57 | sources.push_back({ kernel_code.c_str(),kernel_code.length() }); 58 | 59 | //Create your OpenCL program and build it. 60 | cl::Program program(context, sources); 61 | if (program.build({ device }) != CL_SUCCESS) 62 | { 63 | std::cout << " Error building: " << program.getBuildInfo(device) << std::endl;//print the build log to find any issues with your source 64 | exit(1);//Quit if your program doesn't compile 65 | } 66 | 67 | //set the kernel arguments 68 | cl::Kernel kernelCopy(program, "copy"); 69 | kernelCopy.setArg(0, in); 70 | kernelCopy.setArg(1, out); 71 | 72 | //create command queue 73 | cl::CommandQueue queue(context, device, 0, NULL); 74 | 75 | //execute kernel 76 | queue.enqueueNDRangeKernel(kernelCopy, cl::NullRange, cl::NDRange(w, h), cl::NullRange); 77 | 78 | //wait for kernel to finish 79 | queue.finish(); 80 | 81 | //start and end coordinates for reading our image (I really do not like how the c++ wrapper does this) 82 | cl::size_t<3> origin; 83 | cl::size_t<3> size; 84 | origin[0] = 0; 85 | origin[1] = 0; 86 | origin[2] = 0; 87 | size[0] = w; 88 | size[1] = h; 89 | size[2] = 1; 90 | 91 | //output png 92 | PNG outPng; 93 | //create the image with the same width and height as original 94 | outPng.Create(w, h); 95 | 96 | //temporary array to store the result from opencl 97 | auto tmp = new unsigned char[w * h * 4]; 98 | //CL_TRUE means that it waits for the entire image to be copied before continuing 99 | queue.enqueueReadImage(out, CL_TRUE, origin, size, 0, 0, tmp); 100 | 101 | //copy the data from the temp array to the png 102 | std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data)); 103 | 104 | //write the image to file 105 | outPng.Save("cl_tutorial_2.png"); 106 | //free the iamge's resources since we are done with it 107 | outPng.Free(); 108 | 109 | //free the temp array 110 | delete[] tmp; 111 | 112 | return 0; 113 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenCL-CUDA-Tutorials 2 | Sources for OpenCL and CUDA tutorials. 3 | 4 | This is as much a learning experience for me as it will be with you so bear with me. I try to comment my code and explain what I am doing to the best of my ability however I will not be getting too technical. 5 | 6 | I am compiling and testing my code with both Microsoft Visual Studio 2015 Community and MinGW’s g++. 7 | 8 | The point of these tutorials is not to develop parallel algorithms but to gain an understanding of how OpenCL and CUDA work. 9 | -------------------------------------------------------------------------------- /vs/CUDA/README.md: -------------------------------------------------------------------------------- 1 | VS Project files for CUDA tutorials -------------------------------------------------------------------------------- /vs/CUDA/Tutorial 1 - Hello, CUDA/Tutorial 1 - Hello, CUDA.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A} 23 | Tutorial_1___Hello__CUDA 24 | 8.1 25 | 26 | 27 | 28 | Application 29 | true 30 | MultiByte 31 | v120 32 | 33 | 34 | Application 35 | true 36 | MultiByte 37 | v120 38 | 39 | 40 | Application 41 | false 42 | true 43 | MultiByte 44 | v120 45 | 46 | 47 | Application 48 | false 49 | true 50 | MultiByte 51 | v120 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | true 72 | 73 | 74 | true 75 | 76 | 77 | 78 | Level3 79 | Disabled 80 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 81 | 82 | 83 | true 84 | Console 85 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 86 | 87 | 88 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 89 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 90 | 91 | 92 | 93 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(Include) 94 | 95 | 96 | 97 | 98 | Level3 99 | Disabled 100 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 101 | 102 | 103 | true 104 | Console 105 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 106 | 107 | 108 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 109 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 110 | 111 | 112 | 64 113 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(Include) 114 | 115 | 116 | 117 | 118 | Level3 119 | MaxSpeed 120 | true 121 | true 122 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 123 | 124 | 125 | true 126 | true 127 | true 128 | Console 129 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 130 | 131 | 132 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 133 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 134 | 135 | 136 | 137 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(Include) 138 | 139 | 140 | 141 | 142 | Level3 143 | MaxSpeed 144 | true 145 | true 146 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 147 | 148 | 149 | true 150 | true 151 | true 152 | Console 153 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 154 | 155 | 156 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 157 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 158 | 159 | 160 | 64 161 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(Include) 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /vs/CUDA/Tutorial 1 - Hello, CUDA/kernel.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | 4 | #include 5 | #include 6 | 7 | __global__ void simple_add(const int *A, const int *B, int *C) 8 | { 9 | C[threadIdx.x] = A[threadIdx.x] + B[threadIdx.x]; 10 | } 11 | 12 | int main(int arg, char* args[]) 13 | { 14 | const int size = 10; 15 | int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; 16 | int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }; 17 | int C[size]; 18 | 19 | int *buffer_A = 0; 20 | int *buffer_B = 0; 21 | int *buffer_C = 0; 22 | cudaError_t cudaStatus; 23 | 24 | // Choose which GPU to run on, change this on a multi-GPU system. 25 | cudaStatus = cudaSetDevice(0); 26 | if (cudaStatus != cudaSuccess) 27 | { 28 | std::cout << "No CUDA devices found!" << std::endl; 29 | exit(1); 30 | } 31 | 32 | cudaDeviceProp prop; 33 | cudaGetDeviceProperties(&prop, 0); 34 | 35 | std::cout << "Using device: " << prop.name << std::endl; 36 | 37 | // Allocate GPU buffers for three vectors (two input, one output). 38 | cudaMalloc((void**)&buffer_A, size * sizeof(int)); 39 | cudaMalloc((void**)&buffer_B, size * sizeof(int)); 40 | cudaMalloc((void**)&buffer_C, size * sizeof(int)); 41 | 42 | // Copy input vectors from host memory to GPU buffers. 43 | cudaMemcpy(buffer_A, A, size * sizeof(int), cudaMemcpyHostToDevice); 44 | cudaMemcpy(buffer_B, B, size * sizeof(int), cudaMemcpyHostToDevice); 45 | 46 | // Launch a kernel on the GPU with one thread for each element. 47 | simple_add<<<1, size>>>(buffer_A, buffer_B, buffer_C); 48 | 49 | // Check for any errors launching the kernel 50 | cudaStatus = cudaGetLastError(); 51 | if (cudaStatus != cudaSuccess) 52 | { 53 | std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl; 54 | cudaFree(buffer_A); 55 | cudaFree(buffer_B); 56 | cudaFree(buffer_C); 57 | exit(1); 58 | } 59 | 60 | // cudaDeviceSynchronize waits for the kernel to finish, and returns 61 | // any errors encountered during the launch. 62 | cudaStatus = cudaDeviceSynchronize(); 63 | if (cudaStatus != cudaSuccess) 64 | { 65 | std::cout << "Could not synchronize device!" << std::endl; 66 | cudaFree(buffer_A); 67 | cudaFree(buffer_B); 68 | cudaFree(buffer_C); 69 | exit(1); 70 | } 71 | 72 | // Copy output vector from GPU buffer to host memory. 73 | cudaStatus = cudaMemcpy(C, buffer_C, size * sizeof(int), cudaMemcpyDeviceToHost); 74 | cudaFree(buffer_A); 75 | cudaFree(buffer_B); 76 | cudaFree(buffer_C); 77 | 78 | if(cudaStatus != cudaSuccess) 79 | { 80 | std::cout << "Could not copy buffer memory to host!" << std::endl; 81 | exit(1); 82 | } 83 | 84 | //Prints the array 85 | std::cout << "Result:" << std::endl; 86 | for (int i = 0; i < size; i++) 87 | { 88 | std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl; 89 | } 90 | 91 | // cudaDeviceReset must be called before exiting in order for profiling and 92 | // tracing tools such as Nsight and Visual Profiler to show complete traces. 93 | cudaStatus = cudaDeviceReset(); 94 | if (cudaStatus != cudaSuccess) 95 | { 96 | std::cout << "Device reset failed!" << std::endl; 97 | exit(1); 98 | } 99 | 100 | return 0; 101 | } -------------------------------------------------------------------------------- /vs/CUDA/Tutorial 2 - CUDA load image/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/vs/CUDA/Tutorial 2 - CUDA load image/Lenna.png -------------------------------------------------------------------------------- /vs/CUDA/Tutorial 2 - CUDA load image/Tutorial 2 - CUDA load image.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F} 23 | Tutorial_2___CUDA_load_image 24 | 25 | 26 | 27 | Application 28 | true 29 | MultiByte 30 | v120 31 | 32 | 33 | Application 34 | true 35 | MultiByte 36 | v120 37 | 38 | 39 | Application 40 | false 41 | true 42 | MultiByte 43 | v120 44 | 45 | 46 | Application 47 | false 48 | true 49 | MultiByte 50 | v120 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | true 71 | 72 | 73 | true 74 | 75 | 76 | 77 | Level3 78 | Disabled 79 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 80 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(AdditionalIncludeDirectories) 81 | 82 | 83 | true 84 | Console 85 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 86 | 87 | 88 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 89 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 90 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 91 | 92 | 93 | 94 | 95 | 96 | Level3 97 | Disabled 98 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 99 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(AdditionalIncludeDirectories) 100 | 101 | 102 | true 103 | Console 104 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 105 | 106 | 107 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 108 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 109 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 110 | 111 | 112 | 64 113 | 114 | 115 | 116 | 117 | Level3 118 | MaxSpeed 119 | true 120 | true 121 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 122 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(AdditionalIncludeDirectories) 123 | 124 | 125 | true 126 | true 127 | true 128 | Console 129 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 130 | 131 | 132 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 133 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 134 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 135 | 136 | 137 | 138 | 139 | 140 | Level3 141 | MaxSpeed 142 | true 143 | true 144 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 145 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(AdditionalIncludeDirectories) 146 | 147 | 148 | true 149 | true 150 | true 151 | Console 152 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 153 | 154 | 155 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 156 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 157 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 158 | 159 | 160 | 64 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /vs/CUDA/Tutorial 2 - CUDA load image/kernel.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | 4 | #include 5 | #include 6 | 7 | #include "PNG.h" 8 | 9 | __global__ void copy(const unsigned char* in, unsigned char* out) 10 | { 11 | int x = blockIdx.x; 12 | int y = threadIdx.x; 13 | int width = blockDim.x; 14 | int index = (x + y * width) * 4; 15 | 16 | //copy each color channel 17 | out[index] = in[index]; 18 | out[index + 1] = in[index + 1]; 19 | out[index + 2] = in[index + 2]; 20 | out[index + 3] = in[index + 3]; 21 | } 22 | 23 | int main(int arg, char* args[]) 24 | { 25 | PNG inPng("Lenna.png"); 26 | PNG outPng; 27 | outPng.Create(inPng.w, inPng.h); 28 | 29 | //store width and height so we can use them for our output image later 30 | const unsigned int w = inPng.w; 31 | const unsigned int h = inPng.h; 32 | //4 because there are 4 color channels R, G, B, and A 33 | int size = w * h * 4; 34 | 35 | unsigned char *in = 0; 36 | unsigned char *out = 0; 37 | cudaError_t cudaStatus; 38 | 39 | // Choose which GPU to run on, change this on a multi-GPU system. 40 | cudaStatus = cudaSetDevice(0); 41 | if (cudaStatus != cudaSuccess) 42 | { 43 | std::cout << "No CUDA devices found!" << std::endl; 44 | exit(1); 45 | } 46 | 47 | //prints the device the kernel will be running on 48 | cudaDeviceProp prop; 49 | cudaGetDeviceProperties(&prop, 0); 50 | std::cout << "Using device: " << prop.name << std::endl; 51 | 52 | // Allocate GPU buffers for the images 53 | cudaMalloc((void**)&in, size * sizeof(unsigned char)); 54 | cudaMalloc((void**)&out, size * sizeof(unsigned char)); 55 | 56 | // Copy image data from host memory to GPU buffers. 57 | cudaMemcpy(in, &inPng.data[0], size * sizeof(unsigned char), cudaMemcpyHostToDevice); 58 | 59 | //free the input image because we do not need it anymore 60 | inPng.Free(); 61 | 62 | // Launch a kernel on the GPU with one thread for each element. 63 | copy<<>>(in, out); 64 | 65 | // Check for any errors launching the kernel 66 | cudaStatus = cudaGetLastError(); 67 | if (cudaStatus != cudaSuccess) 68 | { 69 | std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl; 70 | cudaFree(in); 71 | cudaFree(out); 72 | exit(1); 73 | } 74 | 75 | // cudaDeviceSynchronize waits for the kernel to finish, and returns 76 | // any errors encountered during the launch. 77 | cudaStatus = cudaDeviceSynchronize(); 78 | if (cudaStatus != cudaSuccess) 79 | { 80 | std::cout << "Could not synchronize device!" << std::endl; 81 | cudaFree(in); 82 | cudaFree(out); 83 | exit(1); 84 | } 85 | 86 | //temporary array to store the result from opencl 87 | auto tmp = new unsigned char[w * h * 4]; 88 | // Copy output vector from GPU buffer to host memory. 89 | cudaStatus = cudaMemcpy(tmp, out, size * sizeof(unsigned char), cudaMemcpyDeviceToHost); 90 | cudaFree(in); 91 | cudaFree(out); 92 | 93 | //copy the data from the temp array to the png 94 | std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data)); 95 | 96 | //write the image to file 97 | outPng.Save("cuda_tutorial_2.png"); 98 | //free the iamge's resources since we are done with it 99 | outPng.Free(); 100 | 101 | //free the temp array 102 | delete[] tmp; 103 | 104 | if(cudaStatus != cudaSuccess) 105 | { 106 | std::cout << "Could not copy buffer memory to host!" << std::endl; 107 | exit(1); 108 | } 109 | 110 | // cudaDeviceReset must be called before exiting in order for profiling and 111 | // tracing tools such as Nsight and Visual Profiler to show complete traces. 112 | cudaStatus = cudaDeviceReset(); 113 | if (cudaStatus != cudaSuccess) 114 | { 115 | std::cout << "Device reset failed!" << std::endl; 116 | exit(1); 117 | } 118 | 119 | return 0; 120 | } -------------------------------------------------------------------------------- /vs/CUDA/Tutorial 3 - CUDA basic image filtering/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/vs/CUDA/Tutorial 3 - CUDA basic image filtering/Lenna.png -------------------------------------------------------------------------------- /vs/CUDA/Tutorial 3 - CUDA basic image filtering/Tutorial 3 - CUDA basic image filtering.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {20B93A00-1A20-46D6-8841-EB60A002EB08} 23 | Tutorial_3___CUDA_basic_image_filtering 24 | 25 | 26 | 27 | Application 28 | true 29 | MultiByte 30 | v120 31 | 32 | 33 | Application 34 | true 35 | MultiByte 36 | v120 37 | 38 | 39 | Application 40 | false 41 | true 42 | MultiByte 43 | v120 44 | 45 | 46 | Application 47 | false 48 | true 49 | MultiByte 50 | v120 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | true 71 | 72 | 73 | true 74 | 75 | 76 | 77 | Level3 78 | Disabled 79 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 80 | 81 | 82 | true 83 | Console 84 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 85 | 86 | 87 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 88 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 89 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 90 | 91 | 92 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes 93 | 94 | 95 | 96 | 97 | Level3 98 | Disabled 99 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 100 | 101 | 102 | true 103 | Console 104 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 105 | 106 | 107 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 108 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 109 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 110 | 111 | 112 | 64 113 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes 114 | 115 | 116 | 117 | 118 | Level3 119 | MaxSpeed 120 | true 121 | true 122 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 123 | 124 | 125 | true 126 | true 127 | true 128 | Console 129 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 130 | 131 | 132 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 133 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 134 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 135 | 136 | 137 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes 138 | 139 | 140 | 141 | 142 | Level3 143 | MaxSpeed 144 | true 145 | true 146 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 147 | 148 | 149 | true 150 | true 151 | true 152 | Console 153 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 154 | 155 | 156 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 157 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 158 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 159 | 160 | 161 | 64 162 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /vs/CUDA/Tutorial 3 - CUDA basic image filtering/kernel.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | 4 | #include 5 | #include 6 | 7 | #include "PNG.h" 8 | 9 | __global__ void boxFilter(const unsigned char* in, unsigned char* out, const int imageWidth, const int imageHeight, const int halfBoxWidth, const int halfBoxHeight) 10 | { 11 | int x = blockIdx.x; 12 | int y = blockIdx.y; 13 | 14 | int count = 0; 15 | 16 | int index = (x + y * imageWidth) * 4; 17 | 18 | unsigned int total[4] = { 0, 0, 0, 0 }; 19 | 20 | for (int i = -halfBoxWidth; i <= halfBoxWidth; i++) 21 | { 22 | for (int j = -halfBoxHeight; j <= halfBoxHeight; j++) 23 | { 24 | int cx = x + i; 25 | int cy = y + j; 26 | if (cx >= 0 && cy >= 0 && cx < imageWidth && cy < imageHeight) 27 | { 28 | int adjIndex = (cx + cy * imageWidth) * 4; 29 | for (int c = 0; c < 4; c++) 30 | { 31 | total[c] += static_cast(in[adjIndex + c]); 32 | } 33 | count++; 34 | } 35 | } 36 | } 37 | 38 | out[index] = static_cast(total[0] / count); 39 | out[index + 1] = static_cast(total[1] / count); 40 | out[index + 2] = static_cast(total[2] / count); 41 | out[index + 3] = static_cast(total[3] / count); 42 | } 43 | 44 | int main(int arg, char* args[]) 45 | { 46 | int filterWidth = 10; 47 | int filterHeight = 10; 48 | if (arg > 2) 49 | { 50 | filterWidth = std::atoi(args[1]); 51 | filterHeight = std::atoi(args[2]); 52 | } 53 | 54 | PNG inPng("Lenna.png"); 55 | PNG outPng; 56 | outPng.Create(inPng.w, inPng.h); 57 | 58 | //store width and height so we can use them for our output image later 59 | const unsigned int w = inPng.w; 60 | const unsigned int h = inPng.h; 61 | //4 because there are 4 color channels R, G, B, and A 62 | int size = w * h * 4; 63 | 64 | unsigned char *in = 0; 65 | unsigned char *out = 0; 66 | cudaError_t cudaStatus; 67 | 68 | // Choose which GPU to run on, change this on a multi-GPU system. 69 | cudaStatus = cudaSetDevice(0); 70 | if (cudaStatus != cudaSuccess) 71 | { 72 | std::cout << "No CUDA devices found!" << std::endl; 73 | exit(1); 74 | } 75 | 76 | //prints the device the kernel will be running on 77 | cudaDeviceProp prop; 78 | cudaGetDeviceProperties(&prop, 0); 79 | std::cout << "Using device: " << prop.name << std::endl; 80 | 81 | // Allocate GPU buffers for the images 82 | cudaMalloc((void**)&in, size * sizeof(unsigned char)); 83 | cudaMalloc((void**)&out, size * sizeof(unsigned char)); 84 | 85 | // Copy image data from host memory to GPU buffers. 86 | cudaMemcpy(in, &inPng.data[0], size * sizeof(unsigned char), cudaMemcpyHostToDevice); 87 | 88 | //free the input image because we do not need it anymore 89 | inPng.Free(); 90 | 91 | // Launch a kernel on the GPU with one thread for each element. 92 | dim3 block_size(w, h); 93 | dim3 grid_size(1); 94 | boxFilter<<>>(in, out, w, h, filterWidth, filterHeight); 95 | 96 | // Check for any errors launching the kernel 97 | cudaStatus = cudaGetLastError(); 98 | if (cudaStatus != cudaSuccess) 99 | { 100 | std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl; 101 | cudaFree(in); 102 | cudaFree(out); 103 | exit(1); 104 | } 105 | 106 | // cudaDeviceSynchronize waits for the kernel to finish, and returns 107 | // any errors encountered during the launch. 108 | cudaStatus = cudaDeviceSynchronize(); 109 | if (cudaStatus != cudaSuccess) 110 | { 111 | std::cout << "Could not synchronize device!" << std::endl; 112 | cudaFree(in); 113 | cudaFree(out); 114 | exit(1); 115 | } 116 | 117 | //temporary array to store the result from opencl 118 | auto tmp = new unsigned char[w * h * 4]; 119 | // Copy output vector from GPU buffer to host memory. 120 | cudaStatus = cudaMemcpy(tmp, out, size * sizeof(unsigned char), cudaMemcpyDeviceToHost); 121 | cudaFree(in); 122 | cudaFree(out); 123 | 124 | //copy the data from the temp array to the png 125 | std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data)); 126 | 127 | //write the image to file 128 | outPng.Save("cuda_tutorial_3.png"); 129 | //free the iamge's resources since we are done with it 130 | outPng.Free(); 131 | 132 | //free the temp array 133 | delete[] tmp; 134 | 135 | if (cudaStatus != cudaSuccess) 136 | { 137 | std::cout << "Could not copy buffer memory to host!" << std::endl; 138 | exit(1); 139 | } 140 | 141 | // cudaDeviceReset must be called before exiting in order for profiling and 142 | // tracing tools such as Nsight and Visual Profiler to show complete traces. 143 | cudaStatus = cudaDeviceReset(); 144 | if (cudaStatus != cudaSuccess) 145 | { 146 | std::cout << "Device reset failed!" << std::endl; 147 | exit(1); 148 | } 149 | 150 | return 0; 151 | } -------------------------------------------------------------------------------- /vs/OpenCL/README.md: -------------------------------------------------------------------------------- 1 | VS Project files for OpenCL tutorials -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 1 - Hello, OpenCL/Tutorial 1 - Hello, OpenCL.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B} 23 | Tutorial1HelloOpenCL 24 | 8.1 25 | 26 | 27 | 28 | Application 29 | true 30 | v140 31 | MultiByte 32 | 33 | 34 | Application 35 | false 36 | v140 37 | true 38 | MultiByte 39 | 40 | 41 | Application 42 | true 43 | v140 44 | MultiByte 45 | 46 | 47 | Application 48 | false 49 | v140 50 | true 51 | MultiByte 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | Level3 75 | Disabled 76 | true 77 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 78 | 79 | 80 | OpenCL.lib;%(AdditionalDependencies) 81 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories) 82 | Console 83 | 84 | 85 | copy /y "$(ProjectDir)simple_add.cl" "$(OutDir)" 86 | 87 | 88 | 89 | 90 | Level3 91 | Disabled 92 | true 93 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 94 | 95 | 96 | OpenCL.lib;%(AdditionalDependencies) 97 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories) 98 | Console 99 | 100 | 101 | copy /y "$(ProjectDir)simple_add.cl" "$(OutDir)" 102 | 103 | 104 | 105 | 106 | Level3 107 | MaxSpeed 108 | true 109 | true 110 | true 111 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 112 | 113 | 114 | true 115 | true 116 | OpenCL.lib;%(AdditionalDependencies) 117 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories) 118 | Console 119 | 120 | 121 | copy /y "$(ProjectDir)simple_add.cl" "$(OutDir)" 122 | 123 | 124 | 125 | 126 | Level3 127 | MaxSpeed 128 | true 129 | true 130 | true 131 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 132 | 133 | 134 | true 135 | true 136 | OpenCL.lib;%(AdditionalDependencies) 137 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories) 138 | Console 139 | 140 | 141 | copy /y "$(ProjectDir)simple_add.cl" "$(OutDir)" 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | true 150 | true 151 | true 152 | true 153 | 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 1 - Hello, OpenCL/Tutorial 1 - Hello, OpenCL.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 1 - Hello, OpenCL/main.cpp: -------------------------------------------------------------------------------- 1 | #define __CL_ENABLE_EXCEPTIONS 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | std::string readFile(std::string fileName) 9 | { 10 | std::ifstream t(fileName); 11 | std::string str((std::istreambuf_iterator(t)), std::istreambuf_iterator()); 12 | return str; 13 | } 14 | 15 | int main(int arg, char* args[]) 16 | { 17 | const int size = 10; 18 | int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; 19 | int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }; 20 | int C[size]; 21 | 22 | //stl vector to store all of the available platforms 23 | std::vector platforms; 24 | //get all available platforms 25 | cl::Platform::get(&platforms); 26 | 27 | if (platforms.size() == 0) 28 | { 29 | std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system. 30 | exit(1); 31 | } 32 | 33 | //Create a stl vector to store all of the availbe devices to use from the first platform. 34 | std::vector devices; 35 | //Get the available devices from the platform. For me the platform for my 980ti is actually th e second in the platform list but for simplicity we will use the first one. 36 | platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); 37 | //Set the device to the first device in the platform. You can have more than one device associated with a single platform, for instance if you had two of the same GPUs on your system in SLI or CrossFire. 38 | cl::Device device = devices[0]; 39 | 40 | //This is just helpful to see what device and platform you are using. 41 | std::cout << "Using device: " << device.getInfo() << std::endl; 42 | std::cout << "Using platform: " << platforms[0].getInfo() << std::endl; 43 | 44 | //Finally create the OpenCL context from the device you have chosen. 45 | cl::Context context(device); 46 | 47 | cl::Buffer buffer_A(context, CL_MEM_READ_WRITE, sizeof(int) * size); 48 | cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, sizeof(int) * size); 49 | cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, sizeof(int) * size); 50 | 51 | //A source object for your program 52 | cl::Program::Sources sources; 53 | std::string kernel_code = readFile("simple_add.cl"); 54 | //Add your program source 55 | sources.push_back({ kernel_code.c_str(),kernel_code.length() }); 56 | 57 | //Create your OpenCL program and build it. 58 | cl::Program program(context, sources); 59 | if (program.build({ device }) != CL_SUCCESS) 60 | { 61 | std::cout << " Error building: " << program.getBuildInfo(device) << std::endl;//print the build log to find any issues with your source 62 | exit(1);//Quit if your program doesn't compile 63 | } 64 | 65 | cl::CommandQueue queue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL); 66 | 67 | //Write our buffers that we are adding to our OpenCL device 68 | queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int) * size, A); 69 | queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(int) * size, B); 70 | 71 | //Create our Kernel (basically what is the starting point for our OpenCL program) 72 | cl::Kernel simple_add(program, "simple_add"); 73 | //Set our arguements for the kernel 74 | simple_add.setArg(0, buffer_A); 75 | simple_add.setArg(1, buffer_B); 76 | simple_add.setArg(2, buffer_C); 77 | 78 | //Make sure that our queue is done with all of its tasks before continuing 79 | queue.finish(); 80 | 81 | //Create an event that we can use to wait for our program to finish running 82 | cl::Event e; 83 | //This runs our program, the ranges here are the offset, global, local ranges that our code runs in. 84 | queue.enqueueNDRangeKernel(simple_add, cl::NullRange, cl::NDRange(size), cl::NullRange, 0, &e); 85 | 86 | //Waits for our program to finish 87 | e.wait(); 88 | //Reads the output written to our buffer into our final array 89 | queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(int) * size, C); 90 | 91 | //prints the array 92 | std::cout << "Result:" << std::endl; 93 | for (int i = 0; i < size; i++) 94 | { 95 | std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl; 96 | } 97 | 98 | return 0; 99 | } -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 1 - Hello, OpenCL/simple_add.cl: -------------------------------------------------------------------------------- 1 | void kernel simple_add(global const int* A, global const int* B, global int* C) 2 | { 3 | C[get_global_id(0)] = A[get_global_id(0)] + B[get_global_id(0)]; 4 | } -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 2 - OpenCL load image/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/vs/OpenCL/Tutorial 2 - OpenCL load image/Lenna.png -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 2 - OpenCL load image/Tutorial 2 - OpenCL load image.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | true 27 | true 28 | true 29 | true 30 | 31 | 32 | 33 | 34 | false 35 | true 36 | false 37 | true 38 | false 39 | true 40 | false 41 | true 42 | 43 | 44 | 45 | {D27A5344-59EA-4276-B828-B0768E8ECA82} 46 | Tutorial2OpenCLloadimage 47 | 8.1 48 | 49 | 50 | 51 | Application 52 | true 53 | v140 54 | MultiByte 55 | 56 | 57 | Application 58 | false 59 | v140 60 | true 61 | MultiByte 62 | 63 | 64 | Application 65 | true 66 | v140 67 | MultiByte 68 | 69 | 70 | Application 71 | false 72 | v140 73 | true 74 | MultiByte 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | Level3 98 | Disabled 99 | true 100 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 101 | 102 | 103 | OpenCL.lib;%(AdditionalDependencies) 104 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories) 105 | 106 | 107 | copy /y "$(ProjectDir)cl_tutorial_2_copy.cl" "$(OutDir)" 108 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 109 | 110 | 111 | 112 | 113 | Level3 114 | Disabled 115 | true 116 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 117 | 118 | 119 | OpenCL.lib;%(AdditionalDependencies) 120 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories) 121 | 122 | 123 | copy /y "$(ProjectDir)cl_tutorial_2_copy.cl" "$(OutDir)" 124 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 125 | 126 | 127 | 128 | 129 | Level3 130 | MaxSpeed 131 | true 132 | true 133 | true 134 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 135 | 136 | 137 | true 138 | true 139 | OpenCL.lib;%(AdditionalDependencies) 140 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories) 141 | 142 | 143 | copy /y "$(ProjectDir)cl_tutorial_2_copy.cl" "$(OutDir)" 144 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 145 | 146 | 147 | 148 | 149 | Level3 150 | MaxSpeed 151 | true 152 | true 153 | true 154 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 155 | 156 | 157 | true 158 | true 159 | OpenCL.lib;%(AdditionalDependencies) 160 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories) 161 | 162 | 163 | copy /y "$(ProjectDir)cl_tutorial_2_copy.cl" "$(OutDir)" 164 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 165 | 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 2 - OpenCL load image/Tutorial 2 - OpenCL load image.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 2 - OpenCL load image/cl_tutorial_2_copy.cl: -------------------------------------------------------------------------------- 1 | const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; 2 | 3 | void kernel copy(__read_only image2d_t in, __write_only image2d_t out) 4 | { 5 | int x = get_global_id(0); 6 | int y = get_global_id(1); 7 | int2 pos = (int2)(x, y); 8 | uint4 pixel = read_imageui(in, smp, pos); 9 | write_imageui(out, pos, pixel); 10 | } -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 2 - OpenCL load image/main.cpp: -------------------------------------------------------------------------------- 1 | //#define __CL_ENABLE_EXCEPTIONS 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "PNG.h" 13 | 14 | std::string readFile(std::string fileName) 15 | { 16 | std::ifstream t(fileName); 17 | std::string str((std::istreambuf_iterator(t)), std::istreambuf_iterator()); 18 | return str; 19 | } 20 | 21 | int main(int arg, char* args[]) 22 | { 23 | std::vector platforms; 24 | cl::Platform::get(&platforms); 25 | if (platforms.size() == 0) 26 | { 27 | std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system. 28 | exit(1); 29 | } 30 | std::vector devices; 31 | platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); 32 | cl::Device device = devices[0]; 33 | std::cout << "Using device: " << device.getInfo() << std::endl; 34 | std::cout << "Using platform: " << platforms[0].getInfo() << std::endl; 35 | cl::Context context(device); 36 | 37 | //load our image 38 | PNG inPng("Lenna.png"); 39 | 40 | //store width and height so we can use them for our output image later 41 | const unsigned int w = inPng.w; 42 | const unsigned int h = inPng.h; 43 | 44 | //input image 45 | const cl::ImageFormat format(CL_RGBA, CL_UNSIGNED_INT8); 46 | cl::Image2D in(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, format, w, h, 0, &inPng.data[0]); 47 | 48 | //we are done with the image so free up its memory 49 | inPng.Free(); 50 | 51 | //output image 52 | cl::Image2D out(context, CL_MEM_WRITE_ONLY, format, w, h, 0, NULL); 53 | 54 | cl::Program::Sources sources; 55 | std::string kernel_code = readFile("cl_tutorial_2_copy.cl"); 56 | //Add your program source 57 | sources.push_back({ kernel_code.c_str(),kernel_code.length() }); 58 | 59 | //Create your OpenCL program and build it. 60 | cl::Program program(context, sources); 61 | if (program.build({ device }) != CL_SUCCESS) 62 | { 63 | std::cout << " Error building: " << program.getBuildInfo(device) << std::endl;//print the build log to find any issues with your source 64 | exit(1);//Quit if your program doesn't compile 65 | } 66 | 67 | //set the kernel arguments 68 | cl::Kernel kernelCopy(program, "copy"); 69 | kernelCopy.setArg(0, in); 70 | kernelCopy.setArg(1, out); 71 | 72 | //create command queue 73 | cl::CommandQueue queue(context, device, 0, NULL); 74 | 75 | //execute kernel 76 | queue.enqueueNDRangeKernel(kernelCopy, cl::NullRange, cl::NDRange(w, h), cl::NullRange); 77 | 78 | //wait for kernel to finish 79 | queue.finish(); 80 | 81 | //start and end coordinates for reading our image (I really do not like how the c++ wrapper does this) 82 | cl::size_t<3> origin; 83 | cl::size_t<3> size; 84 | origin[0] = 0; 85 | origin[1] = 0; 86 | origin[2] = 0; 87 | size[0] = w; 88 | size[1] = h; 89 | size[2] = 1; 90 | 91 | //output png 92 | PNG outPng; 93 | //create the image with the same width and height as original 94 | outPng.Create(w, h); 95 | 96 | //temporary array to store the result from opencl 97 | auto tmp = new unsigned char[w * h * 4]; 98 | //CL_TRUE means that it waits for the entire image to be copied before continuing 99 | queue.enqueueReadImage(out, CL_TRUE, origin, size, 0, 0, tmp); 100 | 101 | //copy the data from the temp array to the png 102 | std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data)); 103 | 104 | //write the image to file 105 | outPng.Save("cl_tutorial_2.png"); 106 | //free the iamge's resources since we are done with it 107 | outPng.Free(); 108 | 109 | //free the temp array 110 | delete[] tmp; 111 | 112 | return 0; 113 | } -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/Lenna.png -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/Tutorial 3 - OpenCL basic image filtering.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193} 23 | Tutorial3OpenCLbasicimagefiltering 24 | 8.1 25 | 26 | 27 | 28 | Application 29 | true 30 | v140 31 | MultiByte 32 | 33 | 34 | Application 35 | false 36 | v140 37 | true 38 | MultiByte 39 | 40 | 41 | Application 42 | true 43 | v140 44 | MultiByte 45 | 46 | 47 | Application 48 | false 49 | v140 50 | true 51 | MultiByte 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | Level3 75 | Disabled 76 | true 77 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 78 | 79 | 80 | OpenCL.lib;%(AdditionalDependencies) 81 | Console 82 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories) 83 | 84 | 85 | copy /y "$(ProjectDir)cl_tutorial_3_boxFilter.cl" "$(OutDir)" 86 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | Level3 96 | Disabled 97 | true 98 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 99 | 100 | 101 | OpenCL.lib;%(AdditionalDependencies) 102 | Console 103 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories) 104 | 105 | 106 | copy /y "$(ProjectDir)cl_tutorial_3_boxFilter.cl" "$(OutDir)" 107 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | Level3 117 | MaxSpeed 118 | true 119 | true 120 | true 121 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 122 | 123 | 124 | true 125 | true 126 | OpenCL.lib;%(AdditionalDependencies) 127 | Console 128 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories) 129 | 130 | 131 | copy /y "$(ProjectDir)cl_tutorial_3_boxFilter.cl" "$(OutDir)" 132 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | Level3 142 | MaxSpeed 143 | true 144 | true 145 | true 146 | E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories) 147 | 148 | 149 | true 150 | true 151 | OpenCL.lib;%(AdditionalDependencies) 152 | Console 153 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories) 154 | 155 | 156 | copy /y "$(ProjectDir)cl_tutorial_3_boxFilter.cl" "$(OutDir)" 157 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)" 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | Document 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/Tutorial 3 - OpenCL basic image filtering.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/cl_tutorial_3_boxFilter.cl: -------------------------------------------------------------------------------- 1 | const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; 2 | 3 | void kernel boxFilter(__read_only image2d_t in, __write_only image2d_t out, const int imageWidth, const int imageHeight, const int halfBoxWidth, const int halfBoxHeight) 4 | { 5 | int x = get_global_id(0); 6 | int y = get_global_id(1); 7 | int2 pos = (int2)(x, y); 8 | 9 | uint4 total = {0, 0, 0, 0}; 10 | 11 | int count = 0; 12 | 13 | for(int i = -halfBoxWidth; i <= halfBoxWidth; i++) 14 | { 15 | for(int j = -halfBoxHeight; j <= halfBoxHeight; j++) 16 | { 17 | int2 coord = pos + (int2)(i, j); 18 | if(coord.x >= 0 && coord.y >= 0 && coord.x < imageWidth && coord.y < imageHeight) 19 | { 20 | total += read_imageui(in, smp, pos + (int2)(i, j)); 21 | count++; 22 | } 23 | } 24 | } 25 | write_imageui(out, pos, total / count); 26 | } -------------------------------------------------------------------------------- /vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/main.cpp: -------------------------------------------------------------------------------- 1 | //#define __CL_ENABLE_EXCEPTIONS 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "PNG.h" 14 | 15 | std::string readFile(std::string fileName) 16 | { 17 | std::ifstream t(fileName); 18 | std::string str((std::istreambuf_iterator(t)), std::istreambuf_iterator()); 19 | return str; 20 | } 21 | 22 | int main(int arg, char* args[]) 23 | { 24 | int filterWidth = 10; 25 | int filterHeight = 10; 26 | int platformId = 0; 27 | if (arg > 1) 28 | { 29 | platformId = atoi(args[1]); 30 | } 31 | if (arg > 3) 32 | { 33 | filterWidth = std::atoi(args[2]); 34 | filterHeight = std::atoi(args[3]); 35 | } 36 | 37 | std::vector platforms; 38 | cl::Platform::get(&platforms); 39 | if (platforms.size() == 0) 40 | { 41 | std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system. 42 | exit(1); 43 | } 44 | std::vector devices; 45 | platforms[platformId].getDevices(CL_DEVICE_TYPE_ALL, &devices); 46 | cl::Device device = devices[0]; 47 | std::cout << "Using device: " << device.getInfo() << std::endl; 48 | std::cout << "Using platform: " << platforms[platformId].getInfo() << std::endl; 49 | cl::Context context(device); 50 | 51 | //load our image 52 | PNG inPng("Lenna.png"); 53 | 54 | //store width and height so we can use them for our output image later 55 | const unsigned int w = inPng.w; 56 | const unsigned int h = inPng.h; 57 | 58 | //input image 59 | const cl::ImageFormat format(CL_RGBA, CL_UNSIGNED_INT8); 60 | cl::Image2D in(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, format, w, h, 0, &inPng.data[0]); 61 | 62 | //we are done with the image so free up its memory 63 | inPng.Free(); 64 | 65 | //output image 66 | cl::Image2D out(context, CL_MEM_WRITE_ONLY, format, w, h, 0, NULL); 67 | 68 | cl::Program::Sources sources; 69 | std::string kernel_code = readFile("cl_tutorial_3_boxFilter.cl"); 70 | //Add your program source 71 | sources.push_back({ kernel_code.c_str(),kernel_code.length() }); 72 | 73 | //Create your OpenCL program and build it. 74 | cl::Program program(context, sources); 75 | if (program.build({ device }) != CL_SUCCESS) 76 | { 77 | std::cout << " Error building: " << program.getBuildInfo(device) << std::endl;//print the build log to find any issues with your source 78 | exit(1);//Quit if your program doesn't compile 79 | } 80 | 81 | //create command queue 82 | cl::CommandQueue queue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL); 83 | 84 | //set the kernel arguments 85 | cl::Kernel kernelboxFilter(program, "boxFilter"); 86 | kernelboxFilter.setArg(0, in); 87 | kernelboxFilter.setArg(1, out); 88 | kernelboxFilter.setArg(2, w); 89 | kernelboxFilter.setArg(3, h); 90 | kernelboxFilter.setArg(4, filterWidth); 91 | kernelboxFilter.setArg(5, filterHeight); 92 | 93 | cl::Event timer; 94 | 95 | //execute kernel 96 | queue.enqueueNDRangeKernel(kernelboxFilter, cl::NullRange, cl::NDRange(w, h), cl::NullRange, NULL, &timer); 97 | 98 | //wait for kernel to finish 99 | timer.wait(); 100 | 101 | cl_ulong time_start, time_end; 102 | double total_time; 103 | 104 | time_start = timer.getProfilingInfo(); 105 | time_end = timer.getProfilingInfo(); 106 | total_time = time_end - time_start; 107 | 108 | printf("\nExecution time in milliseconds = %0.3f ms\n", (total_time / 1000000.0)); 109 | 110 | //start and end coordinates for reading our image (I really do not like how the c++ wrapper does this) 111 | cl::size_t<3> origin; 112 | cl::size_t<3> size; 113 | origin[0] = 0; 114 | origin[1] = 0; 115 | origin[2] = 0; 116 | size[0] = w; 117 | size[1] = h; 118 | size[2] = 1; 119 | 120 | //output png 121 | PNG outPng; 122 | //create the image with the same width and height as original 123 | outPng.Create(w, h); 124 | 125 | //temporary array to store the result from opencl 126 | auto tmp = new unsigned char[w * h * 4]; 127 | //CL_TRUE means that it waits for the entire image to be copied before continuing 128 | queue.enqueueReadImage(out, CL_TRUE, origin, size, 0, 0, tmp); 129 | 130 | //copy the data from the temp array to the png 131 | std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data)); 132 | 133 | //write the image to file 134 | outPng.Save("cl_tutorial_3.png"); 135 | //free the iamge's resources since we are done with it 136 | outPng.Free(); 137 | 138 | //free the temp array 139 | delete[] tmp; 140 | 141 | return 0; 142 | } -------------------------------------------------------------------------------- /vs/OpenCL_CUDA_Tutorials.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.24720.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 1 - Hello, OpenCL", "OpenCL\Tutorial 1 - Hello, OpenCL\Tutorial 1 - Hello, OpenCL.vcxproj", "{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 1 - Hello, CUDA", "CUDA\Tutorial 1 - Hello, CUDA\Tutorial 1 - Hello, CUDA.vcxproj", "{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}" 9 | EndProject 10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 2 - OpenCL load image", "OpenCL\Tutorial 2 - OpenCL load image\Tutorial 2 - OpenCL load image.vcxproj", "{D27A5344-59EA-4276-B828-B0768E8ECA82}" 11 | EndProject 12 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 2 - CUDA load image", "CUDA\Tutorial 2 - CUDA load image\Tutorial 2 - CUDA load image.vcxproj", "{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}" 13 | EndProject 14 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CUDA", "CUDA", "{6A139FCC-9678-4A85-A12C-8C8E74B80EA5}" 15 | EndProject 16 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "OpenCL", "OpenCL", "{6677E274-9760-4F0B-B5AD-6A9254FB02CA}" 17 | EndProject 18 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 3 - OpenCL basic image filtering", "OpenCL\Tutorial 3 - OpenCL basic image filtering\Tutorial 3 - OpenCL basic image filtering.vcxproj", "{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}" 19 | EndProject 20 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 3 - CUDA basic image filtering", "CUDA\Tutorial 3 - CUDA basic image filtering\Tutorial 3 - CUDA basic image filtering.vcxproj", "{20B93A00-1A20-46D6-8841-EB60A002EB08}" 21 | EndProject 22 | Global 23 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 24 | Debug|x64 = Debug|x64 25 | Debug|x86 = Debug|x86 26 | Release|x64 = Release|x64 27 | Release|x86 = Release|x86 28 | EndGlobalSection 29 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 30 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Debug|x64.ActiveCfg = Debug|x64 31 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Debug|x64.Build.0 = Debug|x64 32 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Debug|x86.ActiveCfg = Debug|Win32 33 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Debug|x86.Build.0 = Debug|Win32 34 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Release|x64.ActiveCfg = Release|x64 35 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Release|x64.Build.0 = Release|x64 36 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Release|x86.ActiveCfg = Release|Win32 37 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Release|x86.Build.0 = Release|Win32 38 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Debug|x64.ActiveCfg = Debug|x64 39 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Debug|x64.Build.0 = Debug|x64 40 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Debug|x86.ActiveCfg = Debug|Win32 41 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Debug|x86.Build.0 = Debug|Win32 42 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Release|x64.ActiveCfg = Release|x64 43 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Release|x64.Build.0 = Release|x64 44 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Release|x86.ActiveCfg = Release|Win32 45 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Release|x86.Build.0 = Release|Win32 46 | {D27A5344-59EA-4276-B828-B0768E8ECA82}.Debug|x64.ActiveCfg = Debug|x64 47 | {D27A5344-59EA-4276-B828-B0768E8ECA82}.Debug|x64.Build.0 = Debug|x64 48 | {D27A5344-59EA-4276-B828-B0768E8ECA82}.Debug|x86.ActiveCfg = Debug|Win32 49 | {D27A5344-59EA-4276-B828-B0768E8ECA82}.Debug|x86.Build.0 = Debug|Win32 50 | {D27A5344-59EA-4276-B828-B0768E8ECA82}.Release|x64.ActiveCfg = Release|x64 51 | {D27A5344-59EA-4276-B828-B0768E8ECA82}.Release|x64.Build.0 = Release|x64 52 | {D27A5344-59EA-4276-B828-B0768E8ECA82}.Release|x86.ActiveCfg = Release|Win32 53 | {D27A5344-59EA-4276-B828-B0768E8ECA82}.Release|x86.Build.0 = Release|Win32 54 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Debug|x64.ActiveCfg = Debug|x64 55 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Debug|x64.Build.0 = Debug|x64 56 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Debug|x86.ActiveCfg = Debug|Win32 57 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Debug|x86.Build.0 = Debug|Win32 58 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Release|x64.ActiveCfg = Release|x64 59 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Release|x64.Build.0 = Release|x64 60 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Release|x86.ActiveCfg = Release|Win32 61 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Release|x86.Build.0 = Release|Win32 62 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Debug|x64.ActiveCfg = Debug|x64 63 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Debug|x64.Build.0 = Debug|x64 64 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Debug|x86.ActiveCfg = Debug|Win32 65 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Debug|x86.Build.0 = Debug|Win32 66 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Release|x64.ActiveCfg = Release|x64 67 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Release|x64.Build.0 = Release|x64 68 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Release|x86.ActiveCfg = Release|Win32 69 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Release|x86.Build.0 = Release|Win32 70 | {20B93A00-1A20-46D6-8841-EB60A002EB08}.Debug|x64.ActiveCfg = Debug|x64 71 | {20B93A00-1A20-46D6-8841-EB60A002EB08}.Debug|x64.Build.0 = Debug|x64 72 | {20B93A00-1A20-46D6-8841-EB60A002EB08}.Debug|x86.ActiveCfg = Debug|Win32 73 | {20B93A00-1A20-46D6-8841-EB60A002EB08}.Debug|x86.Build.0 = Debug|Win32 74 | {20B93A00-1A20-46D6-8841-EB60A002EB08}.Release|x64.ActiveCfg = Release|x64 75 | {20B93A00-1A20-46D6-8841-EB60A002EB08}.Release|x64.Build.0 = Release|x64 76 | {20B93A00-1A20-46D6-8841-EB60A002EB08}.Release|x86.ActiveCfg = Release|Win32 77 | {20B93A00-1A20-46D6-8841-EB60A002EB08}.Release|x86.Build.0 = Release|Win32 78 | EndGlobalSection 79 | GlobalSection(SolutionProperties) = preSolution 80 | HideSolutionNode = FALSE 81 | EndGlobalSection 82 | GlobalSection(NestedProjects) = preSolution 83 | {E68A3975-ACAB-4517-A5A4-3940BCF4CF9B} = {6677E274-9760-4F0B-B5AD-6A9254FB02CA} 84 | {35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A} = {6A139FCC-9678-4A85-A12C-8C8E74B80EA5} 85 | {D27A5344-59EA-4276-B828-B0768E8ECA82} = {6677E274-9760-4F0B-B5AD-6A9254FB02CA} 86 | {365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F} = {6A139FCC-9678-4A85-A12C-8C8E74B80EA5} 87 | {2B217AC4-8B92-4F2F-BF7A-658D56EFC193} = {6677E274-9760-4F0B-B5AD-6A9254FB02CA} 88 | {20B93A00-1A20-46D6-8841-EB60A002EB08} = {6A139FCC-9678-4A85-A12C-8C8E74B80EA5} 89 | EndGlobalSection 90 | EndGlobal 91 | -------------------------------------------------------------------------------- /vs/README.md: -------------------------------------------------------------------------------- 1 | Visual Studio 2015 Community project files. 2 | 3 | Please note that these will not automatically compile when you download them, you will have to change the additional include directories and additional library path directories for each project to point to your installation of OpenCL and CUDA --------------------------------------------------------------------------------