├── .gitattributes
├── .gitignore
├── CUDA
    ├── README.md
    ├── Tutorial 1 - Hello, CUDA
    │   └── kernel.cu
    └── Tutorial 2 - CUDA load image
    │   ├── Lenna.png
    │   └── kernel.cu
├── OpenCL
    ├── README.md
    ├── Tutorial 1 - Hello, OpenCL
    │   ├── main.cpp
    │   └── simple_add.cl
    └── Tutorial 2 - OpenCL load image
    │   ├── Lenna.png
    │   ├── cl_tutorial_2_copy.cl
    │   └── main.cpp
├── README.md
├── includes
    └── PNG.h
└── vs
    ├── CUDA
        ├── README.md
        ├── Tutorial 1 - Hello, CUDA
        │   ├── Tutorial 1 - Hello, CUDA.vcxproj
        │   └── kernel.cu
        ├── Tutorial 2 - CUDA load image
        │   ├── Lenna.png
        │   ├── Tutorial 2 - CUDA load image.vcxproj
        │   └── kernel.cu
        └── Tutorial 3 - CUDA basic image filtering
        │   ├── Lenna.png
        │   ├── Tutorial 3 - CUDA basic image filtering.vcxproj
        │   └── kernel.cu
    ├── OpenCL
        ├── README.md
        ├── Tutorial 1 - Hello, OpenCL
        │   ├── Tutorial 1 - Hello, OpenCL.vcxproj
        │   ├── Tutorial 1 - Hello, OpenCL.vcxproj.filters
        │   ├── main.cpp
        │   └── simple_add.cl
        ├── Tutorial 2 - OpenCL load image
        │   ├── Lenna.png
        │   ├── Tutorial 2 - OpenCL load image.vcxproj
        │   ├── Tutorial 2 - OpenCL load image.vcxproj.filters
        │   ├── cl_tutorial_2_copy.cl
        │   └── main.cpp
        └── Tutorial 3 - OpenCL basic image filtering
        │   ├── Lenna.png
        │   ├── Tutorial 3 - OpenCL basic image filtering.vcxproj
        │   ├── Tutorial 3 - OpenCL basic image filtering.vcxproj.filters
        │   ├── cl_tutorial_3_boxFilter.cl
        │   └── main.cpp
    ├── OpenCL_CUDA_Tutorials.sln
    └── README.md


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Ignore Visual Studio temporary files, build results, and
  3 | ## files generated by popular Visual Studio add-ons.
  4 | 
  5 | # User-specific files
  6 | *.suo
  7 | *.user
  8 | *.userosscache
  9 | *.sln.docstates
 10 | 
 11 | # User-specific files (MonoDevelop/Xamarin Studio)
 12 | *.userprefs
 13 | 
 14 | # Build results
 15 | [Dd]ebug/
 16 | [Dd]ebugPublic/
 17 | [Rr]elease/
 18 | [Rr]eleases/
 19 | x64/
 20 | x86/
 21 | build/
 22 | bld/
 23 | [Bb]in/
 24 | [Oo]bj/
 25 | 
 26 | # Visual Studio 2015 cache/options directory
 27 | .vs/
 28 | # Uncomment if you have tasks that create the project's static files in wwwroot
 29 | #wwwroot/
 30 | 
 31 | # MSTest test Results
 32 | [Tt]est[Rr]esult*/
 33 | [Bb]uild[Ll]og.*
 34 | 
 35 | # NUNIT
 36 | *.VisualState.xml
 37 | TestResult.xml
 38 | 
 39 | # Build Results of an ATL Project
 40 | [Dd]ebugPS/
 41 | [Rr]eleasePS/
 42 | dlldata.c
 43 | 
 44 | # DNX
 45 | project.lock.json
 46 | artifacts/
 47 | 
 48 | *_i.c
 49 | *_p.c
 50 | *_i.h
 51 | *.ilk
 52 | *.meta
 53 | *.obj
 54 | *.pch
 55 | *.pdb
 56 | *.pgc
 57 | *.pgd
 58 | *.rsp
 59 | *.sbr
 60 | *.tlb
 61 | *.tli
 62 | *.tlh
 63 | *.tmp
 64 | *.tmp_proj
 65 | *.log
 66 | *.vspscc
 67 | *.vssscc
 68 | .builds
 69 | *.pidb
 70 | *.svclog
 71 | *.scc
 72 | 
 73 | # Chutzpah Test files
 74 | _Chutzpah*
 75 | 
 76 | # Visual C++ cache files
 77 | ipch/
 78 | *.aps
 79 | *.ncb
 80 | *.opendb
 81 | *.opensdf
 82 | *.sdf
 83 | *.cachefile
 84 | 
 85 | # Visual Studio profiler
 86 | *.psess
 87 | *.vsp
 88 | *.vspx
 89 | *.sap
 90 | 
 91 | # TFS 2012 Local Workspace
 92 | $tf/
 93 | 
 94 | # Guidance Automation Toolkit
 95 | *.gpState
 96 | 
 97 | # ReSharper is a .NET coding add-in
 98 | _ReSharper*/
 99 | *.[Rr]e[Ss]harper
100 | *.DotSettings.user
101 | 
102 | # JustCode is a .NET coding add-in
103 | .JustCode
104 | 
105 | # TeamCity is a build add-in
106 | _TeamCity*
107 | 
108 | # DotCover is a Code Coverage Tool
109 | *.dotCover
110 | 
111 | # NCrunch
112 | _NCrunch_*
113 | .*crunch*.local.xml
114 | nCrunchTemp_*
115 | 
116 | # MightyMoose
117 | *.mm.*
118 | AutoTest.Net/
119 | 
120 | # Web workbench (sass)
121 | .sass-cache/
122 | 
123 | # Installshield output folder
124 | [Ee]xpress/
125 | 
126 | # DocProject is a documentation generator add-in
127 | DocProject/buildhelp/
128 | DocProject/Help/*.HxT
129 | DocProject/Help/*.HxC
130 | DocProject/Help/*.hhc
131 | DocProject/Help/*.hhk
132 | DocProject/Help/*.hhp
133 | DocProject/Help/Html2
134 | DocProject/Help/html
135 | 
136 | # Click-Once directory
137 | publish/
138 | 
139 | # Publish Web Output
140 | *.[Pp]ublish.xml
141 | *.azurePubxml
142 | # TODO: Comment the next line if you want to checkin your web deploy settings 
143 | # but database connection strings (with potential passwords) will be unencrypted
144 | *.pubxml
145 | *.publishproj
146 | 
147 | # NuGet Packages
148 | *.nupkg
149 | # The packages folder can be ignored because of Package Restore
150 | **/packages/*
151 | # except build/, which is used as an MSBuild target.
152 | !**/packages/build/
153 | # Uncomment if necessary however generally it will be regenerated when needed
154 | #!**/packages/repositories.config
155 | 
156 | # Windows Azure Build Output
157 | csx/
158 | *.build.csdef
159 | 
160 | # Windows Azure Emulator
161 | ecf/
162 | rcf/
163 | 
164 | # Windows Store app package directory
165 | AppPackages/
166 | BundleArtifacts/
167 | 
168 | # Visual Studio cache files
169 | # files ending in .cache can be ignored
170 | *.[Cc]ache
171 | # but keep track of directories ending in .cache
172 | !*.[Cc]ache/
173 | 
174 | # Others
175 | ClientBin/
176 | [Ss]tyle[Cc]op.*
177 | ~$*
178 | *~
179 | *.dbmdl
180 | *.dbproj.schemaview
181 | *.pfx
182 | *.publishsettings
183 | node_modules/
184 | orleans.codegen.cs
185 | 
186 | # RIA/Silverlight projects
187 | Generated_Code/
188 | 
189 | # Backup & report files from converting an old project file
190 | # to a newer Visual Studio version. Backup files are not needed,
191 | # because we have git ;-)
192 | _UpgradeReport_Files/
193 | Backup*/
194 | UpgradeLog*.XML
195 | UpgradeLog*.htm
196 | 
197 | # SQL Server files
198 | *.mdf
199 | *.ldf
200 | 
201 | # Business Intelligence projects
202 | *.rdl.data
203 | *.bim.layout
204 | *.bim_*.settings
205 | 
206 | # Microsoft Fakes
207 | FakesAssemblies/
208 | 
209 | # GhostDoc plugin setting file
210 | *.GhostDoc.xml
211 | 
212 | # Node.js Tools for Visual Studio
213 | .ntvs_analysis.dat
214 | 
215 | # Visual Studio 6 build log
216 | *.plg
217 | 
218 | # Visual Studio 6 workspace options file
219 | *.opt
220 | 
221 | # Visual Studio LightSwitch build output
222 | **/*.HTMLClient/GeneratedArtifacts
223 | **/*.DesktopClient/GeneratedArtifacts
224 | **/*.DesktopClient/ModelManifest.xml
225 | **/*.Server/GeneratedArtifacts
226 | **/*.Server/ModelManifest.xml
227 | _Pvt_Extensions
228 | 
229 | # Paket dependency manager
230 | .paket/paket.exe
231 | 
232 | # FAKE - F# Make
233 | .fake/


--------------------------------------------------------------------------------
/CUDA/README.md:
--------------------------------------------------------------------------------
1 | Source for CUDA tutorials


--------------------------------------------------------------------------------
/CUDA/Tutorial 1 - Hello, CUDA/kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "device_launch_parameters.h"
  3 | 
  4 | #include <stdio.h>
  5 | #include <iostream>
  6 | 
  7 | __global__ void simple_add(const int *A, const int *B, int *C)
  8 | {
  9 | 	C[threadIdx.x] = A[threadIdx.x] + B[threadIdx.x];
 10 | }
 11 | 
 12 | int main(int arg, char* args[])
 13 | {
 14 | 	const int size = 10;
 15 | 	int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
 16 | 	int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 };
 17 | 	int C[size];
 18 | 
 19 | 	int *buffer_A = 0;
 20 | 	int *buffer_B = 0;
 21 | 	int *buffer_C = 0;
 22 | 	cudaError_t cudaStatus;
 23 | 
 24 |     // Choose which GPU to run on, change this on a multi-GPU system.
 25 |     cudaStatus = cudaSetDevice(0);
 26 |     if (cudaStatus != cudaSuccess)
 27 | 	{
 28 | 		std::cout << "No CUDA devices found!" << std::endl;
 29 | 		exit(1);
 30 |     }
 31 | 
 32 | 	cudaDeviceProp prop;
 33 | 	cudaGetDeviceProperties(&prop, 0);
 34 | 
 35 | 	std::cout << "Using device: " << prop.name << std::endl;
 36 | 	
 37 |     // Allocate GPU buffers for three vectors (two input, one output).
 38 | 	cudaMalloc((void**)&buffer_A, size * sizeof(int));
 39 | 	cudaMalloc((void**)&buffer_B, size * sizeof(int));
 40 | 	cudaMalloc((void**)&buffer_C, size * sizeof(int));
 41 | 	
 42 |     // Copy input vectors from host memory to GPU buffers.
 43 |     cudaMemcpy(buffer_A, A, size * sizeof(int), cudaMemcpyHostToDevice);
 44 |     cudaMemcpy(buffer_B, B, size * sizeof(int), cudaMemcpyHostToDevice);
 45 | 
 46 | 	// Launch a kernel on the GPU with one thread for each element.
 47 |     simple_add<<<1, size>>>(buffer_A, buffer_B, buffer_C);
 48 | 
 49 | 	// Check for any errors launching the kernel
 50 |     cudaStatus = cudaGetLastError();
 51 |     if (cudaStatus != cudaSuccess)
 52 | 	{
 53 | 		std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl;
 54 | 		cudaFree(buffer_A);
 55 | 		cudaFree(buffer_B);
 56 | 		cudaFree(buffer_C);
 57 | 		exit(1);
 58 |     }
 59 | 	
 60 |     // cudaDeviceSynchronize waits for the kernel to finish, and returns
 61 |     // any errors encountered during the launch.
 62 |     cudaStatus = cudaDeviceSynchronize();
 63 |     if (cudaStatus != cudaSuccess)
 64 | 	{
 65 | 		std::cout << "Could not synchronize device!" << std::endl;
 66 | 		cudaFree(buffer_A);
 67 | 		cudaFree(buffer_B);
 68 | 		cudaFree(buffer_C);
 69 | 		exit(1);
 70 |     }
 71 | 	
 72 |     // Copy output vector from GPU buffer to host memory.
 73 |     cudaStatus = cudaMemcpy(C, buffer_C, size * sizeof(int), cudaMemcpyDeviceToHost);
 74 | 	cudaFree(buffer_A);
 75 | 	cudaFree(buffer_B);
 76 | 	cudaFree(buffer_C);
 77 | 
 78 | 	if(cudaStatus != cudaSuccess)
 79 | 	{
 80 | 		std::cout << "Could not copy buffer memory to host!" << std::endl;
 81 | 		exit(1);
 82 | 	}
 83 | 
 84 |     //Prints the array
 85 | 	std::cout << "Result:" << std::endl;
 86 | 	for (int i = 0; i < size; i++)
 87 | 	{
 88 | 		std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl;
 89 | 	}
 90 | 
 91 |     // cudaDeviceReset must be called before exiting in order for profiling and
 92 |     // tracing tools such as Nsight and Visual Profiler to show complete traces.
 93 |     cudaStatus = cudaDeviceReset();
 94 |     if (cudaStatus != cudaSuccess)
 95 | 	{
 96 | 		std::cout << "Device reset failed!" << std::endl;
 97 |         exit(1);
 98 |     }
 99 | 
100 |     return 0;
101 | }


--------------------------------------------------------------------------------
/CUDA/Tutorial 2 - CUDA load image/Lenna.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/CUDA/Tutorial 2 - CUDA load image/Lenna.png


--------------------------------------------------------------------------------
/CUDA/Tutorial 2 - CUDA load image/kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "device_launch_parameters.h"
  3 | 
  4 | #include <stdio.h>
  5 | #include <iostream>
  6 | 
  7 | #include "PNG.h"
  8 | 
  9 | __global__ void copy(const unsigned char* in, unsigned char* out)
 10 | {
 11 | 	int x = blockIdx.x;
 12 | 	int y = threadIdx.x;
 13 | 	int width = blockDim.x;
 14 | 	int index = (x + y * width) * 4;
 15 | 
 16 | 	//copy each color channel
 17 | 	out[index] = in[index];
 18 | 	out[index + 1] = in[index + 1];
 19 | 	out[index + 2] = in[index + 2];
 20 | 	out[index + 3] = in[index + 3];
 21 | }
 22 | 
 23 | int main(int arg, char* args[])
 24 | {
 25 | 	PNG inPng("Lenna.png");
 26 | 	PNG outPng;
 27 | 	outPng.Create(inPng.w, inPng.h);
 28 | 
 29 | 	//store width and height so we can use them for our output image later
 30 | 	const unsigned int w = inPng.w;
 31 | 	const unsigned int h = inPng.h;
 32 | 	//4 because there are 4 color channels R, G, B, and A
 33 | 	int size = w * h * 4;
 34 | 
 35 | 	unsigned char *in = 0;
 36 | 	unsigned char *out = 0;
 37 | 	cudaError_t cudaStatus;
 38 | 
 39 |     // Choose which GPU to run on, change this on a multi-GPU system.
 40 |     cudaStatus = cudaSetDevice(0);
 41 |     if (cudaStatus != cudaSuccess)
 42 | 	{
 43 | 		std::cout << "No CUDA devices found!" << std::endl;
 44 | 		exit(1);
 45 |     }
 46 | 
 47 | 	//prints the device the kernel will be running on
 48 | 	cudaDeviceProp prop;
 49 | 	cudaGetDeviceProperties(&prop, 0);
 50 | 	std::cout << "Using device: " << prop.name << std::endl;
 51 | 	
 52 |     // Allocate GPU buffers for the images
 53 | 	cudaMalloc((void**)&in, size * sizeof(unsigned char));
 54 | 	cudaMalloc((void**)&out, size * sizeof(unsigned char));
 55 | 	
 56 |     // Copy image data from host memory to GPU buffers.
 57 |     cudaMemcpy(in, &inPng.data[0], size * sizeof(unsigned char), cudaMemcpyHostToDevice);
 58 | 
 59 | 	//free the input image because we do not need it anymore
 60 | 	inPng.Free();
 61 | 
 62 | 	// Launch a kernel on the GPU with one thread for each element.
 63 | 	copy<<<w, h>>>(in, out);
 64 | 
 65 | 	// Check for any errors launching the kernel
 66 |     cudaStatus = cudaGetLastError();
 67 |     if (cudaStatus != cudaSuccess)
 68 | 	{
 69 | 		std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl;
 70 | 		cudaFree(in);
 71 | 		cudaFree(out);
 72 | 		exit(1);
 73 |     }
 74 | 	
 75 |     // cudaDeviceSynchronize waits for the kernel to finish, and returns
 76 |     // any errors encountered during the launch.
 77 |     cudaStatus = cudaDeviceSynchronize();
 78 |     if (cudaStatus != cudaSuccess)
 79 | 	{
 80 | 		std::cout << "Could not synchronize device!" << std::endl;
 81 | 		cudaFree(in);
 82 | 		cudaFree(out);
 83 | 		exit(1);
 84 |     }
 85 | 
 86 | 	//temporary array to store the result from opencl
 87 | 	auto tmp = new unsigned char[w * h * 4];
 88 |     // Copy output vector from GPU buffer to host memory.
 89 |     cudaStatus = cudaMemcpy(tmp, out, size * sizeof(unsigned char), cudaMemcpyDeviceToHost);
 90 | 	cudaFree(in);
 91 | 	cudaFree(out);
 92 | 
 93 | 	//copy the data from the temp array to the png
 94 | 	std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data));
 95 | 
 96 | 	//write the image to file
 97 | 	outPng.Save("cuda_tutorial_2.png");
 98 | 	//free the iamge's resources since we are done with it
 99 | 	outPng.Free();
100 | 
101 | 	//free the temp array
102 | 	delete[] tmp;
103 | 
104 | 	if(cudaStatus != cudaSuccess)
105 | 	{
106 | 		std::cout << "Could not copy buffer memory to host!" << std::endl;
107 | 		exit(1);
108 | 	}
109 | 
110 |     // cudaDeviceReset must be called before exiting in order for profiling and
111 |     // tracing tools such as Nsight and Visual Profiler to show complete traces.
112 |     cudaStatus = cudaDeviceReset();
113 |     if (cudaStatus != cudaSuccess)
114 | 	{
115 | 		std::cout << "Device reset failed!" << std::endl;
116 |         exit(1);
117 |     }
118 | 
119 |     return 0;
120 | }


--------------------------------------------------------------------------------
/OpenCL/README.md:
--------------------------------------------------------------------------------
1 | Source for OpenCL tutorials


--------------------------------------------------------------------------------
/OpenCL/Tutorial 1 - Hello, OpenCL/main.cpp:
--------------------------------------------------------------------------------
 1 | #define __CL_ENABLE_EXCEPTIONS
 2 | #include <CL/cl.hpp>
 3 | #include <fstream>
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <iterator>
 7 | 
 8 | std::string readFile(std::string fileName)
 9 | {
10 | 	std::ifstream t(fileName);
11 | 	std::string str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
12 | 	return str;
13 | }
14 | 
15 | int main(int arg, char* args[])
16 | {
17 | 	const int size = 10;
18 | 	int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
19 | 	int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 };
20 | 	int C[size];
21 | 
22 | 	//stl vector to store all of the available platforms
23 | 	std::vector<cl::Platform> platforms;
24 | 	//get all available platforms
25 | 	cl::Platform::get(&platforms);
26 | 
27 | 	if (platforms.size() == 0)
28 | 	{
29 | 		std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system.
30 | 		exit(1);
31 | 	}
32 | 
33 | 	//Create a stl vector to store all of the availbe devices to use from the first platform.
34 | 	std::vector<cl::Device> devices;
35 | 	//Get the available devices from the platform. For me the platform for my 980ti is actually th e second in the platform list but for simplicity we will use the first one.
36 | 	platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
37 | 	//Set the device to the first device in the platform. You can have more than one device associated with a single platform, for instance if you had two of the same GPUs on your system in SLI or CrossFire.
38 | 	cl::Device device = devices[0];
39 | 
40 | 	//This is just helpful to see what device and platform you are using.
41 | 	std::cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
42 | 	std::cout << "Using platform: " << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
43 | 
44 | 	//Finally create the OpenCL context from the device you have chosen.
45 | 	cl::Context context(device);
46 | 
47 | 	cl::Buffer buffer_A(context, CL_MEM_READ_WRITE, sizeof(int) * size);
48 | 	cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, sizeof(int) * size);
49 | 	cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, sizeof(int) * size);
50 | 
51 | 	//A source object for your program
52 | 	cl::Program::Sources sources;
53 | 	std::string kernel_code = readFile("simple_add.cl");
54 | 	//Add your program source
55 | 	sources.push_back({ kernel_code.c_str(),kernel_code.length() });
56 | 
57 | 	//Create your OpenCL program and build it.
58 | 	cl::Program program(context, sources);
59 | 	if (program.build({ device }) != CL_SUCCESS)
60 | 	{
61 | 		std::cout << " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;//print the build log to find any issues with your source
62 | 		exit(1);//Quit if your program doesn't compile
63 | 	}
64 | 
65 | 	cl::CommandQueue queue(context, device, 0, NULL);
66 | 
67 | 	//Write our buffers that we are adding to our OpenCL device
68 | 	queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int) * size, A);
69 | 	queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(int) * size, B);
70 | 
71 | 	//Create our Kernel (basically what is the starting point for our OpenCL program)
72 | 	cl::Kernel simple_add(program, "simple_add");
73 | 	//Set our arguements for the kernel
74 | 	simple_add.setArg(0, buffer_A);
75 | 	simple_add.setArg(1, buffer_B);
76 | 	simple_add.setArg(2, buffer_C);
77 | 
78 | 	//Make sure that our queue is done with all of its tasks before continuing
79 | 	queue.finish();
80 | 
81 | 	//Create an event that we can use to wait for our program to finish running
82 | 	cl::Event e;
83 | 	//This runs our program, the ranges here are the offset, global, local ranges that our code runs in.
84 | 	queue.enqueueNDRangeKernel(simple_add, cl::NullRange, cl::NDRange(size), cl::NullRange, 0, &e);
85 | 
86 | 	//Waits for our program to finish
87 | 	e.wait();
88 | 	//Reads the output written to our buffer into our final array
89 | 	queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(int) * size, C);
90 | 
91 | 	//prints the array
92 | 	std::cout << "Result:" << std::endl;
93 | 	for (int i = 0; i < size; i++)
94 | 	{
95 | 		std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl;
96 | 	}
97 | 
98 | 	return 0;
99 | }


--------------------------------------------------------------------------------
/OpenCL/Tutorial 1 - Hello, OpenCL/simple_add.cl:
--------------------------------------------------------------------------------
1 | void kernel simple_add(global const int* A, global const int* B, global int* C)
2 | {
3 | 	C[get_global_id(0)] = A[get_global_id(0)] + B[get_global_id(0)];
4 | }


--------------------------------------------------------------------------------
/OpenCL/Tutorial 2 - OpenCL load image/Lenna.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/OpenCL/Tutorial 2 - OpenCL load image/Lenna.png


--------------------------------------------------------------------------------
/OpenCL/Tutorial 2 - OpenCL load image/cl_tutorial_2_copy.cl:
--------------------------------------------------------------------------------
 1 | const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
 2 | 
 3 | void kernel copy(__read_only image2d_t in, __write_only image2d_t out)
 4 | {
 5 | 	int x = get_global_id(0);
 6 | 	int y = get_global_id(1);
 7 | 	int2 pos = (int2)(x, y);
 8 | 	uint4 pixel = read_imageui(in, smp, pos);
 9 | 	write_imageui(out, pos, pixel);
10 | }


--------------------------------------------------------------------------------
/OpenCL/Tutorial 2 - OpenCL load image/main.cpp:
--------------------------------------------------------------------------------
  1 | //#define __CL_ENABLE_EXCEPTIONS
  2 | #include <CL/cl.hpp>
  3 | 
  4 | #include <string>
  5 | #include <sstream>
  6 | #include <ostream>
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | #include <iostream>
 10 | #include <fstream>
 11 | 
 12 | #include "PNG.h"
 13 | 
 14 | std::string readFile(std::string fileName)
 15 | {
 16 | 	std::ifstream t(fileName);
 17 | 	std::string str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
 18 | 	return str;
 19 | }
 20 | 
 21 | int main(int arg, char* args[])
 22 | {
 23 | 	std::vector<cl::Platform> platforms;
 24 | 	cl::Platform::get(&platforms);
 25 | 	if (platforms.size() == 0)
 26 | 	{
 27 | 		std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system.
 28 | 		exit(1);
 29 | 	}
 30 | 	std::vector<cl::Device> devices;
 31 | 	platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
 32 | 	cl::Device device = devices[0];
 33 | 	std::cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
 34 | 	std::cout << "Using platform: " << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
 35 | 	cl::Context context(device);
 36 | 
 37 | 	//load our image
 38 | 	PNG inPng("Lenna.png");
 39 | 
 40 | 	//store width and height so we can use them for our output image later
 41 | 	const unsigned int w = inPng.w;
 42 | 	const unsigned int h = inPng.h;
 43 | 
 44 | 	//input image
 45 | 	const cl::ImageFormat format(CL_RGBA, CL_UNSIGNED_INT8);
 46 | 	cl::Image2D in(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, format, w, h, 0, &inPng.data[0]);
 47 | 
 48 | 	//we are done with the image so free up its memory
 49 | 	inPng.Free();
 50 | 
 51 | 	//output image
 52 | 	cl::Image2D out(context, CL_MEM_WRITE_ONLY, format, w, h, 0, NULL);
 53 | 
 54 | 	cl::Program::Sources sources;
 55 | 	std::string kernel_code = readFile("cl_tutorial_2_copy.cl");
 56 | 	//Add your program source
 57 | 	sources.push_back({ kernel_code.c_str(),kernel_code.length() });
 58 | 
 59 | 	//Create your OpenCL program and build it.
 60 | 	cl::Program program(context, sources);
 61 | 	if (program.build({ device }) != CL_SUCCESS)
 62 | 	{
 63 | 		std::cout << " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;//print the build log to find any issues with your source
 64 | 		exit(1);//Quit if your program doesn't compile
 65 | 	}
 66 | 
 67 | 	//set the kernel arguments
 68 | 	cl::Kernel kernelCopy(program, "copy");
 69 | 	kernelCopy.setArg(0, in);
 70 | 	kernelCopy.setArg(1, out);
 71 | 
 72 | 	//create command queue
 73 | 	cl::CommandQueue queue(context, device, 0, NULL);
 74 | 
 75 | 	//execute kernel
 76 | 	queue.enqueueNDRangeKernel(kernelCopy, cl::NullRange, cl::NDRange(w, h), cl::NullRange);
 77 | 
 78 | 	//wait for kernel to finish
 79 | 	queue.finish();
 80 | 
 81 | 	//start and end coordinates for reading our image (I really do not like how the c++ wrapper does this)
 82 | 	cl::size_t<3> origin;
 83 | 	cl::size_t<3> size;
 84 | 	origin[0] = 0;
 85 | 	origin[1] = 0;
 86 | 	origin[2] = 0;
 87 | 	size[0] = w;
 88 | 	size[1] = h;
 89 | 	size[2] = 1;
 90 | 
 91 | 	//output png
 92 | 	PNG outPng;
 93 | 	//create the image with the same width and height as original
 94 | 	outPng.Create(w, h);
 95 | 
 96 | 	//temporary array to store the result from opencl
 97 | 	auto tmp = new unsigned char[w * h * 4];
 98 | 	//CL_TRUE means that it waits for the entire image to be copied before continuing
 99 | 	queue.enqueueReadImage(out, CL_TRUE, origin, size, 0, 0, tmp);
100 | 
101 | 	//copy the data from the temp array to the png
102 | 	std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data));
103 | 
104 | 	//write the image to file
105 | 	outPng.Save("cl_tutorial_2.png");
106 | 	//free the iamge's resources since we are done with it
107 | 	outPng.Free();
108 | 
109 | 	//free the temp array
110 | 	delete[] tmp;
111 | 
112 | 	return 0;
113 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OpenCL-CUDA-Tutorials
2 | Sources for OpenCL and CUDA tutorials.
3 | 
4 | This is as much a learning experience for me as it will be with you so bear with me. I try to comment my code and explain what I am doing to the best of my ability however I will not be getting too technical.
5 | 
6 | I am compiling and testing my code with both Microsoft Visual Studio 2015 Community and MinGW’s g++.
7 | 
8 | The point of these tutorials is not to develop parallel algorithms but to gain an understanding of how OpenCL and CUDA work.
9 | 


--------------------------------------------------------------------------------
/vs/CUDA/README.md:
--------------------------------------------------------------------------------
1 | VS Project files for CUDA tutorials


--------------------------------------------------------------------------------
/vs/CUDA/Tutorial 1 - Hello, CUDA/Tutorial 1 - Hello, CUDA.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <ProjectGuid>{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}</ProjectGuid>
 23 |     <RootNamespace>Tutorial_1___Hello__CUDA</RootNamespace>
 24 |     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
 25 |   </PropertyGroup>
 26 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 27 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 28 |     <ConfigurationType>Application</ConfigurationType>
 29 |     <UseDebugLibraries>true</UseDebugLibraries>
 30 |     <CharacterSet>MultiByte</CharacterSet>
 31 |     <PlatformToolset>v120</PlatformToolset>
 32 |   </PropertyGroup>
 33 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 34 |     <ConfigurationType>Application</ConfigurationType>
 35 |     <UseDebugLibraries>true</UseDebugLibraries>
 36 |     <CharacterSet>MultiByte</CharacterSet>
 37 |     <PlatformToolset>v120</PlatformToolset>
 38 |   </PropertyGroup>
 39 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 40 |     <ConfigurationType>Application</ConfigurationType>
 41 |     <UseDebugLibraries>false</UseDebugLibraries>
 42 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 43 |     <CharacterSet>MultiByte</CharacterSet>
 44 |     <PlatformToolset>v120</PlatformToolset>
 45 |   </PropertyGroup>
 46 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 47 |     <ConfigurationType>Application</ConfigurationType>
 48 |     <UseDebugLibraries>false</UseDebugLibraries>
 49 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 50 |     <CharacterSet>MultiByte</CharacterSet>
 51 |     <PlatformToolset>v120</PlatformToolset>
 52 |   </PropertyGroup>
 53 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 54 |   <ImportGroup Label="ExtensionSettings">
 55 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
 56 |   </ImportGroup>
 57 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 58 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 59 |   </ImportGroup>
 60 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 61 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 62 |   </ImportGroup>
 63 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 64 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 65 |   </ImportGroup>
 66 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 67 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 68 |   </ImportGroup>
 69 |   <PropertyGroup Label="UserMacros" />
 70 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 71 |     <LinkIncremental>true</LinkIncremental>
 72 |   </PropertyGroup>
 73 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 74 |     <LinkIncremental>true</LinkIncremental>
 75 |   </PropertyGroup>
 76 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 77 |     <ClCompile>
 78 |       <WarningLevel>Level3</WarningLevel>
 79 |       <Optimization>Disabled</Optimization>
 80 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 81 |     </ClCompile>
 82 |     <Link>
 83 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 84 |       <SubSystem>Console</SubSystem>
 85 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
 86 |     </Link>
 87 |     <PostBuildEvent>
 88 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
 89 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
 90 |     </PostBuildEvent>
 91 |     <CudaCompile />
 92 |     <CudaCompile>
 93 |       <Include>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(Include)</Include>
 94 |     </CudaCompile>
 95 |   </ItemDefinitionGroup>
 96 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 97 |     <ClCompile>
 98 |       <WarningLevel>Level3</WarningLevel>
 99 |       <Optimization>Disabled</Optimization>
100 |       <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
101 |     </ClCompile>
102 |     <Link>
103 |       <GenerateDebugInformation>true</GenerateDebugInformation>
104 |       <SubSystem>Console</SubSystem>
105 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
106 |     </Link>
107 |     <PostBuildEvent>
108 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
109 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
110 |     </PostBuildEvent>
111 |     <CudaCompile>
112 |       <TargetMachinePlatform>64</TargetMachinePlatform>
113 |       <Include>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(Include)</Include>
114 |     </CudaCompile>
115 |   </ItemDefinitionGroup>
116 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
117 |     <ClCompile>
118 |       <WarningLevel>Level3</WarningLevel>
119 |       <Optimization>MaxSpeed</Optimization>
120 |       <FunctionLevelLinking>true</FunctionLevelLinking>
121 |       <IntrinsicFunctions>true</IntrinsicFunctions>
122 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
123 |     </ClCompile>
124 |     <Link>
125 |       <GenerateDebugInformation>true</GenerateDebugInformation>
126 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
127 |       <OptimizeReferences>true</OptimizeReferences>
128 |       <SubSystem>Console</SubSystem>
129 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
130 |     </Link>
131 |     <PostBuildEvent>
132 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
133 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
134 |     </PostBuildEvent>
135 |     <CudaCompile />
136 |     <CudaCompile>
137 |       <Include>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(Include)</Include>
138 |     </CudaCompile>
139 |   </ItemDefinitionGroup>
140 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
141 |     <ClCompile>
142 |       <WarningLevel>Level3</WarningLevel>
143 |       <Optimization>MaxSpeed</Optimization>
144 |       <FunctionLevelLinking>true</FunctionLevelLinking>
145 |       <IntrinsicFunctions>true</IntrinsicFunctions>
146 |       <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
147 |     </ClCompile>
148 |     <Link>
149 |       <GenerateDebugInformation>true</GenerateDebugInformation>
150 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
151 |       <OptimizeReferences>true</OptimizeReferences>
152 |       <SubSystem>Console</SubSystem>
153 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
154 |     </Link>
155 |     <PostBuildEvent>
156 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
157 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
158 |     </PostBuildEvent>
159 |     <CudaCompile>
160 |       <TargetMachinePlatform>64</TargetMachinePlatform>
161 |       <Include>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(Include)</Include>
162 |     </CudaCompile>
163 |   </ItemDefinitionGroup>
164 |   <ItemGroup>
165 |     <CudaCompile Include="kernel.cu" />
166 |   </ItemGroup>
167 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
168 |   <ImportGroup Label="ExtensionTargets">
169 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
170 |   </ImportGroup>
171 | </Project>


--------------------------------------------------------------------------------
/vs/CUDA/Tutorial 1 - Hello, CUDA/kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "device_launch_parameters.h"
  3 | 
  4 | #include <stdio.h>
  5 | #include <iostream>
  6 | 
  7 | __global__ void simple_add(const int *A, const int *B, int *C)
  8 | {
  9 | 	C[threadIdx.x] = A[threadIdx.x] + B[threadIdx.x];
 10 | }
 11 | 
 12 | int main(int arg, char* args[])
 13 | {
 14 | 	const int size = 10;
 15 | 	int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
 16 | 	int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 };
 17 | 	int C[size];
 18 | 
 19 | 	int *buffer_A = 0;
 20 | 	int *buffer_B = 0;
 21 | 	int *buffer_C = 0;
 22 | 	cudaError_t cudaStatus;
 23 | 
 24 |     // Choose which GPU to run on, change this on a multi-GPU system.
 25 |     cudaStatus = cudaSetDevice(0);
 26 |     if (cudaStatus != cudaSuccess)
 27 | 	{
 28 | 		std::cout << "No CUDA devices found!" << std::endl;
 29 | 		exit(1);
 30 |     }
 31 | 
 32 | 	cudaDeviceProp prop;
 33 | 	cudaGetDeviceProperties(&prop, 0);
 34 | 
 35 | 	std::cout << "Using device: " << prop.name << std::endl;
 36 | 	
 37 |     // Allocate GPU buffers for three vectors (two input, one output).
 38 | 	cudaMalloc((void**)&buffer_A, size * sizeof(int));
 39 | 	cudaMalloc((void**)&buffer_B, size * sizeof(int));
 40 | 	cudaMalloc((void**)&buffer_C, size * sizeof(int));
 41 | 	
 42 |     // Copy input vectors from host memory to GPU buffers.
 43 |     cudaMemcpy(buffer_A, A, size * sizeof(int), cudaMemcpyHostToDevice);
 44 |     cudaMemcpy(buffer_B, B, size * sizeof(int), cudaMemcpyHostToDevice);
 45 | 
 46 | 	// Launch a kernel on the GPU with one thread for each element.
 47 |     simple_add<<<1, size>>>(buffer_A, buffer_B, buffer_C);
 48 | 
 49 | 	// Check for any errors launching the kernel
 50 |     cudaStatus = cudaGetLastError();
 51 |     if (cudaStatus != cudaSuccess)
 52 | 	{
 53 | 		std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl;
 54 | 		cudaFree(buffer_A);
 55 | 		cudaFree(buffer_B);
 56 | 		cudaFree(buffer_C);
 57 | 		exit(1);
 58 |     }
 59 | 	
 60 |     // cudaDeviceSynchronize waits for the kernel to finish, and returns
 61 |     // any errors encountered during the launch.
 62 |     cudaStatus = cudaDeviceSynchronize();
 63 |     if (cudaStatus != cudaSuccess)
 64 | 	{
 65 | 		std::cout << "Could not synchronize device!" << std::endl;
 66 | 		cudaFree(buffer_A);
 67 | 		cudaFree(buffer_B);
 68 | 		cudaFree(buffer_C);
 69 | 		exit(1);
 70 |     }
 71 | 	
 72 |     // Copy output vector from GPU buffer to host memory.
 73 |     cudaStatus = cudaMemcpy(C, buffer_C, size * sizeof(int), cudaMemcpyDeviceToHost);
 74 | 	cudaFree(buffer_A);
 75 | 	cudaFree(buffer_B);
 76 | 	cudaFree(buffer_C);
 77 | 
 78 | 	if(cudaStatus != cudaSuccess)
 79 | 	{
 80 | 		std::cout << "Could not copy buffer memory to host!" << std::endl;
 81 | 		exit(1);
 82 | 	}
 83 | 
 84 |     //Prints the array
 85 | 	std::cout << "Result:" << std::endl;
 86 | 	for (int i = 0; i < size; i++)
 87 | 	{
 88 | 		std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl;
 89 | 	}
 90 | 
 91 |     // cudaDeviceReset must be called before exiting in order for profiling and
 92 |     // tracing tools such as Nsight and Visual Profiler to show complete traces.
 93 |     cudaStatus = cudaDeviceReset();
 94 |     if (cudaStatus != cudaSuccess)
 95 | 	{
 96 | 		std::cout << "Device reset failed!" << std::endl;
 97 |         exit(1);
 98 |     }
 99 | 
100 |     return 0;
101 | }


--------------------------------------------------------------------------------
/vs/CUDA/Tutorial 2 - CUDA load image/Lenna.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/vs/CUDA/Tutorial 2 - CUDA load image/Lenna.png


--------------------------------------------------------------------------------
/vs/CUDA/Tutorial 2 - CUDA load image/Tutorial 2 - CUDA load image.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <ProjectGuid>{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}</ProjectGuid>
 23 |     <RootNamespace>Tutorial_2___CUDA_load_image</RootNamespace>
 24 |   </PropertyGroup>
 25 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 26 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 27 |     <ConfigurationType>Application</ConfigurationType>
 28 |     <UseDebugLibraries>true</UseDebugLibraries>
 29 |     <CharacterSet>MultiByte</CharacterSet>
 30 |     <PlatformToolset>v120</PlatformToolset>
 31 |   </PropertyGroup>
 32 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 33 |     <ConfigurationType>Application</ConfigurationType>
 34 |     <UseDebugLibraries>true</UseDebugLibraries>
 35 |     <CharacterSet>MultiByte</CharacterSet>
 36 |     <PlatformToolset>v120</PlatformToolset>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 39 |     <ConfigurationType>Application</ConfigurationType>
 40 |     <UseDebugLibraries>false</UseDebugLibraries>
 41 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 42 |     <CharacterSet>MultiByte</CharacterSet>
 43 |     <PlatformToolset>v120</PlatformToolset>
 44 |   </PropertyGroup>
 45 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 46 |     <ConfigurationType>Application</ConfigurationType>
 47 |     <UseDebugLibraries>false</UseDebugLibraries>
 48 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 49 |     <CharacterSet>MultiByte</CharacterSet>
 50 |     <PlatformToolset>v120</PlatformToolset>
 51 |   </PropertyGroup>
 52 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 53 |   <ImportGroup Label="ExtensionSettings">
 54 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
 55 |   </ImportGroup>
 56 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 57 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 58 |   </ImportGroup>
 59 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 60 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 61 |   </ImportGroup>
 62 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 63 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 64 |   </ImportGroup>
 65 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 66 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 67 |   </ImportGroup>
 68 |   <PropertyGroup Label="UserMacros" />
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 70 |     <LinkIncremental>true</LinkIncremental>
 71 |   </PropertyGroup>
 72 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 73 |     <LinkIncremental>true</LinkIncremental>
 74 |   </PropertyGroup>
 75 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 76 |     <ClCompile>
 77 |       <WarningLevel>Level3</WarningLevel>
 78 |       <Optimization>Disabled</Optimization>
 79 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 80 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 81 |     </ClCompile>
 82 |     <Link>
 83 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 84 |       <SubSystem>Console</SubSystem>
 85 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
 86 |     </Link>
 87 |     <PostBuildEvent>
 88 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
 89 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
 90 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
 91 |     </PostBuildEvent>
 92 |     <CudaCompile />
 93 |   </ItemDefinitionGroup>
 94 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 95 |     <ClCompile>
 96 |       <WarningLevel>Level3</WarningLevel>
 97 |       <Optimization>Disabled</Optimization>
 98 |       <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 99 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
100 |     </ClCompile>
101 |     <Link>
102 |       <GenerateDebugInformation>true</GenerateDebugInformation>
103 |       <SubSystem>Console</SubSystem>
104 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
105 |     </Link>
106 |     <PostBuildEvent>
107 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
108 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
109 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
110 |     </PostBuildEvent>
111 |     <CudaCompile>
112 |       <TargetMachinePlatform>64</TargetMachinePlatform>
113 |     </CudaCompile>
114 |   </ItemDefinitionGroup>
115 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
116 |     <ClCompile>
117 |       <WarningLevel>Level3</WarningLevel>
118 |       <Optimization>MaxSpeed</Optimization>
119 |       <FunctionLevelLinking>true</FunctionLevelLinking>
120 |       <IntrinsicFunctions>true</IntrinsicFunctions>
121 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
122 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
123 |     </ClCompile>
124 |     <Link>
125 |       <GenerateDebugInformation>true</GenerateDebugInformation>
126 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
127 |       <OptimizeReferences>true</OptimizeReferences>
128 |       <SubSystem>Console</SubSystem>
129 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
130 |     </Link>
131 |     <PostBuildEvent>
132 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
133 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
134 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
135 |     </PostBuildEvent>
136 |     <CudaCompile />
137 |   </ItemDefinitionGroup>
138 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
139 |     <ClCompile>
140 |       <WarningLevel>Level3</WarningLevel>
141 |       <Optimization>MaxSpeed</Optimization>
142 |       <FunctionLevelLinking>true</FunctionLevelLinking>
143 |       <IntrinsicFunctions>true</IntrinsicFunctions>
144 |       <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
145 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
146 |     </ClCompile>
147 |     <Link>
148 |       <GenerateDebugInformation>true</GenerateDebugInformation>
149 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
150 |       <OptimizeReferences>true</OptimizeReferences>
151 |       <SubSystem>Console</SubSystem>
152 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
153 |     </Link>
154 |     <PostBuildEvent>
155 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
156 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
157 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
158 |     </PostBuildEvent>
159 |     <CudaCompile>
160 |       <TargetMachinePlatform>64</TargetMachinePlatform>
161 |     </CudaCompile>
162 |   </ItemDefinitionGroup>
163 |   <ItemGroup>
164 |     <CudaCompile Include="kernel.cu" />
165 |   </ItemGroup>
166 |   <ItemGroup>
167 |     <Image Include="Lenna.png" />
168 |   </ItemGroup>
169 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
170 |   <ImportGroup Label="ExtensionTargets">
171 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
172 |   </ImportGroup>
173 | </Project>


--------------------------------------------------------------------------------
/vs/CUDA/Tutorial 2 - CUDA load image/kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "device_launch_parameters.h"
  3 | 
  4 | #include <stdio.h>
  5 | #include <iostream>
  6 | 
  7 | #include "PNG.h"
  8 | 
  9 | __global__ void copy(const unsigned char* in, unsigned char* out)
 10 | {
 11 | 	int x = blockIdx.x;
 12 | 	int y = threadIdx.x;
 13 | 	int width = blockDim.x;
 14 | 	int index = (x + y * width) * 4;
 15 | 
 16 | 	//copy each color channel
 17 | 	out[index] = in[index];
 18 | 	out[index + 1] = in[index + 1];
 19 | 	out[index + 2] = in[index + 2];
 20 | 	out[index + 3] = in[index + 3];
 21 | }
 22 | 
 23 | int main(int arg, char* args[])
 24 | {
 25 | 	PNG inPng("Lenna.png");
 26 | 	PNG outPng;
 27 | 	outPng.Create(inPng.w, inPng.h);
 28 | 
 29 | 	//store width and height so we can use them for our output image later
 30 | 	const unsigned int w = inPng.w;
 31 | 	const unsigned int h = inPng.h;
 32 | 	//4 because there are 4 color channels R, G, B, and A
 33 | 	int size = w * h * 4;
 34 | 
 35 | 	unsigned char *in = 0;
 36 | 	unsigned char *out = 0;
 37 | 	cudaError_t cudaStatus;
 38 | 
 39 |     // Choose which GPU to run on, change this on a multi-GPU system.
 40 |     cudaStatus = cudaSetDevice(0);
 41 |     if (cudaStatus != cudaSuccess)
 42 | 	{
 43 | 		std::cout << "No CUDA devices found!" << std::endl;
 44 | 		exit(1);
 45 |     }
 46 | 
 47 | 	//prints the device the kernel will be running on
 48 | 	cudaDeviceProp prop;
 49 | 	cudaGetDeviceProperties(&prop, 0);
 50 | 	std::cout << "Using device: " << prop.name << std::endl;
 51 | 	
 52 |     // Allocate GPU buffers for the images
 53 | 	cudaMalloc((void**)&in, size * sizeof(unsigned char));
 54 | 	cudaMalloc((void**)&out, size * sizeof(unsigned char));
 55 | 	
 56 |     // Copy image data from host memory to GPU buffers.
 57 |     cudaMemcpy(in, &inPng.data[0], size * sizeof(unsigned char), cudaMemcpyHostToDevice);
 58 | 
 59 | 	//free the input image because we do not need it anymore
 60 | 	inPng.Free();
 61 | 
 62 | 	// Launch a kernel on the GPU with one thread for each element.
 63 | 	copy<<<w, h>>>(in, out);
 64 | 
 65 | 	// Check for any errors launching the kernel
 66 |     cudaStatus = cudaGetLastError();
 67 |     if (cudaStatus != cudaSuccess)
 68 | 	{
 69 | 		std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl;
 70 | 		cudaFree(in);
 71 | 		cudaFree(out);
 72 | 		exit(1);
 73 |     }
 74 | 	
 75 |     // cudaDeviceSynchronize waits for the kernel to finish, and returns
 76 |     // any errors encountered during the launch.
 77 |     cudaStatus = cudaDeviceSynchronize();
 78 |     if (cudaStatus != cudaSuccess)
 79 | 	{
 80 | 		std::cout << "Could not synchronize device!" << std::endl;
 81 | 		cudaFree(in);
 82 | 		cudaFree(out);
 83 | 		exit(1);
 84 |     }
 85 | 
 86 | 	//temporary array to store the result from opencl
 87 | 	auto tmp = new unsigned char[w * h * 4];
 88 |     // Copy output vector from GPU buffer to host memory.
 89 |     cudaStatus = cudaMemcpy(tmp, out, size * sizeof(unsigned char), cudaMemcpyDeviceToHost);
 90 | 	cudaFree(in);
 91 | 	cudaFree(out);
 92 | 
 93 | 	//copy the data from the temp array to the png
 94 | 	std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data));
 95 | 
 96 | 	//write the image to file
 97 | 	outPng.Save("cuda_tutorial_2.png");
 98 | 	//free the iamge's resources since we are done with it
 99 | 	outPng.Free();
100 | 
101 | 	//free the temp array
102 | 	delete[] tmp;
103 | 
104 | 	if(cudaStatus != cudaSuccess)
105 | 	{
106 | 		std::cout << "Could not copy buffer memory to host!" << std::endl;
107 | 		exit(1);
108 | 	}
109 | 
110 |     // cudaDeviceReset must be called before exiting in order for profiling and
111 |     // tracing tools such as Nsight and Visual Profiler to show complete traces.
112 |     cudaStatus = cudaDeviceReset();
113 |     if (cudaStatus != cudaSuccess)
114 | 	{
115 | 		std::cout << "Device reset failed!" << std::endl;
116 |         exit(1);
117 |     }
118 | 
119 |     return 0;
120 | }


--------------------------------------------------------------------------------
/vs/CUDA/Tutorial 3 - CUDA basic image filtering/Lenna.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/vs/CUDA/Tutorial 3 - CUDA basic image filtering/Lenna.png


--------------------------------------------------------------------------------
/vs/CUDA/Tutorial 3 - CUDA basic image filtering/Tutorial 3 - CUDA basic image filtering.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <ProjectGuid>{20B93A00-1A20-46D6-8841-EB60A002EB08}</ProjectGuid>
 23 |     <RootNamespace>Tutorial_3___CUDA_basic_image_filtering</RootNamespace>
 24 |   </PropertyGroup>
 25 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 26 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 27 |     <ConfigurationType>Application</ConfigurationType>
 28 |     <UseDebugLibraries>true</UseDebugLibraries>
 29 |     <CharacterSet>MultiByte</CharacterSet>
 30 |     <PlatformToolset>v120</PlatformToolset>
 31 |   </PropertyGroup>
 32 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 33 |     <ConfigurationType>Application</ConfigurationType>
 34 |     <UseDebugLibraries>true</UseDebugLibraries>
 35 |     <CharacterSet>MultiByte</CharacterSet>
 36 |     <PlatformToolset>v120</PlatformToolset>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 39 |     <ConfigurationType>Application</ConfigurationType>
 40 |     <UseDebugLibraries>false</UseDebugLibraries>
 41 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 42 |     <CharacterSet>MultiByte</CharacterSet>
 43 |     <PlatformToolset>v120</PlatformToolset>
 44 |   </PropertyGroup>
 45 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 46 |     <ConfigurationType>Application</ConfigurationType>
 47 |     <UseDebugLibraries>false</UseDebugLibraries>
 48 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 49 |     <CharacterSet>MultiByte</CharacterSet>
 50 |     <PlatformToolset>v120</PlatformToolset>
 51 |   </PropertyGroup>
 52 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 53 |   <ImportGroup Label="ExtensionSettings">
 54 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
 55 |   </ImportGroup>
 56 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 57 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 58 |   </ImportGroup>
 59 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 60 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 61 |   </ImportGroup>
 62 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 63 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 64 |   </ImportGroup>
 65 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 66 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 67 |   </ImportGroup>
 68 |   <PropertyGroup Label="UserMacros" />
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 70 |     <LinkIncremental>true</LinkIncremental>
 71 |   </PropertyGroup>
 72 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 73 |     <LinkIncremental>true</LinkIncremental>
 74 |   </PropertyGroup>
 75 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 76 |     <ClCompile>
 77 |       <WarningLevel>Level3</WarningLevel>
 78 |       <Optimization>Disabled</Optimization>
 79 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 80 |     </ClCompile>
 81 |     <Link>
 82 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 83 |       <SubSystem>Console</SubSystem>
 84 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
 85 |     </Link>
 86 |     <PostBuildEvent>
 87 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
 88 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
 89 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
 90 |     </PostBuildEvent>
 91 |     <CudaCompile>
 92 |       <Include>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes</Include>
 93 |     </CudaCompile>
 94 |   </ItemDefinitionGroup>
 95 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 96 |     <ClCompile>
 97 |       <WarningLevel>Level3</WarningLevel>
 98 |       <Optimization>Disabled</Optimization>
 99 |       <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
100 |     </ClCompile>
101 |     <Link>
102 |       <GenerateDebugInformation>true</GenerateDebugInformation>
103 |       <SubSystem>Console</SubSystem>
104 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
105 |     </Link>
106 |     <PostBuildEvent>
107 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
108 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
109 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
110 |     </PostBuildEvent>
111 |     <CudaCompile>
112 |       <TargetMachinePlatform>64</TargetMachinePlatform>
113 |       <Include>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes</Include>
114 |     </CudaCompile>
115 |   </ItemDefinitionGroup>
116 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
117 |     <ClCompile>
118 |       <WarningLevel>Level3</WarningLevel>
119 |       <Optimization>MaxSpeed</Optimization>
120 |       <FunctionLevelLinking>true</FunctionLevelLinking>
121 |       <IntrinsicFunctions>true</IntrinsicFunctions>
122 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
123 |     </ClCompile>
124 |     <Link>
125 |       <GenerateDebugInformation>true</GenerateDebugInformation>
126 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
127 |       <OptimizeReferences>true</OptimizeReferences>
128 |       <SubSystem>Console</SubSystem>
129 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
130 |     </Link>
131 |     <PostBuildEvent>
132 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
133 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
134 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
135 |     </PostBuildEvent>
136 |     <CudaCompile>
137 |       <Include>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes</Include>
138 |     </CudaCompile>
139 |   </ItemDefinitionGroup>
140 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
141 |     <ClCompile>
142 |       <WarningLevel>Level3</WarningLevel>
143 |       <Optimization>MaxSpeed</Optimization>
144 |       <FunctionLevelLinking>true</FunctionLevelLinking>
145 |       <IntrinsicFunctions>true</IntrinsicFunctions>
146 |       <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
147 |     </ClCompile>
148 |     <Link>
149 |       <GenerateDebugInformation>true</GenerateDebugInformation>
150 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
151 |       <OptimizeReferences>true</OptimizeReferences>
152 |       <SubSystem>Console</SubSystem>
153 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
154 |     </Link>
155 |     <PostBuildEvent>
156 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
157 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
158 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
159 |     </PostBuildEvent>
160 |     <CudaCompile>
161 |       <TargetMachinePlatform>64</TargetMachinePlatform>
162 |       <Include>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes</Include>
163 |     </CudaCompile>
164 |   </ItemDefinitionGroup>
165 |   <ItemGroup>
166 |     <CudaCompile Include="kernel.cu" />
167 |   </ItemGroup>
168 |   <ItemGroup>
169 |     <Image Include="Lenna.png" />
170 |   </ItemGroup>
171 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
172 |   <ImportGroup Label="ExtensionTargets">
173 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
174 |   </ImportGroup>
175 | </Project>


--------------------------------------------------------------------------------
/vs/CUDA/Tutorial 3 - CUDA basic image filtering/kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "device_launch_parameters.h"
  3 | 
  4 | #include <stdio.h>
  5 | #include <iostream>
  6 | 
  7 | #include "PNG.h"
  8 | 
  9 | __global__ void boxFilter(const unsigned char* in, unsigned char* out, const int imageWidth, const int imageHeight, const int halfBoxWidth, const int halfBoxHeight)
 10 | {
 11 | 	int x = blockIdx.x;
 12 | 	int y = blockIdx.y;
 13 | 
 14 | 	int count = 0;
 15 | 
 16 | 	int index = (x + y * imageWidth) * 4;
 17 | 
 18 | 	unsigned int total[4] = { 0, 0, 0, 0 };
 19 | 
 20 | 	for (int i = -halfBoxWidth; i <= halfBoxWidth; i++)
 21 | 	{
 22 | 		for (int j = -halfBoxHeight; j <= halfBoxHeight; j++)
 23 | 		{
 24 | 			int cx = x + i;
 25 | 			int cy = y + j;
 26 | 			if (cx >= 0 && cy >= 0 && cx < imageWidth && cy < imageHeight)
 27 | 			{
 28 | 				int adjIndex = (cx + cy * imageWidth) * 4;
 29 | 				for (int c = 0; c < 4; c++)
 30 | 				{
 31 | 					total[c] += static_cast<unsigned int>(in[adjIndex + c]);
 32 | 				}
 33 | 				count++;
 34 | 			}
 35 | 		}
 36 | 	}
 37 | 
 38 | 	out[index]     = static_cast<unsigned char>(total[0] / count);
 39 | 	out[index + 1] = static_cast<unsigned char>(total[1] / count);
 40 | 	out[index + 2] = static_cast<unsigned char>(total[2] / count);
 41 | 	out[index + 3] = static_cast<unsigned char>(total[3] / count);
 42 | }
 43 | 
 44 | int main(int arg, char* args[])
 45 | {
 46 | 	int filterWidth = 10;
 47 | 	int filterHeight = 10;
 48 | 	if (arg > 2)
 49 | 	{
 50 | 		filterWidth = std::atoi(args[1]);
 51 | 		filterHeight = std::atoi(args[2]);
 52 | 	}
 53 | 
 54 | 	PNG inPng("Lenna.png");
 55 | 	PNG outPng;
 56 | 	outPng.Create(inPng.w, inPng.h);
 57 | 
 58 | 	//store width and height so we can use them for our output image later
 59 | 	const unsigned int w = inPng.w;
 60 | 	const unsigned int h = inPng.h;
 61 | 	//4 because there are 4 color channels R, G, B, and A
 62 | 	int size = w * h * 4;
 63 | 
 64 | 	unsigned char *in = 0;
 65 | 	unsigned char *out = 0;
 66 | 	cudaError_t cudaStatus;
 67 | 
 68 | 	// Choose which GPU to run on, change this on a multi-GPU system.
 69 | 	cudaStatus = cudaSetDevice(0);
 70 | 	if (cudaStatus != cudaSuccess)
 71 | 	{
 72 | 		std::cout << "No CUDA devices found!" << std::endl;
 73 | 		exit(1);
 74 | 	}
 75 | 
 76 | 	//prints the device the kernel will be running on
 77 | 	cudaDeviceProp prop;
 78 | 	cudaGetDeviceProperties(&prop, 0);
 79 | 	std::cout << "Using device: " << prop.name << std::endl;
 80 | 
 81 | 	// Allocate GPU buffers for the images
 82 | 	cudaMalloc((void**)&in, size * sizeof(unsigned char));
 83 | 	cudaMalloc((void**)&out, size * sizeof(unsigned char));
 84 | 
 85 | 	// Copy image data from host memory to GPU buffers.
 86 | 	cudaMemcpy(in, &inPng.data[0], size * sizeof(unsigned char), cudaMemcpyHostToDevice);
 87 | 
 88 | 	//free the input image because we do not need it anymore
 89 | 	inPng.Free();
 90 | 
 91 | 	// Launch a kernel on the GPU with one thread for each element.
 92 | 	dim3 block_size(w, h);
 93 | 	dim3 grid_size(1);
 94 | 	boxFilter<<<block_size, 1>>>(in, out, w, h, filterWidth, filterHeight);
 95 | 
 96 | 	// Check for any errors launching the kernel
 97 | 	cudaStatus = cudaGetLastError();
 98 | 	if (cudaStatus != cudaSuccess)
 99 | 	{
100 | 		std::cout << "Kernel launch failed: " << cudaGetErrorString(cudaStatus) << std::endl;
101 | 		cudaFree(in);
102 | 		cudaFree(out);
103 | 		exit(1);
104 | 	}
105 | 
106 | 	// cudaDeviceSynchronize waits for the kernel to finish, and returns
107 | 	// any errors encountered during the launch.
108 | 	cudaStatus = cudaDeviceSynchronize();
109 | 	if (cudaStatus != cudaSuccess)
110 | 	{
111 | 		std::cout << "Could not synchronize device!" << std::endl;
112 | 		cudaFree(in);
113 | 		cudaFree(out);
114 | 		exit(1);
115 | 	}
116 | 
117 | 	//temporary array to store the result from opencl
118 | 	auto tmp = new unsigned char[w * h * 4];
119 | 	// Copy output vector from GPU buffer to host memory.
120 | 	cudaStatus = cudaMemcpy(tmp, out, size * sizeof(unsigned char), cudaMemcpyDeviceToHost);
121 | 	cudaFree(in);
122 | 	cudaFree(out);
123 | 
124 | 	//copy the data from the temp array to the png
125 | 	std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data));
126 | 
127 | 	//write the image to file
128 | 	outPng.Save("cuda_tutorial_3.png");
129 | 	//free the iamge's resources since we are done with it
130 | 	outPng.Free();
131 | 
132 | 	//free the temp array
133 | 	delete[] tmp;
134 | 
135 | 	if (cudaStatus != cudaSuccess)
136 | 	{
137 | 		std::cout << "Could not copy buffer memory to host!" << std::endl;
138 | 		exit(1);
139 | 	}
140 | 
141 | 	// cudaDeviceReset must be called before exiting in order for profiling and
142 | 	// tracing tools such as Nsight and Visual Profiler to show complete traces.
143 | 	cudaStatus = cudaDeviceReset();
144 | 	if (cudaStatus != cudaSuccess)
145 | 	{
146 | 		std::cout << "Device reset failed!" << std::endl;
147 | 		exit(1);
148 | 	}
149 | 
150 | 	return 0;
151 | }


--------------------------------------------------------------------------------
/vs/OpenCL/README.md:
--------------------------------------------------------------------------------
1 | VS Project files for OpenCL tutorials


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 1 - Hello, OpenCL/Tutorial 1 - Hello, OpenCL.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <ProjectGuid>{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}</ProjectGuid>
 23 |     <RootNamespace>Tutorial1HelloOpenCL</RootNamespace>
 24 |     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
 25 |   </PropertyGroup>
 26 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 27 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 28 |     <ConfigurationType>Application</ConfigurationType>
 29 |     <UseDebugLibraries>true</UseDebugLibraries>
 30 |     <PlatformToolset>v140</PlatformToolset>
 31 |     <CharacterSet>MultiByte</CharacterSet>
 32 |   </PropertyGroup>
 33 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 34 |     <ConfigurationType>Application</ConfigurationType>
 35 |     <UseDebugLibraries>false</UseDebugLibraries>
 36 |     <PlatformToolset>v140</PlatformToolset>
 37 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 38 |     <CharacterSet>MultiByte</CharacterSet>
 39 |   </PropertyGroup>
 40 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 41 |     <ConfigurationType>Application</ConfigurationType>
 42 |     <UseDebugLibraries>true</UseDebugLibraries>
 43 |     <PlatformToolset>v140</PlatformToolset>
 44 |     <CharacterSet>MultiByte</CharacterSet>
 45 |   </PropertyGroup>
 46 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 47 |     <ConfigurationType>Application</ConfigurationType>
 48 |     <UseDebugLibraries>false</UseDebugLibraries>
 49 |     <PlatformToolset>v140</PlatformToolset>
 50 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 51 |     <CharacterSet>MultiByte</CharacterSet>
 52 |   </PropertyGroup>
 53 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 54 |   <ImportGroup Label="ExtensionSettings">
 55 |   </ImportGroup>
 56 |   <ImportGroup Label="Shared">
 57 |   </ImportGroup>
 58 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 59 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 60 |   </ImportGroup>
 61 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 62 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 63 |   </ImportGroup>
 64 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 65 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 66 |   </ImportGroup>
 67 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 68 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 69 |   </ImportGroup>
 70 |   <PropertyGroup Label="UserMacros" />
 71 |   <PropertyGroup />
 72 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 73 |     <ClCompile>
 74 |       <WarningLevel>Level3</WarningLevel>
 75 |       <Optimization>Disabled</Optimization>
 76 |       <SDLCheck>true</SDLCheck>
 77 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 78 |     </ClCompile>
 79 |     <Link>
 80 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
 81 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
 82 |       <SubSystem>Console</SubSystem>
 83 |     </Link>
 84 |     <PostBuildEvent>
 85 |       <Command>copy /y "$(ProjectDir)simple_add.cl" "$(OutDir)"</Command>
 86 |     </PostBuildEvent>
 87 |   </ItemDefinitionGroup>
 88 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 89 |     <ClCompile>
 90 |       <WarningLevel>Level3</WarningLevel>
 91 |       <Optimization>Disabled</Optimization>
 92 |       <SDLCheck>true</SDLCheck>
 93 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 94 |     </ClCompile>
 95 |     <Link>
 96 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
 97 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
 98 |       <SubSystem>Console</SubSystem>
 99 |     </Link>
100 |     <PostBuildEvent>
101 |       <Command>copy /y "$(ProjectDir)simple_add.cl" "$(OutDir)"</Command>
102 |     </PostBuildEvent>
103 |   </ItemDefinitionGroup>
104 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
105 |     <ClCompile>
106 |       <WarningLevel>Level3</WarningLevel>
107 |       <Optimization>MaxSpeed</Optimization>
108 |       <FunctionLevelLinking>true</FunctionLevelLinking>
109 |       <IntrinsicFunctions>true</IntrinsicFunctions>
110 |       <SDLCheck>true</SDLCheck>
111 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
112 |     </ClCompile>
113 |     <Link>
114 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
115 |       <OptimizeReferences>true</OptimizeReferences>
116 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
117 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
118 |       <SubSystem>Console</SubSystem>
119 |     </Link>
120 |     <PostBuildEvent>
121 |       <Command>copy /y "$(ProjectDir)simple_add.cl" "$(OutDir)"</Command>
122 |     </PostBuildEvent>
123 |   </ItemDefinitionGroup>
124 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
125 |     <ClCompile>
126 |       <WarningLevel>Level3</WarningLevel>
127 |       <Optimization>MaxSpeed</Optimization>
128 |       <FunctionLevelLinking>true</FunctionLevelLinking>
129 |       <IntrinsicFunctions>true</IntrinsicFunctions>
130 |       <SDLCheck>true</SDLCheck>
131 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
132 |     </ClCompile>
133 |     <Link>
134 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
135 |       <OptimizeReferences>true</OptimizeReferences>
136 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
137 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
138 |       <SubSystem>Console</SubSystem>
139 |     </Link>
140 |     <PostBuildEvent>
141 |       <Command>copy /y "$(ProjectDir)simple_add.cl" "$(OutDir)"</Command>
142 |     </PostBuildEvent>
143 |   </ItemDefinitionGroup>
144 |   <ItemGroup>
145 |     <ClCompile Include="main.cpp" />
146 |   </ItemGroup>
147 |   <ItemGroup>
148 |     <None Include="simple_add.cl">
149 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
150 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
151 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
152 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
153 |     </None>
154 |   </ItemGroup>
155 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
156 |   <ImportGroup Label="ExtensionTargets">
157 |   </ImportGroup>
158 | </Project>


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 1 - Hello, OpenCL/Tutorial 1 - Hello, OpenCL.vcxproj.filters:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8"?>
2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 |   <ItemGroup>
4 |     <ClCompile Include="main.cpp" />
5 |   </ItemGroup>
6 |   <ItemGroup>
7 |     <None Include="simple_add.cl" />
8 |   </ItemGroup>
9 | </Project>


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 1 - Hello, OpenCL/main.cpp:
--------------------------------------------------------------------------------
 1 | #define __CL_ENABLE_EXCEPTIONS
 2 | #include <CL/cl.hpp>
 3 | #include <fstream>
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <iterator>
 7 | 
 8 | std::string readFile(std::string fileName)
 9 | {
10 | 	std::ifstream t(fileName);
11 | 	std::string str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
12 | 	return str;
13 | }
14 | 
15 | int main(int arg, char* args[])
16 | {
17 | 	const int size = 10;
18 | 	int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
19 | 	int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 };
20 | 	int C[size];
21 | 
22 | 	//stl vector to store all of the available platforms
23 | 	std::vector<cl::Platform> platforms;
24 | 	//get all available platforms
25 | 	cl::Platform::get(&platforms);
26 | 
27 | 	if (platforms.size() == 0)
28 | 	{
29 | 		std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system.
30 | 		exit(1);
31 | 	}
32 | 
33 | 	//Create a stl vector to store all of the availbe devices to use from the first platform.
34 | 	std::vector<cl::Device> devices;
35 | 	//Get the available devices from the platform. For me the platform for my 980ti is actually th e second in the platform list but for simplicity we will use the first one.
36 | 	platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
37 | 	//Set the device to the first device in the platform. You can have more than one device associated with a single platform, for instance if you had two of the same GPUs on your system in SLI or CrossFire.
38 | 	cl::Device device = devices[0];
39 | 
40 | 	//This is just helpful to see what device and platform you are using.
41 | 	std::cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
42 | 	std::cout << "Using platform: " << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
43 | 
44 | 	//Finally create the OpenCL context from the device you have chosen.
45 | 	cl::Context context(device);
46 | 
47 | 	cl::Buffer buffer_A(context, CL_MEM_READ_WRITE, sizeof(int) * size);
48 | 	cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, sizeof(int) * size);
49 | 	cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, sizeof(int) * size);
50 | 
51 | 	//A source object for your program
52 | 	cl::Program::Sources sources;
53 | 	std::string kernel_code = readFile("simple_add.cl");
54 | 	//Add your program source
55 | 	sources.push_back({ kernel_code.c_str(),kernel_code.length() });
56 | 
57 | 	//Create your OpenCL program and build it.
58 | 	cl::Program program(context, sources);
59 | 	if (program.build({ device }) != CL_SUCCESS)
60 | 	{
61 | 		std::cout << " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;//print the build log to find any issues with your source
62 | 		exit(1);//Quit if your program doesn't compile
63 | 	}
64 | 
65 | 	cl::CommandQueue queue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL);
66 | 
67 | 	//Write our buffers that we are adding to our OpenCL device
68 | 	queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int) * size, A);
69 | 	queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(int) * size, B);
70 | 
71 | 	//Create our Kernel (basically what is the starting point for our OpenCL program)
72 | 	cl::Kernel simple_add(program, "simple_add");
73 | 	//Set our arguements for the kernel
74 | 	simple_add.setArg(0, buffer_A);
75 | 	simple_add.setArg(1, buffer_B);
76 | 	simple_add.setArg(2, buffer_C);
77 | 
78 | 	//Make sure that our queue is done with all of its tasks before continuing
79 | 	queue.finish();
80 | 
81 | 	//Create an event that we can use to wait for our program to finish running
82 | 	cl::Event e;
83 | 	//This runs our program, the ranges here are the offset, global, local ranges that our code runs in.
84 | 	queue.enqueueNDRangeKernel(simple_add, cl::NullRange, cl::NDRange(size), cl::NullRange, 0, &e);
85 | 
86 | 	//Waits for our program to finish
87 | 	e.wait();
88 | 	//Reads the output written to our buffer into our final array
89 | 	queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(int) * size, C);
90 | 
91 | 	//prints the array
92 | 	std::cout << "Result:" << std::endl;
93 | 	for (int i = 0; i < size; i++)
94 | 	{
95 | 		std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl;
96 | 	}
97 | 
98 | 	return 0;
99 | }


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 1 - Hello, OpenCL/simple_add.cl:
--------------------------------------------------------------------------------
1 | void kernel simple_add(global const int* A, global const int* B, global int* C)
2 | {
3 | 	C[get_global_id(0)] = A[get_global_id(0)] + B[get_global_id(0)];
4 | }


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 2 - OpenCL load image/Lenna.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/vs/OpenCL/Tutorial 2 - OpenCL load image/Lenna.png


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 2 - OpenCL load image/Tutorial 2 - OpenCL load image.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <ItemGroup>
 22 |     <ClCompile Include="main.cpp" />
 23 |   </ItemGroup>
 24 |   <ItemGroup>
 25 |     <None Include="cl_tutorial_2_copy.cl">
 26 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
 27 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
 28 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
 29 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
 30 |     </None>
 31 |   </ItemGroup>
 32 |   <ItemGroup>
 33 |     <Image Include="Lenna.png">
 34 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
 35 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
 36 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
 37 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
 38 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
 39 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
 40 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
 41 |       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
 42 |     </Image>
 43 |   </ItemGroup>
 44 |   <PropertyGroup Label="Globals">
 45 |     <ProjectGuid>{D27A5344-59EA-4276-B828-B0768E8ECA82}</ProjectGuid>
 46 |     <RootNamespace>Tutorial2OpenCLloadimage</RootNamespace>
 47 |     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
 48 |   </PropertyGroup>
 49 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 50 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 51 |     <ConfigurationType>Application</ConfigurationType>
 52 |     <UseDebugLibraries>true</UseDebugLibraries>
 53 |     <PlatformToolset>v140</PlatformToolset>
 54 |     <CharacterSet>MultiByte</CharacterSet>
 55 |   </PropertyGroup>
 56 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 57 |     <ConfigurationType>Application</ConfigurationType>
 58 |     <UseDebugLibraries>false</UseDebugLibraries>
 59 |     <PlatformToolset>v140</PlatformToolset>
 60 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 61 |     <CharacterSet>MultiByte</CharacterSet>
 62 |   </PropertyGroup>
 63 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 64 |     <ConfigurationType>Application</ConfigurationType>
 65 |     <UseDebugLibraries>true</UseDebugLibraries>
 66 |     <PlatformToolset>v140</PlatformToolset>
 67 |     <CharacterSet>MultiByte</CharacterSet>
 68 |   </PropertyGroup>
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 70 |     <ConfigurationType>Application</ConfigurationType>
 71 |     <UseDebugLibraries>false</UseDebugLibraries>
 72 |     <PlatformToolset>v140</PlatformToolset>
 73 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 74 |     <CharacterSet>MultiByte</CharacterSet>
 75 |   </PropertyGroup>
 76 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 77 |   <ImportGroup Label="ExtensionSettings">
 78 |   </ImportGroup>
 79 |   <ImportGroup Label="Shared">
 80 |   </ImportGroup>
 81 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 82 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 83 |   </ImportGroup>
 84 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 85 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 86 |   </ImportGroup>
 87 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 88 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 89 |   </ImportGroup>
 90 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 91 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 92 |   </ImportGroup>
 93 |   <PropertyGroup Label="UserMacros" />
 94 |   <PropertyGroup />
 95 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 96 |     <ClCompile>
 97 |       <WarningLevel>Level3</WarningLevel>
 98 |       <Optimization>Disabled</Optimization>
 99 |       <SDLCheck>true</SDLCheck>
100 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
101 |     </ClCompile>
102 |     <Link>
103 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
104 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
105 |     </Link>
106 |     <PostBuildEvent>
107 |       <Command>copy /y "$(ProjectDir)cl_tutorial_2_copy.cl" "$(OutDir)"
108 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
109 |     </PostBuildEvent>
110 |   </ItemDefinitionGroup>
111 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
112 |     <ClCompile>
113 |       <WarningLevel>Level3</WarningLevel>
114 |       <Optimization>Disabled</Optimization>
115 |       <SDLCheck>true</SDLCheck>
116 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
117 |     </ClCompile>
118 |     <Link>
119 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
120 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
121 |     </Link>
122 |     <PostBuildEvent>
123 |       <Command>copy /y "$(ProjectDir)cl_tutorial_2_copy.cl" "$(OutDir)"
124 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
125 |     </PostBuildEvent>
126 |   </ItemDefinitionGroup>
127 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
128 |     <ClCompile>
129 |       <WarningLevel>Level3</WarningLevel>
130 |       <Optimization>MaxSpeed</Optimization>
131 |       <FunctionLevelLinking>true</FunctionLevelLinking>
132 |       <IntrinsicFunctions>true</IntrinsicFunctions>
133 |       <SDLCheck>true</SDLCheck>
134 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
135 |     </ClCompile>
136 |     <Link>
137 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
138 |       <OptimizeReferences>true</OptimizeReferences>
139 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
140 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
141 |     </Link>
142 |     <PostBuildEvent>
143 |       <Command>copy /y "$(ProjectDir)cl_tutorial_2_copy.cl" "$(OutDir)"
144 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
145 |     </PostBuildEvent>
146 |   </ItemDefinitionGroup>
147 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
148 |     <ClCompile>
149 |       <WarningLevel>Level3</WarningLevel>
150 |       <Optimization>MaxSpeed</Optimization>
151 |       <FunctionLevelLinking>true</FunctionLevelLinking>
152 |       <IntrinsicFunctions>true</IntrinsicFunctions>
153 |       <SDLCheck>true</SDLCheck>
154 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
155 |     </ClCompile>
156 |     <Link>
157 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
158 |       <OptimizeReferences>true</OptimizeReferences>
159 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
160 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
161 |     </Link>
162 |     <PostBuildEvent>
163 |       <Command>copy /y "$(ProjectDir)cl_tutorial_2_copy.cl" "$(OutDir)"
164 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
165 |     </PostBuildEvent>
166 |   </ItemDefinitionGroup>
167 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
168 |   <ImportGroup Label="ExtensionTargets">
169 |   </ImportGroup>
170 | </Project>


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 2 - OpenCL load image/Tutorial 2 - OpenCL load image.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <ClCompile Include="main.cpp" />
 5 |   </ItemGroup>
 6 |   <ItemGroup>
 7 |     <None Include="cl_tutorial_2_copy.cl" />
 8 |   </ItemGroup>
 9 |   <ItemGroup>
10 |     <Image Include="Lenna.png" />
11 |   </ItemGroup>
12 | </Project>


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 2 - OpenCL load image/cl_tutorial_2_copy.cl:
--------------------------------------------------------------------------------
 1 | const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
 2 | 
 3 | void kernel copy(__read_only image2d_t in, __write_only image2d_t out)
 4 | {
 5 | 	int x = get_global_id(0);
 6 | 	int y = get_global_id(1);
 7 | 	int2 pos = (int2)(x, y);
 8 | 	uint4 pixel = read_imageui(in, smp, pos);
 9 | 	write_imageui(out, pos, pixel);
10 | }


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 2 - OpenCL load image/main.cpp:
--------------------------------------------------------------------------------
  1 | //#define __CL_ENABLE_EXCEPTIONS
  2 | #include <CL/cl.hpp>
  3 | 
  4 | #include <string>
  5 | #include <sstream>
  6 | #include <ostream>
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | #include <iostream>
 10 | #include <fstream>
 11 | 
 12 | #include "PNG.h"
 13 | 
 14 | std::string readFile(std::string fileName)
 15 | {
 16 | 	std::ifstream t(fileName);
 17 | 	std::string str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
 18 | 	return str;
 19 | }
 20 | 
 21 | int main(int arg, char* args[])
 22 | {
 23 | 	std::vector<cl::Platform> platforms;
 24 | 	cl::Platform::get(&platforms);
 25 | 	if (platforms.size() == 0)
 26 | 	{
 27 | 		std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system.
 28 | 		exit(1);
 29 | 	}
 30 | 	std::vector<cl::Device> devices;
 31 | 	platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
 32 | 	cl::Device device = devices[0];
 33 | 	std::cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
 34 | 	std::cout << "Using platform: " << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
 35 | 	cl::Context context(device);
 36 | 
 37 | 	//load our image
 38 | 	PNG inPng("Lenna.png");
 39 | 
 40 | 	//store width and height so we can use them for our output image later
 41 | 	const unsigned int w = inPng.w;
 42 | 	const unsigned int h = inPng.h;
 43 | 
 44 | 	//input image
 45 | 	const cl::ImageFormat format(CL_RGBA, CL_UNSIGNED_INT8);
 46 | 	cl::Image2D in(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, format, w, h, 0, &inPng.data[0]);
 47 | 
 48 | 	//we are done with the image so free up its memory
 49 | 	inPng.Free();
 50 | 
 51 | 	//output image
 52 | 	cl::Image2D out(context, CL_MEM_WRITE_ONLY, format, w, h, 0, NULL);
 53 | 
 54 | 	cl::Program::Sources sources;
 55 | 	std::string kernel_code = readFile("cl_tutorial_2_copy.cl");
 56 | 	//Add your program source
 57 | 	sources.push_back({ kernel_code.c_str(),kernel_code.length() });
 58 | 
 59 | 	//Create your OpenCL program and build it.
 60 | 	cl::Program program(context, sources);
 61 | 	if (program.build({ device }) != CL_SUCCESS)
 62 | 	{
 63 | 		std::cout << " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;//print the build log to find any issues with your source
 64 | 		exit(1);//Quit if your program doesn't compile
 65 | 	}
 66 | 
 67 | 	//set the kernel arguments
 68 | 	cl::Kernel kernelCopy(program, "copy");
 69 | 	kernelCopy.setArg(0, in);
 70 | 	kernelCopy.setArg(1, out);
 71 | 
 72 | 	//create command queue
 73 | 	cl::CommandQueue queue(context, device, 0, NULL);
 74 | 
 75 | 	//execute kernel
 76 | 	queue.enqueueNDRangeKernel(kernelCopy, cl::NullRange, cl::NDRange(w, h), cl::NullRange);
 77 | 
 78 | 	//wait for kernel to finish
 79 | 	queue.finish();
 80 | 
 81 | 	//start and end coordinates for reading our image (I really do not like how the c++ wrapper does this)
 82 | 	cl::size_t<3> origin;
 83 | 	cl::size_t<3> size;
 84 | 	origin[0] = 0;
 85 | 	origin[1] = 0;
 86 | 	origin[2] = 0;
 87 | 	size[0] = w;
 88 | 	size[1] = h;
 89 | 	size[2] = 1;
 90 | 
 91 | 	//output png
 92 | 	PNG outPng;
 93 | 	//create the image with the same width and height as original
 94 | 	outPng.Create(w, h);
 95 | 
 96 | 	//temporary array to store the result from opencl
 97 | 	auto tmp = new unsigned char[w * h * 4];
 98 | 	//CL_TRUE means that it waits for the entire image to be copied before continuing
 99 | 	queue.enqueueReadImage(out, CL_TRUE, origin, size, 0, 0, tmp);
100 | 
101 | 	//copy the data from the temp array to the png
102 | 	std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data));
103 | 
104 | 	//write the image to file
105 | 	outPng.Save("cl_tutorial_2.png");
106 | 	//free the iamge's resources since we are done with it
107 | 	outPng.Free();
108 | 
109 | 	//free the temp array
110 | 	delete[] tmp;
111 | 
112 | 	return 0;
113 | }


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/Lenna.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamolnng/OpenCL-CUDA-Tutorials/bed78bb907a11c2944e2562658533e88bfb7c8c8/vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/Lenna.png


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/Tutorial 3 - OpenCL basic image filtering.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <ProjectGuid>{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}</ProjectGuid>
 23 |     <RootNamespace>Tutorial3OpenCLbasicimagefiltering</RootNamespace>
 24 |     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
 25 |   </PropertyGroup>
 26 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 27 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 28 |     <ConfigurationType>Application</ConfigurationType>
 29 |     <UseDebugLibraries>true</UseDebugLibraries>
 30 |     <PlatformToolset>v140</PlatformToolset>
 31 |     <CharacterSet>MultiByte</CharacterSet>
 32 |   </PropertyGroup>
 33 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 34 |     <ConfigurationType>Application</ConfigurationType>
 35 |     <UseDebugLibraries>false</UseDebugLibraries>
 36 |     <PlatformToolset>v140</PlatformToolset>
 37 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 38 |     <CharacterSet>MultiByte</CharacterSet>
 39 |   </PropertyGroup>
 40 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 41 |     <ConfigurationType>Application</ConfigurationType>
 42 |     <UseDebugLibraries>true</UseDebugLibraries>
 43 |     <PlatformToolset>v140</PlatformToolset>
 44 |     <CharacterSet>MultiByte</CharacterSet>
 45 |   </PropertyGroup>
 46 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 47 |     <ConfigurationType>Application</ConfigurationType>
 48 |     <UseDebugLibraries>false</UseDebugLibraries>
 49 |     <PlatformToolset>v140</PlatformToolset>
 50 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 51 |     <CharacterSet>MultiByte</CharacterSet>
 52 |   </PropertyGroup>
 53 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 54 |   <ImportGroup Label="ExtensionSettings">
 55 |   </ImportGroup>
 56 |   <ImportGroup Label="Shared">
 57 |   </ImportGroup>
 58 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 59 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 60 |   </ImportGroup>
 61 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 62 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 63 |   </ImportGroup>
 64 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 65 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 66 |   </ImportGroup>
 67 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 68 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 69 |   </ImportGroup>
 70 |   <PropertyGroup Label="UserMacros" />
 71 |   <PropertyGroup />
 72 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 73 |     <ClCompile>
 74 |       <WarningLevel>Level3</WarningLevel>
 75 |       <Optimization>Disabled</Optimization>
 76 |       <SDLCheck>true</SDLCheck>
 77 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 78 |     </ClCompile>
 79 |     <Link>
 80 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
 81 |       <SubSystem>Console</SubSystem>
 82 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
 83 |     </Link>
 84 |     <PostBuildEvent>
 85 |       <Command>copy /y "$(ProjectDir)cl_tutorial_3_boxFilter.cl" "$(OutDir)"
 86 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
 87 |     </PostBuildEvent>
 88 |     <PreBuildEvent>
 89 |       <Command>
 90 |       </Command>
 91 |     </PreBuildEvent>
 92 |   </ItemDefinitionGroup>
 93 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 94 |     <ClCompile>
 95 |       <WarningLevel>Level3</WarningLevel>
 96 |       <Optimization>Disabled</Optimization>
 97 |       <SDLCheck>true</SDLCheck>
 98 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 99 |     </ClCompile>
100 |     <Link>
101 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
102 |       <SubSystem>Console</SubSystem>
103 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
104 |     </Link>
105 |     <PostBuildEvent>
106 |       <Command>copy /y "$(ProjectDir)cl_tutorial_3_boxFilter.cl" "$(OutDir)"
107 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
108 |     </PostBuildEvent>
109 |     <PreBuildEvent>
110 |       <Command>
111 |       </Command>
112 |     </PreBuildEvent>
113 |   </ItemDefinitionGroup>
114 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
115 |     <ClCompile>
116 |       <WarningLevel>Level3</WarningLevel>
117 |       <Optimization>MaxSpeed</Optimization>
118 |       <FunctionLevelLinking>true</FunctionLevelLinking>
119 |       <IntrinsicFunctions>true</IntrinsicFunctions>
120 |       <SDLCheck>true</SDLCheck>
121 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
122 |     </ClCompile>
123 |     <Link>
124 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
125 |       <OptimizeReferences>true</OptimizeReferences>
126 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
127 |       <SubSystem>Console</SubSystem>
128 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
129 |     </Link>
130 |     <PostBuildEvent>
131 |       <Command>copy /y "$(ProjectDir)cl_tutorial_3_boxFilter.cl" "$(OutDir)"
132 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
133 |     </PostBuildEvent>
134 |     <PreBuildEvent>
135 |       <Command>
136 |       </Command>
137 |     </PreBuildEvent>
138 |   </ItemDefinitionGroup>
139 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
140 |     <ClCompile>
141 |       <WarningLevel>Level3</WarningLevel>
142 |       <Optimization>MaxSpeed</Optimization>
143 |       <FunctionLevelLinking>true</FunctionLevelLinking>
144 |       <IntrinsicFunctions>true</IntrinsicFunctions>
145 |       <SDLCheck>true</SDLCheck>
146 |       <AdditionalIncludeDirectories>E:\Users\Jesse Laning\Documents\GitHub\OpenCL-CUDA-Tutorials\includes;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
147 |     </ClCompile>
148 |     <Link>
149 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
150 |       <OptimizeReferences>true</OptimizeReferences>
151 |       <AdditionalDependencies>OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies>
152 |       <SubSystem>Console</SubSystem>
153 |       <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
154 |     </Link>
155 |     <PostBuildEvent>
156 |       <Command>copy /y "$(ProjectDir)cl_tutorial_3_boxFilter.cl" "$(OutDir)"
157 | copy /y "$(ProjectDir)Lenna.png" "$(OutDir)"</Command>
158 |     </PostBuildEvent>
159 |     <PreBuildEvent>
160 |       <Command>
161 |       </Command>
162 |     </PreBuildEvent>
163 |   </ItemDefinitionGroup>
164 |   <ItemGroup>
165 |     <Text Include="cl_tutorial_3_boxFilter.cl">
166 |       <FileType>Document</FileType>
167 |     </Text>
168 |   </ItemGroup>
169 |   <ItemGroup>
170 |     <ClCompile Include="main.cpp" />
171 |   </ItemGroup>
172 |   <ItemGroup>
173 |     <Image Include="Lenna.png" />
174 |   </ItemGroup>
175 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
176 |   <ImportGroup Label="ExtensionTargets">
177 |   </ImportGroup>
178 | </Project>


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/Tutorial 3 - OpenCL basic image filtering.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <ClCompile Include="main.cpp" />
 5 |   </ItemGroup>
 6 |   <ItemGroup>
 7 |     <Image Include="Lenna.png" />
 8 |   </ItemGroup>
 9 |   <ItemGroup>
10 |     <Text Include="cl_tutorial_3_boxFilter.cl" />
11 |   </ItemGroup>
12 | </Project>


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/cl_tutorial_3_boxFilter.cl:
--------------------------------------------------------------------------------
 1 | const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
 2 | 
 3 | void kernel boxFilter(__read_only image2d_t in, __write_only image2d_t out, const int imageWidth, const int imageHeight, const int halfBoxWidth, const int halfBoxHeight)
 4 | {
 5 | 	int x = get_global_id(0);
 6 | 	int y = get_global_id(1);
 7 | 	int2 pos = (int2)(x, y);
 8 | 
 9 | 	uint4 total = {0, 0, 0, 0};
10 | 
11 | 	int count = 0;
12 | 
13 | 	for(int i = -halfBoxWidth; i <= halfBoxWidth; i++)
14 | 	{
15 | 		for(int j = -halfBoxHeight; j <= halfBoxHeight; j++)
16 | 		{
17 | 			int2 coord = pos + (int2)(i, j);
18 | 			if(coord.x >= 0 && coord.y >= 0 && coord.x < imageWidth && coord.y < imageHeight)
19 | 			{
20 | 				total += read_imageui(in, smp, pos + (int2)(i, j));
21 | 				count++;
22 | 			}
23 | 		}
24 | 	}
25 | 	write_imageui(out, pos, total / count);
26 | }


--------------------------------------------------------------------------------
/vs/OpenCL/Tutorial 3 - OpenCL basic image filtering/main.cpp:
--------------------------------------------------------------------------------
  1 | //#define __CL_ENABLE_EXCEPTIONS
  2 | #include <CL/cl.hpp>
  3 | 
  4 | #include <string>
  5 | #include <sstream>
  6 | #include <ostream>
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | #include <iostream>
 10 | #include <fstream>
 11 | #include <iomanip>
 12 | 
 13 | #include "PNG.h"
 14 | 
 15 | std::string readFile(std::string fileName)
 16 | {
 17 | 	std::ifstream t(fileName);
 18 | 	std::string str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
 19 | 	return str;
 20 | }
 21 | 
 22 | int main(int arg, char* args[])
 23 | {
 24 | 	int filterWidth = 10;
 25 | 	int filterHeight = 10;
 26 | 	int platformId = 0;
 27 | 	if (arg > 1)
 28 | 	{
 29 | 		platformId = atoi(args[1]);
 30 | 	}
 31 | 	if (arg > 3)
 32 | 	{
 33 | 		filterWidth = std::atoi(args[2]);
 34 | 		filterHeight = std::atoi(args[3]);
 35 | 	}
 36 | 
 37 | 	std::vector<cl::Platform> platforms;
 38 | 	cl::Platform::get(&platforms);
 39 | 	if (platforms.size() == 0)
 40 | 	{
 41 | 		std::cout << "No OpenCL platforms found" << std::endl;//This means you do not have an OpenCL compatible platform on your system.
 42 | 		exit(1);
 43 | 	}
 44 | 	std::vector<cl::Device> devices;
 45 | 	platforms[platformId].getDevices(CL_DEVICE_TYPE_ALL, &devices);
 46 | 	cl::Device device = devices[0];
 47 | 	std::cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
 48 | 	std::cout << "Using platform: " << platforms[platformId].getInfo<CL_PLATFORM_NAME>() << std::endl;
 49 | 	cl::Context context(device);
 50 | 
 51 | 	//load our image
 52 | 	PNG inPng("Lenna.png");
 53 | 
 54 | 	//store width and height so we can use them for our output image later
 55 | 	const unsigned int w = inPng.w;
 56 | 	const unsigned int h = inPng.h;
 57 | 
 58 | 	//input image
 59 | 	const cl::ImageFormat format(CL_RGBA, CL_UNSIGNED_INT8);
 60 | 	cl::Image2D in(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, format, w, h, 0, &inPng.data[0]);
 61 | 
 62 | 	//we are done with the image so free up its memory
 63 | 	inPng.Free();
 64 | 
 65 | 	//output image
 66 | 	cl::Image2D out(context, CL_MEM_WRITE_ONLY, format, w, h, 0, NULL);
 67 | 
 68 | 	cl::Program::Sources sources;
 69 | 	std::string kernel_code = readFile("cl_tutorial_3_boxFilter.cl");
 70 | 	//Add your program source
 71 | 	sources.push_back({ kernel_code.c_str(),kernel_code.length() });
 72 | 
 73 | 	//Create your OpenCL program and build it.
 74 | 	cl::Program program(context, sources);
 75 | 	if (program.build({ device }) != CL_SUCCESS)
 76 | 	{
 77 | 		std::cout << " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;//print the build log to find any issues with your source
 78 | 		exit(1);//Quit if your program doesn't compile
 79 | 	}
 80 | 
 81 | 	//create command queue
 82 | 	cl::CommandQueue queue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL);
 83 | 
 84 | 	//set the kernel arguments
 85 | 	cl::Kernel kernelboxFilter(program, "boxFilter");
 86 | 	kernelboxFilter.setArg(0, in);
 87 | 	kernelboxFilter.setArg(1, out);
 88 | 	kernelboxFilter.setArg(2, w);
 89 | 	kernelboxFilter.setArg(3, h);
 90 | 	kernelboxFilter.setArg(4, filterWidth);
 91 | 	kernelboxFilter.setArg(5, filterHeight);
 92 | 
 93 | 	cl::Event timer;
 94 | 
 95 | 	//execute kernel
 96 | 	queue.enqueueNDRangeKernel(kernelboxFilter, cl::NullRange, cl::NDRange(w, h), cl::NullRange, NULL, &timer);
 97 | 
 98 | 	//wait for kernel to finish
 99 | 	timer.wait();
100 | 
101 | 	cl_ulong time_start, time_end;
102 | 	double total_time;
103 | 
104 | 	time_start = timer.getProfilingInfo<CL_PROFILING_COMMAND_START>();
105 | 	time_end = timer.getProfilingInfo<CL_PROFILING_COMMAND_END>();
106 | 	total_time = time_end - time_start;
107 | 
108 | 	printf("\nExecution time in milliseconds = %0.3f ms\n", (total_time / 1000000.0));
109 | 
110 | 	//start and end coordinates for reading our image (I really do not like how the c++ wrapper does this)
111 | 	cl::size_t<3> origin;
112 | 	cl::size_t<3> size;
113 | 	origin[0] = 0;
114 | 	origin[1] = 0;
115 | 	origin[2] = 0;
116 | 	size[0] = w;
117 | 	size[1] = h;
118 | 	size[2] = 1;
119 | 
120 | 	//output png
121 | 	PNG outPng;
122 | 	//create the image with the same width and height as original
123 | 	outPng.Create(w, h);
124 | 
125 | 	//temporary array to store the result from opencl
126 | 	auto tmp = new unsigned char[w * h * 4];
127 | 	//CL_TRUE means that it waits for the entire image to be copied before continuing
128 | 	queue.enqueueReadImage(out, CL_TRUE, origin, size, 0, 0, tmp);
129 | 
130 | 	//copy the data from the temp array to the png
131 | 	std::copy(&tmp[0], &tmp[w * h * 4], std::back_inserter(outPng.data));
132 | 
133 | 	//write the image to file
134 | 	outPng.Save("cl_tutorial_3.png");
135 | 	//free the iamge's resources since we are done with it
136 | 	outPng.Free();
137 | 
138 | 	//free the temp array
139 | 	delete[] tmp;
140 | 
141 | 	return 0;
142 | }


--------------------------------------------------------------------------------
/vs/OpenCL_CUDA_Tutorials.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 14
 4 | VisualStudioVersion = 14.0.24720.0
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 1 - Hello, OpenCL", "OpenCL\Tutorial 1 - Hello, OpenCL\Tutorial 1 - Hello, OpenCL.vcxproj", "{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}"
 7 | EndProject
 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 1 - Hello, CUDA", "CUDA\Tutorial 1 - Hello, CUDA\Tutorial 1 - Hello, CUDA.vcxproj", "{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}"
 9 | EndProject
10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 2 - OpenCL load image", "OpenCL\Tutorial 2 - OpenCL load image\Tutorial 2 - OpenCL load image.vcxproj", "{D27A5344-59EA-4276-B828-B0768E8ECA82}"
11 | EndProject
12 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 2 - CUDA load image", "CUDA\Tutorial 2 - CUDA load image\Tutorial 2 - CUDA load image.vcxproj", "{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}"
13 | EndProject
14 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CUDA", "CUDA", "{6A139FCC-9678-4A85-A12C-8C8E74B80EA5}"
15 | EndProject
16 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "OpenCL", "OpenCL", "{6677E274-9760-4F0B-B5AD-6A9254FB02CA}"
17 | EndProject
18 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 3 - OpenCL basic image filtering", "OpenCL\Tutorial 3 - OpenCL basic image filtering\Tutorial 3 - OpenCL basic image filtering.vcxproj", "{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}"
19 | EndProject
20 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Tutorial 3 - CUDA basic image filtering", "CUDA\Tutorial 3 - CUDA basic image filtering\Tutorial 3 - CUDA basic image filtering.vcxproj", "{20B93A00-1A20-46D6-8841-EB60A002EB08}"
21 | EndProject
22 | Global
23 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
24 | 		Debug|x64 = Debug|x64
25 | 		Debug|x86 = Debug|x86
26 | 		Release|x64 = Release|x64
27 | 		Release|x86 = Release|x86
28 | 	EndGlobalSection
29 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
30 | 		{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Debug|x64.ActiveCfg = Debug|x64
31 | 		{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Debug|x64.Build.0 = Debug|x64
32 | 		{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Debug|x86.ActiveCfg = Debug|Win32
33 | 		{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Debug|x86.Build.0 = Debug|Win32
34 | 		{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Release|x64.ActiveCfg = Release|x64
35 | 		{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Release|x64.Build.0 = Release|x64
36 | 		{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Release|x86.ActiveCfg = Release|Win32
37 | 		{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B}.Release|x86.Build.0 = Release|Win32
38 | 		{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Debug|x64.ActiveCfg = Debug|x64
39 | 		{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Debug|x64.Build.0 = Debug|x64
40 | 		{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Debug|x86.ActiveCfg = Debug|Win32
41 | 		{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Debug|x86.Build.0 = Debug|Win32
42 | 		{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Release|x64.ActiveCfg = Release|x64
43 | 		{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Release|x64.Build.0 = Release|x64
44 | 		{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Release|x86.ActiveCfg = Release|Win32
45 | 		{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A}.Release|x86.Build.0 = Release|Win32
46 | 		{D27A5344-59EA-4276-B828-B0768E8ECA82}.Debug|x64.ActiveCfg = Debug|x64
47 | 		{D27A5344-59EA-4276-B828-B0768E8ECA82}.Debug|x64.Build.0 = Debug|x64
48 | 		{D27A5344-59EA-4276-B828-B0768E8ECA82}.Debug|x86.ActiveCfg = Debug|Win32
49 | 		{D27A5344-59EA-4276-B828-B0768E8ECA82}.Debug|x86.Build.0 = Debug|Win32
50 | 		{D27A5344-59EA-4276-B828-B0768E8ECA82}.Release|x64.ActiveCfg = Release|x64
51 | 		{D27A5344-59EA-4276-B828-B0768E8ECA82}.Release|x64.Build.0 = Release|x64
52 | 		{D27A5344-59EA-4276-B828-B0768E8ECA82}.Release|x86.ActiveCfg = Release|Win32
53 | 		{D27A5344-59EA-4276-B828-B0768E8ECA82}.Release|x86.Build.0 = Release|Win32
54 | 		{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Debug|x64.ActiveCfg = Debug|x64
55 | 		{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Debug|x64.Build.0 = Debug|x64
56 | 		{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Debug|x86.ActiveCfg = Debug|Win32
57 | 		{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Debug|x86.Build.0 = Debug|Win32
58 | 		{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Release|x64.ActiveCfg = Release|x64
59 | 		{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Release|x64.Build.0 = Release|x64
60 | 		{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Release|x86.ActiveCfg = Release|Win32
61 | 		{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F}.Release|x86.Build.0 = Release|Win32
62 | 		{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Debug|x64.ActiveCfg = Debug|x64
63 | 		{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Debug|x64.Build.0 = Debug|x64
64 | 		{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Debug|x86.ActiveCfg = Debug|Win32
65 | 		{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Debug|x86.Build.0 = Debug|Win32
66 | 		{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Release|x64.ActiveCfg = Release|x64
67 | 		{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Release|x64.Build.0 = Release|x64
68 | 		{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Release|x86.ActiveCfg = Release|Win32
69 | 		{2B217AC4-8B92-4F2F-BF7A-658D56EFC193}.Release|x86.Build.0 = Release|Win32
70 | 		{20B93A00-1A20-46D6-8841-EB60A002EB08}.Debug|x64.ActiveCfg = Debug|x64
71 | 		{20B93A00-1A20-46D6-8841-EB60A002EB08}.Debug|x64.Build.0 = Debug|x64
72 | 		{20B93A00-1A20-46D6-8841-EB60A002EB08}.Debug|x86.ActiveCfg = Debug|Win32
73 | 		{20B93A00-1A20-46D6-8841-EB60A002EB08}.Debug|x86.Build.0 = Debug|Win32
74 | 		{20B93A00-1A20-46D6-8841-EB60A002EB08}.Release|x64.ActiveCfg = Release|x64
75 | 		{20B93A00-1A20-46D6-8841-EB60A002EB08}.Release|x64.Build.0 = Release|x64
76 | 		{20B93A00-1A20-46D6-8841-EB60A002EB08}.Release|x86.ActiveCfg = Release|Win32
77 | 		{20B93A00-1A20-46D6-8841-EB60A002EB08}.Release|x86.Build.0 = Release|Win32
78 | 	EndGlobalSection
79 | 	GlobalSection(SolutionProperties) = preSolution
80 | 		HideSolutionNode = FALSE
81 | 	EndGlobalSection
82 | 	GlobalSection(NestedProjects) = preSolution
83 | 		{E68A3975-ACAB-4517-A5A4-3940BCF4CF9B} = {6677E274-9760-4F0B-B5AD-6A9254FB02CA}
84 | 		{35EF189C-6A3C-44A5-9EFE-40FC6C20AB8A} = {6A139FCC-9678-4A85-A12C-8C8E74B80EA5}
85 | 		{D27A5344-59EA-4276-B828-B0768E8ECA82} = {6677E274-9760-4F0B-B5AD-6A9254FB02CA}
86 | 		{365BEF91-40D2-4462-AE8C-8DBEFBB2DB3F} = {6A139FCC-9678-4A85-A12C-8C8E74B80EA5}
87 | 		{2B217AC4-8B92-4F2F-BF7A-658D56EFC193} = {6677E274-9760-4F0B-B5AD-6A9254FB02CA}
88 | 		{20B93A00-1A20-46D6-8841-EB60A002EB08} = {6A139FCC-9678-4A85-A12C-8C8E74B80EA5}
89 | 	EndGlobalSection
90 | EndGlobal
91 | 


--------------------------------------------------------------------------------
/vs/README.md:
--------------------------------------------------------------------------------
1 | Visual Studio 2015 Community project files.
2 | 
3 | Please note that these will not automatically compile when you download them, you will have to change the additional include directories and additional library path directories for each project to point to your installation of OpenCL and CUDA


--------------------------------------------------------------------------------