├── BitonicMerge.sln
├── .gitattributes
├── BitonicMerge
├── BitonicMerge.vcxproj
└── kernel.cu
├── README.md
└── .gitignore
/BitonicMerge.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.6.33829.357
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BitonicMerge", "BitonicMerge\BitonicMerge.vcxproj", "{FF8B1809-EE3F-43C3-BBA6-0F77D2856584}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Release|x64 = Release|x64
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584}.Debug|x64.ActiveCfg = Debug|x64
15 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584}.Debug|x64.Build.0 = Debug|x64
16 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584}.Release|x64.ActiveCfg = Release|x64
17 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584}.Release|x64.Build.0 = Release|x64
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {F45265A6-E491-4FA6-89C3-91363CAD5369}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
--------------------------------------------------------------------------------
/BitonicMerge/BitonicMerge.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | x64
7 |
8 |
9 | Release
10 | x64
11 |
12 |
13 |
14 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584}
15 | BitonicMerge
16 |
17 |
18 |
19 | Application
20 | true
21 | MultiByte
22 | v143
23 |
24 |
25 | Application
26 | false
27 | true
28 | MultiByte
29 | v143
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 | true
44 |
45 |
46 |
47 | Level3
48 | Disabled
49 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
50 |
51 |
52 | true
53 | Console
54 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
55 |
56 |
57 | 64
58 |
59 |
60 |
61 |
62 | Level3
63 | MaxSpeed
64 | true
65 | true
66 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
67 |
68 |
69 | true
70 | true
71 | true
72 | Console
73 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
74 |
75 |
76 | 64
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CUDA Merge and Bitonic Sort
2 |
3 | This project provides efficient implementations of Merge Sort and Bitonic Sort algorithms using CUDA, enabling fast sorting of large arrays through GPU parallel processing. The project includes both CPU and GPU versions of the algorithms, along with a performance comparison to showcase the benefits of using CUDA for sorting tasks.
4 |
5 | ## Table of Contents
6 |
7 | - [Introduction](#introduction)
8 | - [Merge Sort](#merge-sort)
9 | - [Bitonic Sort](#bitonic-sort)
10 | - [Implementation](#implementation)
11 | - [Performance Comparison](#performance-comparison)
12 | - [Requirements](#requirements)
13 | - [Usage](#usage)
14 | - [Contributing](#contributing)
15 | - [License](#license)
16 |
17 | ## Introduction
18 |
19 | Sorting large datasets efficiently is a common computational challenge. Merge Sort and Bitonic Sort are well-known sorting algorithms that can be implemented using parallel processing techniques, such as those provided by CUDA, to achieve significant speedup compared to traditional CPU-based sorting methods.
20 |
21 | This project aims to provide an easy-to-use CUDA-based implementation of Merge Sort and Bitonic Sort, enabling users to sort large arrays efficiently on compatible NVIDIA GPUs.
22 |
23 | ## Merge Sort
24 |
25 | Merge Sort is a popular divide-and-conquer sorting algorithm that efficiently sorts an array by recursively dividing it into two halves, sorting each half, and then merging the sorted halves to produce the final sorted array.
26 |
27 | ## Bitonic Sort
28 |
29 | Bitonic Sort is an efficient parallel sorting algorithm that requires the input size to be a power of 2. It is based on the concept of bitonic sequences, which are sequences that first monotonically increase and then monotonically decrease or vice versa. The algorithm recursively builds a bitonic sequence, and then repeatedly merges bitonic sequences to achieve sorting.
30 |
31 | ## Implementation
32 |
33 | The project contains the following implementations:
34 |
35 | - **CPU Merge Sort:** This is a traditional CPU-based implementation of the Merge Sort algorithm using a recursive approach.
36 |
37 | - **GPU Merge Sort:** The GPU version of Merge Sort that uses CUDA to achieve parallelism. It utilizes CUDA kernels to perform sorting operations on the GPU.
38 |
39 | - **CPU Bitonic Sort:** A CPU-based implementation of the Bitonic Sort algorithm. It requires the input size to be a power of 2.
40 |
41 | - **GPU Bitonic Sort:** The GPU version of Bitonic Sort that takes advantage of CUDA parallelism. Like the GPU Merge Sort, it uses CUDA kernels for sorting on the GPU.
42 |
43 | ## Performance Comparison
44 |
45 | The performance comparison section presents benchmark results of the CPU and GPU implementations for both Merge Sort and Bitonic Sort. It measures the execution time for each approach and demonstrates the potential speedup gained by using CUDA on compatible GPUs.
46 |
47 | ## Requirements
48 |
49 | To run this project, you need the following:
50 |
51 | - A compatible NVIDIA GPU with CUDA support.
52 | - NVIDIA CUDA Toolkit installed on your system.
53 | - C++ compiler with CUDA support (e.g., NVCC).
54 |
55 | ## Usage
56 |
57 | 1. Clone or download the project repository to your local machine.
58 | 2. Ensure you have met the requirements mentioned in the previous section.
59 | 3. Compile the source files using the appropriate C++ compiler with CUDA support.
60 | 4. Run the compiled executable to sort arrays using either Merge Sort or Bitonic Sort.
61 | 5. The program will provide sorted arrays and performance timings for both CPU and GPU implementations.
62 |
63 | ## Contributing
64 |
65 | Contributions to this project are welcome. If you find any issues or have improvements to suggest, feel free to open an issue or create a pull request.
66 |
67 | ## License
68 |
69 | This project is licensed under the [MIT License](LICENSE). You are free to use, modify, and distribute the code as per the terms of the license.
70 |
71 | ## Outputs
72 | 
73 | Testing the inputs and Merge Sort with a Small Array
74 | 
75 | Testing Bitonic Sort with a Small Array
76 | 
77 | Merge Sort CPU vs GPU performance for a large Array
78 | 
79 | Bitonic Sort CPU vs GPU performance for a large Array.
80 |
81 | Clearly Bitonic Sort performs well in a Parallel Computation while Merge Sort performs well in a linear computation.
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Ww][Ii][Nn]32/
27 | [Aa][Rr][Mm]/
28 | [Aa][Rr][Mm]64/
29 | bld/
30 | [Bb]in/
31 | [Oo]bj/
32 | [Oo]ut/
33 | [Ll]og/
34 | [Ll]ogs/
35 |
36 | # Visual Studio 2015/2017 cache/options directory
37 | .vs/
38 | # Uncomment if you have tasks that create the project's static files in wwwroot
39 | #wwwroot/
40 |
41 | # Visual Studio 2017 auto generated files
42 | Generated\ Files/
43 |
44 | # MSTest test Results
45 | [Tt]est[Rr]esult*/
46 | [Bb]uild[Ll]og.*
47 |
48 | # NUnit
49 | *.VisualState.xml
50 | TestResult.xml
51 | nunit-*.xml
52 |
53 | # Build Results of an ATL Project
54 | [Dd]ebugPS/
55 | [Rr]eleasePS/
56 | dlldata.c
57 |
58 | # Benchmark Results
59 | BenchmarkDotNet.Artifacts/
60 |
61 | # .NET Core
62 | project.lock.json
63 | project.fragment.lock.json
64 | artifacts/
65 |
66 | # ASP.NET Scaffolding
67 | ScaffoldingReadMe.txt
68 |
69 | # StyleCop
70 | StyleCopReport.xml
71 |
72 | # Files built by Visual Studio
73 | *_i.c
74 | *_p.c
75 | *_h.h
76 | *.ilk
77 | *.meta
78 | *.obj
79 | *.iobj
80 | *.pch
81 | *.pdb
82 | *.ipdb
83 | *.pgc
84 | *.pgd
85 | *.rsp
86 | *.sbr
87 | *.tlb
88 | *.tli
89 | *.tlh
90 | *.tmp
91 | *.tmp_proj
92 | *_wpftmp.csproj
93 | *.log
94 | *.vspscc
95 | *.vssscc
96 | .builds
97 | *.pidb
98 | *.svclog
99 | *.scc
100 |
101 | # Chutzpah Test files
102 | _Chutzpah*
103 |
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 |
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 |
121 | # Visual Studio Trace Files
122 | *.e2e
123 |
124 | # TFS 2012 Local Workspace
125 | $tf/
126 |
127 | # Guidance Automation Toolkit
128 | *.gpState
129 |
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 |
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 |
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 |
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 |
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 |
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 |
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 |
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 |
163 | # Web workbench (sass)
164 | .sass-cache/
165 |
166 | # Installshield output folder
167 | [Ee]xpress/
168 |
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 |
179 | # Click-Once directory
180 | publish/
181 |
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 |
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 |
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 |
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 |
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 |
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 |
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 |
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 |
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 |
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 |
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 |
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 |
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 |
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 |
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 |
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 |
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 |
288 | # Visual Studio 6 build log
289 | *.plg
290 |
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 |
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 |
297 | # Visual Studio LightSwitch build output
298 | **/*.HTMLClient/GeneratedArtifacts
299 | **/*.DesktopClient/GeneratedArtifacts
300 | **/*.DesktopClient/ModelManifest.xml
301 | **/*.Server/GeneratedArtifacts
302 | **/*.Server/ModelManifest.xml
303 | _Pvt_Extensions
304 |
305 | # Paket dependency manager
306 | .paket/paket.exe
307 | paket-files/
308 |
309 | # FAKE - F# Make
310 | .fake/
311 |
312 | # CodeRush personal settings
313 | .cr/personal
314 |
315 | # Python Tools for Visual Studio (PTVS)
316 | __pycache__/
317 | *.pyc
318 |
319 | # Cake - Uncomment if you are using it
320 | # tools/**
321 | # !tools/packages.config
322 |
323 | # Tabs Studio
324 | *.tss
325 |
326 | # Telerik's JustMock configuration file
327 | *.jmconfig
328 |
329 | # BizTalk build output
330 | *.btp.cs
331 | *.btm.cs
332 | *.odx.cs
333 | *.xsd.cs
334 |
335 | # OpenCover UI analysis results
336 | OpenCover/
337 |
338 | # Azure Stream Analytics local run output
339 | ASALocalRun/
340 |
341 | # MSBuild Binary and Structured Log
342 | *.binlog
343 |
344 | # NVidia Nsight GPU debugger configuration file
345 | *.nvuser
346 |
347 | # MFractors (Xamarin productivity tool) working folder
348 | .mfractor/
349 |
350 | # Local History for Visual Studio
351 | .localhistory/
352 |
353 | # BeatPulse healthcheck temp database
354 | healthchecksdb
355 |
356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
357 | MigrationBackup/
358 |
359 | # Ionide (cross platform F# VS Code tools) working folder
360 | .ionide/
361 |
362 | # Fody - auto-generated XML schema
363 | FodyWeavers.xsd
--------------------------------------------------------------------------------
/BitonicMerge/kernel.cu:
--------------------------------------------------------------------------------
1 | #include "cuda_runtime.h"
2 | #include "device_launch_parameters.h"
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #define MAX_THREADS_PER_BLOCK 1024
10 |
11 | // Bitonic Sort for CPU
12 | void bitonicSortCPU(int* arr, int n)
13 | {
14 | for (int k = 2; k <= n; k *= 2)
15 | {
16 | for (int j = k / 2; j > 0; j /= 2)
17 | {
18 | for (int i = 0; i < n; i++)
19 | {
20 | int ij = i ^ j;
21 |
22 | if (ij > i)
23 | {
24 | if ((i & k) == 0)
25 | {
26 | if (arr[i] > arr[ij])
27 | {
28 | int temp = arr[i];
29 | arr[i] = arr[ij];
30 | arr[ij] = temp;
31 | }
32 | }
33 | else
34 | {
35 | if (arr[i] < arr[ij])
36 | {
37 | int temp = arr[i];
38 | arr[i] = arr[ij];
39 | arr[ij] = temp;
40 | }
41 | }
42 | }
43 | }
44 | }
45 | }
46 | }
47 |
48 | //GPU Kernel Implementation of Bitonic Sort
49 | __global__ void bitonicSortGPU(int* arr, int j, int k)
50 | {
51 | unsigned int i, ij;
52 |
53 | i = threadIdx.x + blockDim.x * blockIdx.x;
54 |
55 | ij = i ^ j;
56 |
57 | if (ij > i)
58 | {
59 | if ((i & k) == 0)
60 | {
61 | if (arr[i] > arr[ij])
62 | {
63 | int temp = arr[i];
64 | arr[i] = arr[ij];
65 | arr[ij] = temp;
66 | }
67 | }
68 | else
69 | {
70 | if (arr[i] < arr[ij])
71 | {
72 | int temp = arr[i];
73 | arr[i] = arr[ij];
74 | arr[ij] = temp;
75 | }
76 | }
77 | }
78 | }
79 |
80 | //Device function for recursive Merge
81 | __device__ void Merge(int* arr, int* temp, int left, int middle, int right)
82 | {
83 | int i = left;
84 | int j = middle;
85 | int k = left;
86 |
87 | while (i < middle && j < right)
88 | {
89 | if (arr[i] <= arr[j])
90 | temp[k++] = arr[i++];
91 | else
92 | temp[k++] = arr[j++];
93 | }
94 |
95 | while (i < middle)
96 | temp[k++] = arr[i++];
97 | while (j < right)
98 | temp[k++] = arr[j++];
99 |
100 | for (int x = left; x < right; x++)
101 | arr[x] = temp[x];
102 | }
103 |
104 | //GPU Kernel for Merge Sort
105 | __global__ void MergeSortGPU(int* arr, int* temp, int n, int width)
106 | {
107 | int tid = threadIdx.x + blockDim.x * blockIdx.x;
108 | int left = tid * width;
109 | int middle = left + width / 2;
110 | int right = left + width;
111 |
112 | if (left < n && middle < n)
113 | {
114 | Merge(arr, temp, left, middle, right);
115 | }
116 | }
117 |
118 | //CPU Merge Recursive Call function
119 | void merge(int* arr, int* temp, int left, int mid, int right)
120 | {
121 | int i = left;
122 | int j = mid + 1;
123 | int k = left;
124 |
125 | while (i <= mid && j <= right)
126 | {
127 | if (arr[i] <= arr[j])
128 | temp[k++] = arr[i++];
129 | else
130 | temp[k++] = arr[j++];
131 | }
132 |
133 | while (i <= mid)
134 | temp[k++] = arr[i++];
135 |
136 | while (j <= right)
137 | temp[k++] = arr[j++];
138 |
139 | for (int idx = left; idx <= right; ++idx)
140 | arr[idx] = temp[idx];
141 | }
142 |
143 | //CPU Implementation of Merge Sort
144 | void mergeSortCPU(int* arr, int* temp, int left, int right)
145 | {
146 | if (left >= right)
147 | return;
148 |
149 | int mid = left + (right - left) / 2;
150 |
151 | mergeSortCPU(arr, temp, left, mid);
152 | mergeSortCPU(arr, temp, mid + 1, right);
153 |
154 | merge(arr, temp, left, mid, right);
155 | }
156 |
157 | //Function to print array
158 | void printArray(int* arr, int size)
159 | {
160 | for (int i = 0; i < size; ++i)
161 | std::cout << arr[i] << " ";
162 | std::cout << std::endl;
163 | }
164 |
165 | //Automated function to check if array is sorted
166 | bool isSorted(int* arr, int size)
167 | {
168 | for (int i = 1; i < size; ++i)
169 | {
170 | if (arr[i] < arr[i - 1])
171 | return false;
172 | }
173 | return true;
174 | }
175 |
176 | //Function to check if given number is a power of 2
177 | bool isPowerOfTwo(int num)
178 | {
179 | return num > 0 && (num & (num - 1)) == 0;
180 | }
181 |
182 |
183 | //MAIN PROGRAM
184 | int main()
185 | {
186 | std::cout << "-----------------------------------------------" << std::endl;
187 | std::cout << "CUDA MERGE AND BITONIC SORT IMPLEMENTATION" << std::endl;
188 | std::cout << "A Performance Comparison of These 2 Sorts in CPU vs GPU" << std::endl;
189 | std::cout << "-----------------------------------------------" << std::endl;
190 | int choice;
191 | std::cout << "\nSelect the type of sort:";
192 | std::cout << "\n\t1. Merge Sort";
193 | std::cout << "\n\t2. Bitonic Sort";
194 | std::cout << "\nEnter your choice: ";
195 | std::cin >> choice;
196 |
197 |
198 | if (choice < 1 || choice > 2)
199 | {
200 | while (choice != 1 || choice != 2)
201 | {
202 | std::cout << "\n!!!!! WRONG CHOICE. TRY AGAIN. YOU HAVE ONLY 2 DISTINCT OPTIONS-\n";
203 | std::cin >> choice;
204 |
205 |
206 | if (choice == 1 || choice == 2)
207 | break;
208 | }
209 | }
210 |
211 | if (choice == 1)
212 | {
213 | std::cout << "\n--------------------------------------------------------------\nMERGE SORT SELECTED\n--------------------------------------------------------------";
214 | }
215 | else
216 | {
217 | std::cout << "\n--------------------------------------------------------------\nBITONIC SORT SELECTED\n--------------------------------------------------------------";
218 | }
219 |
220 | int size;
221 | std::cout << "\n\nEnter the size of the array. Must be a power of 2:\n ";
222 | std::cin>>size;
223 |
224 | while (!isPowerOfTwo(size))
225 | {
226 | if (!isPowerOfTwo(size))
227 | {
228 | std::cout << "\nWrong Size, must be power of 2. Try again:\n ";
229 | std::cin>>size;
230 | }
231 | else
232 | break;
233 | }
234 |
235 | std::cout << "\n--------------------------------------------------------------\nSELECTED SORT PROCESS UNDERWAY\n--------------------------------------------------------------";
236 |
237 | //Create CPU based Arrays
238 | int* arr = new int[size];
239 | int* carr = new int[size];
240 | int* temp = new int[size];
241 |
242 | //Create GPU based arrays
243 | int* gpuArrmerge;
244 | int* gpuArrbiton;
245 | int* gpuTemp;
246 |
247 | // Initialize the array with random values
248 | srand(static_cast(time(nullptr)));
249 | for (int i = 0; i < size; ++i)
250 | {
251 | arr[i] = rand() % 100;
252 | carr[i] = arr[i];
253 | }
254 |
255 | //Print unsorted array
256 | std::cout << "\n\nUnsorted array: ";
257 | if (size <= 100)
258 | {
259 | printArray(arr, size);
260 | }
261 | else
262 | {
263 | printf("\nToo Big to print. Check Variable. Automated isSorted Checker will be implemented\n");
264 | }
265 |
266 | // Allocate memory on GPU
267 | cudaMalloc((void**)&gpuArrmerge, size * sizeof(int));
268 | cudaMalloc((void**)&gpuTemp, size * sizeof(int));
269 | cudaMalloc((void**)&gpuArrbiton, size * sizeof(int));
270 |
271 | // Copy the input array to GPU memory
272 | cudaMemcpy(gpuArrmerge, arr, size * sizeof(int), cudaMemcpyHostToDevice);
273 | cudaMemcpy(gpuArrbiton, arr, size * sizeof(int), cudaMemcpyHostToDevice);
274 |
275 | // Perform GPU merge sort and measure time
276 | cudaEvent_t startGPU, stopGPU;
277 | cudaEventCreate(&startGPU);
278 | cudaEventCreate(&stopGPU);
279 | float millisecondsGPU = 0;
280 |
281 | //Initialize CPU clock counters
282 | clock_t startCPU, endCPU;
283 |
284 | //Set number of threads and blocks for kernel calls
285 | int threadsPerBlock = MAX_THREADS_PER_BLOCK;
286 | int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
287 |
288 | //Main If else block
289 | if (choice == 1)
290 | {
291 | //Call GPU Merge Kernel and time the run
292 | cudaEventRecord(startGPU);
293 | for (int wid = 1; wid < size; wid *= 2)
294 | {
295 | MergeSortGPU << > > (gpuArrmerge, gpuTemp, size, wid * 2);
296 | }
297 | cudaEventRecord(stopGPU);
298 |
299 | //Transfer sorted array back to CPU
300 | cudaMemcpy(arr, gpuArrmerge, size * sizeof(int), cudaMemcpyDeviceToHost);
301 |
302 | //Calculate Elapsed GPU time
303 | cudaEventSynchronize(stopGPU);
304 | cudaEventElapsedTime(&millisecondsGPU, startGPU, stopGPU);
305 |
306 | //Time the CPU and call CPU Merge Sort
307 | startCPU = clock();
308 | mergeSortCPU(carr, temp, 0, size - 1);
309 | endCPU = clock();
310 | }
311 |
312 | else
313 | {
314 | int j, k;
315 |
316 | //Time the run and call GPU Bitonic Kernel
317 | cudaEventRecord(startGPU);
318 | for (k = 2; k <= size; k <<= 1)
319 | {
320 | for (j = k >> 1; j > 0; j = j >> 1)
321 | {
322 | bitonicSortGPU << > > (gpuArrbiton, j, k);
323 | }
324 | }
325 | cudaEventRecord(stopGPU);
326 |
327 | //Transfer Sorted array back to CPU
328 | cudaMemcpy(arr, gpuArrbiton, size * sizeof(int), cudaMemcpyDeviceToHost);
329 | cudaEventSynchronize(stopGPU);
330 | cudaEventElapsedTime(&millisecondsGPU, startGPU, stopGPU);
331 |
332 | //Time the run and call CPU Bitonic Sort
333 | startCPU = clock();
334 | bitonicSortCPU(carr, size);
335 | endCPU = clock();
336 | }
337 |
338 | //Calculate Elapsed CPU time
339 | double millisecondsCPU = static_cast(endCPU - startCPU) / (CLOCKS_PER_SEC / 1000.0);
340 |
341 | // Display sorted GPU array
342 | std::cout << "\n\nSorted GPU array: ";
343 | if (size <= 100)
344 | {
345 | printArray(arr, size);
346 | }
347 | else {
348 | printf("\nToo Big to print. Check Variable. Automated isSorted Checker will be implemented\n");
349 | }
350 |
351 | //Display sorted CPU array
352 | std::cout << "\nSorted CPU array: ";
353 | if (size <= 100)
354 | {
355 | printArray(carr, size);
356 | }
357 | else {
358 | printf("\nToo Big to print. Check Variable. Automated isSorted Checker will be implemented\n");
359 | }
360 |
361 | //Run the array with the automated isSorted checker
362 | if (isSorted(arr, size))
363 | std::cout << "\n\nSORT CHECKER RUNNING - SUCCESFULLY SORTED GPU ARRAY" << std::endl;
364 | else
365 | std::cout << "SORT CHECKER RUNNING - !!! FAIL !!!" << std::endl;
366 |
367 | if (isSorted(carr, size))
368 | std::cout << "SORT CHECKER RUNNING - SUCCESFULLY SORTED CPU ARRAY" << std::endl;
369 | else
370 | std::cout << "SORT CHECKER RUNNING - !!! FAIL !!!" << std::endl;
371 |
372 | //Print the time of the runs
373 | std::cout << "\n\nGPU Time: " << millisecondsGPU << " ms" << std::endl;
374 | std::cout << "CPU Time: " << millisecondsCPU << " ms" << std::endl;
375 |
376 | //Destroy all variables
377 | delete[] arr;
378 | delete[] carr;
379 | delete[] temp;
380 |
381 | //End
382 | cudaFree(gpuArrmerge);
383 | cudaFree(gpuArrbiton);
384 | cudaFree(gpuTemp);
385 |
386 | std::cout << "\n------------------------------------------------------------------------------------\n||||| END. YOU MAY RUN THIS AGAIN |||||\n------------------------------------------------------------------------------------";
387 | return 0;
388 | }
--------------------------------------------------------------------------------