├── BitonicMerge.sln ├── .gitattributes ├── BitonicMerge ├── BitonicMerge.vcxproj └── kernel.cu ├── README.md └── .gitignore /BitonicMerge.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.6.33829.357 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BitonicMerge", "BitonicMerge\BitonicMerge.vcxproj", "{FF8B1809-EE3F-43C3-BBA6-0F77D2856584}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584}.Debug|x64.ActiveCfg = Debug|x64 15 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584}.Debug|x64.Build.0 = Debug|x64 16 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584}.Release|x64.ActiveCfg = Release|x64 17 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {F45265A6-E491-4FA6-89C3-91363CAD5369} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /BitonicMerge/BitonicMerge.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | {FF8B1809-EE3F-43C3-BBA6-0F77D2856584} 15 | BitonicMerge 16 | 17 | 18 | 19 | Application 20 | true 21 | MultiByte 22 | v143 23 | 24 | 25 | Application 26 | false 27 | true 28 | MultiByte 29 | v143 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | true 44 | 45 | 46 | 47 | Level3 48 | Disabled 49 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 50 | 51 | 52 | true 53 | Console 54 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 55 | 56 | 57 | 64 58 | 59 | 60 | 61 | 62 | Level3 63 | MaxSpeed 64 | true 65 | true 66 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 67 | 68 | 69 | true 70 | true 71 | true 72 | Console 73 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 74 | 75 | 76 | 64 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CUDA Merge and Bitonic Sort 2 | 3 | This project provides efficient implementations of Merge Sort and Bitonic Sort algorithms using CUDA, enabling fast sorting of large arrays through GPU parallel processing. The project includes both CPU and GPU versions of the algorithms, along with a performance comparison to showcase the benefits of using CUDA for sorting tasks. 4 | 5 | ## Table of Contents 6 | 7 | - [Introduction](#introduction) 8 | - [Merge Sort](#merge-sort) 9 | - [Bitonic Sort](#bitonic-sort) 10 | - [Implementation](#implementation) 11 | - [Performance Comparison](#performance-comparison) 12 | - [Requirements](#requirements) 13 | - [Usage](#usage) 14 | - [Contributing](#contributing) 15 | - [License](#license) 16 | 17 | ## Introduction 18 | 19 | Sorting large datasets efficiently is a common computational challenge. Merge Sort and Bitonic Sort are well-known sorting algorithms that can be implemented using parallel processing techniques, such as those provided by CUDA, to achieve significant speedup compared to traditional CPU-based sorting methods. 20 | 21 | This project aims to provide an easy-to-use CUDA-based implementation of Merge Sort and Bitonic Sort, enabling users to sort large arrays efficiently on compatible NVIDIA GPUs. 22 | 23 | ## Merge Sort 24 | 25 | Merge Sort is a popular divide-and-conquer sorting algorithm that efficiently sorts an array by recursively dividing it into two halves, sorting each half, and then merging the sorted halves to produce the final sorted array. 26 | 27 | ## Bitonic Sort 28 | 29 | Bitonic Sort is an efficient parallel sorting algorithm that requires the input size to be a power of 2. It is based on the concept of bitonic sequences, which are sequences that first monotonically increase and then monotonically decrease or vice versa. The algorithm recursively builds a bitonic sequence, and then repeatedly merges bitonic sequences to achieve sorting. 30 | 31 | ## Implementation 32 | 33 | The project contains the following implementations: 34 | 35 | - **CPU Merge Sort:** This is a traditional CPU-based implementation of the Merge Sort algorithm using a recursive approach. 36 | 37 | - **GPU Merge Sort:** The GPU version of Merge Sort that uses CUDA to achieve parallelism. It utilizes CUDA kernels to perform sorting operations on the GPU. 38 | 39 | - **CPU Bitonic Sort:** A CPU-based implementation of the Bitonic Sort algorithm. It requires the input size to be a power of 2. 40 | 41 | - **GPU Bitonic Sort:** The GPU version of Bitonic Sort that takes advantage of CUDA parallelism. Like the GPU Merge Sort, it uses CUDA kernels for sorting on the GPU. 42 | 43 | ## Performance Comparison 44 | 45 | The performance comparison section presents benchmark results of the CPU and GPU implementations for both Merge Sort and Bitonic Sort. It measures the execution time for each approach and demonstrates the potential speedup gained by using CUDA on compatible GPUs. 46 | 47 | ## Requirements 48 | 49 | To run this project, you need the following: 50 | 51 | - A compatible NVIDIA GPU with CUDA support. 52 | - NVIDIA CUDA Toolkit installed on your system. 53 | - C++ compiler with CUDA support (e.g., NVCC). 54 | 55 | ## Usage 56 | 57 | 1. Clone or download the project repository to your local machine. 58 | 2. Ensure you have met the requirements mentioned in the previous section. 59 | 3. Compile the source files using the appropriate C++ compiler with CUDA support. 60 | 4. Run the compiled executable to sort arrays using either Merge Sort or Bitonic Sort. 61 | 5. The program will provide sorted arrays and performance timings for both CPU and GPU implementations. 62 | 63 | ## Contributing 64 | 65 | Contributions to this project are welcome. If you find any issues or have improvements to suggest, feel free to open an issue or create a pull request. 66 | 67 | ## License 68 | 69 | This project is licensed under the [MIT License](LICENSE). You are free to use, modify, and distribute the code as per the terms of the license. 70 | 71 | ## Outputs 72 | ![image](https://github.com/rbga/CUDA-Merge-and-Bitonic-Sort/assets/75168756/510c4afb-1aa7-4add-abba-41e2fb0db8bb) 73 | Testing the inputs and Merge Sort with a Small Array 74 | ![image](https://github.com/rbga/CUDA-Merge-and-Bitonic-Sort/assets/75168756/49b813d1-f997-49a8-bae9-216cc149de6e) 75 | Testing Bitonic Sort with a Small Array 76 | ![image](https://github.com/rbga/CUDA-Merge-and-Bitonic-Sort/assets/75168756/2348af41-6314-4964-a95b-7282e8d42dde) 77 | Merge Sort CPU vs GPU performance for a large Array 78 | ![image](https://github.com/rbga/CUDA-Merge-and-Bitonic-Sort/assets/75168756/0e2b662b-165c-43ab-9d30-51dfc6ebd132) 79 | Bitonic Sort CPU vs GPU performance for a large Array. 80 | 81 | Clearly Bitonic Sort performs well in a Parallel Computation while Merge Sort performs well in a linear computation. 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Oo]ut/ 33 | [Ll]og/ 34 | [Ll]ogs/ 35 | 36 | # Visual Studio 2015/2017 cache/options directory 37 | .vs/ 38 | # Uncomment if you have tasks that create the project's static files in wwwroot 39 | #wwwroot/ 40 | 41 | # Visual Studio 2017 auto generated files 42 | Generated\ Files/ 43 | 44 | # MSTest test Results 45 | [Tt]est[Rr]esult*/ 46 | [Bb]uild[Ll]og.* 47 | 48 | # NUnit 49 | *.VisualState.xml 50 | TestResult.xml 51 | nunit-*.xml 52 | 53 | # Build Results of an ATL Project 54 | [Dd]ebugPS/ 55 | [Rr]eleasePS/ 56 | dlldata.c 57 | 58 | # Benchmark Results 59 | BenchmarkDotNet.Artifacts/ 60 | 61 | # .NET Core 62 | project.lock.json 63 | project.fragment.lock.json 64 | artifacts/ 65 | 66 | # ASP.NET Scaffolding 67 | ScaffoldingReadMe.txt 68 | 69 | # StyleCop 70 | StyleCopReport.xml 71 | 72 | # Files built by Visual Studio 73 | *_i.c 74 | *_p.c 75 | *_h.h 76 | *.ilk 77 | *.meta 78 | *.obj 79 | *.iobj 80 | *.pch 81 | *.pdb 82 | *.ipdb 83 | *.pgc 84 | *.pgd 85 | *.rsp 86 | *.sbr 87 | *.tlb 88 | *.tli 89 | *.tlh 90 | *.tmp 91 | *.tmp_proj 92 | *_wpftmp.csproj 93 | *.log 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio LightSwitch build output 298 | **/*.HTMLClient/GeneratedArtifacts 299 | **/*.DesktopClient/GeneratedArtifacts 300 | **/*.DesktopClient/ModelManifest.xml 301 | **/*.Server/GeneratedArtifacts 302 | **/*.Server/ModelManifest.xml 303 | _Pvt_Extensions 304 | 305 | # Paket dependency manager 306 | .paket/paket.exe 307 | paket-files/ 308 | 309 | # FAKE - F# Make 310 | .fake/ 311 | 312 | # CodeRush personal settings 313 | .cr/personal 314 | 315 | # Python Tools for Visual Studio (PTVS) 316 | __pycache__/ 317 | *.pyc 318 | 319 | # Cake - Uncomment if you are using it 320 | # tools/** 321 | # !tools/packages.config 322 | 323 | # Tabs Studio 324 | *.tss 325 | 326 | # Telerik's JustMock configuration file 327 | *.jmconfig 328 | 329 | # BizTalk build output 330 | *.btp.cs 331 | *.btm.cs 332 | *.odx.cs 333 | *.xsd.cs 334 | 335 | # OpenCover UI analysis results 336 | OpenCover/ 337 | 338 | # Azure Stream Analytics local run output 339 | ASALocalRun/ 340 | 341 | # MSBuild Binary and Structured Log 342 | *.binlog 343 | 344 | # NVidia Nsight GPU debugger configuration file 345 | *.nvuser 346 | 347 | # MFractors (Xamarin productivity tool) working folder 348 | .mfractor/ 349 | 350 | # Local History for Visual Studio 351 | .localhistory/ 352 | 353 | # BeatPulse healthcheck temp database 354 | healthchecksdb 355 | 356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 357 | MigrationBackup/ 358 | 359 | # Ionide (cross platform F# VS Code tools) working folder 360 | .ionide/ 361 | 362 | # Fody - auto-generated XML schema 363 | FodyWeavers.xsd -------------------------------------------------------------------------------- /BitonicMerge/kernel.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define MAX_THREADS_PER_BLOCK 1024 10 | 11 | // Bitonic Sort for CPU 12 | void bitonicSortCPU(int* arr, int n) 13 | { 14 | for (int k = 2; k <= n; k *= 2) 15 | { 16 | for (int j = k / 2; j > 0; j /= 2) 17 | { 18 | for (int i = 0; i < n; i++) 19 | { 20 | int ij = i ^ j; 21 | 22 | if (ij > i) 23 | { 24 | if ((i & k) == 0) 25 | { 26 | if (arr[i] > arr[ij]) 27 | { 28 | int temp = arr[i]; 29 | arr[i] = arr[ij]; 30 | arr[ij] = temp; 31 | } 32 | } 33 | else 34 | { 35 | if (arr[i] < arr[ij]) 36 | { 37 | int temp = arr[i]; 38 | arr[i] = arr[ij]; 39 | arr[ij] = temp; 40 | } 41 | } 42 | } 43 | } 44 | } 45 | } 46 | } 47 | 48 | //GPU Kernel Implementation of Bitonic Sort 49 | __global__ void bitonicSortGPU(int* arr, int j, int k) 50 | { 51 | unsigned int i, ij; 52 | 53 | i = threadIdx.x + blockDim.x * blockIdx.x; 54 | 55 | ij = i ^ j; 56 | 57 | if (ij > i) 58 | { 59 | if ((i & k) == 0) 60 | { 61 | if (arr[i] > arr[ij]) 62 | { 63 | int temp = arr[i]; 64 | arr[i] = arr[ij]; 65 | arr[ij] = temp; 66 | } 67 | } 68 | else 69 | { 70 | if (arr[i] < arr[ij]) 71 | { 72 | int temp = arr[i]; 73 | arr[i] = arr[ij]; 74 | arr[ij] = temp; 75 | } 76 | } 77 | } 78 | } 79 | 80 | //Device function for recursive Merge 81 | __device__ void Merge(int* arr, int* temp, int left, int middle, int right) 82 | { 83 | int i = left; 84 | int j = middle; 85 | int k = left; 86 | 87 | while (i < middle && j < right) 88 | { 89 | if (arr[i] <= arr[j]) 90 | temp[k++] = arr[i++]; 91 | else 92 | temp[k++] = arr[j++]; 93 | } 94 | 95 | while (i < middle) 96 | temp[k++] = arr[i++]; 97 | while (j < right) 98 | temp[k++] = arr[j++]; 99 | 100 | for (int x = left; x < right; x++) 101 | arr[x] = temp[x]; 102 | } 103 | 104 | //GPU Kernel for Merge Sort 105 | __global__ void MergeSortGPU(int* arr, int* temp, int n, int width) 106 | { 107 | int tid = threadIdx.x + blockDim.x * blockIdx.x; 108 | int left = tid * width; 109 | int middle = left + width / 2; 110 | int right = left + width; 111 | 112 | if (left < n && middle < n) 113 | { 114 | Merge(arr, temp, left, middle, right); 115 | } 116 | } 117 | 118 | //CPU Merge Recursive Call function 119 | void merge(int* arr, int* temp, int left, int mid, int right) 120 | { 121 | int i = left; 122 | int j = mid + 1; 123 | int k = left; 124 | 125 | while (i <= mid && j <= right) 126 | { 127 | if (arr[i] <= arr[j]) 128 | temp[k++] = arr[i++]; 129 | else 130 | temp[k++] = arr[j++]; 131 | } 132 | 133 | while (i <= mid) 134 | temp[k++] = arr[i++]; 135 | 136 | while (j <= right) 137 | temp[k++] = arr[j++]; 138 | 139 | for (int idx = left; idx <= right; ++idx) 140 | arr[idx] = temp[idx]; 141 | } 142 | 143 | //CPU Implementation of Merge Sort 144 | void mergeSortCPU(int* arr, int* temp, int left, int right) 145 | { 146 | if (left >= right) 147 | return; 148 | 149 | int mid = left + (right - left) / 2; 150 | 151 | mergeSortCPU(arr, temp, left, mid); 152 | mergeSortCPU(arr, temp, mid + 1, right); 153 | 154 | merge(arr, temp, left, mid, right); 155 | } 156 | 157 | //Function to print array 158 | void printArray(int* arr, int size) 159 | { 160 | for (int i = 0; i < size; ++i) 161 | std::cout << arr[i] << " "; 162 | std::cout << std::endl; 163 | } 164 | 165 | //Automated function to check if array is sorted 166 | bool isSorted(int* arr, int size) 167 | { 168 | for (int i = 1; i < size; ++i) 169 | { 170 | if (arr[i] < arr[i - 1]) 171 | return false; 172 | } 173 | return true; 174 | } 175 | 176 | //Function to check if given number is a power of 2 177 | bool isPowerOfTwo(int num) 178 | { 179 | return num > 0 && (num & (num - 1)) == 0; 180 | } 181 | 182 | 183 | //MAIN PROGRAM 184 | int main() 185 | { 186 | std::cout << "-----------------------------------------------" << std::endl; 187 | std::cout << "CUDA MERGE AND BITONIC SORT IMPLEMENTATION" << std::endl; 188 | std::cout << "A Performance Comparison of These 2 Sorts in CPU vs GPU" << std::endl; 189 | std::cout << "-----------------------------------------------" << std::endl; 190 | int choice; 191 | std::cout << "\nSelect the type of sort:"; 192 | std::cout << "\n\t1. Merge Sort"; 193 | std::cout << "\n\t2. Bitonic Sort"; 194 | std::cout << "\nEnter your choice: "; 195 | std::cin >> choice; 196 | 197 | 198 | if (choice < 1 || choice > 2) 199 | { 200 | while (choice != 1 || choice != 2) 201 | { 202 | std::cout << "\n!!!!! WRONG CHOICE. TRY AGAIN. YOU HAVE ONLY 2 DISTINCT OPTIONS-\n"; 203 | std::cin >> choice; 204 | 205 | 206 | if (choice == 1 || choice == 2) 207 | break; 208 | } 209 | } 210 | 211 | if (choice == 1) 212 | { 213 | std::cout << "\n--------------------------------------------------------------\nMERGE SORT SELECTED\n--------------------------------------------------------------"; 214 | } 215 | else 216 | { 217 | std::cout << "\n--------------------------------------------------------------\nBITONIC SORT SELECTED\n--------------------------------------------------------------"; 218 | } 219 | 220 | int size; 221 | std::cout << "\n\nEnter the size of the array. Must be a power of 2:\n "; 222 | std::cin>>size; 223 | 224 | while (!isPowerOfTwo(size)) 225 | { 226 | if (!isPowerOfTwo(size)) 227 | { 228 | std::cout << "\nWrong Size, must be power of 2. Try again:\n "; 229 | std::cin>>size; 230 | } 231 | else 232 | break; 233 | } 234 | 235 | std::cout << "\n--------------------------------------------------------------\nSELECTED SORT PROCESS UNDERWAY\n--------------------------------------------------------------"; 236 | 237 | //Create CPU based Arrays 238 | int* arr = new int[size]; 239 | int* carr = new int[size]; 240 | int* temp = new int[size]; 241 | 242 | //Create GPU based arrays 243 | int* gpuArrmerge; 244 | int* gpuArrbiton; 245 | int* gpuTemp; 246 | 247 | // Initialize the array with random values 248 | srand(static_cast(time(nullptr))); 249 | for (int i = 0; i < size; ++i) 250 | { 251 | arr[i] = rand() % 100; 252 | carr[i] = arr[i]; 253 | } 254 | 255 | //Print unsorted array 256 | std::cout << "\n\nUnsorted array: "; 257 | if (size <= 100) 258 | { 259 | printArray(arr, size); 260 | } 261 | else 262 | { 263 | printf("\nToo Big to print. Check Variable. Automated isSorted Checker will be implemented\n"); 264 | } 265 | 266 | // Allocate memory on GPU 267 | cudaMalloc((void**)&gpuArrmerge, size * sizeof(int)); 268 | cudaMalloc((void**)&gpuTemp, size * sizeof(int)); 269 | cudaMalloc((void**)&gpuArrbiton, size * sizeof(int)); 270 | 271 | // Copy the input array to GPU memory 272 | cudaMemcpy(gpuArrmerge, arr, size * sizeof(int), cudaMemcpyHostToDevice); 273 | cudaMemcpy(gpuArrbiton, arr, size * sizeof(int), cudaMemcpyHostToDevice); 274 | 275 | // Perform GPU merge sort and measure time 276 | cudaEvent_t startGPU, stopGPU; 277 | cudaEventCreate(&startGPU); 278 | cudaEventCreate(&stopGPU); 279 | float millisecondsGPU = 0; 280 | 281 | //Initialize CPU clock counters 282 | clock_t startCPU, endCPU; 283 | 284 | //Set number of threads and blocks for kernel calls 285 | int threadsPerBlock = MAX_THREADS_PER_BLOCK; 286 | int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock; 287 | 288 | //Main If else block 289 | if (choice == 1) 290 | { 291 | //Call GPU Merge Kernel and time the run 292 | cudaEventRecord(startGPU); 293 | for (int wid = 1; wid < size; wid *= 2) 294 | { 295 | MergeSortGPU << > > (gpuArrmerge, gpuTemp, size, wid * 2); 296 | } 297 | cudaEventRecord(stopGPU); 298 | 299 | //Transfer sorted array back to CPU 300 | cudaMemcpy(arr, gpuArrmerge, size * sizeof(int), cudaMemcpyDeviceToHost); 301 | 302 | //Calculate Elapsed GPU time 303 | cudaEventSynchronize(stopGPU); 304 | cudaEventElapsedTime(&millisecondsGPU, startGPU, stopGPU); 305 | 306 | //Time the CPU and call CPU Merge Sort 307 | startCPU = clock(); 308 | mergeSortCPU(carr, temp, 0, size - 1); 309 | endCPU = clock(); 310 | } 311 | 312 | else 313 | { 314 | int j, k; 315 | 316 | //Time the run and call GPU Bitonic Kernel 317 | cudaEventRecord(startGPU); 318 | for (k = 2; k <= size; k <<= 1) 319 | { 320 | for (j = k >> 1; j > 0; j = j >> 1) 321 | { 322 | bitonicSortGPU << > > (gpuArrbiton, j, k); 323 | } 324 | } 325 | cudaEventRecord(stopGPU); 326 | 327 | //Transfer Sorted array back to CPU 328 | cudaMemcpy(arr, gpuArrbiton, size * sizeof(int), cudaMemcpyDeviceToHost); 329 | cudaEventSynchronize(stopGPU); 330 | cudaEventElapsedTime(&millisecondsGPU, startGPU, stopGPU); 331 | 332 | //Time the run and call CPU Bitonic Sort 333 | startCPU = clock(); 334 | bitonicSortCPU(carr, size); 335 | endCPU = clock(); 336 | } 337 | 338 | //Calculate Elapsed CPU time 339 | double millisecondsCPU = static_cast(endCPU - startCPU) / (CLOCKS_PER_SEC / 1000.0); 340 | 341 | // Display sorted GPU array 342 | std::cout << "\n\nSorted GPU array: "; 343 | if (size <= 100) 344 | { 345 | printArray(arr, size); 346 | } 347 | else { 348 | printf("\nToo Big to print. Check Variable. Automated isSorted Checker will be implemented\n"); 349 | } 350 | 351 | //Display sorted CPU array 352 | std::cout << "\nSorted CPU array: "; 353 | if (size <= 100) 354 | { 355 | printArray(carr, size); 356 | } 357 | else { 358 | printf("\nToo Big to print. Check Variable. Automated isSorted Checker will be implemented\n"); 359 | } 360 | 361 | //Run the array with the automated isSorted checker 362 | if (isSorted(arr, size)) 363 | std::cout << "\n\nSORT CHECKER RUNNING - SUCCESFULLY SORTED GPU ARRAY" << std::endl; 364 | else 365 | std::cout << "SORT CHECKER RUNNING - !!! FAIL !!!" << std::endl; 366 | 367 | if (isSorted(carr, size)) 368 | std::cout << "SORT CHECKER RUNNING - SUCCESFULLY SORTED CPU ARRAY" << std::endl; 369 | else 370 | std::cout << "SORT CHECKER RUNNING - !!! FAIL !!!" << std::endl; 371 | 372 | //Print the time of the runs 373 | std::cout << "\n\nGPU Time: " << millisecondsGPU << " ms" << std::endl; 374 | std::cout << "CPU Time: " << millisecondsCPU << " ms" << std::endl; 375 | 376 | //Destroy all variables 377 | delete[] arr; 378 | delete[] carr; 379 | delete[] temp; 380 | 381 | //End 382 | cudaFree(gpuArrmerge); 383 | cudaFree(gpuArrbiton); 384 | cudaFree(gpuTemp); 385 | 386 | std::cout << "\n------------------------------------------------------------------------------------\n||||| END. YOU MAY RUN THIS AGAIN |||||\n------------------------------------------------------------------------------------"; 387 | return 0; 388 | } --------------------------------------------------------------------------------