├── .github └── FUNDING.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── VkFFT_TestSuite.cpp ├── benchmark_plot ├── fp64_cuda_a100.png └── fp64_hip_mi250.png ├── benchmark_scripts ├── cuFFT_scripts │ ├── include │ │ ├── precision_cuFFT_double.h │ │ ├── precision_cuFFT_half.h │ │ ├── precision_cuFFT_r2c.h │ │ ├── precision_cuFFT_single.h │ │ ├── sample_0_benchmark_cuFFT_single.h │ │ ├── sample_1000_benchmark_cuFFT_single_2_4096.h │ │ ├── sample_1001_benchmark_cuFFT_double_2_4096.h │ │ ├── sample_1003_benchmark_cuFFT_single_3d_2_512.h │ │ ├── sample_1_benchmark_cuFFT_double.h │ │ ├── sample_2_benchmark_cuFFT_half.h │ │ ├── sample_3_benchmark_cuFFT_single_3d.h │ │ ├── sample_6_benchmark_cuFFT_single_r2c.h │ │ ├── sample_7_benchmark_cuFFT_single_Bluestein.h │ │ ├── sample_8_benchmark_cuFFT_double_Bluestein.h │ │ └── user_benchmark_cuFFT.h │ └── src │ │ ├── precision_cuFFT_double.cu │ │ ├── precision_cuFFT_half.cu │ │ ├── precision_cuFFT_r2c.cu │ │ ├── precision_cuFFT_single.cu │ │ ├── sample_0_benchmark_cuFFT_single.cu │ │ ├── sample_1000_benchmark_cuFFT_single_2_4096.cu │ │ ├── sample_1001_benchmark_cuFFT_double_2_4096.cu │ │ ├── sample_1003_benchmark_cuFFT_single_3d_2_512.cu │ │ ├── sample_1_benchmark_cuFFT_double.cu │ │ ├── sample_2_benchmark_cuFFT_half.cu │ │ ├── sample_3_benchmark_cuFFT_single_3d.cu │ │ ├── sample_6_benchmark_cuFFT_single_r2c.cu │ │ ├── sample_7_benchmark_cuFFT_single_Bluestein.cu │ │ ├── sample_8_benchmark_cuFFT_double_Bluestein.cu │ │ └── user_benchmark_cuFFT.cu ├── rocFFT_scripts │ ├── include │ │ ├── precision_rocFFT_double.h │ │ ├── precision_rocFFT_r2c.h │ │ ├── precision_rocFFT_single.h │ │ ├── sample_0_benchmark_rocFFT_single.h │ │ ├── sample_1000_benchmark_rocFFT_single_2_4096.h │ │ ├── sample_1001_benchmark_rocFFT_double_2_4096.h │ │ ├── sample_1003_benchmark_rocFFT_single_3d_2_512.h │ │ ├── sample_1_benchmark_rocFFT_double.h │ │ ├── sample_3_benchmark_rocFFT_single_3d.h │ │ ├── sample_6_benchmark_rocFFT_single_r2c.h │ │ ├── sample_7_benchmark_rocFFT_single_Bluestein.h │ │ ├── sample_8_benchmark_rocFFT_double_Bluestein.h │ │ └── user_benchmark_rocFFT.h │ └── src │ │ ├── precision_rocFFT_double.cpp │ │ ├── precision_rocFFT_r2c.cpp │ │ ├── precision_rocFFT_single.cpp │ │ ├── sample_0_benchmark_rocFFT_single.cpp │ │ ├── sample_1000_benchmark_rocFFT_single_2_4096.cpp │ │ ├── sample_1001_benchmark_rocFFT_double_2_4096.cpp │ │ ├── sample_1003_benchmark_rocFFT_single_3d_2_512.cpp │ │ ├── sample_1_benchmark_rocFFT_double.cpp │ │ ├── sample_3_benchmark_rocFFT_single_3d.cpp │ │ ├── sample_6_benchmark_rocFFT_single_r2c.cpp │ │ ├── sample_7_benchmark_rocFFT_single_Bluestein.cpp │ │ ├── sample_8_benchmark_rocFFT_double_Bluestein.cpp │ │ └── user_benchmark_rocFFT.cpp └── vkFFT_scripts │ ├── include │ ├── sample_0_benchmark_VkFFT_single.h │ ├── sample_1000_benchmark_VkFFT_single_2_4096.h │ ├── sample_1001_benchmark_VkFFT_double_2_4096.h │ ├── sample_1002_benchmark_VkFFT_half_2_4096.h │ ├── sample_1003_benchmark_VkFFT_single_3d_2_512.h │ ├── sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.h │ ├── sample_100_benchmark_VkFFT_single_nd_dct.h │ ├── sample_101_benchmark_VkFFT_double_nd_dct.h │ ├── sample_10_benchmark_VkFFT_single_multipleBuffers.h │ ├── sample_11_precision_VkFFT_single.h │ ├── sample_12_precision_VkFFT_double.h │ ├── sample_13_precision_VkFFT_half.h │ ├── sample_14_precision_VkFFT_single_nonPow2.h │ ├── sample_15_precision_VkFFT_single_r2c.h │ ├── sample_16_precision_VkFFT_single_dct.h │ ├── sample_17_precision_VkFFT_double_dct.h │ ├── sample_18_precision_VkFFT_double_nonPow2.h │ ├── sample_19_precision_VkFFT_quadDoubleDouble_nonPow2.h │ ├── sample_1_benchmark_VkFFT_double.h │ ├── sample_2_benchmark_VkFFT_half.h │ ├── sample_3_benchmark_VkFFT_single_3d.h │ ├── sample_4_benchmark_VkFFT_single_3d_zeropadding.h │ ├── sample_50_convolution_VkFFT_single_1d_matrix.h │ ├── sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.h │ ├── sample_52_convolution_VkFFT_single_2d_batched_r2c.h │ ├── sample_5_benchmark_VkFFT_single_disableReorderFourStep.h │ ├── sample_6_benchmark_VkFFT_single_r2c.h │ ├── sample_7_benchmark_VkFFT_single_Bluestein.h │ ├── sample_8_benchmark_VkFFT_double_Bluestein.h │ ├── sample_9_benchmark_VkFFT_quadDoubleDouble.h │ ├── user_benchmark_VkFFT.h │ └── utils_VkFFT.h │ └── src │ ├── sample_0_benchmark_VkFFT_single.cpp │ ├── sample_1000_benchmark_VkFFT_single_2_4096.cpp │ ├── sample_1001_benchmark_VkFFT_double_2_4096.cpp │ ├── sample_1002_benchmark_VkFFT_half_2_4096.cpp │ ├── sample_1003_benchmark_VkFFT_single_3d_2_512.cpp │ ├── sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.cpp │ ├── sample_100_benchmark_VkFFT_single_nd_dct.cpp │ ├── sample_101_benchmark_VkFFT_double_nd_dct.cpp │ ├── sample_10_benchmark_VkFFT_single_multipleBuffers.cpp │ ├── sample_11_precision_VkFFT_single.cpp │ ├── sample_12_precision_VkFFT_double.cpp │ ├── sample_13_precision_VkFFT_half.cpp │ ├── sample_14_precision_VkFFT_single_nonPow2.cpp │ ├── sample_15_precision_VkFFT_single_r2c.cpp │ ├── sample_16_precision_VkFFT_single_dct.cpp │ ├── sample_17_precision_VkFFT_double_dct.cpp │ ├── sample_18_precision_VkFFT_double_nonPow2.cpp │ ├── sample_19_precision_VkFFT_quadDoubleDouble_nonPow2.cpp │ ├── sample_1_benchmark_VkFFT_double.cpp │ ├── sample_2_benchmark_VkFFT_half.cpp │ ├── sample_3_benchmark_VkFFT_single_3d.cpp │ ├── sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp │ ├── sample_50_convolution_VkFFT_single_1d_matrix.cpp │ ├── sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp │ ├── sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp │ ├── sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp │ ├── sample_6_benchmark_VkFFT_single_r2c.cpp │ ├── sample_7_benchmark_VkFFT_single_Bluestein.cpp │ ├── sample_8_benchmark_VkFFT_double_Bluestein.cpp │ ├── sample_9_benchmark_VkFFT_quadDoubleDouble.cpp │ ├── user_benchmark_VkFFT.cpp │ └── utils_VkFFT.cpp ├── documentation ├── VkFFT_API_guide.lyx ├── VkFFT_API_guide.pdf └── VkFFT_API_guide.tex ├── half_lib └── half.hpp ├── metal-cpp ├── Foundation │ ├── Foundation.hpp │ ├── NSArray.hpp │ ├── NSAutoreleasePool.hpp │ ├── NSBundle.hpp │ ├── NSData.hpp │ ├── NSDate.hpp │ ├── NSDefines.hpp │ ├── NSDictionary.hpp │ ├── NSEnumerator.hpp │ ├── NSError.hpp │ ├── NSLock.hpp │ ├── NSNotification.hpp │ ├── NSNumber.hpp │ ├── NSObjCRuntime.hpp │ ├── NSObject.hpp │ ├── NSPrivate.hpp │ ├── NSProcessInfo.hpp │ ├── NSRange.hpp │ ├── NSString.hpp │ ├── NSTypes.hpp │ └── NSURL.hpp ├── LICENSE.txt ├── Metal │ ├── MTLAccelerationStructure.hpp │ ├── MTLAccelerationStructureCommandEncoder.hpp │ ├── MTLAccelerationStructureTypes.hpp │ ├── MTLArgument.hpp │ ├── MTLArgumentEncoder.hpp │ ├── MTLBinaryArchive.hpp │ ├── MTLBlitCommandEncoder.hpp │ ├── MTLBlitPass.hpp │ ├── MTLBuffer.hpp │ ├── MTLCaptureManager.hpp │ ├── MTLCaptureScope.hpp │ ├── MTLCommandBuffer.hpp │ ├── MTLCommandEncoder.hpp │ ├── MTLCommandQueue.hpp │ ├── MTLComputeCommandEncoder.hpp │ ├── MTLComputePass.hpp │ ├── MTLComputePipeline.hpp │ ├── MTLCounters.hpp │ ├── MTLDefines.hpp │ ├── MTLDepthStencil.hpp │ ├── MTLDevice.hpp │ ├── MTLDrawable.hpp │ ├── MTLDynamicLibrary.hpp │ ├── MTLEvent.hpp │ ├── MTLFence.hpp │ ├── MTLFunctionConstantValues.hpp │ ├── MTLFunctionDescriptor.hpp │ ├── MTLFunctionHandle.hpp │ ├── MTLFunctionLog.hpp │ ├── MTLFunctionStitching.hpp │ ├── MTLHeaderBridge.hpp │ ├── MTLHeap.hpp │ ├── MTLIndirectCommandBuffer.hpp │ ├── MTLIndirectCommandEncoder.hpp │ ├── MTLIntersectionFunctionTable.hpp │ ├── MTLLibrary.hpp │ ├── MTLLinkedFunctions.hpp │ ├── MTLParallelRenderCommandEncoder.hpp │ ├── MTLPipeline.hpp │ ├── MTLPixelFormat.hpp │ ├── MTLPrivate.hpp │ ├── MTLRasterizationRate.hpp │ ├── MTLRenderCommandEncoder.hpp │ ├── MTLRenderPass.hpp │ ├── MTLRenderPipeline.hpp │ ├── MTLResource.hpp │ ├── MTLResourceStateCommandEncoder.hpp │ ├── MTLResourceStatePass.hpp │ ├── MTLSampler.hpp │ ├── MTLStageInputOutputDescriptor.hpp │ ├── MTLTexture.hpp │ ├── MTLTypes.hpp │ ├── MTLVertexDescriptor.hpp │ ├── MTLVisibleFunctionTable.hpp │ └── Metal.hpp ├── QuartzCore │ ├── CADefines.hpp │ ├── CAMetalDrawable.hpp │ ├── CAPrivate.hpp │ └── QuartzCore.hpp ├── README.md └── SingleHeader │ └── MakeSingleHeader.py ├── precision_results ├── FP32_precision.png └── FP64_precision.png └── vkFFT ├── vkFFT.h └── vkFFT ├── vkFFT_AppManagement ├── vkFFT_DeleteApp.h ├── vkFFT_InitializeApp.h └── vkFFT_RunApp.h ├── vkFFT_CodeGen ├── vkFFT_KernelsLevel0 │ ├── vkFFT_KernelStartEnd.h │ ├── vkFFT_KernelUtils.h │ ├── vkFFT_MemoryManagement │ │ ├── vkFFT_MemoryInitialization │ │ │ ├── vkFFT_Constants.h │ │ │ ├── vkFFT_InputOutput.h │ │ │ ├── vkFFT_InputOutputLayout.h │ │ │ ├── vkFFT_PushConstants.h │ │ │ ├── vkFFT_Registers.h │ │ │ └── vkFFT_SharedMemory.h │ │ └── vkFFT_MemoryTransfers │ │ │ └── vkFFT_Transfers.h │ └── vkFFT_Zeropad.h ├── vkFFT_KernelsLevel1 │ ├── PrePostProcessing │ │ ├── vkFFT_4step.h │ │ ├── vkFFT_Bluestein.h │ │ ├── vkFFT_Convolution.h │ │ ├── vkFFT_R2C.h │ │ └── vkFFT_R2R.h │ ├── vkFFT_RaderKernels.h │ ├── vkFFT_RadixKernels.h │ ├── vkFFT_RadixShuffle.h │ ├── vkFFT_RadixStage.h │ ├── vkFFT_ReadWrite.h │ └── vkFFT_RegisterBoost.h ├── vkFFT_KernelsLevel2 │ ├── vkFFT_FFT.h │ └── vkFFT_R2C_even_decomposition.h ├── vkFFT_MathUtils │ └── vkFFT_MathUtils.h └── vkFFT_StringManagement │ └── vkFFT_StringManager.h ├── vkFFT_PlanManagement ├── vkFFT_API_handles │ ├── vkFFT_CompileKernel.h │ ├── vkFFT_DeletePlan.h │ ├── vkFFT_DispatchPlan.h │ ├── vkFFT_InitAPIParameters.h │ ├── vkFFT_ManageMemory.h │ └── vkFFT_UpdateBuffers.h ├── vkFFT_HostFunctions │ ├── vkFFT_AxisBlockSplitter.h │ ├── vkFFT_ManageLUT.h │ ├── vkFFT_RecursiveFFTGenerators.h │ └── vkFFT_Scheduler.h └── vkFFT_Plans │ ├── vkFFT_Plan_FFT.h │ └── vkFFT_Plan_R2C.h └── vkFFT_Structs └── vkFFT_Structs.h /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: ["https://paypal.me/DTolm"] 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 - present Dmitrii Tolmachev 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benchmark_plot/fp64_cuda_a100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DTolm/VkFFT/066a17c17068c0f11c9298d848c2976c71fad1c1/benchmark_plot/fp64_cuda_a100.png -------------------------------------------------------------------------------- /benchmark_plot/fp64_hip_mi250.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DTolm/VkFFT/066a17c17068c0f11c9298d848c2976c71fad1c1/benchmark_plot/fp64_hip_mi250.png -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/precision_cuFFT_double.h: -------------------------------------------------------------------------------- 1 | void launch_precision_cuFFT_double(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/precision_cuFFT_half.h: -------------------------------------------------------------------------------- 1 | void launch_precision_cuFFT_half(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/precision_cuFFT_r2c.h: -------------------------------------------------------------------------------- 1 | void launch_precision_cuFFT_r2c(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/precision_cuFFT_single.h: -------------------------------------------------------------------------------- 1 | void launch_precision_cuFFT_single(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_0_benchmark_cuFFT_single.h: -------------------------------------------------------------------------------- 1 | void sample_0_benchmark_cuFFT_single(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_1000_benchmark_cuFFT_single_2_4096.h: -------------------------------------------------------------------------------- 1 | void sample_1000_benchmark_cuFFT_single_2_4096(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_1001_benchmark_cuFFT_double_2_4096.h: -------------------------------------------------------------------------------- 1 | void sample_1001_benchmark_cuFFT_double_2_4096(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_1003_benchmark_cuFFT_single_3d_2_512.h: -------------------------------------------------------------------------------- 1 | void sample_1003_benchmark_cuFFT_single_3d_2_512(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_1_benchmark_cuFFT_double.h: -------------------------------------------------------------------------------- 1 | void sample_1_benchmark_cuFFT_double(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_2_benchmark_cuFFT_half.h: -------------------------------------------------------------------------------- 1 | void sample_2_benchmark_cuFFT_half(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_3_benchmark_cuFFT_single_3d.h: -------------------------------------------------------------------------------- 1 | void sample_3_benchmark_cuFFT_single_3d(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_6_benchmark_cuFFT_single_r2c.h: -------------------------------------------------------------------------------- 1 | void sample_6_benchmark_cuFFT_single_r2c(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_7_benchmark_cuFFT_single_Bluestein.h: -------------------------------------------------------------------------------- 1 | void sample_7_benchmark_cuFFT_single_Bluestein(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/sample_8_benchmark_cuFFT_double_Bluestein.h: -------------------------------------------------------------------------------- 1 | void sample_8_benchmark_cuFFT_double_Bluestein(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/include/user_benchmark_cuFFT.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | uint64_t X; 3 | uint64_t Y; 4 | uint64_t Z; 5 | uint64_t P; 6 | uint64_t B; 7 | uint64_t N; 8 | uint64_t R2C; 9 | } cuFFTUserSystemParameters;//an example structure used to pass user-defined system for benchmarking 10 | 11 | void user_benchmark_cuFFT(bool file_output, FILE* output, cuFFTUserSystemParameters* userParams, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/src/precision_cuFFT_double.cu: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //CUDA parts 15 | #include "cuda_runtime.h" 16 | #include "device_launch_parameters.h" 17 | #include 18 | 19 | void launch_precision_cuFFT_double(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims) 20 | { 21 | cudaSetDevice(device_id); 22 | cufftHandle planZ2Z; 23 | cufftDoubleComplex* dataC; 24 | cudaMalloc((void**)&dataC, sizeof(cufftDoubleComplex) * dims[0] * dims[1] * dims[2]); 25 | cudaMemcpy(dataC, inputC, sizeof(cufftDoubleComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyHostToDevice); 26 | if (cudaGetLastError() != cudaSuccess) { 27 | fprintf(stderr, "Cuda error: Failed to allocate\n"); 28 | return; 29 | } 30 | switch (dims[4]) { 31 | case 1: 32 | cufftPlan1d(&planZ2Z, dims[0], CUFFT_Z2Z, 1); 33 | break; 34 | case 2: 35 | cufftPlan2d(&planZ2Z, dims[1], dims[0], CUFFT_Z2Z); 36 | break; 37 | case 3: 38 | cufftPlan3d(&planZ2Z, dims[2], dims[1], dims[0], CUFFT_Z2Z); 39 | break; 40 | } 41 | for (int i = 0; i < 1; i++) { 42 | cufftExecZ2Z(planZ2Z, dataC, dataC, -1); 43 | } 44 | cudaDeviceSynchronize(); 45 | cudaMemcpy(output_cuFFT, dataC, sizeof(cufftDoubleComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyDeviceToHost); 46 | cudaDeviceSynchronize(); 47 | cufftDestroy(planZ2Z); 48 | cudaFree(dataC); 49 | } 50 | -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/src/precision_cuFFT_half.cu: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //CUDA parts 15 | #include "cuda_runtime.h" 16 | #include "device_launch_parameters.h" 17 | #include 18 | #include 19 | #include 20 | 21 | void launch_precision_cuFFT_half(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims) 22 | { 23 | cudaSetDevice(device_id); 24 | cufftHandle planHalf; 25 | half2* dataC; 26 | cudaMalloc((void**)&dataC, sizeof(half2) * dims[0] * dims[1] * dims[2]); 27 | cudaMemcpy(dataC, inputC, sizeof(half2) * dims[0] * dims[1] * dims[2], cudaMemcpyHostToDevice); 28 | if (cudaGetLastError() != cudaSuccess) { 29 | fprintf(stderr, "Cuda error: Failed to allocate\n"); 30 | return; 31 | } 32 | uint64_t sizeCUDA; 33 | cufftResult res = cufftCreate(&planHalf); 34 | size_t ws = 0; 35 | long long local_dims[3]; 36 | switch (dims[4]) { 37 | case 1: 38 | local_dims[0] = (long long)dims[0]; 39 | local_dims[1] = (long long)dims[1]; 40 | local_dims[2] = (long long)dims[2]; 41 | break; 42 | case 2: 43 | local_dims[0] = (long long)dims[1]; 44 | local_dims[1] = (long long)dims[0]; 45 | local_dims[2] = (long long)dims[2]; 46 | break; 47 | case 3: 48 | local_dims[0] = (long long)dims[2]; 49 | local_dims[1] = (long long)dims[1]; 50 | local_dims[2] = (long long)dims[0]; 51 | break; 52 | } 53 | res = cufftXtMakePlanMany( 54 | planHalf, dims[4], local_dims, NULL, 1, 1, CUDA_C_16F, 55 | NULL, 1, 1, CUDA_C_16F, 1, &ws, CUDA_C_16F); 56 | 57 | for (int i = 0; i < 1; i++) { 58 | res = cufftXtExec(planHalf, dataC, dataC, -1); 59 | } 60 | cudaDeviceSynchronize(); 61 | cudaMemcpy(output_cuFFT, dataC, sizeof(half2) * dims[0] * dims[1] * dims[2], cudaMemcpyDeviceToHost); 62 | cudaDeviceSynchronize(); 63 | cufftDestroy(planHalf); 64 | cudaFree(dataC); 65 | } 66 | -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/src/precision_cuFFT_r2c.cu: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //CUDA parts 15 | #include "cuda_runtime.h" 16 | #include "device_launch_parameters.h" 17 | #include 18 | 19 | void launch_precision_cuFFT_r2c(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims) 20 | { 21 | cudaSetDevice(device_id); 22 | cufftHandle planR2C; 23 | cufftReal* dataR; 24 | cufftComplex* dataC; 25 | cudaMalloc((void**)&dataR, sizeof(cufftComplex) * (dims[0]/2+1) * dims[1] * dims[2]); 26 | cudaMalloc((void**)&dataC, sizeof(cufftComplex) * (dims[0] / 2 + 1) * dims[1] * dims[2]); 27 | cudaMemcpy(dataR, inputC, sizeof(cufftReal) * dims[0] * dims[1] * dims[2], cudaMemcpyHostToDevice); 28 | if (cudaGetLastError() != cudaSuccess) { 29 | fprintf(stderr, "Cuda error: Failed to allocate\n"); 30 | return; 31 | } 32 | switch (dims[4]) { 33 | case 1: 34 | cufftPlan1d(&planR2C, dims[0], CUFFT_R2C, 1); 35 | break; 36 | case 2: 37 | cufftPlan2d(&planR2C, dims[1], dims[0], CUFFT_R2C); 38 | break; 39 | case 3: 40 | cufftPlan3d(&planR2C, dims[2], dims[1], dims[0], CUFFT_R2C); 41 | break; 42 | } 43 | for (int i = 0; i < 1; i++) { 44 | cufftExecR2C(planR2C, dataR, dataC); 45 | } 46 | cudaDeviceSynchronize(); 47 | cufftDestroy(planR2C); 48 | switch (dims[4]) { 49 | case 1: 50 | cufftPlan1d(&planR2C, dims[0], CUFFT_C2R, 1); 51 | break; 52 | case 2: 53 | cufftPlan2d(&planR2C, dims[1], dims[0], CUFFT_C2R); 54 | break; 55 | case 3: 56 | cufftPlan3d(&planR2C, dims[2], dims[1], dims[0], CUFFT_C2R); 57 | break; 58 | } 59 | for (int i = 0; i < 1; i++) { 60 | cufftExecC2R(planR2C, dataC, dataR); 61 | } 62 | cudaDeviceSynchronize(); 63 | cudaMemcpy(output_cuFFT, dataR, sizeof(float) * (dims[0] ) * dims[1] * dims[2], cudaMemcpyDeviceToHost); 64 | cudaDeviceSynchronize(); 65 | cufftDestroy(planR2C); 66 | cudaFree(dataR); 67 | cudaFree(dataC); 68 | } 69 | -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/src/precision_cuFFT_single.cu: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //CUDA parts 15 | #include "cuda_runtime.h" 16 | #include "device_launch_parameters.h" 17 | #include 18 | 19 | void launch_precision_cuFFT_single(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims) 20 | { 21 | cudaSetDevice(device_id); 22 | cufftHandle planC2C; 23 | cufftComplex* dataC; 24 | cudaMalloc((void**)&dataC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2]); 25 | cudaMemcpy(dataC, inputC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyHostToDevice); 26 | if (cudaGetLastError() != cudaSuccess) { 27 | fprintf(stderr, "Cuda error: Failed to allocate\n"); 28 | return; 29 | } 30 | switch (dims[4]) { 31 | case 1: 32 | cufftPlan1d(&planC2C, dims[0], CUFFT_C2C, 1); 33 | break; 34 | case 2: 35 | cufftPlan2d(&planC2C, dims[1], dims[0], CUFFT_C2C); 36 | break; 37 | case 3: 38 | cufftPlan3d(&planC2C, dims[2], dims[1], dims[0], CUFFT_C2C); 39 | break; 40 | } 41 | for (int i = 0; i < 1; i++) { 42 | cufftExecC2C(planC2C, dataC, dataC, -1); 43 | } 44 | cudaDeviceSynchronize(); 45 | cudaMemcpy(output_cuFFT, dataC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyDeviceToHost); 46 | cudaDeviceSynchronize(); 47 | cufftDestroy(planC2C); 48 | cudaFree(dataC); 49 | } 50 | -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/src/sample_0_benchmark_cuFFT_single.cu: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //CUDA parts 15 | #include "cuda_runtime.h" 16 | #include "device_launch_parameters.h" 17 | #include 18 | 19 | #define GROUP 1 20 | 21 | 22 | void sample_0_benchmark_cuFFT_single(bool file_output, FILE* output, int device_id) 23 | { 24 | 25 | const int num_runs = 3; 26 | if (file_output) 27 | fprintf(output, "0 - cuFFT FFT + iFFT C2C benchmark 1D batched in single precision\n"); 28 | printf("0 - cuFFT FFT + iFFT C2C benchmark 1D batched in single precision\n"); 29 | cudaSetDevice(device_id); 30 | double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples 31 | cufftComplex* inputC = (cufftComplex*)malloc((uint64_t)sizeof(cufftComplex)*pow(2, 27)); 32 | for (uint64_t i = 0; i < pow(2, 27); i++) { 33 | inputC[i].x = 2 * ((float)rand()) / RAND_MAX - 1.0; 34 | inputC[i].y = 2 * ((float)rand()) / RAND_MAX - 1.0; 35 | } 36 | for (int n = 0; n < 26; n++) { 37 | double run_time[num_runs][2]; 38 | for (int r = 0; r < num_runs; r++) { 39 | cufftHandle planC2C; 40 | cufftComplex* dataC; 41 | 42 | uint64_t dims[3]; 43 | dims[0] = 4 * pow(2, n); //Multidimensional FFT dimensions sizes (default 1). For best performance (and stability), order dimensions in descendant size order as: x>y>z. 44 | if (n == 0) dims[0] = 4096; 45 | dims[1] = 64* 32 * pow(2, 16)/dims[0]; 46 | //dims[1] = (dims[1] > 32768) ? 32768 : dims[1]; 47 | if (dims[1] == 0) dims[1] = 1; 48 | dims[2] = 1; 49 | 50 | cudaMalloc((void**)&dataC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2]); 51 | 52 | cudaMemcpy(dataC, inputC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyHostToDevice); 53 | if (cudaGetLastError() != cudaSuccess) { 54 | fprintf(stderr, "Cuda error: Failed to allocate\n"); 55 | return; 56 | } 57 | uint64_t sizeCUDA; 58 | switch (1) { 59 | case 1: 60 | cufftPlan1d(&planC2C, dims[0], CUFFT_C2C, dims[1]); 61 | cufftGetSize1d(planC2C, dims[0], CUFFT_C2C, dims[1], (size_t*)&sizeCUDA); 62 | break; 63 | case 2: 64 | cufftPlan2d(&planC2C, dims[1], dims[0], CUFFT_C2C); 65 | break; 66 | case 3: 67 | cufftPlan3d(&planC2C, dims[2], dims[1], dims[0], CUFFT_C2C); 68 | break; 69 | } 70 | 71 | float totTime = 0; 72 | uint64_t cuBufferSize = sizeof(float) * 2 * dims[0] * dims[1] * dims[2]; 73 | uint64_t num_iter = ((3*4096 * 1024.0 * 1024.0) / cuBufferSize > 1000) ? 1000 : (3*4096 * 1024.0 * 1024.0) / cuBufferSize; 74 | if (num_iter == 0) num_iter = 1; 75 | 76 | std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now(); 77 | for (int i = 0; i < num_iter; i++) { 78 | 79 | cufftExecC2C(planC2C, dataC, dataC, -1); 80 | cufftExecC2C(planC2C, dataC, dataC, 1); 81 | } 82 | cudaDeviceSynchronize(); 83 | std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now(); 84 | totTime = (std::chrono::duration_cast(timeEnd - timeSubmit).count() * 0.001) / num_iter; 85 | run_time[r][0] = totTime; 86 | if (n > 0) { 87 | if (r == num_runs - 1) { 88 | double std_error = 0; 89 | double avg_time = 0; 90 | for (uint64_t t = 0; t < num_runs; t++) { 91 | avg_time += run_time[t][0]; 92 | } 93 | avg_time /= num_runs; 94 | for (uint64_t t = 0; t < num_runs; t++) { 95 | std_error += (run_time[t][0] - avg_time) * (run_time[t][0] - avg_time); 96 | } 97 | std_error = sqrt(std_error / num_runs); 98 | if (file_output) 99 | fprintf(output, "cuFFT System: %" PRIu64 " %" PRIu64 "x%" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 "\n", (uint64_t)log2(dims[0]), dims[0], dims[1], cuBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)cuBufferSize / 1024) / avg_time)); 100 | 101 | printf("cuFFT System: %" PRIu64 " %" PRIu64 "x%" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 "\n", (uint64_t)log2(dims[0]), dims[0], dims[1], cuBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)cuBufferSize / 1024) / avg_time)); 102 | benchmark_result[0] += ((double)cuBufferSize / 1024) / avg_time; 103 | } 104 | 105 | } 106 | cufftDestroy(planC2C); 107 | cudaFree(dataC); 108 | cudaDeviceSynchronize(); 109 | //cufftComplex* output_cuFFT = (cufftComplex*)(malloc(sizeof(cufftComplex) * dims[0] * dims[1] * dims[2])); 110 | //cudaMemcpy(output_cuFFT, dataC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyDeviceToHost); 111 | //cudaDeviceSynchronize(); 112 | 113 | 114 | } 115 | } 116 | free(inputC); 117 | benchmark_result[0] /= (26 - 1); 118 | if (file_output) 119 | fprintf(output, "Benchmark score cuFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 120 | printf("Benchmark score cuFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 121 | 122 | } 123 | -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/src/sample_1000_benchmark_cuFFT_single_2_4096.cu: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //CUDA parts 15 | #include "cuda_runtime.h" 16 | #include "device_launch_parameters.h" 17 | #include 18 | 19 | #define GROUP 1 20 | 21 | 22 | void sample_1000_benchmark_cuFFT_single_2_4096(bool file_output, FILE* output, int device_id) 23 | { 24 | 25 | const int num_runs = 3; 26 | if (file_output) 27 | fprintf(output, "1000 - cuFFT FFT + iFFT C2C benchmark 1D batched in single precision: all supported systems from 2 to 4096\n"); 28 | printf("1000 - FFT + iFFT C2C benchmark 1D batched in single precision: all supported systems from 2 to 4096\n"); 29 | cudaSetDevice(device_id); 30 | double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples 31 | cufftComplex* inputC = (cufftComplex*)malloc((uint64_t)sizeof(cufftComplex) * pow(2, 27)); 32 | for (uint64_t i = 0; i < pow(2, 27); i++) { 33 | inputC[i].x = 2 * ((float)rand()) / RAND_MAX - 1.0; 34 | inputC[i].y = 2 * ((float)rand()) / RAND_MAX - 1.0; 35 | } 36 | int num_systems = 0; 37 | for (int n = 1; n < 4097; n++) { 38 | double run_time[num_runs][2]; 39 | for (int r = 0; r < num_runs; r++) { 40 | cufftHandle planC2C; 41 | cufftComplex* dataC; 42 | 43 | uint64_t dims[3]; 44 | 45 | dims[0] = n; 46 | if (n == 1) dims[0] = 4096; 47 | uint64_t temp = dims[0]; 48 | 49 | /*for (uint64_t j = 2; j < 14; j++) 50 | { 51 | if (temp % j == 0) { 52 | temp /= j; 53 | j = 1; 54 | } 55 | } 56 | if (temp != 1) break;*/ 57 | dims[1] = pow(2, (uint64_t)log2(64 * 32 * pow(2, 16) / dims[0])); 58 | if (dims[1] < 1) dims[1] = 1; 59 | dims[2] = 1; 60 | 61 | cudaMalloc((void**)&dataC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2]); 62 | 63 | cudaMemcpy(dataC, inputC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyHostToDevice); 64 | if (cudaGetLastError() != cudaSuccess) { 65 | fprintf(stderr, "Cuda error: Failed to allocate\n"); 66 | return; 67 | } 68 | uint64_t sizeCUDA; 69 | switch (1) { 70 | case 1: 71 | cufftPlan1d(&planC2C, dims[0], CUFFT_C2C, dims[1]); 72 | cufftGetSize1d(planC2C, dims[0], CUFFT_C2C, dims[1], (size_t*)&sizeCUDA); 73 | break; 74 | case 2: 75 | cufftPlan2d(&planC2C, dims[1], dims[0], CUFFT_C2C); 76 | break; 77 | case 3: 78 | cufftPlan3d(&planC2C, dims[2], dims[1], dims[0], CUFFT_C2C); 79 | break; 80 | } 81 | 82 | float totTime = 0; 83 | uint64_t cuBufferSize = sizeof(float) * 2 * dims[0] * dims[1] * dims[2]; 84 | uint64_t num_iter = ((3 * 4096 * 1024.0 * 1024.0) / cuBufferSize > 1000) ? 1000 : (3 * 4096 * 1024.0 * 1024.0) / cuBufferSize; 85 | if (num_iter == 0) num_iter = 1; 86 | 87 | std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now(); 88 | for (int i = 0; i < num_iter; i++) { 89 | 90 | cufftExecC2C(planC2C, dataC, dataC, -1); 91 | cufftExecC2C(planC2C, dataC, dataC, 1); 92 | } 93 | cudaDeviceSynchronize(); 94 | std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now(); 95 | totTime = (std::chrono::duration_cast(timeEnd - timeSubmit).count() * 0.001) / num_iter; 96 | run_time[r][0] = totTime; 97 | if (n > 1) { 98 | if (r == num_runs - 1) { 99 | num_systems++; 100 | double std_error = 0; 101 | double avg_time = 0; 102 | for (uint64_t t = 0; t < num_runs; t++) { 103 | avg_time += run_time[t][0]; 104 | } 105 | avg_time /= num_runs; 106 | for (uint64_t t = 0; t < num_runs; t++) { 107 | std_error += (run_time[t][0] - avg_time) * (run_time[t][0] - avg_time); 108 | } 109 | std_error = sqrt(std_error / num_runs); 110 | if (file_output) 111 | fprintf(output, "cuFFT System: %" PRIu64 " %" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 " bandwidth: %0.1f\n", dims[0], dims[1], cuBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)cuBufferSize / 1024) / avg_time), cuBufferSize / 1024.0 / 1024.0 / 1.024 * 4 / avg_time); 112 | 113 | printf("cuFFT System: %" PRIu64 " %" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 " bandwidth: %0.1f\n", dims[0], dims[1], cuBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)cuBufferSize / 1024) / avg_time), cuBufferSize / 1024.0 / 1024.0 / 1.024 * 4 / avg_time); 114 | benchmark_result[0] += ((double)cuBufferSize / 1024) / avg_time; 115 | } 116 | 117 | } 118 | cufftDestroy(planC2C); 119 | cudaFree(dataC); 120 | cudaDeviceSynchronize(); 121 | //cufftComplex* output_cuFFT = (cufftComplex*)(malloc(sizeof(cufftComplex) * dims[0] * dims[1] * dims[2])); 122 | //cudaMemcpy(output_cuFFT, dataC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyDeviceToHost); 123 | //cudaDeviceSynchronize(); 124 | 125 | 126 | } 127 | } 128 | free(inputC); 129 | benchmark_result[0] /= (num_systems); 130 | if (file_output) 131 | fprintf(output, "Benchmark score cuFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 132 | printf("Benchmark score cuFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 133 | 134 | } 135 | -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/src/sample_1003_benchmark_cuFFT_single_3d_2_512.cu: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //CUDA parts 15 | #include "cuda_runtime.h" 16 | #include "device_launch_parameters.h" 17 | #include 18 | 19 | #define GROUP 1 20 | 21 | 22 | void sample_1003_benchmark_cuFFT_single_3d_2_512(bool file_output, FILE* output, int device_id) 23 | { 24 | if (file_output) 25 | fprintf(output, "1003 - cuFFT FFT + iFFT C2C multidimensional benchmark in single precision: all supported cubes from 2 to 512\n"); 26 | printf("1003 - cuFFT FFT + iFFT C2C multidimensional benchmark in single precision: all supported cubes from 2 to 512\n"); 27 | cudaSetDevice(device_id); 28 | const int num_runs = 3; 29 | 30 | double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples 31 | cufftComplex* inputC = (cufftComplex*)malloc((uint64_t)sizeof(cufftComplex)*pow(2, 27)); 32 | for (uint64_t i = 0; i < pow(2, 27); i++) { 33 | inputC[i].x = 2 * ((float)rand()) / RAND_MAX - 1.0; 34 | inputC[i].y = 2 * ((float)rand()) / RAND_MAX - 1.0; 35 | } 36 | int num_systems = 0; 37 | for (int n = 1; n < 513; n++) { 38 | double run_time[num_runs][2]; 39 | for (int r = 0; r < num_runs; r++) { 40 | cufftHandle planC2C; 41 | cufftComplex* dataC; 42 | 43 | uint64_t dims[3]; 44 | 45 | dims[0] = n; 46 | if (n == 1) dims[0] = 512; 47 | uint64_t temp = dims[0]; 48 | 49 | /*for (uint64_t j = 2; j < 14; j++) 50 | { 51 | if (temp % j == 0) { 52 | temp /= j; 53 | j = 1; 54 | } 55 | } 56 | if (temp != 1) break;*/ 57 | dims[1] = dims[0]; 58 | dims[2] = dims[0]; 59 | 60 | cudaMalloc((void**)&dataC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2]); 61 | 62 | cudaMemcpy(dataC, inputC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyHostToDevice); 63 | if (cudaGetLastError() != cudaSuccess) { 64 | fprintf(stderr, "Cuda error: Failed to allocate\n"); 65 | return; 66 | } 67 | switch (3) { 68 | case 1: 69 | cufftPlan1d(&planC2C, dims[0], CUFFT_C2C, 1); 70 | break; 71 | case 2: 72 | cufftPlan2d(&planC2C, dims[1], dims[0], CUFFT_C2C); 73 | break; 74 | case 3: 75 | cufftPlan3d(&planC2C, dims[2], dims[1], dims[0], CUFFT_C2C); 76 | break; 77 | } 78 | 79 | float totTime = 0; 80 | uint64_t cuBufferSize = sizeof(float) * 2 * dims[0] * dims[1] * dims[2]; 81 | uint64_t num_iter = ((4096 * 1024.0 * 1024.0) / cuBufferSize > 1000) ? 1000 : (4096 * 1024.0 * 1024.0) / cuBufferSize; 82 | if (num_iter == 0) num_iter = 1; 83 | std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now(); 84 | for (int i = 0; i < num_iter; i++) { 85 | 86 | cufftExecC2C(planC2C, dataC, dataC, -1); 87 | cufftExecC2C(planC2C, dataC, dataC, 1); 88 | } 89 | cudaDeviceSynchronize(); 90 | std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now(); 91 | totTime = (std::chrono::duration_cast(timeEnd - timeSubmit).count() * 0.001) / num_iter; 92 | run_time[r][0] = totTime; 93 | if (n > 1) { 94 | if (r == num_runs - 1) { 95 | num_systems++; 96 | double std_error = 0; 97 | double avg_time = 0; 98 | for (uint64_t t = 0; t < num_runs; t++) { 99 | avg_time += run_time[t][0]; 100 | } 101 | avg_time /= num_runs; 102 | for (uint64_t t = 0; t < num_runs; t++) { 103 | std_error += (run_time[t][0] - avg_time) * (run_time[t][0] - avg_time); 104 | } 105 | std_error = sqrt(std_error / num_runs); 106 | 107 | if (file_output) 108 | fprintf(output, "cuFFT System: %" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 " bandwidth: %0.1f\n", dims[0], cuBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)cuBufferSize / 1024) / avg_time), 3*cuBufferSize / 1024.0 / 1024.0 / 1.024 * 4 / avg_time); 109 | 110 | printf("cuFFT System: %" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 " bandwidth: %0.1f\n", dims[0], cuBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)cuBufferSize / 1024) / avg_time), 3*cuBufferSize / 1024.0 / 1024.0 / 1.024 * 4 / avg_time); 111 | benchmark_result[0] += ((double)cuBufferSize / 1024) / avg_time; 112 | } 113 | 114 | } 115 | cufftDestroy(planC2C); 116 | cudaFree(dataC); 117 | cudaDeviceSynchronize(); 118 | //cufftComplex* output_cuFFT = (cufftComplex*)(malloc(sizeof(cufftComplex) * dims[0] * dims[1] * dims[2])); 119 | //cudaMemcpy(output_cuFFT, dataC, sizeof(cufftComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyDeviceToHost); 120 | //cudaDeviceSynchronize(); 121 | 122 | 123 | } 124 | } 125 | free(inputC); 126 | benchmark_result[0] /= (num_systems); 127 | if (file_output) 128 | fprintf(output, "Benchmark score cuFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 129 | printf("Benchmark score cuFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 130 | 131 | } 132 | -------------------------------------------------------------------------------- /benchmark_scripts/cuFFT_scripts/src/sample_1_benchmark_cuFFT_double.cu: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //CUDA parts 15 | #include "cuda_runtime.h" 16 | #include "device_launch_parameters.h" 17 | #include 18 | 19 | #define GROUP 1 20 | 21 | 22 | void sample_1_benchmark_cuFFT_double(bool file_output, FILE* output, int device_id) 23 | { 24 | 25 | const int num_runs = 3; 26 | if (file_output) 27 | fprintf(output, "1 - cuFFT FFT + iFFT C2C benchmark 1D batched in double precision\n"); 28 | printf("1 - cuFFT FFT + iFFT C2C benchmark 1D batched in double precision\n"); 29 | cudaSetDevice(device_id); 30 | double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples 31 | cufftDoubleComplex* inputC = (cufftDoubleComplex*)malloc((uint64_t)sizeof(cufftDoubleComplex) *pow(2, 27)); 32 | for (uint64_t i = 0; i y>z. 44 | if (n == 0) dims[0] = 2048; 45 | dims[1] = 64 * 32 * pow(2, 15) / dims[0]; 46 | //dims[1] = (dims[1] > 32768) ? 32768 : dims[1]; 47 | if (dims[1] == 0) dims[1] = 1; 48 | dims[2] = 1; 49 | cudaMalloc((void**)&dataC, sizeof(cufftDoubleComplex) * dims[0] * dims[1] * dims[2]); 50 | 51 | cudaMemcpy(dataC, inputC, sizeof(cufftDoubleComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyHostToDevice); 52 | if (cudaGetLastError() != cudaSuccess) { 53 | fprintf(stderr, "Cuda error: Failed to allocate\n"); 54 | return; 55 | } 56 | uint64_t sizeCUDA; 57 | switch (1) { 58 | case 1: 59 | cufftPlan1d(&planZ2Z, dims[0], CUFFT_Z2Z, dims[1]); 60 | cufftEstimate1d(dims[0], CUFFT_Z2Z, 1, (size_t*)&sizeCUDA); 61 | break; 62 | case 2: 63 | cufftPlan2d(&planZ2Z, dims[1], dims[0], CUFFT_Z2Z); 64 | cufftEstimate2d(dims[1], dims[0], CUFFT_Z2Z, (size_t*)&sizeCUDA); 65 | break; 66 | case 3: 67 | cufftPlan3d(&planZ2Z, dims[2], dims[1], dims[0], CUFFT_Z2Z); 68 | cufftEstimate3d(dims[2], dims[1], dims[0], CUFFT_Z2Z, (size_t*)&sizeCUDA); 69 | break; 70 | } 71 | 72 | double totTime = 0; 73 | uint64_t cuBufferSize = sizeof(double) * 2 * dims[0] * dims[1] * dims[2]; 74 | uint64_t num_iter = ((4096 * 1024.0 * 1024.0) / cuBufferSize > 1000) ? 1000 : (4096 * 1024.0 * 1024.0) / cuBufferSize ; 75 | if (num_iter == 0) num_iter = 1; 76 | std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now(); 77 | for (int i = 0; i < num_iter; i++) { 78 | 79 | cufftExecZ2Z(planZ2Z, dataC, dataC, -1); 80 | cufftExecZ2Z(planZ2Z, dataC, dataC, 1); 81 | } 82 | cudaDeviceSynchronize(); 83 | std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now(); 84 | totTime = (std::chrono::duration_cast(timeEnd - timeSubmit).count() * 0.001) / num_iter; 85 | run_time[r][0] = totTime; 86 | if (n > 0) { 87 | if (r == num_runs - 1) { 88 | double std_error = 0; 89 | double avg_time = 0; 90 | for (uint64_t t = 0; t < num_runs; t++) { 91 | avg_time += run_time[t][0]; 92 | } 93 | avg_time /= num_runs; 94 | for (uint64_t t = 0; t < num_runs; t++) { 95 | std_error += (run_time[t][0] - avg_time) * (run_time[t][0] - avg_time); 96 | } 97 | std_error = sqrt(std_error / num_runs); 98 | if (file_output) 99 | fprintf(output, "cuFFT System: %" PRIu64 " %" PRIu64 "x%" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 "\n", (uint64_t)log2(dims[0]), dims[0], dims[1], cuBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)cuBufferSize * sizeof(float) / sizeof(double) / 1024) / avg_time)); 100 | 101 | printf("cuFFT System: %" PRIu64 " %" PRIu64 "x%" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 "\n", (uint64_t)log2(dims[0]), dims[0], dims[1], cuBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)cuBufferSize * sizeof(float) / sizeof(double) / 1024) / avg_time)); 102 | benchmark_result[0] += ((double)cuBufferSize * sizeof(float)/sizeof(double)/ 1024) / avg_time; 103 | } 104 | 105 | } 106 | cufftDestroy(planZ2Z); 107 | cudaFree(dataC); 108 | cudaDeviceSynchronize(); 109 | //cufftDoubleComplex* output_cuFFT = (cufftDoubleComplex*)(malloc(sizeof(cufftDoubleComplex) * dims[0] * dims[1] * dims[2])); 110 | //cudaMemcpy(output_cuFFT, dataC, sizeof(cufftDoubleComplex) * dims[0] * dims[1] * dims[2], cudaMemcpyDeviceToHost); 111 | //cudaDeviceSynchronize(); 112 | 113 | 114 | } 115 | } 116 | free(inputC); 117 | benchmark_result[0] /= (24 - 1); 118 | if (file_output) 119 | fprintf(output, "Benchmark score cuFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 120 | printf("Benchmark score cuFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 121 | 122 | } 123 | -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/precision_rocFFT_double.h: -------------------------------------------------------------------------------- 1 | void launch_precision_rocFFT_double(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/precision_rocFFT_r2c.h: -------------------------------------------------------------------------------- 1 | void launch_precision_rocFFT_r2c(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/precision_rocFFT_single.h: -------------------------------------------------------------------------------- 1 | void launch_precision_rocFFT_single(void* inputC, void* output_cuFFT, int device_id, uint64_t* dims); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/sample_0_benchmark_rocFFT_single.h: -------------------------------------------------------------------------------- 1 | void sample_0_benchmark_rocFFT_single(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/sample_1000_benchmark_rocFFT_single_2_4096.h: -------------------------------------------------------------------------------- 1 | void sample_1000_benchmark_rocFFT_single_2_4096(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/sample_1001_benchmark_rocFFT_double_2_4096.h: -------------------------------------------------------------------------------- 1 | void sample_1001_benchmark_rocFFT_double_2_4096(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/sample_1003_benchmark_rocFFT_single_3d_2_512.h: -------------------------------------------------------------------------------- 1 | void sample_1003_benchmark_rocFFT_single_3d_2_512(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/sample_1_benchmark_rocFFT_double.h: -------------------------------------------------------------------------------- 1 | void sample_1_benchmark_rocFFT_double(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/sample_3_benchmark_rocFFT_single_3d.h: -------------------------------------------------------------------------------- 1 | void sample_3_benchmark_rocFFT_single_3d(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/sample_6_benchmark_rocFFT_single_r2c.h: -------------------------------------------------------------------------------- 1 | void sample_6_benchmark_rocFFT_single_r2c(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/sample_7_benchmark_rocFFT_single_Bluestein.h: -------------------------------------------------------------------------------- 1 | void sample_7_benchmark_rocFFT_single_Bluestein(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/sample_8_benchmark_rocFFT_double_Bluestein.h: -------------------------------------------------------------------------------- 1 | void sample_8_benchmark_rocFFT_double_Bluestein(bool file_output, FILE* output, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/include/user_benchmark_rocFFT.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | uint64_t X; 3 | uint64_t Y; 4 | uint64_t Z; 5 | uint64_t P; 6 | uint64_t B; 7 | uint64_t N; 8 | uint64_t R2C; 9 | } rocFFTUserSystemParameters;//an example structure used to pass user-defined system for benchmarking 10 | 11 | void user_benchmark_rocFFT(bool file_output, FILE* output, rocFFTUserSystemParameters* userParams, int device_id); -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/src/precision_rocFFT_double.cpp: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //ROCM parts 15 | #include "hip/hip_runtime.h" 16 | #include 17 | 18 | void launch_precision_rocFFT_double(void* inputC, void* output_rocFFT, int device_id, uint64_t* dims) 19 | { 20 | hipSetDevice(device_id); 21 | hipfftHandle planZ2Z; 22 | hipfftDoubleComplex* dataC; 23 | hipMalloc((void**)&dataC, sizeof(hipfftDoubleComplex) * dims[0] * dims[1] * dims[2]); 24 | hipMemcpy(dataC, inputC, sizeof(hipfftDoubleComplex) * dims[0] * dims[1] * dims[2], hipMemcpyHostToDevice); 25 | if (hipGetLastError() != hipSuccess) { 26 | fprintf(stderr, "ROCM error: Failed to allocate\n"); 27 | return; 28 | } 29 | switch (dims[4]) { 30 | case 1: 31 | hipfftPlan1d(&planZ2Z, dims[0], HIPFFT_Z2Z, 1); 32 | break; 33 | case 2: 34 | hipfftPlan2d(&planZ2Z, dims[1], dims[0], HIPFFT_Z2Z); 35 | break; 36 | case 3: 37 | hipfftPlan3d(&planZ2Z, dims[2], dims[1], dims[0], HIPFFT_Z2Z); 38 | break; 39 | } 40 | for (int i = 0; i < 1; i++) { 41 | hipfftExecZ2Z(planZ2Z, dataC, dataC, -1); 42 | } 43 | hipDeviceSynchronize(); 44 | hipMemcpy(output_rocFFT, dataC, sizeof(hipfftDoubleComplex) * dims[0] * dims[1] * dims[2], hipMemcpyDeviceToHost); 45 | hipDeviceSynchronize(); 46 | hipfftDestroy(planZ2Z); 47 | hipFree(dataC); 48 | } 49 | -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/src/precision_rocFFT_r2c.cpp: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //ROCM parts 15 | #include "hip/hip_runtime.h" 16 | #include 17 | 18 | void launch_precision_rocFFT_r2c(void* inputC, void* output_rocFFT, int device_id, uint64_t* dims) 19 | { 20 | hipSetDevice(device_id); 21 | hipfftHandle planR2C; 22 | hipfftReal* dataR; 23 | hipfftComplex* dataC; 24 | hipMalloc((void**)&dataR, sizeof(hipfftComplex) * (dims[0]/2+1) * dims[1] * dims[2]); 25 | hipMalloc((void**)&dataC, sizeof(hipfftComplex) * (dims[0] / 2 + 1) * dims[1] * dims[2]); 26 | hipMemcpy(dataR, inputC, sizeof(hipfftReal) * dims[0] * dims[1] * dims[2], hipMemcpyHostToDevice); 27 | if (hipGetLastError() != hipSuccess) { 28 | fprintf(stderr, "ROCM error: Failed to allocate\n"); 29 | return; 30 | } 31 | switch (dims[4]) { 32 | case 1: 33 | hipfftPlan1d(&planR2C, dims[0], HIPFFT_R2C, 1); 34 | break; 35 | case 2: 36 | hipfftPlan2d(&planR2C, dims[1], dims[0], HIPFFT_R2C); 37 | break; 38 | case 3: 39 | hipfftPlan3d(&planR2C, dims[2], dims[1], dims[0], HIPFFT_R2C); 40 | break; 41 | } 42 | for (int i = 0; i < 1; i++) { 43 | hipfftExecR2C(planR2C, dataR, dataC); 44 | } 45 | hipDeviceSynchronize(); 46 | hipfftDestroy(planR2C); 47 | switch (dims[4]) { 48 | case 1: 49 | hipfftPlan1d(&planR2C, dims[0], HIPFFT_C2R, 1); 50 | break; 51 | case 2: 52 | hipfftPlan2d(&planR2C, dims[1], dims[0], HIPFFT_C2R); 53 | break; 54 | case 3: 55 | hipfftPlan3d(&planR2C, dims[2], dims[1], dims[0], HIPFFT_C2R); 56 | break; 57 | } 58 | for (int i = 0; i < 1; i++) { 59 | hipfftExecC2R(planR2C, dataC, dataR); 60 | } 61 | hipDeviceSynchronize(); 62 | hipMemcpy(output_rocFFT, dataR, sizeof(float) * (dims[0] ) * dims[1] * dims[2], hipMemcpyDeviceToHost); 63 | hipDeviceSynchronize(); 64 | hipfftDestroy(planR2C); 65 | hipFree(dataR); 66 | hipFree(dataC); 67 | } 68 | -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/src/precision_rocFFT_single.cpp: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //ROCM parts 15 | #include "hip/hip_runtime.h" 16 | #include 17 | 18 | void launch_precision_rocFFT_single(void* inputC, void* output_rocFFT, int device_id, uint64_t* dims) 19 | { 20 | hipSetDevice(device_id); 21 | hipfftHandle planC2C; 22 | hipfftComplex* dataC; 23 | hipMalloc((void**)&dataC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2]); 24 | hipMemcpy(dataC, inputC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2], hipMemcpyHostToDevice); 25 | if (hipGetLastError() != hipSuccess) { 26 | fprintf(stderr, "ROCM error: Failed to allocate\n"); 27 | return; 28 | } 29 | switch (dims[4]) { 30 | case 1: 31 | hipfftPlan1d(&planC2C, dims[0], HIPFFT_C2C, 1); 32 | break; 33 | case 2: 34 | hipfftPlan2d(&planC2C, dims[1], dims[0], HIPFFT_C2C); 35 | break; 36 | case 3: 37 | hipfftPlan3d(&planC2C, dims[2], dims[1], dims[0], HIPFFT_C2C); 38 | break; 39 | } 40 | for (int i = 0; i < 1; i++) { 41 | hipfftExecC2C(planC2C, dataC, dataC, -1); 42 | } 43 | hipDeviceSynchronize(); 44 | hipMemcpy(output_rocFFT, dataC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2], hipMemcpyDeviceToHost); 45 | hipDeviceSynchronize(); 46 | hipfftDestroy(planC2C); 47 | hipFree(dataC); 48 | } 49 | -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/src/sample_0_benchmark_rocFFT_single.cpp: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //ROCM parts 15 | #include "hip/hip_runtime.h" 16 | #include 17 | 18 | #define GROUP 1 19 | 20 | 21 | void sample_0_benchmark_rocFFT_single(bool file_output, FILE* output, int device_id) 22 | { 23 | 24 | const int num_runs = 5; 25 | if (file_output) 26 | fprintf(output, "0 - rocFFT FFT + iFFT C2C benchmark 1D batched in single precision\n"); 27 | printf("0 - rocFFT FFT + iFFT C2C benchmark 1D batched in single precision\n"); 28 | hipSetDevice(device_id); 29 | double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples 30 | hipfftComplex* inputC = (hipfftComplex*)malloc((uint64_t)sizeof(hipfftComplex)*pow(2, 27)); 31 | for (uint64_t i = 0; i < pow(2, 27); i++) { 32 | inputC[i].x = 2 * ((float)rand()) / RAND_MAX - 1.0; 33 | inputC[i].y = 2 * ((float)rand()) / RAND_MAX - 1.0; 34 | } 35 | for (int n = 0; n < 26; n++) { 36 | double run_time[num_runs][2]; 37 | for (int r = 0; r < num_runs; r++) { 38 | hipfftHandle planC2C; 39 | hipfftComplex* dataC; 40 | 41 | uint64_t dims[3]; 42 | dims[0] = 4 * pow(2, n); //Multidimensional FFT dimensions sizes (default 1). For best performance (and stability), order dimensions in descendant size order as: x>y>z. 43 | if (n == 0) dims[0] = 4096; 44 | dims[1] = 64* 32 * pow(2, 16)/dims[0]; 45 | //dims[1] = (dims[1] > 32768) ? 32768 : dims[1]; 46 | if (dims[1] == 0) dims[1] = 1; 47 | dims[2] = 1; 48 | 49 | hipMalloc((void**)&dataC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2]); 50 | 51 | hipMemcpy(dataC, inputC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2], hipMemcpyHostToDevice); 52 | if (hipGetLastError() != hipSuccess) { 53 | fprintf(stderr, "ROCM error: Failed to allocate\n"); 54 | return; 55 | } 56 | uint64_t sizeROCM; 57 | switch (1) { 58 | case 1: 59 | hipfftPlan1d(&planC2C, dims[0], HIPFFT_C2C, dims[1]); 60 | hipfftGetSize1d(planC2C, dims[0], HIPFFT_C2C, dims[1], (size_t*)&sizeROCM); 61 | break; 62 | case 2: 63 | hipfftPlan2d(&planC2C, dims[1], dims[0], HIPFFT_C2C); 64 | break; 65 | case 3: 66 | hipfftPlan3d(&planC2C, dims[2], dims[1], dims[0], HIPFFT_C2C); 67 | break; 68 | } 69 | 70 | float totTime = 0; 71 | uint64_t rocBufferSize = sizeof(float) * 2 * dims[0] * dims[1] * dims[2]; 72 | uint64_t num_iter = ((3*4096 * 1024.0 * 1024.0) / rocBufferSize > 1000) ? 1000 : (3*4096 * 1024.0 * 1024.0) / rocBufferSize; 73 | if (num_iter == 0) num_iter = 1; 74 | 75 | std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now(); 76 | for (int i = 0; i < num_iter; i++) { 77 | 78 | hipfftExecC2C(planC2C, dataC, dataC, -1); 79 | hipfftExecC2C(planC2C, dataC, dataC, 1); 80 | } 81 | hipDeviceSynchronize(); 82 | std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now(); 83 | totTime = (std::chrono::duration_cast(timeEnd - timeSubmit).count() * 0.001) / num_iter; 84 | run_time[r][0] = totTime; 85 | if (n > 0) { 86 | if (r == num_runs - 1) { 87 | double std_error = 0; 88 | double avg_time = 0; 89 | for (uint64_t t = 2; t < num_runs; t++) { 90 | avg_time += run_time[t][0]; 91 | } 92 | avg_time /= num_runs-2; 93 | for (uint64_t t = 2; t < num_runs; t++) { 94 | std_error += (run_time[t][0] - avg_time) * (run_time[t][0] - avg_time); 95 | } 96 | std_error = sqrt(std_error / (num_runs-2)); 97 | if (file_output) 98 | fprintf(output, "rocFFT System: %" PRIu64 " %" PRIu64 "x%" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 "\n", (uint64_t)log2(dims[0]), dims[0], dims[1], rocBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)rocBufferSize / 1024) / avg_time)); 99 | 100 | printf("rocFFT System: %" PRIu64 " %" PRIu64 "x%" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 "\n", (uint64_t)log2(dims[0]), dims[0], dims[1], rocBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)rocBufferSize / 1024) / avg_time)); 101 | benchmark_result[0] += ((double)rocBufferSize / 1024) / avg_time; 102 | } 103 | 104 | } 105 | hipfftDestroy(planC2C); 106 | hipFree(dataC); 107 | hipDeviceSynchronize(); 108 | //hipfftComplex* output_rocFFT = (hipfftComplex*)(malloc(sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2])); 109 | //hipMemcpy(output_rocFFT, dataC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2], hipMemcpyDeviceToHost); 110 | //hipDeviceSynchronize(); 111 | 112 | 113 | } 114 | } 115 | free(inputC); 116 | benchmark_result[0] /= (26 - 1); 117 | if (file_output) 118 | fprintf(output, "Benchmark score rocFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 119 | printf("Benchmark score rocFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 120 | 121 | } 122 | -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/src/sample_1000_benchmark_rocFFT_single_2_4096.cpp: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //ROCM parts 15 | #include "hip/hip_runtime.h" 16 | #include 17 | 18 | #define GROUP 1 19 | 20 | 21 | void sample_1000_benchmark_rocFFT_single_2_4096(bool file_output, FILE* output, int device_id) 22 | { 23 | 24 | const int num_runs = 5; 25 | if (file_output) 26 | fprintf(output, "1000 - rocFFT FFT + iFFT C2C benchmark 1D batched in single precision: all supported systems from 2 to 4096\n"); 27 | printf("1000 - FFT + iFFT C2C benchmark 1D batched in single precision: all supported systems from 2 to 4096\n"); 28 | hipSetDevice(device_id); 29 | double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples 30 | hipfftComplex* inputC = (hipfftComplex*)malloc((uint64_t)sizeof(hipfftComplex) * pow(2, 27)); 31 | for (uint64_t i = 0; i < pow(2, 27); i++) { 32 | inputC[i].x = 2 * ((float)rand()) / RAND_MAX - 1.0; 33 | inputC[i].y = 2 * ((float)rand()) / RAND_MAX - 1.0; 34 | } 35 | int num_systems = 0; 36 | for (int n = 1; n < 4097; n++) { 37 | double run_time[num_runs][2]; 38 | for (int r = 0; r < num_runs; r++) { 39 | hipfftHandle planC2C; 40 | hipfftComplex* dataC; 41 | 42 | uint64_t dims[3]; 43 | 44 | dims[0] = n; 45 | if (n == 1) dims[0] = 4096; 46 | uint64_t temp = dims[0]; 47 | 48 | /*for (uint64_t j = 2; j < 14; j++) 49 | { 50 | if (temp % j == 0) { 51 | temp /= j; 52 | j = 1; 53 | } 54 | } 55 | if (temp != 1) break;*/ 56 | dims[1] = pow(2, (uint64_t)log2(64 * 32 * pow(2, 16) / dims[0])); 57 | if (dims[1] < 1) dims[1] = 1; 58 | dims[2] = 1; 59 | 60 | hipMalloc((void**)&dataC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2]); 61 | 62 | hipMemcpy(dataC, inputC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2], hipMemcpyHostToDevice); 63 | if (hipGetLastError() != hipSuccess) { 64 | fprintf(stderr, "ROCM error: Failed to allocate\n"); 65 | return; 66 | } 67 | uint64_t sizeROCM; 68 | switch (1) { 69 | case 1: 70 | hipfftPlan1d(&planC2C, dims[0], HIPFFT_C2C, dims[1]); 71 | hipfftGetSize1d(planC2C, dims[0], HIPFFT_C2C, dims[1], (size_t*)&sizeROCM); 72 | break; 73 | case 2: 74 | hipfftPlan2d(&planC2C, dims[1], dims[0], HIPFFT_C2C); 75 | break; 76 | case 3: 77 | hipfftPlan3d(&planC2C, dims[2], dims[1], dims[0], HIPFFT_C2C); 78 | break; 79 | } 80 | 81 | float totTime = 0; 82 | uint64_t rocBufferSize = sizeof(float) * 2 * dims[0] * dims[1] * dims[2]; 83 | uint64_t num_iter = ((3 * 4096 * 1024.0 * 1024.0) / rocBufferSize > 1000) ? 1000 : (3 * 4096 * 1024.0 * 1024.0) / rocBufferSize; 84 | if (num_iter == 0) num_iter = 1; 85 | 86 | std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now(); 87 | for (int i = 0; i < num_iter; i++) { 88 | 89 | hipfftExecC2C(planC2C, dataC, dataC, -1); 90 | hipfftExecC2C(planC2C, dataC, dataC, 1); 91 | } 92 | hipDeviceSynchronize(); 93 | std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now(); 94 | totTime = (std::chrono::duration_cast(timeEnd - timeSubmit).count() * 0.001) / num_iter; 95 | run_time[r][0] = totTime; 96 | if (n > 1) { 97 | if (r == num_runs - 1) { 98 | num_systems++; 99 | double std_error = 0; 100 | double avg_time = 0; 101 | for (uint64_t t = 2; t < num_runs; t++) { 102 | avg_time += run_time[t][0]; 103 | } 104 | avg_time /= (num_runs-2); 105 | for (uint64_t t = 2; t < num_runs; t++) { 106 | std_error += (run_time[t][0] - avg_time) * (run_time[t][0] - avg_time); 107 | } 108 | std_error = sqrt(std_error / (num_runs-2)); 109 | if (file_output) 110 | fprintf(output, "rocFFT System: %" PRIu64 " %" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 " bandwidth: %0.1f\n", dims[0], dims[1], rocBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)rocBufferSize / 1024) / avg_time), rocBufferSize / 1024.0 / 1024.0 / 1.024 * 4 / avg_time); 111 | 112 | printf("rocFFT System: %" PRIu64 " %" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 " bandwidth: %0.1f\n", dims[0], dims[1], rocBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)rocBufferSize / 1024) / avg_time), rocBufferSize / 1024.0 / 1024.0 / 1.024 * 4 / avg_time); 113 | benchmark_result[0] += ((double)rocBufferSize / 1024) / avg_time; 114 | } 115 | 116 | } 117 | hipfftDestroy(planC2C); 118 | hipFree(dataC); 119 | hipDeviceSynchronize(); 120 | //hipfftComplex* output_rocFFT = (hipfftComplex*)(malloc(sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2])); 121 | //hipMemcpy(output_rocFFT, dataC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2], hipMemcpyDeviceToHost); 122 | //hipDeviceSynchronize(); 123 | 124 | 125 | } 126 | } 127 | free(inputC); 128 | benchmark_result[0] /= (num_systems); 129 | if (file_output) 130 | fprintf(output, "Benchmark score rocFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 131 | printf("Benchmark score rocFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 132 | 133 | } 134 | -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/src/sample_1003_benchmark_rocFFT_single_3d_2_512.cpp: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //ROCM parts 15 | #include "hip/hip_runtime.h" 16 | #include 17 | 18 | #define GROUP 1 19 | 20 | 21 | void sample_1003_benchmark_rocFFT_single_3d_2_512(bool file_output, FILE* output, int device_id) 22 | { 23 | if (file_output) 24 | fprintf(output, "1003 - rocFFT FFT + iFFT C2C multidimensional benchmark in single precision: all supported cubes from 2 to 512\n"); 25 | printf("1003 - rocFFT FFT + iFFT C2C multidimensional benchmark in single precision: all supported cubes from 2 to 512\n"); 26 | hipSetDevice(device_id); 27 | const int num_runs = 5; 28 | 29 | double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples 30 | hipfftComplex* inputC = (hipfftComplex*)malloc((uint64_t)sizeof(hipfftComplex)*pow(2, 27)); 31 | for (uint64_t i = 0; i < pow(2, 27); i++) { 32 | inputC[i].x = 2 * ((float)rand()) / RAND_MAX - 1.0; 33 | inputC[i].y = 2 * ((float)rand()) / RAND_MAX - 1.0; 34 | } 35 | int num_systems = 0; 36 | for (int n = 1; n < 513; n++) { 37 | double run_time[num_runs][2]; 38 | for (int r = 0; r < num_runs; r++) { 39 | hipfftHandle planC2C; 40 | hipfftComplex* dataC; 41 | 42 | uint64_t dims[3]; 43 | 44 | dims[0] = n; 45 | if (n == 1) dims[0] = 512; 46 | uint64_t temp = dims[0]; 47 | 48 | /*for (uint64_t j = 2; j < 14; j++) 49 | { 50 | if (temp % j == 0) { 51 | temp /= j; 52 | j = 1; 53 | } 54 | } 55 | if (temp != 1) break;*/ 56 | dims[1] = dims[0]; 57 | dims[2] = dims[0]; 58 | 59 | hipMalloc((void**)&dataC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2]); 60 | 61 | hipMemcpy(dataC, inputC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2], hipMemcpyHostToDevice); 62 | if (hipGetLastError() != hipSuccess) { 63 | fprintf(stderr, "ROCM error: Failed to allocate\n"); 64 | return; 65 | } 66 | switch (3) { 67 | case 1: 68 | hipfftPlan1d(&planC2C, dims[0], HIPFFT_C2C, 1); 69 | break; 70 | case 2: 71 | hipfftPlan2d(&planC2C, dims[1], dims[0], HIPFFT_C2C); 72 | break; 73 | case 3: 74 | hipfftPlan3d(&planC2C, dims[2], dims[1], dims[0], HIPFFT_C2C); 75 | break; 76 | } 77 | 78 | float totTime = 0; 79 | uint64_t rocBufferSize = sizeof(float) * 2 * dims[0] * dims[1] * dims[2]; 80 | uint64_t num_iter = ((4096 * 1024.0 * 1024.0) / rocBufferSize > 1000) ? 1000 : (4096 * 1024.0 * 1024.0) / rocBufferSize; 81 | if (num_iter == 0) num_iter = 1; 82 | std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now(); 83 | for (int i = 0; i < num_iter; i++) { 84 | 85 | hipfftExecC2C(planC2C, dataC, dataC, -1); 86 | hipfftExecC2C(planC2C, dataC, dataC, 1); 87 | } 88 | hipDeviceSynchronize(); 89 | std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now(); 90 | totTime = (std::chrono::duration_cast(timeEnd - timeSubmit).count() * 0.001) / num_iter; 91 | run_time[r][0] = totTime; 92 | if (n > 1) { 93 | if (r == num_runs - 1) { 94 | num_systems++; 95 | double std_error = 0; 96 | double avg_time = 0; 97 | for (uint64_t t = 2; t < num_runs; t++) { 98 | avg_time += run_time[t][0]; 99 | } 100 | avg_time /= (num_runs-2); 101 | for (uint64_t t = 2; t < num_runs; t++) { 102 | std_error += (run_time[t][0] - avg_time) * (run_time[t][0] - avg_time); 103 | } 104 | std_error = sqrt(std_error / (num_runs-2)); 105 | 106 | if (file_output) 107 | fprintf(output, "rocFFT System: %" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 " bandwidth: %0.1f\n", dims[0], rocBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)rocBufferSize / 1024) / avg_time), 3*rocBufferSize / 1024.0 / 1024.0 / 1.024 * 4 / avg_time); 108 | 109 | printf("rocFFT System: %" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 " bandwidth: %0.1f\n", dims[0], rocBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)rocBufferSize / 1024) / avg_time), 3*rocBufferSize / 1024.0 / 1024.0 / 1.024 * 4 / avg_time); 110 | benchmark_result[0] += ((double)rocBufferSize / 1024) / avg_time; 111 | } 112 | 113 | } 114 | hipfftDestroy(planC2C); 115 | hipFree(dataC); 116 | hipDeviceSynchronize(); 117 | //hipfftComplex* output_rocFFT = (hipfftComplex*)(malloc(sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2])); 118 | //hipMemcpy(output_rocFFT, dataC, sizeof(hipfftComplex) * dims[0] * dims[1] * dims[2], hipMemcpyDeviceToHost); 119 | //hipDeviceSynchronize(); 120 | 121 | 122 | } 123 | } 124 | free(inputC); 125 | benchmark_result[0] /= (num_systems); 126 | if (file_output) 127 | fprintf(output, "Benchmark score rocFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 128 | printf("Benchmark score rocFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 129 | 130 | } 131 | -------------------------------------------------------------------------------- /benchmark_scripts/rocFFT_scripts/src/sample_1_benchmark_rocFFT_double.cpp: -------------------------------------------------------------------------------- 1 | //general parts 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef __STDC_FORMAT_MACROS 10 | #define __STDC_FORMAT_MACROS 11 | #endif 12 | #include 13 | 14 | //ROCM parts 15 | #include "hip/hip_runtime.h" 16 | #include 17 | 18 | #define GROUP 1 19 | 20 | 21 | void sample_1_benchmark_rocFFT_double(bool file_output, FILE* output, int device_id) 22 | { 23 | 24 | const int num_runs = 5; 25 | if (file_output) 26 | fprintf(output, "1 - rocFFT FFT + iFFT C2C benchmark 1D batched in double precision\n"); 27 | printf("1 - rocFFT FFT + iFFT C2C benchmark 1D batched in double precision\n"); 28 | hipSetDevice(device_id); 29 | double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples 30 | hipfftDoubleComplex* inputC = (hipfftDoubleComplex*)malloc((uint64_t)sizeof(hipfftDoubleComplex) *pow(2, 27)); 31 | for (uint64_t i = 0; i y>z. 43 | if (n == 0) dims[0] = 2048; 44 | dims[1] = 64 * 32 * pow(2, 15) / dims[0]; 45 | //dims[1] = (dims[1] > 32768) ? 32768 : dims[1]; 46 | if (dims[1] == 0) dims[1] = 1; 47 | dims[2] = 1; 48 | hipMalloc((void**)&dataC, sizeof(hipfftDoubleComplex) * dims[0] * dims[1] * dims[2]); 49 | 50 | hipMemcpy(dataC, inputC, sizeof(hipfftDoubleComplex) * dims[0] * dims[1] * dims[2], hipMemcpyHostToDevice); 51 | if (hipGetLastError() != hipSuccess) { 52 | fprintf(stderr, "ROCM error: Failed to allocate\n"); 53 | return; 54 | } 55 | uint64_t sizeROCm; 56 | switch (1) { 57 | case 1: 58 | hipfftPlan1d(&planZ2Z, dims[0], HIPFFT_Z2Z, dims[1]); 59 | hipfftEstimate1d(dims[0], HIPFFT_Z2Z, 1, (size_t*)&sizeROCm); 60 | break; 61 | case 2: 62 | hipfftPlan2d(&planZ2Z, dims[1], dims[0], HIPFFT_Z2Z); 63 | hipfftEstimate2d(dims[1], dims[0], HIPFFT_Z2Z, (size_t*)&sizeROCm); 64 | break; 65 | case 3: 66 | hipfftPlan3d(&planZ2Z, dims[2], dims[1], dims[0], HIPFFT_Z2Z); 67 | hipfftEstimate3d(dims[2], dims[1], dims[0], HIPFFT_Z2Z, (size_t*)&sizeROCm); 68 | break; 69 | } 70 | 71 | double totTime = 0; 72 | uint64_t rocBufferSize = sizeof(double) * 2 * dims[0] * dims[1] * dims[2]; 73 | uint64_t num_iter = ((4096 * 1024.0 * 1024.0) / rocBufferSize > 1000) ? 1000 : (4096 * 1024.0 * 1024.0) / rocBufferSize ; 74 | if (num_iter == 0) num_iter = 1; 75 | std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now(); 76 | for (int i = 0; i < num_iter; i++) { 77 | 78 | hipfftExecZ2Z(planZ2Z, dataC, dataC, -1); 79 | hipfftExecZ2Z(planZ2Z, dataC, dataC, 1); 80 | } 81 | hipDeviceSynchronize(); 82 | std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now(); 83 | totTime = (std::chrono::duration_cast(timeEnd - timeSubmit).count() * 0.001) / num_iter; 84 | run_time[r][0] = totTime; 85 | if (n > 0) { 86 | if (r == num_runs - 1) { 87 | double std_error = 0; 88 | double avg_time = 0; 89 | for (uint64_t t = 2; t < num_runs; t++) { 90 | avg_time += run_time[t][0]; 91 | } 92 | avg_time /= num_runs-2; 93 | for (uint64_t t = 2; t < num_runs; t++) { 94 | std_error += (run_time[t][0] - avg_time) * (run_time[t][0] - avg_time); 95 | } 96 | std_error = sqrt(std_error / (num_runs-2)); 97 | if (file_output) 98 | fprintf(output, "rocFFT System: %" PRIu64 " %" PRIu64 "x%" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 "\n", (uint64_t)log2(dims[0]), dims[0], dims[1], rocBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)rocBufferSize * sizeof(float) / sizeof(double) / 1024) / avg_time)); 99 | 100 | printf("rocFFT System: %" PRIu64 " %" PRIu64 "x%" PRIu64 " Buffer: %" PRIu64 " MB avg_time_per_step: %0.3f ms std_error: %0.3f num_iter: %" PRIu64 " benchmark: %" PRIu64 "\n", (uint64_t)log2(dims[0]), dims[0], dims[1], rocBufferSize / 1024 / 1024, avg_time, std_error, num_iter, (uint64_t)(((double)rocBufferSize * sizeof(float) / sizeof(double) / 1024) / avg_time)); 101 | benchmark_result[0] += ((double)rocBufferSize * sizeof(float)/sizeof(double)/ 1024) / avg_time; 102 | } 103 | 104 | } 105 | hipfftDestroy(planZ2Z); 106 | hipFree(dataC); 107 | hipDeviceSynchronize(); 108 | //hipfftDoubleComplex* output_rocFFT = (hipfftDoubleComplex*)(malloc(sizeof(hipfftDoubleComplex) * dims[0] * dims[1] * dims[2])); 109 | //hipMemcpy(output_rocFFT, dataC, sizeof(hipfftDoubleComplex) * dims[0] * dims[1] * dims[2], hipMemcpyDeviceToHost); 110 | //hipDeviceSynchronize(); 111 | 112 | 113 | } 114 | } 115 | free(inputC); 116 | benchmark_result[0] /= (24 - 1); 117 | if (file_output) 118 | fprintf(output, "Benchmark score rocFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 119 | printf("Benchmark score rocFFT: %" PRIu64 "\n", (uint64_t)(benchmark_result[0])); 120 | 121 | } 122 | -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_0_benchmark_VkFFT_single.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_0_benchmark_VkFFT_single(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_1000_benchmark_VkFFT_single_2_4096.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_1000_benchmark_VkFFT_single_2_4096(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); 5 | -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_1001_benchmark_VkFFT_double_2_4096.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_1001_benchmark_VkFFT_double_2_4096(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_1002_benchmark_VkFFT_half_2_4096.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_1002_benchmark_VkFFT_half_2_4096(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); 5 | -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_1003_benchmark_VkFFT_single_3d_2_512.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_1003_benchmark_VkFFT_single_3d_2_512(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); 5 | -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_100_benchmark_VkFFT_single_nd_dct.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized, uint64_t dct_type); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_101_benchmark_VkFFT_double_nd_dct.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized, uint64_t dct_type); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_10_benchmark_VkFFT_single_multipleBuffers.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_10_benchmark_VkFFT_single_multipleBuffers(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_11_precision_VkFFT_single.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_12_precision_VkFFT_double.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_13_precision_VkFFT_half.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_14_precision_VkFFT_single_nonPow2.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_15_precision_VkFFT_single_r2c.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_16_precision_VkFFT_single_dct.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_17_precision_VkFFT_double_dct.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_18_precision_VkFFT_double_nonPow2.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_19_precision_VkFFT_quadDoubleDouble_nonPow2.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_19_precision_VkFFT_quadDoubleDouble_nonPow2(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_1_benchmark_VkFFT_double.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_1_benchmark_VkFFT_double(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_2_benchmark_VkFFT_half.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_3_benchmark_VkFFT_single_3d.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_3_benchmark_VkFFT_single_3d(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_4_benchmark_VkFFT_single_3d_zeropadding.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_4_benchmark_VkFFT_single_3d_zeropadding(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_50_convolution_VkFFT_single_1d_matrix.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_52_convolution_VkFFT_single_2d_batched_r2c.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_5_benchmark_VkFFT_single_disableReorderFourStep.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_5_benchmark_VkFFT_single_disableReorderFourStep(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_6_benchmark_VkFFT_single_r2c.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_6_benchmark_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_7_benchmark_VkFFT_single_Bluestein.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_7_benchmark_VkFFT_single_Bluestein(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_8_benchmark_VkFFT_double_Bluestein.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_8_benchmark_VkFFT_double_Bluestein(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/sample_9_benchmark_VkFFT_quadDoubleDouble.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult sample_9_benchmark_VkFFT_quadDoubleDouble(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized); 5 | -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/user_benchmark_VkFFT.h: -------------------------------------------------------------------------------- 1 | #include "vkFFT.h" 2 | #include "utils_VkFFT.h" 3 | 4 | VkFFTResult user_benchmark_VkFFT(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized, VkFFTUserSystemParameters* userParams); -------------------------------------------------------------------------------- /benchmark_scripts/vkFFT_scripts/include/utils_VkFFT.h: -------------------------------------------------------------------------------- 1 | #ifndef VKFFT_UTILS_H 2 | #define VKFFT_UTILS_H 3 | #include "vkFFT.h" 4 | #include 5 | typedef struct { 6 | #if(VKFFT_BACKEND==0) 7 | VkInstance instance;//a connection between the application and the Vulkan library 8 | VkPhysicalDevice physicalDevice;//a handle for the graphics card used in the application 9 | VkPhysicalDeviceProperties physicalDeviceProperties;//bastic device properties 10 | VkPhysicalDeviceMemoryProperties physicalDeviceMemoryProperties;//bastic memory properties of the device 11 | VkDevice device;//a logical device, interacting with physical device 12 | VkDebugUtilsMessengerEXT debugMessenger;//extension for debugging 13 | uint64_t queueFamilyIndex;//if multiple queues are available, specify the used one 14 | VkQueue queue;//a place, where all operations are submitted 15 | VkCommandPool commandPool;//an opaque objects that command buffer memory is allocated from 16 | VkFence fence;//a vkGPU->fence used to synchronize dispatches 17 | std::vector enabledDeviceExtensions; 18 | uint64_t enableValidationLayers; 19 | 20 | VkBuffer* stagingBuffer;//optional pointer to the user defined staging buffer 21 | VkDeviceMemory* stagingBufferMemory;//optional pointer to the user defined staging buffer memory, associated with the stagingBuffer 22 | #elif(VKFFT_BACKEND==1) 23 | CUdevice device; 24 | CUcontext context; 25 | #elif(VKFFT_BACKEND==2) 26 | hipDevice_t device; 27 | hipCtx_t context; 28 | #elif(VKFFT_BACKEND==3) 29 | cl_platform_id platform; 30 | cl_device_id device; 31 | cl_context context; 32 | cl_command_queue commandQueue; 33 | #elif(VKFFT_BACKEND==4) 34 | ze_driver_handle_t driver; 35 | ze_device_handle_t device; 36 | ze_context_handle_t context; 37 | ze_command_queue_handle_t commandQueue; 38 | uint32_t commandQueueID; 39 | #elif(VKFFT_BACKEND==5) 40 | MTL::Device* device; 41 | MTL::CommandQueue* queue; 42 | #endif 43 | uint64_t device_id;//an id of a device, reported by Vulkan device list 44 | } VkGPU;//an example structure containing Vulkan primitives 45 | 46 | typedef struct { 47 | uint64_t X; 48 | uint64_t Y; 49 | uint64_t Z; 50 | uint64_t P; 51 | uint64_t B; 52 | uint64_t N; 53 | uint64_t R2C; 54 | uint64_t DCT; 55 | uint64_t saveApplicationToString; 56 | uint64_t loadApplicationFromString; 57 | } VkFFTUserSystemParameters;//an example structure used to pass user-defined system for benchmarking 58 | 59 | #if(VKFFT_BACKEND==0) 60 | VkResult CreateDebugUtilsMessengerEXT(VkGPU* vkGPU, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger); 61 | void DestroyDebugUtilsMessengerEXT(VkGPU* vkGPU, const VkAllocationCallbacks* pAllocator); 62 | static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData); 63 | VkResult setupDebugMessenger(VkGPU* vkGPU); 64 | VkResult checkValidationLayerSupport(); 65 | std::vector getRequiredExtensions(VkGPU* vkGPU, uint64_t sample_id); 66 | VkResult createInstance(VkGPU* vkGPU, uint64_t sample_id); 67 | VkResult findPhysicalDevice(VkGPU* vkGPU); 68 | VkResult getComputeQueueFamilyIndex(VkGPU* vkGPU); 69 | VkResult createDevice(VkGPU* vkGPU, uint64_t sample_id); 70 | VkResult createFence(VkGPU* vkGPU); 71 | VkResult createCommandPool(VkGPU* vkGPU); 72 | VkFFTResult findMemoryType(VkGPU* vkGPU, uint64_t memoryTypeBits, uint64_t memorySize, VkMemoryPropertyFlags properties, uint32_t* memoryTypeIndex); 73 | VkFFTResult allocateBuffer(VkGPU* vkGPU, VkBuffer* buffer, VkDeviceMemory* deviceMemory, VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags propertyFlags, uint64_t size); 74 | #endif 75 | VkFFTResult transferDataToCPU(VkGPU* vkGPU, void* cpu_arr, void* output_buffer, uint64_t bufferSize); 76 | VkFFTResult transferDataFromCPU(VkGPU* vkGPU, void* cpu_arr, void* input_buffer, uint64_t bufferSize); 77 | VkFFTResult devices_list(); 78 | VkFFTResult performVulkanFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunchParams* launchParams, int inverse, uint64_t num_iter); 79 | VkFFTResult performVulkanFFTiFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunchParams* launchParams, uint64_t num_iter, double* time_result); 80 | #endif 81 | -------------------------------------------------------------------------------- /documentation/VkFFT_API_guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DTolm/VkFFT/066a17c17068c0f11c9298d848c2976c71fad1c1/documentation/VkFFT_API_guide.pdf -------------------------------------------------------------------------------- /metal-cpp/Foundation/Foundation.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/Foundation.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSArray.hpp" 26 | #include "NSAutoreleasePool.hpp" 27 | #include "NSBundle.hpp" 28 | #include "NSData.hpp" 29 | #include "NSDate.hpp" 30 | #include "NSDefines.hpp" 31 | #include "NSDictionary.hpp" 32 | #include "NSEnumerator.hpp" 33 | #include "NSError.hpp" 34 | #include "NSLock.hpp" 35 | #include "NSNotification.hpp" 36 | #include "NSNumber.hpp" 37 | #include "NSObject.hpp" 38 | #include "NSPrivate.hpp" 39 | #include "NSProcessInfo.hpp" 40 | #include "NSRange.hpp" 41 | #include "NSString.hpp" 42 | #include "NSTypes.hpp" 43 | #include "NSURL.hpp" 44 | 45 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 46 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSArray.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSArray.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSObject.hpp" 26 | #include "NSTypes.hpp" 27 | 28 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 29 | 30 | namespace NS 31 | { 32 | class Array : public Copying 33 | { 34 | public: 35 | static Array* array(); 36 | static Array* array(const Object* pObject); 37 | static Array* array(const Object* const* pObjects, UInteger count); 38 | 39 | static Array* alloc(); 40 | 41 | Array* init(); 42 | Array* init(const Object* const* pObjects, UInteger count); 43 | Array* init(const class Coder* pCoder); 44 | 45 | template 46 | _Object* object(UInteger index) const; 47 | UInteger count() const; 48 | }; 49 | } 50 | 51 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 52 | 53 | _NS_INLINE NS::Array* NS::Array::array() 54 | { 55 | return Object::sendMessage(_NS_PRIVATE_CLS(NSArray), _NS_PRIVATE_SEL(array)); 56 | } 57 | 58 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 59 | 60 | _NS_INLINE NS::Array* NS::Array::array(const Object* pObject) 61 | { 62 | return Object::sendMessage(_NS_PRIVATE_CLS(NSArray), _NS_PRIVATE_SEL(arrayWithObject_), pObject); 63 | } 64 | 65 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 66 | 67 | _NS_INLINE NS::Array* NS::Array::array(const Object* const* pObjects, UInteger count) 68 | { 69 | return Object::sendMessage(_NS_PRIVATE_CLS(NSArray), _NS_PRIVATE_SEL(arrayWithObjects_count_), pObjects, count); 70 | } 71 | 72 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 73 | 74 | _NS_INLINE NS::Array* NS::Array::alloc() 75 | { 76 | return NS::Object::alloc(_NS_PRIVATE_CLS(NSArray)); 77 | } 78 | 79 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 80 | 81 | _NS_INLINE NS::Array* NS::Array::init() 82 | { 83 | return NS::Object::init(); 84 | } 85 | 86 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 87 | 88 | _NS_INLINE NS::Array* NS::Array::init(const Object* const* pObjects, UInteger count) 89 | { 90 | return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithObjects_count_), pObjects, count); 91 | } 92 | 93 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 94 | 95 | _NS_INLINE NS::Array* NS::Array::init(const class Coder* pCoder) 96 | { 97 | return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder); 98 | } 99 | 100 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 101 | 102 | _NS_INLINE NS::UInteger NS::Array::count() const 103 | { 104 | return Object::sendMessage(this, _NS_PRIVATE_SEL(count)); 105 | } 106 | 107 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 108 | 109 | template 110 | _NS_INLINE _Object* NS::Array::object(UInteger index) const 111 | { 112 | return Object::sendMessage<_Object*>(this, _NS_PRIVATE_SEL(objectAtIndex_), index); 113 | } 114 | 115 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 116 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSAutoreleasePool.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSAutoreleasePool.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSDefines.hpp" 26 | #include "NSObject.hpp" 27 | #include "NSPrivate.hpp" 28 | #include "NSTypes.hpp" 29 | 30 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 31 | 32 | namespace NS 33 | { 34 | class AutoreleasePool : public Object 35 | { 36 | public: 37 | static AutoreleasePool* alloc(); 38 | AutoreleasePool* init(); 39 | 40 | void drain(); 41 | 42 | void addObject(Object* pObject); 43 | 44 | static void showPools(); 45 | }; 46 | } 47 | 48 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 49 | 50 | _NS_INLINE NS::AutoreleasePool* NS::AutoreleasePool::alloc() 51 | { 52 | return NS::Object::alloc(_NS_PRIVATE_CLS(NSAutoreleasePool)); 53 | } 54 | 55 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 56 | 57 | _NS_INLINE NS::AutoreleasePool* NS::AutoreleasePool::init() 58 | { 59 | return NS::Object::init(); 60 | } 61 | 62 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 63 | 64 | _NS_INLINE void NS::AutoreleasePool::drain() 65 | { 66 | Object::sendMessage(this, _NS_PRIVATE_SEL(drain)); 67 | } 68 | 69 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 70 | 71 | _NS_INLINE void NS::AutoreleasePool::addObject(Object* pObject) 72 | { 73 | Object::sendMessage(this, _NS_PRIVATE_SEL(addObject_), pObject); 74 | } 75 | 76 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 77 | 78 | _NS_INLINE void NS::AutoreleasePool::showPools() 79 | { 80 | Object::sendMessage(_NS_PRIVATE_CLS(NSAutoreleasePool), _NS_PRIVATE_SEL(showPools)); 81 | } 82 | 83 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 84 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSData.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSData.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSObject.hpp" 26 | #include "NSTypes.hpp" 27 | 28 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 29 | 30 | namespace NS 31 | { 32 | class Data : public Copying 33 | { 34 | public: 35 | void* mutableBytes() const; 36 | UInteger length() const; 37 | }; 38 | } 39 | 40 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 41 | 42 | _NS_INLINE void* NS::Data::mutableBytes() const 43 | { 44 | return Object::sendMessage(this, _NS_PRIVATE_SEL(mutableBytes)); 45 | } 46 | 47 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 48 | 49 | _NS_INLINE NS::UInteger NS::Data::length() const 50 | { 51 | return Object::sendMessage(this, _NS_PRIVATE_SEL(length)); 52 | } 53 | 54 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 55 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSDate.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSDate.hpp 4 | // 5 | // See LICENSE.txt for this project licensing information. 6 | // 7 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 8 | 9 | #pragma once 10 | 11 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 12 | 13 | #include "NSDefines.hpp" 14 | #include "NSObject.hpp" 15 | #include "NSPrivate.hpp" 16 | #include "NSTypes.hpp" 17 | 18 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 19 | 20 | namespace NS 21 | { 22 | 23 | using TimeInterval = double; 24 | 25 | class Date : public Copying 26 | { 27 | public: 28 | static Date* dateWithTimeIntervalSinceNow(TimeInterval secs); 29 | }; 30 | 31 | } // NS 32 | 33 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 34 | 35 | _NS_INLINE NS::Date* NS::Date::dateWithTimeIntervalSinceNow(NS::TimeInterval secs) 36 | { 37 | return NS::Object::sendMessage(_NS_PRIVATE_CLS(NSDate), _NS_PRIVATE_SEL(dateWithTimeIntervalSinceNow_), secs); 38 | } 39 | 40 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSDefines.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSDefines.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #define _NS_WEAK_IMPORT __attribute__((weak_import)) 26 | #define _NS_EXPORT __attribute__((visibility("default"))) 27 | #define _NS_EXTERN extern "C" _NS_EXPORT 28 | #define _NS_INLINE inline __attribute__((always_inline)) 29 | #define _NS_PACKED __attribute__((packed)) 30 | 31 | #define _NS_CONST(type, name) _NS_EXTERN type const name; 32 | #define _NS_ENUM(type, name) enum name : type 33 | #define _NS_OPTIONS(type, name) \ 34 | using name = type; \ 35 | enum : name 36 | 37 | #define _NS_CAST_TO_UINT(value) static_cast(value) 38 | #define _NS_VALIDATE_SIZE(ns, name) static_assert(sizeof(ns::name) == sizeof(ns##name), "size mismatch " #ns "::" #name) 39 | #define _NS_VALIDATE_ENUM(ns, name) static_assert(_NS_CAST_TO_UINT(ns::name) == _NS_CAST_TO_UINT(ns##name), "value mismatch " #ns "::" #name) 40 | 41 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 42 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSEnumerator.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSEnumerator.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSObject.hpp" 26 | #include "NSTypes.hpp" 27 | 28 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 29 | 30 | namespace NS 31 | { 32 | struct FastEnumerationState 33 | { 34 | unsigned long state; 35 | Object** itemsPtr; 36 | unsigned long* mutationsPtr; 37 | unsigned long extra[5]; 38 | } _NS_PACKED; 39 | 40 | class FastEnumeration : public Referencing 41 | { 42 | public: 43 | NS::UInteger countByEnumerating(FastEnumerationState* pState, Object** pBuffer, NS::UInteger len); 44 | }; 45 | 46 | template 47 | class Enumerator : public Referencing, FastEnumeration> 48 | { 49 | public: 50 | _ObjectType* nextObject(); 51 | class Array* allObjects(); 52 | }; 53 | } 54 | 55 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 56 | 57 | _NS_INLINE NS::UInteger NS::FastEnumeration::countByEnumerating(FastEnumerationState* pState, Object** pBuffer, NS::UInteger len) 58 | { 59 | return Object::sendMessage(this, _NS_PRIVATE_SEL(countByEnumeratingWithState_objects_count_), pState, pBuffer, len); 60 | } 61 | 62 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 63 | 64 | template 65 | _NS_INLINE _ObjectType* NS::Enumerator<_ObjectType>::nextObject() 66 | { 67 | return Object::sendMessage<_ObjectType*>(this, _NS_PRIVATE_SEL(nextObject)); 68 | } 69 | 70 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 71 | 72 | template 73 | _NS_INLINE NS::Array* NS::Enumerator<_ObjectType>::allObjects() 74 | { 75 | return Object::sendMessage(this, _NS_PRIVATE_SEL(allObjects)); 76 | } 77 | 78 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 79 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSLock.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSLock.hpp 4 | // 5 | // See LICENSE.txt for this project licensing information. 6 | // 7 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 8 | 9 | #pragma once 10 | 11 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 12 | 13 | #include "NSDefines.hpp" 14 | #include "NSObject.hpp" 15 | #include "NSPrivate.hpp" 16 | #include "NSTypes.hpp" 17 | #include "NSDate.hpp" 18 | 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | namespace NS 22 | { 23 | 24 | template 25 | class Locking : public _Base 26 | { 27 | public: 28 | void lock(); 29 | void unlock(); 30 | }; 31 | 32 | class Condition : public Locking 33 | { 34 | public: 35 | static Condition* alloc(); 36 | 37 | Condition* init(); 38 | 39 | void wait(); 40 | bool waitUntilDate(Date* pLimit); 41 | void signal(); 42 | void broadcast(); 43 | }; 44 | 45 | } // NS 46 | 47 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 48 | 49 | template 50 | _NS_INLINE void NS::Locking<_Class, _Base>::lock() 51 | { 52 | NS::Object::sendMessage(this, _NS_PRIVATE_SEL(lock)); 53 | } 54 | 55 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 56 | 57 | template 58 | _NS_INLINE void NS::Locking<_Class, _Base>::unlock() 59 | { 60 | NS::Object::sendMessage(this, _NS_PRIVATE_SEL(unlock)); 61 | } 62 | 63 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 64 | 65 | _NS_INLINE NS::Condition* NS::Condition::alloc() 66 | { 67 | return NS::Object::alloc(_NS_PRIVATE_CLS(NSCondition)); 68 | } 69 | 70 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 71 | 72 | _NS_INLINE NS::Condition* NS::Condition::init() 73 | { 74 | return NS::Object::init(); 75 | } 76 | 77 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 78 | 79 | _NS_INLINE void NS::Condition::wait() 80 | { 81 | NS::Object::sendMessage(this, _NS_PRIVATE_SEL(wait)); 82 | } 83 | 84 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 85 | 86 | _NS_INLINE bool NS::Condition::waitUntilDate(NS::Date* pLimit) 87 | { 88 | return NS::Object::sendMessage(this, _NS_PRIVATE_SEL(waitUntilDate_), pLimit); 89 | } 90 | 91 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 92 | 93 | _NS_INLINE void NS::Condition::signal() 94 | { 95 | NS::Object::sendMessage(this, _NS_PRIVATE_SEL(signal)); 96 | } 97 | 98 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 99 | 100 | _NS_INLINE void NS::Condition::broadcast() 101 | { 102 | NS::Object::sendMessage(this, _NS_PRIVATE_SEL(broadcast)); 103 | } 104 | 105 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSNotification.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSNotification.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSDefines.hpp" 26 | #include "NSDictionary.hpp" 27 | #include "NSObject.hpp" 28 | #include "NSString.hpp" 29 | #include "NSTypes.hpp" 30 | 31 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 32 | 33 | namespace NS 34 | { 35 | using NotificationName = class String*; 36 | 37 | class Notification : public NS::Referencing 38 | { 39 | public: 40 | NS::String* name() const; 41 | NS::Object* object() const; 42 | NS::Dictionary* userInfo() const; 43 | }; 44 | } 45 | 46 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 47 | 48 | _NS_INLINE NS::String* NS::Notification::name() const 49 | { 50 | return Object::sendMessage(this, _NS_PRIVATE_SEL(name)); 51 | } 52 | 53 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 54 | 55 | _NS_INLINE NS::Object* NS::Notification::object() const 56 | { 57 | return Object::sendMessage(this, _NS_PRIVATE_SEL(object)); 58 | } 59 | 60 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 61 | 62 | _NS_INLINE NS::Dictionary* NS::Notification::userInfo() const 63 | { 64 | return Object::sendMessage(this, _NS_PRIVATE_SEL(userInfo)); 65 | } 66 | 67 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 68 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSObjCRuntime.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSObjCRuntime.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSDefines.hpp" 26 | #include "NSTypes.hpp" 27 | 28 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 29 | 30 | namespace NS 31 | { 32 | 33 | _NS_ENUM(Integer, ComparisonResult) { 34 | OrderedAscending = -1, 35 | OrderedSame = 0, 36 | OrderedDescending = 1, 37 | }; 38 | 39 | const Integer NotFound = IntegerMax; 40 | 41 | } 42 | 43 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 44 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSRange.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSRange.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSDefines.hpp" 26 | #include "NSTypes.hpp" 27 | 28 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 29 | 30 | namespace NS 31 | { 32 | struct Range 33 | { 34 | static Range Make(UInteger loc, UInteger len); 35 | 36 | Range(UInteger loc, UInteger len); 37 | 38 | bool Equal(const Range& range) const; 39 | bool LocationInRange(UInteger loc) const; 40 | UInteger Max() const; 41 | 42 | UInteger location; 43 | UInteger length; 44 | } _NS_PACKED; 45 | } 46 | 47 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 48 | 49 | _NS_INLINE NS::Range::Range(UInteger loc, UInteger len) 50 | : location(loc) 51 | , length(len) 52 | { 53 | } 54 | 55 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 56 | 57 | _NS_INLINE NS::Range NS::Range::Make(UInteger loc, UInteger len) 58 | { 59 | return Range(loc, len); 60 | } 61 | 62 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 63 | 64 | _NS_INLINE bool NS::Range::Equal(const Range& range) const 65 | { 66 | return (location == range.location) && (length == range.length); 67 | } 68 | 69 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 70 | 71 | _NS_INLINE bool NS::Range::LocationInRange(UInteger loc) const 72 | { 73 | return (!(loc < location)) && ((loc - location) < length); 74 | } 75 | 76 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 77 | 78 | _NS_INLINE NS::UInteger NS::Range::Max() const 79 | { 80 | return location + length; 81 | } 82 | 83 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 84 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSTypes.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSTypes.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSDefines.hpp" 26 | 27 | #include 28 | #include 29 | 30 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 31 | 32 | namespace NS 33 | { 34 | using TimeInterval = double; 35 | 36 | using Integer = std::intptr_t; 37 | using UInteger = std::uintptr_t; 38 | 39 | const Integer IntegerMax = INTPTR_MAX; 40 | const Integer IntegerMin = INTPTR_MIN; 41 | const UInteger UIntegerMax = UINTPTR_MAX; 42 | 43 | struct OperatingSystemVersion 44 | { 45 | Integer majorVersion; 46 | Integer minorVersion; 47 | Integer patchVersion; 48 | } _NS_PACKED; 49 | } 50 | 51 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 52 | -------------------------------------------------------------------------------- /metal-cpp/Foundation/NSURL.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Foundation/NSURL.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "NSDefines.hpp" 26 | #include "NSObject.hpp" 27 | #include "NSPrivate.hpp" 28 | #include "NSTypes.hpp" 29 | 30 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 31 | 32 | namespace NS 33 | { 34 | class URL : public Copying 35 | { 36 | public: 37 | static URL* fileURLWithPath(const class String* pPath); 38 | 39 | static URL* alloc(); 40 | URL* init(); 41 | URL* init(const class String* pString); 42 | URL* initFileURLWithPath(const class String* pPath); 43 | 44 | const char* fileSystemRepresentation() const; 45 | }; 46 | } 47 | 48 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 49 | 50 | _NS_INLINE NS::URL* NS::URL::fileURLWithPath(const String* pPath) 51 | { 52 | return Object::sendMessage(_NS_PRIVATE_CLS(NSURL), _NS_PRIVATE_SEL(fileURLWithPath_), pPath); 53 | } 54 | 55 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 56 | 57 | _NS_INLINE NS::URL* NS::URL::alloc() 58 | { 59 | return Object::alloc(_NS_PRIVATE_CLS(NSURL)); 60 | } 61 | 62 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 63 | 64 | _NS_INLINE NS::URL* NS::URL::init() 65 | { 66 | return Object::init(); 67 | } 68 | 69 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 70 | 71 | _NS_INLINE NS::URL* NS::URL::init(const String* pString) 72 | { 73 | return Object::sendMessage(this, _NS_PRIVATE_SEL(initWithString_), pString); 74 | } 75 | 76 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 77 | 78 | _NS_INLINE NS::URL* NS::URL::initFileURLWithPath(const String* pPath) 79 | { 80 | return Object::sendMessage(this, _NS_PRIVATE_SEL(initFileURLWithPath_), pPath); 81 | } 82 | 83 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 84 | 85 | _NS_INLINE const char* NS::URL::fileSystemRepresentation() const 86 | { 87 | return Object::sendMessage(this, _NS_PRIVATE_SEL(fileSystemRepresentation)); 88 | } 89 | 90 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 91 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLBinaryArchive.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLBinaryArchive.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | namespace MTL 30 | { 31 | _MTL_ENUM(NS::UInteger, BinaryArchiveError) { 32 | BinaryArchiveErrorNone = 0, 33 | BinaryArchiveErrorInvalidFile = 1, 34 | BinaryArchiveErrorUnexpectedElement = 2, 35 | BinaryArchiveErrorCompilationFailure = 3, 36 | }; 37 | 38 | class BinaryArchiveDescriptor : public NS::Copying 39 | { 40 | public: 41 | static class BinaryArchiveDescriptor* alloc(); 42 | 43 | class BinaryArchiveDescriptor* init(); 44 | 45 | NS::URL* url() const; 46 | void setUrl(const NS::URL* url); 47 | }; 48 | 49 | class BinaryArchive : public NS::Referencing 50 | { 51 | public: 52 | NS::String* label() const; 53 | void setLabel(const NS::String* label); 54 | 55 | class Device* device() const; 56 | 57 | bool addComputePipelineFunctions(const class ComputePipelineDescriptor* descriptor, NS::Error** error); 58 | 59 | bool addRenderPipelineFunctions(const class RenderPipelineDescriptor* descriptor, NS::Error** error); 60 | 61 | bool addTileRenderPipelineFunctions(const class TileRenderPipelineDescriptor* descriptor, NS::Error** error); 62 | 63 | bool serializeToURL(const NS::URL* url, NS::Error** error); 64 | 65 | bool addFunction(const class FunctionDescriptor* descriptor, const class Library* library, NS::Error** error); 66 | }; 67 | 68 | } 69 | 70 | // static method: alloc 71 | _MTL_INLINE MTL::BinaryArchiveDescriptor* MTL::BinaryArchiveDescriptor::alloc() 72 | { 73 | return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLBinaryArchiveDescriptor)); 74 | } 75 | 76 | // method: init 77 | _MTL_INLINE MTL::BinaryArchiveDescriptor* MTL::BinaryArchiveDescriptor::init() 78 | { 79 | return NS::Object::init(); 80 | } 81 | 82 | // property: url 83 | _MTL_INLINE NS::URL* MTL::BinaryArchiveDescriptor::url() const 84 | { 85 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(url)); 86 | } 87 | 88 | _MTL_INLINE void MTL::BinaryArchiveDescriptor::setUrl(const NS::URL* url) 89 | { 90 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setUrl_), url); 91 | } 92 | 93 | // property: label 94 | _MTL_INLINE NS::String* MTL::BinaryArchive::label() const 95 | { 96 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); 97 | } 98 | 99 | _MTL_INLINE void MTL::BinaryArchive::setLabel(const NS::String* label) 100 | { 101 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); 102 | } 103 | 104 | // property: device 105 | _MTL_INLINE MTL::Device* MTL::BinaryArchive::device() const 106 | { 107 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); 108 | } 109 | 110 | // method: addComputePipelineFunctionsWithDescriptor:error: 111 | _MTL_INLINE bool MTL::BinaryArchive::addComputePipelineFunctions(const MTL::ComputePipelineDescriptor* descriptor, NS::Error** error) 112 | { 113 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(addComputePipelineFunctionsWithDescriptor_error_), descriptor, error); 114 | } 115 | 116 | // method: addRenderPipelineFunctionsWithDescriptor:error: 117 | _MTL_INLINE bool MTL::BinaryArchive::addRenderPipelineFunctions(const MTL::RenderPipelineDescriptor* descriptor, NS::Error** error) 118 | { 119 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(addRenderPipelineFunctionsWithDescriptor_error_), descriptor, error); 120 | } 121 | 122 | // method: addTileRenderPipelineFunctionsWithDescriptor:error: 123 | _MTL_INLINE bool MTL::BinaryArchive::addTileRenderPipelineFunctions(const MTL::TileRenderPipelineDescriptor* descriptor, NS::Error** error) 124 | { 125 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(addTileRenderPipelineFunctionsWithDescriptor_error_), descriptor, error); 126 | } 127 | 128 | // method: serializeToURL:error: 129 | _MTL_INLINE bool MTL::BinaryArchive::serializeToURL(const NS::URL* url, NS::Error** error) 130 | { 131 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(serializeToURL_error_), url, error); 132 | } 133 | 134 | // method: addFunctionWithDescriptor:library:error: 135 | _MTL_INLINE bool MTL::BinaryArchive::addFunction(const MTL::FunctionDescriptor* descriptor, const MTL::Library* library, NS::Error** error) 136 | { 137 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(addFunctionWithDescriptor_library_error_), descriptor, library, error); 138 | } 139 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLBuffer.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLBuffer.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include "MTLResource.hpp" 30 | 31 | namespace MTL 32 | { 33 | class Buffer : public NS::Referencing 34 | { 35 | public: 36 | NS::UInteger length() const; 37 | 38 | void* contents(); 39 | 40 | void didModifyRange(NS::Range range); 41 | 42 | class Texture* newTexture(const class TextureDescriptor* descriptor, NS::UInteger offset, NS::UInteger bytesPerRow); 43 | 44 | void addDebugMarker(const NS::String* marker, NS::Range range); 45 | 46 | void removeAllDebugMarkers(); 47 | 48 | class Buffer* remoteStorageBuffer() const; 49 | 50 | class Buffer* newRemoteBufferViewForDevice(const class Device* device); 51 | }; 52 | 53 | } 54 | 55 | // property: length 56 | _MTL_INLINE NS::UInteger MTL::Buffer::length() const 57 | { 58 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(length)); 59 | } 60 | 61 | // method: contents 62 | _MTL_INLINE void* MTL::Buffer::contents() 63 | { 64 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(contents)); 65 | } 66 | 67 | // method: didModifyRange: 68 | _MTL_INLINE void MTL::Buffer::didModifyRange(NS::Range range) 69 | { 70 | Object::sendMessage(this, _MTL_PRIVATE_SEL(didModifyRange_), range); 71 | } 72 | 73 | // method: newTextureWithDescriptor:offset:bytesPerRow: 74 | _MTL_INLINE MTL::Texture* MTL::Buffer::newTexture(const MTL::TextureDescriptor* descriptor, NS::UInteger offset, NS::UInteger bytesPerRow) 75 | { 76 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_offset_bytesPerRow_), descriptor, offset, bytesPerRow); 77 | } 78 | 79 | // method: addDebugMarker:range: 80 | _MTL_INLINE void MTL::Buffer::addDebugMarker(const NS::String* marker, NS::Range range) 81 | { 82 | Object::sendMessage(this, _MTL_PRIVATE_SEL(addDebugMarker_range_), marker, range); 83 | } 84 | 85 | // method: removeAllDebugMarkers 86 | _MTL_INLINE void MTL::Buffer::removeAllDebugMarkers() 87 | { 88 | Object::sendMessage(this, _MTL_PRIVATE_SEL(removeAllDebugMarkers)); 89 | } 90 | 91 | // property: remoteStorageBuffer 92 | _MTL_INLINE MTL::Buffer* MTL::Buffer::remoteStorageBuffer() const 93 | { 94 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(remoteStorageBuffer)); 95 | } 96 | 97 | // method: newRemoteBufferViewForDevice: 98 | _MTL_INLINE MTL::Buffer* MTL::Buffer::newRemoteBufferViewForDevice(const MTL::Device* device) 99 | { 100 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(newRemoteBufferViewForDevice_), device); 101 | } 102 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLCaptureScope.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLCaptureScope.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "MTLDefines.hpp" 26 | #include "MTLPrivate.hpp" 27 | 28 | #include "../Foundation/NSObject.hpp" 29 | #include "../Foundation/NSString.hpp" 30 | 31 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 32 | 33 | namespace MTL 34 | { 35 | class CaptureScope : public NS::Referencing 36 | { 37 | public: 38 | class Device* device() const; 39 | 40 | NS::String* label() const; 41 | void setLabel(const NS::String* pLabel); 42 | 43 | class CommandQueue* commandQueue() const; 44 | 45 | void beginScope(); 46 | void endScope(); 47 | }; 48 | } 49 | 50 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 51 | 52 | _MTL_INLINE MTL::Device* MTL::CaptureScope::device() const 53 | { 54 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); 55 | } 56 | 57 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 58 | 59 | _MTL_INLINE NS::String* MTL::CaptureScope::label() const 60 | { 61 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); 62 | } 63 | 64 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 65 | 66 | _MTL_INLINE void MTL::CaptureScope::setLabel(const NS::String* pLabel) 67 | { 68 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), pLabel); 69 | } 70 | 71 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 72 | 73 | _MTL_INLINE MTL::CommandQueue* MTL::CaptureScope::commandQueue() const 74 | { 75 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandQueue)); 76 | } 77 | 78 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 79 | 80 | _MTL_INLINE void MTL::CaptureScope::beginScope() 81 | { 82 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(beginScope)); 83 | } 84 | 85 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 86 | 87 | _MTL_INLINE void MTL::CaptureScope::endScope() 88 | { 89 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(endScope)); 90 | } 91 | 92 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 93 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLCommandEncoder.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLCommandEncoder.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | namespace MTL 30 | { 31 | _MTL_OPTIONS(NS::UInteger, ResourceUsage) { 32 | ResourceUsageRead = 1, 33 | ResourceUsageWrite = 2, 34 | ResourceUsageSample = 4, 35 | }; 36 | 37 | _MTL_OPTIONS(NS::UInteger, BarrierScope) { 38 | BarrierScopeBuffers = 1, 39 | BarrierScopeTextures = 2, 40 | BarrierScopeRenderTargets = 4, 41 | }; 42 | 43 | class CommandEncoder : public NS::Referencing 44 | { 45 | public: 46 | class Device* device() const; 47 | 48 | NS::String* label() const; 49 | void setLabel(const NS::String* label); 50 | 51 | void endEncoding(); 52 | 53 | void insertDebugSignpost(const NS::String* string); 54 | 55 | void pushDebugGroup(const NS::String* string); 56 | 57 | void popDebugGroup(); 58 | }; 59 | 60 | } 61 | 62 | // property: device 63 | _MTL_INLINE MTL::Device* MTL::CommandEncoder::device() const 64 | { 65 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); 66 | } 67 | 68 | // property: label 69 | _MTL_INLINE NS::String* MTL::CommandEncoder::label() const 70 | { 71 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); 72 | } 73 | 74 | _MTL_INLINE void MTL::CommandEncoder::setLabel(const NS::String* label) 75 | { 76 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); 77 | } 78 | 79 | // method: endEncoding 80 | _MTL_INLINE void MTL::CommandEncoder::endEncoding() 81 | { 82 | Object::sendMessage(this, _MTL_PRIVATE_SEL(endEncoding)); 83 | } 84 | 85 | // method: insertDebugSignpost: 86 | _MTL_INLINE void MTL::CommandEncoder::insertDebugSignpost(const NS::String* string) 87 | { 88 | Object::sendMessage(this, _MTL_PRIVATE_SEL(insertDebugSignpost_), string); 89 | } 90 | 91 | // method: pushDebugGroup: 92 | _MTL_INLINE void MTL::CommandEncoder::pushDebugGroup(const NS::String* string) 93 | { 94 | Object::sendMessage(this, _MTL_PRIVATE_SEL(pushDebugGroup_), string); 95 | } 96 | 97 | // method: popDebugGroup 98 | _MTL_INLINE void MTL::CommandEncoder::popDebugGroup() 99 | { 100 | Object::sendMessage(this, _MTL_PRIVATE_SEL(popDebugGroup)); 101 | } 102 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLCommandQueue.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLCommandQueue.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | namespace MTL 30 | { 31 | class CommandQueue : public NS::Referencing 32 | { 33 | public: 34 | NS::String* label() const; 35 | void setLabel(const NS::String* label); 36 | 37 | class Device* device() const; 38 | 39 | class CommandBuffer* commandBuffer(); 40 | 41 | class CommandBuffer* commandBuffer(const class CommandBufferDescriptor* descriptor); 42 | 43 | class CommandBuffer* commandBufferWithUnretainedReferences(); 44 | 45 | void insertDebugCaptureBoundary(); 46 | }; 47 | 48 | } 49 | 50 | // property: label 51 | _MTL_INLINE NS::String* MTL::CommandQueue::label() const 52 | { 53 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); 54 | } 55 | 56 | _MTL_INLINE void MTL::CommandQueue::setLabel(const NS::String* label) 57 | { 58 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); 59 | } 60 | 61 | // property: device 62 | _MTL_INLINE MTL::Device* MTL::CommandQueue::device() const 63 | { 64 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); 65 | } 66 | 67 | // method: commandBuffer 68 | _MTL_INLINE MTL::CommandBuffer* MTL::CommandQueue::commandBuffer() 69 | { 70 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandBuffer)); 71 | } 72 | 73 | // method: commandBufferWithDescriptor: 74 | _MTL_INLINE MTL::CommandBuffer* MTL::CommandQueue::commandBuffer(const MTL::CommandBufferDescriptor* descriptor) 75 | { 76 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandBufferWithDescriptor_), descriptor); 77 | } 78 | 79 | // method: commandBufferWithUnretainedReferences 80 | _MTL_INLINE MTL::CommandBuffer* MTL::CommandQueue::commandBufferWithUnretainedReferences() 81 | { 82 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(commandBufferWithUnretainedReferences)); 83 | } 84 | 85 | // method: insertDebugCaptureBoundary 86 | _MTL_INLINE void MTL::CommandQueue::insertDebugCaptureBoundary() 87 | { 88 | Object::sendMessage(this, _MTL_PRIVATE_SEL(insertDebugCaptureBoundary)); 89 | } 90 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLDefines.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLDefines.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "../Foundation/NSDefines.hpp" 26 | 27 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 28 | 29 | #define _MTL_EXPORT _NS_EXPORT 30 | #define _MTL_EXTERN _NS_EXTERN 31 | #define _MTL_INLINE _NS_INLINE 32 | #define _MTL_PACKED _NS_PACKED 33 | 34 | #define _MTL_CONST(type, name) _NS_CONST(type, name) 35 | #define _MTL_ENUM(type, name) _NS_ENUM(type, name) 36 | #define _MTL_OPTIONS(type, name) _NS_OPTIONS(type, name) 37 | 38 | #define _MTL_VALIDATE_SIZE(ns, name) _NS_VALIDATE_SIZE(ns, name) 39 | #define _MTL_VALIDATE_ENUM(ns, name) _NS_VALIDATE_ENUM(ns, name) 40 | 41 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 42 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLDrawable.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLDrawable.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include 30 | #include 31 | 32 | namespace MTL 33 | { 34 | using DrawablePresentedHandler = void (^)(class Drawable*); 35 | 36 | using DrawablePresentedHandlerFunction = std::function; 37 | 38 | class Drawable : public NS::Referencing 39 | { 40 | public: 41 | void addPresentedHandler(const MTL::DrawablePresentedHandlerFunction& function); 42 | 43 | void present(); 44 | 45 | void presentAtTime(CFTimeInterval presentationTime); 46 | 47 | void presentAfterMinimumDuration(CFTimeInterval duration); 48 | 49 | void addPresentedHandler(const MTL::DrawablePresentedHandler block); 50 | 51 | CFTimeInterval presentedTime() const; 52 | 53 | NS::UInteger drawableID() const; 54 | }; 55 | 56 | } 57 | 58 | _MTL_INLINE void MTL::Drawable::addPresentedHandler(const MTL::DrawablePresentedHandlerFunction& function) 59 | { 60 | __block DrawablePresentedHandlerFunction blockFunction = function; 61 | 62 | addPresentedHandler(^(Drawable* pDrawable) { blockFunction(pDrawable); }); 63 | } 64 | 65 | // method: present 66 | _MTL_INLINE void MTL::Drawable::present() 67 | { 68 | Object::sendMessage(this, _MTL_PRIVATE_SEL(present)); 69 | } 70 | 71 | // method: presentAtTime: 72 | _MTL_INLINE void MTL::Drawable::presentAtTime(CFTimeInterval presentationTime) 73 | { 74 | Object::sendMessage(this, _MTL_PRIVATE_SEL(presentAtTime_), presentationTime); 75 | } 76 | 77 | // method: presentAfterMinimumDuration: 78 | _MTL_INLINE void MTL::Drawable::presentAfterMinimumDuration(CFTimeInterval duration) 79 | { 80 | Object::sendMessage(this, _MTL_PRIVATE_SEL(presentAfterMinimumDuration_), duration); 81 | } 82 | 83 | // method: addPresentedHandler: 84 | _MTL_INLINE void MTL::Drawable::addPresentedHandler(const MTL::DrawablePresentedHandler block) 85 | { 86 | Object::sendMessage(this, _MTL_PRIVATE_SEL(addPresentedHandler_), block); 87 | } 88 | 89 | // property: presentedTime 90 | _MTL_INLINE CFTimeInterval MTL::Drawable::presentedTime() const 91 | { 92 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(presentedTime)); 93 | } 94 | 95 | // property: drawableID 96 | _MTL_INLINE NS::UInteger MTL::Drawable::drawableID() const 97 | { 98 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(drawableID)); 99 | } 100 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLDynamicLibrary.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLDynamicLibrary.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | namespace MTL 30 | { 31 | _MTL_ENUM(NS::UInteger, DynamicLibraryError) { 32 | DynamicLibraryErrorNone = 0, 33 | DynamicLibraryErrorInvalidFile = 1, 34 | DynamicLibraryErrorCompilationFailure = 2, 35 | DynamicLibraryErrorUnresolvedInstallName = 3, 36 | DynamicLibraryErrorDependencyLoadFailure = 4, 37 | DynamicLibraryErrorUnsupported = 5, 38 | }; 39 | 40 | class DynamicLibrary : public NS::Referencing 41 | { 42 | public: 43 | NS::String* label() const; 44 | void setLabel(const NS::String* label); 45 | 46 | class Device* device() const; 47 | 48 | NS::String* installName() const; 49 | 50 | bool serializeToURL(const NS::URL* url, NS::Error** error); 51 | }; 52 | 53 | } 54 | 55 | // property: label 56 | _MTL_INLINE NS::String* MTL::DynamicLibrary::label() const 57 | { 58 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); 59 | } 60 | 61 | _MTL_INLINE void MTL::DynamicLibrary::setLabel(const NS::String* label) 62 | { 63 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); 64 | } 65 | 66 | // property: device 67 | _MTL_INLINE MTL::Device* MTL::DynamicLibrary::device() const 68 | { 69 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); 70 | } 71 | 72 | // property: installName 73 | _MTL_INLINE NS::String* MTL::DynamicLibrary::installName() const 74 | { 75 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(installName)); 76 | } 77 | 78 | // method: serializeToURL:error: 79 | _MTL_INLINE bool MTL::DynamicLibrary::serializeToURL(const NS::URL* url, NS::Error** error) 80 | { 81 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(serializeToURL_error_), url, error); 82 | } 83 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLFence.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLFence.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | namespace MTL 30 | { 31 | class Fence : public NS::Referencing 32 | { 33 | public: 34 | class Device* device() const; 35 | 36 | NS::String* label() const; 37 | void setLabel(const NS::String* label); 38 | }; 39 | 40 | } 41 | 42 | // property: device 43 | _MTL_INLINE MTL::Device* MTL::Fence::device() const 44 | { 45 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); 46 | } 47 | 48 | // property: label 49 | _MTL_INLINE NS::String* MTL::Fence::label() const 50 | { 51 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(label)); 52 | } 53 | 54 | _MTL_INLINE void MTL::Fence::setLabel(const NS::String* label) 55 | { 56 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setLabel_), label); 57 | } 58 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLFunctionConstantValues.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLFunctionConstantValues.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include "MTLArgument.hpp" 30 | 31 | namespace MTL 32 | { 33 | class FunctionConstantValues : public NS::Copying 34 | { 35 | public: 36 | static class FunctionConstantValues* alloc(); 37 | 38 | class FunctionConstantValues* init(); 39 | 40 | void setConstantValue(const void* value, MTL::DataType type, NS::UInteger index); 41 | 42 | void setConstantValues(const void* values, MTL::DataType type, NS::Range range); 43 | 44 | void setConstantValue(const void* value, MTL::DataType type, const NS::String* name); 45 | 46 | void reset(); 47 | }; 48 | 49 | } 50 | 51 | // static method: alloc 52 | _MTL_INLINE MTL::FunctionConstantValues* MTL::FunctionConstantValues::alloc() 53 | { 54 | return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLFunctionConstantValues)); 55 | } 56 | 57 | // method: init 58 | _MTL_INLINE MTL::FunctionConstantValues* MTL::FunctionConstantValues::init() 59 | { 60 | return NS::Object::init(); 61 | } 62 | 63 | // method: setConstantValue:type:atIndex: 64 | _MTL_INLINE void MTL::FunctionConstantValues::setConstantValue(const void* value, MTL::DataType type, NS::UInteger index) 65 | { 66 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setConstantValue_type_atIndex_), value, type, index); 67 | } 68 | 69 | // method: setConstantValues:type:withRange: 70 | _MTL_INLINE void MTL::FunctionConstantValues::setConstantValues(const void* values, MTL::DataType type, NS::Range range) 71 | { 72 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setConstantValues_type_withRange_), values, type, range); 73 | } 74 | 75 | // method: setConstantValue:type:withName: 76 | _MTL_INLINE void MTL::FunctionConstantValues::setConstantValue(const void* value, MTL::DataType type, const NS::String* name) 77 | { 78 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setConstantValue_type_withName_), value, type, name); 79 | } 80 | 81 | // method: reset 82 | _MTL_INLINE void MTL::FunctionConstantValues::reset() 83 | { 84 | Object::sendMessage(this, _MTL_PRIVATE_SEL(reset)); 85 | } 86 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLFunctionHandle.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLFunctionHandle.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include "MTLLibrary.hpp" 30 | 31 | namespace MTL 32 | { 33 | class FunctionHandle : public NS::Referencing 34 | { 35 | public: 36 | MTL::FunctionType functionType() const; 37 | 38 | NS::String* name() const; 39 | 40 | class Device* device() const; 41 | }; 42 | 43 | } 44 | 45 | // property: functionType 46 | _MTL_INLINE MTL::FunctionType MTL::FunctionHandle::functionType() const 47 | { 48 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionType)); 49 | } 50 | 51 | // property: name 52 | _MTL_INLINE NS::String* MTL::FunctionHandle::name() const 53 | { 54 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(name)); 55 | } 56 | 57 | // property: device 58 | _MTL_INLINE MTL::Device* MTL::FunctionHandle::device() const 59 | { 60 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(device)); 61 | } 62 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLFunctionLog.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLFunctionLog.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include "MTLFunctionLog.hpp" 30 | 31 | namespace MTL 32 | { 33 | _MTL_ENUM(NS::UInteger, FunctionLogType) { 34 | FunctionLogTypeValidation = 0, 35 | }; 36 | 37 | class LogContainer : public NS::Referencing 38 | { 39 | public: 40 | }; 41 | 42 | class FunctionLogDebugLocation : public NS::Referencing 43 | { 44 | public: 45 | NS::String* functionName() const; 46 | 47 | NS::URL* URL() const; 48 | 49 | NS::UInteger line() const; 50 | 51 | NS::UInteger column() const; 52 | }; 53 | 54 | class FunctionLog : public NS::Referencing 55 | { 56 | public: 57 | MTL::FunctionLogType type() const; 58 | 59 | NS::String* encoderLabel() const; 60 | 61 | class Function* function() const; 62 | 63 | class FunctionLogDebugLocation* debugLocation() const; 64 | }; 65 | 66 | } 67 | 68 | // property: functionName 69 | _MTL_INLINE NS::String* MTL::FunctionLogDebugLocation::functionName() const 70 | { 71 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionName)); 72 | } 73 | 74 | // property: URL 75 | _MTL_INLINE NS::URL* MTL::FunctionLogDebugLocation::URL() const 76 | { 77 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(URL)); 78 | } 79 | 80 | // property: line 81 | _MTL_INLINE NS::UInteger MTL::FunctionLogDebugLocation::line() const 82 | { 83 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(line)); 84 | } 85 | 86 | // property: column 87 | _MTL_INLINE NS::UInteger MTL::FunctionLogDebugLocation::column() const 88 | { 89 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(column)); 90 | } 91 | 92 | // property: type 93 | _MTL_INLINE MTL::FunctionLogType MTL::FunctionLog::type() const 94 | { 95 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(type)); 96 | } 97 | 98 | // property: encoderLabel 99 | _MTL_INLINE NS::String* MTL::FunctionLog::encoderLabel() const 100 | { 101 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(encoderLabel)); 102 | } 103 | 104 | // property: function 105 | _MTL_INLINE MTL::Function* MTL::FunctionLog::function() const 106 | { 107 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(function)); 108 | } 109 | 110 | // property: debugLocation 111 | _MTL_INLINE MTL::FunctionLogDebugLocation* MTL::FunctionLog::debugLocation() const 112 | { 113 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(debugLocation)); 114 | } 115 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLLinkedFunctions.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLLinkedFunctions.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | namespace MTL 30 | { 31 | class LinkedFunctions : public NS::Copying 32 | { 33 | public: 34 | static class LinkedFunctions* alloc(); 35 | 36 | class LinkedFunctions* init(); 37 | 38 | static class LinkedFunctions* linkedFunctions(); 39 | 40 | NS::Array* functions() const; 41 | void setFunctions(const NS::Array* functions); 42 | 43 | NS::Array* binaryFunctions() const; 44 | void setBinaryFunctions(const NS::Array* binaryFunctions); 45 | 46 | NS::Array* groups() const; 47 | void setGroups(const NS::Array* groups); 48 | 49 | NS::Array* privateFunctions() const; 50 | void setPrivateFunctions(const NS::Array* privateFunctions); 51 | }; 52 | 53 | } 54 | 55 | // static method: alloc 56 | _MTL_INLINE MTL::LinkedFunctions* MTL::LinkedFunctions::alloc() 57 | { 58 | return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLLinkedFunctions)); 59 | } 60 | 61 | // method: init 62 | _MTL_INLINE MTL::LinkedFunctions* MTL::LinkedFunctions::init() 63 | { 64 | return NS::Object::init(); 65 | } 66 | 67 | // static method: linkedFunctions 68 | _MTL_INLINE MTL::LinkedFunctions* MTL::LinkedFunctions::linkedFunctions() 69 | { 70 | return Object::sendMessage(_MTL_PRIVATE_CLS(MTLLinkedFunctions), _MTL_PRIVATE_SEL(linkedFunctions)); 71 | } 72 | 73 | // property: functions 74 | _MTL_INLINE NS::Array* MTL::LinkedFunctions::functions() const 75 | { 76 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(functions)); 77 | } 78 | 79 | _MTL_INLINE void MTL::LinkedFunctions::setFunctions(const NS::Array* functions) 80 | { 81 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctions_), functions); 82 | } 83 | 84 | // property: binaryFunctions 85 | _MTL_INLINE NS::Array* MTL::LinkedFunctions::binaryFunctions() const 86 | { 87 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(binaryFunctions)); 88 | } 89 | 90 | _MTL_INLINE void MTL::LinkedFunctions::setBinaryFunctions(const NS::Array* binaryFunctions) 91 | { 92 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setBinaryFunctions_), binaryFunctions); 93 | } 94 | 95 | // property: groups 96 | _MTL_INLINE NS::Array* MTL::LinkedFunctions::groups() const 97 | { 98 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(groups)); 99 | } 100 | 101 | _MTL_INLINE void MTL::LinkedFunctions::setGroups(const NS::Array* groups) 102 | { 103 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setGroups_), groups); 104 | } 105 | 106 | // property: privateFunctions 107 | _MTL_INLINE NS::Array* MTL::LinkedFunctions::privateFunctions() const 108 | { 109 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(privateFunctions)); 110 | } 111 | 112 | _MTL_INLINE void MTL::LinkedFunctions::setPrivateFunctions(const NS::Array* privateFunctions) 113 | { 114 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setPrivateFunctions_), privateFunctions); 115 | } 116 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLParallelRenderCommandEncoder.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include "MTLCommandEncoder.hpp" 30 | #include "MTLRenderPass.hpp" 31 | 32 | namespace MTL 33 | { 34 | class ParallelRenderCommandEncoder : public NS::Referencing 35 | { 36 | public: 37 | class RenderCommandEncoder* renderCommandEncoder(); 38 | 39 | void setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex); 40 | 41 | void setDepthStoreAction(MTL::StoreAction storeAction); 42 | 43 | void setStencilStoreAction(MTL::StoreAction storeAction); 44 | 45 | void setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex); 46 | 47 | void setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions); 48 | 49 | void setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions); 50 | }; 51 | 52 | } 53 | 54 | // method: renderCommandEncoder 55 | _MTL_INLINE MTL::RenderCommandEncoder* MTL::ParallelRenderCommandEncoder::renderCommandEncoder() 56 | { 57 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(renderCommandEncoder)); 58 | } 59 | 60 | // method: setColorStoreAction:atIndex: 61 | _MTL_INLINE void MTL::ParallelRenderCommandEncoder::setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex) 62 | { 63 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setColorStoreAction_atIndex_), storeAction, colorAttachmentIndex); 64 | } 65 | 66 | // method: setDepthStoreAction: 67 | _MTL_INLINE void MTL::ParallelRenderCommandEncoder::setDepthStoreAction(MTL::StoreAction storeAction) 68 | { 69 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthStoreAction_), storeAction); 70 | } 71 | 72 | // method: setStencilStoreAction: 73 | _MTL_INLINE void MTL::ParallelRenderCommandEncoder::setStencilStoreAction(MTL::StoreAction storeAction) 74 | { 75 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilStoreAction_), storeAction); 76 | } 77 | 78 | // method: setColorStoreActionOptions:atIndex: 79 | _MTL_INLINE void MTL::ParallelRenderCommandEncoder::setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex) 80 | { 81 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setColorStoreActionOptions_atIndex_), storeActionOptions, colorAttachmentIndex); 82 | } 83 | 84 | // method: setDepthStoreActionOptions: 85 | _MTL_INLINE void MTL::ParallelRenderCommandEncoder::setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions) 86 | { 87 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setDepthStoreActionOptions_), storeActionOptions); 88 | } 89 | 90 | // method: setStencilStoreActionOptions: 91 | _MTL_INLINE void MTL::ParallelRenderCommandEncoder::setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions) 92 | { 93 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setStencilStoreActionOptions_), storeActionOptions); 94 | } 95 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLPipeline.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLPipeline.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include "MTLPipeline.hpp" 30 | 31 | namespace MTL 32 | { 33 | _MTL_ENUM(NS::UInteger, Mutability) { 34 | MutabilityDefault = 0, 35 | MutabilityMutable = 1, 36 | MutabilityImmutable = 2, 37 | }; 38 | 39 | class PipelineBufferDescriptor : public NS::Copying 40 | { 41 | public: 42 | static class PipelineBufferDescriptor* alloc(); 43 | 44 | class PipelineBufferDescriptor* init(); 45 | 46 | MTL::Mutability mutability() const; 47 | void setMutability(MTL::Mutability mutability); 48 | }; 49 | 50 | class PipelineBufferDescriptorArray : public NS::Referencing 51 | { 52 | public: 53 | static class PipelineBufferDescriptorArray* alloc(); 54 | 55 | class PipelineBufferDescriptorArray* init(); 56 | 57 | class PipelineBufferDescriptor* object(NS::UInteger bufferIndex); 58 | 59 | void setObject(const class PipelineBufferDescriptor* buffer, NS::UInteger bufferIndex); 60 | }; 61 | 62 | } 63 | 64 | // static method: alloc 65 | _MTL_INLINE MTL::PipelineBufferDescriptor* MTL::PipelineBufferDescriptor::alloc() 66 | { 67 | return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLPipelineBufferDescriptor)); 68 | } 69 | 70 | // method: init 71 | _MTL_INLINE MTL::PipelineBufferDescriptor* MTL::PipelineBufferDescriptor::init() 72 | { 73 | return NS::Object::init(); 74 | } 75 | 76 | // property: mutability 77 | _MTL_INLINE MTL::Mutability MTL::PipelineBufferDescriptor::mutability() const 78 | { 79 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(mutability)); 80 | } 81 | 82 | _MTL_INLINE void MTL::PipelineBufferDescriptor::setMutability(MTL::Mutability mutability) 83 | { 84 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setMutability_), mutability); 85 | } 86 | 87 | // static method: alloc 88 | _MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::PipelineBufferDescriptorArray::alloc() 89 | { 90 | return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLPipelineBufferDescriptorArray)); 91 | } 92 | 93 | // method: init 94 | _MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::PipelineBufferDescriptorArray::init() 95 | { 96 | return NS::Object::init(); 97 | } 98 | 99 | // method: objectAtIndexedSubscript: 100 | _MTL_INLINE MTL::PipelineBufferDescriptor* MTL::PipelineBufferDescriptorArray::object(NS::UInteger bufferIndex) 101 | { 102 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), bufferIndex); 103 | } 104 | 105 | // method: setObject:atIndexedSubscript: 106 | _MTL_INLINE void MTL::PipelineBufferDescriptorArray::setObject(const MTL::PipelineBufferDescriptor* buffer, NS::UInteger bufferIndex) 107 | { 108 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), buffer, bufferIndex); 109 | } 110 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLPrivate.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLPrivate.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "MTLDefines.hpp" 26 | 27 | #include 28 | 29 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 30 | 31 | #define _MTL_PRIVATE_CLS(symbol) (Private::Class::s_k##symbol) 32 | #define _MTL_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor) 33 | 34 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 35 | 36 | #if defined(MTL_PRIVATE_IMPLEMENTATION) 37 | 38 | #define _MTL_PRIVATE_VISIBILITY __attribute__((visibility("default"))) 39 | #define _MTL_PRIVATE_IMPORT __attribute__((weak_import)) 40 | 41 | #if __OBJC__ 42 | #define _MTL_PRIVATE_OBJC_LOOKUP_CLASS(symbol) ((__bridge void*)objc_lookUpClass(#symbol)) 43 | #else 44 | #define _MTL_PRIVATE_OBJC_LOOKUP_CLASS(symbol) objc_lookUpClass(#symbol) 45 | #endif // __OBJC__ 46 | 47 | #define _MTL_PRIVATE_DEF_CLS(symbol) void* s_k##symbol _MTL_PRIVATE_VISIBILITY = _MTL_PRIVATE_OBJC_LOOKUP_CLASS(symbol); 48 | #define _MTL_PRIVATE_DEF_PRO(symbol) 49 | #define _MTL_PRIVATE_DEF_SEL(accessor, symbol) SEL s_k##accessor _MTL_PRIVATE_VISIBILITY = sel_registerName(symbol); 50 | 51 | #if defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0) 52 | 53 | #define _MTL_PRIVATE_DEF_STR(type, symbol) \ 54 | _MTL_EXTERN type const MTL##symbol _MTL_PRIVATE_IMPORT; \ 55 | type const MTL::symbol = (nullptr != &MTL##symbol) ? MTL##symbol : nullptr; 56 | 57 | #else 58 | 59 | #include 60 | 61 | namespace MTL 62 | { 63 | namespace Private 64 | { 65 | 66 | template 67 | inline _Type const LoadSymbol(const char* pSymbol) 68 | { 69 | const _Type* pAddress = static_cast<_Type*>(dlsym(RTLD_DEFAULT, pSymbol)); 70 | 71 | return pAddress ? *pAddress : nullptr; 72 | } 73 | 74 | } // Private 75 | } // MTL 76 | 77 | #define _MTL_PRIVATE_DEF_STR(type, symbol) \ 78 | _MTL_EXTERN type const MTL##symbol; \ 79 | type const MTL::symbol = Private::LoadSymbol("MTL" #symbol); 80 | 81 | #endif // defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0) 82 | 83 | #else 84 | 85 | #define _MTL_PRIVATE_DEF_CLS(symbol) extern void* s_k##symbol; 86 | #define _MTL_PRIVATE_DEF_PRO(symbol) 87 | #define _MTL_PRIVATE_DEF_SEL(accessor, symbol) extern SEL s_k##accessor; 88 | #define _MTL_PRIVATE_DEF_STR(type, symbol) 89 | 90 | #endif // MTL_PRIVATE_IMPLEMENTATION 91 | 92 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 93 | 94 | namespace MTL 95 | { 96 | namespace Private 97 | { 98 | namespace Class 99 | { 100 | 101 | } // Class 102 | } // Private 103 | } // MTL 104 | 105 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 106 | 107 | namespace MTL 108 | { 109 | namespace Private 110 | { 111 | namespace Protocol 112 | { 113 | 114 | } // Protocol 115 | } // Private 116 | } // MTL 117 | 118 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 119 | 120 | namespace MTL 121 | { 122 | namespace Private 123 | { 124 | namespace Selector 125 | { 126 | 127 | _MTL_PRIVATE_DEF_SEL(beginScope, 128 | "beginScope"); 129 | _MTL_PRIVATE_DEF_SEL(endScope, 130 | "endScope"); 131 | } // Class 132 | } // Private 133 | } // MTL 134 | 135 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 136 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLResourceStateCommandEncoder.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include "MTLCommandEncoder.hpp" 30 | 31 | namespace MTL 32 | { 33 | _MTL_ENUM(NS::UInteger, SparseTextureMappingMode) { 34 | SparseTextureMappingModeMap = 0, 35 | SparseTextureMappingModeUnmap = 1, 36 | }; 37 | 38 | struct MapIndirectArguments 39 | { 40 | uint32_t regionOriginX; 41 | uint32_t regionOriginY; 42 | uint32_t regionOriginZ; 43 | uint32_t regionSizeWidth; 44 | uint32_t regionSizeHeight; 45 | uint32_t regionSizeDepth; 46 | uint32_t mipMapLevel; 47 | uint32_t sliceId; 48 | } _MTL_PACKED; 49 | 50 | class ResourceStateCommandEncoder : public NS::Referencing 51 | { 52 | public: 53 | void updateTextureMappings(const class Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region* regions, const NS::UInteger* mipLevels, const NS::UInteger* slices, NS::UInteger numRegions); 54 | 55 | void updateTextureMapping(const class Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region region, const NS::UInteger mipLevel, const NS::UInteger slice); 56 | 57 | void updateTextureMapping(const class Texture* texture, const MTL::SparseTextureMappingMode mode, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset); 58 | 59 | void updateFence(const class Fence* fence); 60 | 61 | void waitForFence(const class Fence* fence); 62 | }; 63 | 64 | } 65 | 66 | // method: updateTextureMappings:mode:regions:mipLevels:slices:numRegions: 67 | _MTL_INLINE void MTL::ResourceStateCommandEncoder::updateTextureMappings(const MTL::Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region* regions, const NS::UInteger* mipLevels, const NS::UInteger* slices, NS::UInteger numRegions) 68 | { 69 | Object::sendMessage(this, _MTL_PRIVATE_SEL(updateTextureMappings_mode_regions_mipLevels_slices_numRegions_), texture, mode, regions, mipLevels, slices, numRegions); 70 | } 71 | 72 | // method: updateTextureMapping:mode:region:mipLevel:slice: 73 | _MTL_INLINE void MTL::ResourceStateCommandEncoder::updateTextureMapping(const MTL::Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region region, const NS::UInteger mipLevel, const NS::UInteger slice) 74 | { 75 | Object::sendMessage(this, _MTL_PRIVATE_SEL(updateTextureMapping_mode_region_mipLevel_slice_), texture, mode, region, mipLevel, slice); 76 | } 77 | 78 | // method: updateTextureMapping:mode:indirectBuffer:indirectBufferOffset: 79 | _MTL_INLINE void MTL::ResourceStateCommandEncoder::updateTextureMapping(const MTL::Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset) 80 | { 81 | Object::sendMessage(this, _MTL_PRIVATE_SEL(updateTextureMapping_mode_indirectBuffer_indirectBufferOffset_), texture, mode, indirectBuffer, indirectBufferOffset); 82 | } 83 | 84 | // method: updateFence: 85 | _MTL_INLINE void MTL::ResourceStateCommandEncoder::updateFence(const MTL::Fence* fence) 86 | { 87 | Object::sendMessage(this, _MTL_PRIVATE_SEL(updateFence_), fence); 88 | } 89 | 90 | // method: waitForFence: 91 | _MTL_INLINE void MTL::ResourceStateCommandEncoder::waitForFence(const MTL::Fence* fence) 92 | { 93 | Object::sendMessage(this, _MTL_PRIVATE_SEL(waitForFence_), fence); 94 | } 95 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLTypes.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLTypes.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include "MTLTypes.hpp" 30 | 31 | namespace MTL 32 | { 33 | struct Origin 34 | { 35 | Origin() = default; 36 | 37 | Origin(NS::UInteger x, NS::UInteger y, NS::UInteger z); 38 | 39 | static Origin Make(NS::UInteger x, NS::UInteger y, NS::UInteger z); 40 | 41 | NS::UInteger x; 42 | NS::UInteger y; 43 | NS::UInteger z; 44 | } _MTL_PACKED; 45 | 46 | struct Size 47 | { 48 | Size() = default; 49 | 50 | Size(NS::UInteger width, NS::UInteger height, NS::UInteger depth); 51 | 52 | static Size Make(NS::UInteger width, NS::UInteger height, NS::UInteger depth); 53 | 54 | NS::UInteger width; 55 | NS::UInteger height; 56 | NS::UInteger depth; 57 | } _MTL_PACKED; 58 | 59 | struct Region 60 | { 61 | Region() = default; 62 | 63 | Region(NS::UInteger x, NS::UInteger width); 64 | 65 | Region(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height); 66 | 67 | Region(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth); 68 | 69 | static Region Make1D(NS::UInteger x, NS::UInteger width); 70 | 71 | static Region Make2D(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height); 72 | 73 | static Region Make3D(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth); 74 | 75 | MTL::Origin origin; 76 | MTL::Size size; 77 | } _MTL_PACKED; 78 | 79 | struct SamplePosition; 80 | 81 | using Coordinate2D = SamplePosition; 82 | 83 | struct SamplePosition 84 | { 85 | SamplePosition() = default; 86 | 87 | SamplePosition(float _x, float _y); 88 | 89 | static SamplePosition Make(float x, float y); 90 | 91 | float x; 92 | float y; 93 | } _MTL_PACKED; 94 | 95 | } 96 | 97 | _MTL_INLINE MTL::Origin::Origin(NS::UInteger _x, NS::UInteger _y, NS::UInteger _z) 98 | : x(_x) 99 | , y(_y) 100 | , z(_z) 101 | { 102 | } 103 | 104 | _MTL_INLINE MTL::Origin MTL::Origin::Make(NS::UInteger x, NS::UInteger y, NS::UInteger z) 105 | { 106 | return Origin(x, y, z); 107 | } 108 | 109 | _MTL_INLINE MTL::Size::Size(NS::UInteger _width, NS::UInteger _height, NS::UInteger _depth) 110 | : width(_width) 111 | , height(_height) 112 | , depth(_depth) 113 | { 114 | } 115 | 116 | _MTL_INLINE MTL::Size MTL::Size::Make(NS::UInteger width, NS::UInteger height, NS::UInteger depth) 117 | { 118 | return Size(width, height, depth); 119 | } 120 | 121 | _MTL_INLINE MTL::Region::Region(NS::UInteger x, NS::UInteger width) 122 | : origin(x, 0, 0) 123 | , size(width, 1, 1) 124 | { 125 | } 126 | 127 | _MTL_INLINE MTL::Region::Region(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height) 128 | : origin(x, y, 0) 129 | , size(width, height, 1) 130 | { 131 | } 132 | 133 | _MTL_INLINE MTL::Region::Region(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth) 134 | : origin(x, y, z) 135 | , size(width, height, depth) 136 | { 137 | } 138 | 139 | _MTL_INLINE MTL::Region MTL::Region::Make1D(NS::UInteger x, NS::UInteger width) 140 | { 141 | return Region(x, width); 142 | } 143 | 144 | _MTL_INLINE MTL::Region MTL::Region::Make2D(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height) 145 | { 146 | return Region(x, y, width, height); 147 | } 148 | 149 | _MTL_INLINE MTL::Region MTL::Region::Make3D(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth) 150 | { 151 | return Region(x, y, z, width, height, depth); 152 | } 153 | 154 | _MTL_INLINE MTL::SamplePosition::SamplePosition(float _x, float _y) 155 | : x(_x) 156 | , y(_y) 157 | { 158 | } 159 | 160 | _MTL_INLINE MTL::SamplePosition MTL::SamplePosition::Make(float x, float y) 161 | { 162 | return SamplePosition(x, y); 163 | } 164 | -------------------------------------------------------------------------------- /metal-cpp/Metal/MTLVisibleFunctionTable.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/MTLVisibleFunctionTable.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | #include "MTLDefines.hpp" 24 | #include "MTLHeaderBridge.hpp" 25 | #include "MTLPrivate.hpp" 26 | 27 | #include 28 | 29 | #include "MTLFunctionHandle.hpp" 30 | #include "MTLResource.hpp" 31 | 32 | namespace MTL 33 | { 34 | class VisibleFunctionTableDescriptor : public NS::Copying 35 | { 36 | public: 37 | static class VisibleFunctionTableDescriptor* alloc(); 38 | 39 | class VisibleFunctionTableDescriptor* init(); 40 | 41 | static class VisibleFunctionTableDescriptor* visibleFunctionTableDescriptor(); 42 | 43 | NS::UInteger functionCount() const; 44 | void setFunctionCount(NS::UInteger functionCount); 45 | }; 46 | 47 | class VisibleFunctionTable : public NS::Referencing 48 | { 49 | public: 50 | void setFunction(const class FunctionHandle* function, NS::UInteger index); 51 | 52 | void setFunctions(const class FunctionHandle* functions[], NS::Range range); 53 | }; 54 | 55 | } 56 | 57 | // static method: alloc 58 | _MTL_INLINE MTL::VisibleFunctionTableDescriptor* MTL::VisibleFunctionTableDescriptor::alloc() 59 | { 60 | return NS::Object::alloc(_MTL_PRIVATE_CLS(MTLVisibleFunctionTableDescriptor)); 61 | } 62 | 63 | // method: init 64 | _MTL_INLINE MTL::VisibleFunctionTableDescriptor* MTL::VisibleFunctionTableDescriptor::init() 65 | { 66 | return NS::Object::init(); 67 | } 68 | 69 | // static method: visibleFunctionTableDescriptor 70 | _MTL_INLINE MTL::VisibleFunctionTableDescriptor* MTL::VisibleFunctionTableDescriptor::visibleFunctionTableDescriptor() 71 | { 72 | return Object::sendMessage(_MTL_PRIVATE_CLS(MTLVisibleFunctionTableDescriptor), _MTL_PRIVATE_SEL(visibleFunctionTableDescriptor)); 73 | } 74 | 75 | // property: functionCount 76 | _MTL_INLINE NS::UInteger MTL::VisibleFunctionTableDescriptor::functionCount() const 77 | { 78 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(functionCount)); 79 | } 80 | 81 | _MTL_INLINE void MTL::VisibleFunctionTableDescriptor::setFunctionCount(NS::UInteger functionCount) 82 | { 83 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctionCount_), functionCount); 84 | } 85 | 86 | // method: setFunction:atIndex: 87 | _MTL_INLINE void MTL::VisibleFunctionTable::setFunction(const MTL::FunctionHandle* function, NS::UInteger index) 88 | { 89 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunction_atIndex_), function, index); 90 | } 91 | 92 | // method: setFunctions:withRange: 93 | _MTL_INLINE void MTL::VisibleFunctionTable::setFunctions(const MTL::FunctionHandle* functions[], NS::Range range) 94 | { 95 | Object::sendMessage(this, _MTL_PRIVATE_SEL(setFunctions_withRange_), functions, range); 96 | } 97 | -------------------------------------------------------------------------------- /metal-cpp/Metal/Metal.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Metal/Metal.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "MTLAccelerationStructure.hpp" 26 | #include "MTLAccelerationStructureCommandEncoder.hpp" 27 | #include "MTLAccelerationStructureTypes.hpp" 28 | #include "MTLArgument.hpp" 29 | #include "MTLArgumentEncoder.hpp" 30 | #include "MTLBinaryArchive.hpp" 31 | #include "MTLBlitCommandEncoder.hpp" 32 | #include "MTLBlitPass.hpp" 33 | #include "MTLBuffer.hpp" 34 | #include "MTLCaptureManager.hpp" 35 | #include "MTLCaptureScope.hpp" 36 | #include "MTLCommandBuffer.hpp" 37 | #include "MTLCommandEncoder.hpp" 38 | #include "MTLCommandQueue.hpp" 39 | #include "MTLComputeCommandEncoder.hpp" 40 | #include "MTLComputePass.hpp" 41 | #include "MTLComputePipeline.hpp" 42 | #include "MTLCounters.hpp" 43 | #include "MTLDefines.hpp" 44 | #include "MTLDepthStencil.hpp" 45 | #include "MTLDevice.hpp" 46 | #include "MTLDrawable.hpp" 47 | #include "MTLDynamicLibrary.hpp" 48 | #include "MTLEvent.hpp" 49 | #include "MTLFence.hpp" 50 | #include "MTLFunctionConstantValues.hpp" 51 | #include "MTLFunctionDescriptor.hpp" 52 | #include "MTLFunctionHandle.hpp" 53 | #include "MTLFunctionLog.hpp" 54 | #include "MTLFunctionStitching.hpp" 55 | #include "MTLHeaderBridge.hpp" 56 | #include "MTLHeap.hpp" 57 | #include "MTLIndirectCommandBuffer.hpp" 58 | #include "MTLIndirectCommandEncoder.hpp" 59 | #include "MTLIntersectionFunctionTable.hpp" 60 | #include "MTLLibrary.hpp" 61 | #include "MTLLinkedFunctions.hpp" 62 | #include "MTLParallelRenderCommandEncoder.hpp" 63 | #include "MTLPipeline.hpp" 64 | #include "MTLPixelFormat.hpp" 65 | #include "MTLPrivate.hpp" 66 | #include "MTLRasterizationRate.hpp" 67 | #include "MTLRenderCommandEncoder.hpp" 68 | #include "MTLRenderPass.hpp" 69 | #include "MTLRenderPipeline.hpp" 70 | #include "MTLResource.hpp" 71 | #include "MTLResourceStateCommandEncoder.hpp" 72 | #include "MTLResourceStatePass.hpp" 73 | #include "MTLSampler.hpp" 74 | #include "MTLStageInputOutputDescriptor.hpp" 75 | #include "MTLTexture.hpp" 76 | #include "MTLTypes.hpp" 77 | #include "MTLVertexDescriptor.hpp" 78 | #include "MTLVisibleFunctionTable.hpp" 79 | 80 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 81 | -------------------------------------------------------------------------------- /metal-cpp/QuartzCore/CADefines.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // QuartzCore/CADefines.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "../Foundation/NSDefines.hpp" 26 | 27 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 28 | 29 | #define _CA_EXPORT _NS_EXPORT 30 | #define _CA_EXTERN _NS_EXTERN 31 | #define _CA_INLINE _NS_INLINE 32 | #define _CA_PACKED _NS_PACKED 33 | 34 | #define _CA_CONST(type, name) _NS_CONST(type, name) 35 | #define _CA_ENUM(type, name) _NS_ENUM(type, name) 36 | #define _CA_OPTIONS(type, name) _NS_OPTIONS(type, name) 37 | 38 | #define _CA_VALIDATE_SIZE(ns, name) _NS_VALIDATE_SIZE(ns, name) 39 | #define _CA_VALIDATE_ENUM(ns, name) _NS_VALIDATE_ENUM(ns, name) 40 | 41 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 42 | -------------------------------------------------------------------------------- /metal-cpp/QuartzCore/CAMetalDrawable.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // QuartzCore/CAMetalDrawable.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "../Metal/MTLDrawable.hpp" 26 | #include "../Metal/MTLTexture.hpp" 27 | 28 | #include "CADefines.hpp" 29 | #include "CAPrivate.hpp" 30 | 31 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 32 | 33 | namespace CA 34 | { 35 | class MetalDrawable : public NS::Referencing 36 | { 37 | public: 38 | class MetalLayer* layer() const; 39 | MTL::Texture* texture() const; 40 | }; 41 | } 42 | 43 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 44 | 45 | _CA_INLINE CA::MetalLayer* CA::MetalDrawable::layer() const 46 | { 47 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(layer)); 48 | } 49 | 50 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 51 | 52 | _CA_INLINE MTL::Texture* CA::MetalDrawable::texture() const 53 | { 54 | return Object::sendMessage(this, _MTL_PRIVATE_SEL(texture)); 55 | } 56 | 57 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 58 | -------------------------------------------------------------------------------- /metal-cpp/QuartzCore/CAPrivate.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // QuartzCore/CAPrivate.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "CADefines.hpp" 26 | 27 | #include 28 | 29 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 30 | 31 | #define _CA_PRIVATE_CLS(symbol) (Private::Class::s_k##symbol) 32 | #define _CA_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor) 33 | 34 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 35 | 36 | #if defined(CA_PRIVATE_IMPLEMENTATION) 37 | 38 | #define _CA_PRIVATE_VISIBILITY __attribute__((visibility("default"))) 39 | #define _CA_PRIVATE_IMPORT __attribute__((weak_import)) 40 | 41 | #if __OBJC__ 42 | #define _CA_PRIVATE_OBJC_LOOKUP_CLASS(symbol) ((__bridge void*)objc_lookUpClass(#symbol)) 43 | #else 44 | #define _CA_PRIVATE_OBJC_LOOKUP_CLASS(symbol) objc_lookUpClass(#symbol) 45 | #endif // __OBJC__ 46 | 47 | #define _CA_PRIVATE_DEF_CLS(symbol) void* s_k##symbol _CA_PRIVATE_VISIBILITY = _CA_PRIVATE_OBJC_LOOKUP_CLASS(symbol); 48 | #define _CA_PRIVATE_DEF_PRO(symbol) 49 | #define _CA_PRIVATE_DEF_SEL(accessor, symbol) SEL s_k##accessor _CA_PRIVATE_VISIBILITY = sel_registerName(symbol); 50 | #define _CA_PRIVATE_DEF_STR(type, symbol) \ 51 | _CA_EXTERN type const CA##symbol _CA_PRIVATE_IMPORT; \ 52 | type const CA::symbol = (nullptr != &CA##symbol) ? CA##symbol : nullptr; 53 | 54 | #else 55 | 56 | #define _CA_PRIVATE_DEF_CLS(symbol) extern void* s_k##symbol; 57 | #define _CA_PRIVATE_DEF_PRO(symbol) 58 | #define _CA_PRIVATE_DEF_SEL(accessor, symbol) extern SEL s_k##accessor; 59 | #define _CA_PRIVATE_DEF_STR(type, symbol) 60 | 61 | #endif // CA_PRIVATE_IMPLEMENTATION 62 | 63 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 64 | 65 | namespace CA 66 | { 67 | namespace Private 68 | { 69 | namespace Class 70 | { 71 | 72 | } // Class 73 | } // Private 74 | } // CA 75 | 76 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 77 | 78 | namespace CA 79 | { 80 | namespace Private 81 | { 82 | namespace Protocol 83 | { 84 | 85 | _CA_PRIVATE_DEF_PRO(CAMetalDrawable); 86 | 87 | } // Protocol 88 | } // Private 89 | } // CA 90 | 91 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 92 | 93 | namespace CA 94 | { 95 | namespace Private 96 | { 97 | namespace Selector 98 | { 99 | 100 | _CA_PRIVATE_DEF_SEL(layer, 101 | "layer"); 102 | _CA_PRIVATE_DEF_SEL(texture, 103 | "texture"); 104 | 105 | } // Class 106 | } // Private 107 | } // CA 108 | 109 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 110 | -------------------------------------------------------------------------------- /metal-cpp/QuartzCore/QuartzCore.hpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // QuartzCore/QuartzCore.hpp 4 | // 5 | // Copyright 2020-2021 Apple Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | // See the License for the specific language governing permissions and 17 | // limitations under the License. 18 | // 19 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | 21 | #pragma once 22 | 23 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | 25 | #include "CAMetalDrawable.hpp" 26 | 27 | //------------------------------------------------------------------------------------------------------------------------------------------------------------- 28 | -------------------------------------------------------------------------------- /precision_results/FP32_precision.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DTolm/VkFFT/066a17c17068c0f11c9298d848c2976c71fad1c1/precision_results/FP32_precision.png -------------------------------------------------------------------------------- /precision_results/FP64_precision.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DTolm/VkFFT/066a17c17068c0f11c9298d848c2976c71fad1c1/precision_results/FP64_precision.png -------------------------------------------------------------------------------- /vkFFT/vkFFT.h: -------------------------------------------------------------------------------- 1 | // This file is part of VkFFT, a Vulkan Fast Fourier Transform library 2 | // 3 | // Copyright (C) 2020 - present Dmitrii Tolmachev 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | // THE SOFTWARE. 22 | 23 | #ifndef VKFFT_H 24 | #define VKFFT_H 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #ifndef __STDC_FORMAT_MACROS 33 | #define __STDC_FORMAT_MACROS 34 | #endif 35 | #include 36 | #if(VKFFT_BACKEND==0) 37 | #include "vulkan/vulkan.h" 38 | #include "glslang_c_interface.h" 39 | #elif(VKFFT_BACKEND==1) 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #ifndef CUDA_TOOLKIT_ROOT_DIR 46 | #define CUDA_TOOLKIT_ROOT_DIR "" 47 | #endif 48 | #elif(VKFFT_BACKEND==2) 49 | #include 50 | #include 51 | #include 52 | #include 53 | #elif(VKFFT_BACKEND==3) 54 | #ifndef CL_USE_DEPRECATED_OPENCL_1_2_APIS 55 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS 56 | #endif 57 | #ifdef __APPLE__ 58 | #include 59 | #else 60 | #include 61 | #endif 62 | #endif 63 | 64 | #ifdef __cplusplus 65 | #define VKFFT_ZERO_INIT {} 66 | #else 67 | #define VKFFT_ZERO_INIT {0} 68 | #endif 69 | 70 | #ifndef VKFFT_MAX_FFT_DIMENSIONS 71 | #define VKFFT_MAX_FFT_DIMENSIONS 4 72 | #endif 73 | 74 | #ifdef VKFFT_USE_DOUBLEDOUBLE_FP128 75 | 76 | #define VKFFT_USE_QUADMATH_FP128 // for now the only implementation, but defining these functions as mpfr should also be possible 77 | #include 78 | //#define pfQ __float128 79 | #define pfLD __float128 80 | #define pfUINT uint64_t 81 | #define pfINT int64_t 82 | #define pfsin sinq 83 | #define pfcos cosq 84 | #define pfceil ceilq 85 | #define pffloor floorq 86 | #define pfsqrt sqrtq 87 | 88 | #define pfFPinit(x) strtoflt128(x, 0) 89 | 90 | #else 91 | #define pfLD long double 92 | #define pfUINT uint64_t 93 | #define pfINT int64_t 94 | #define pfsin sin 95 | #define pfcos cos 96 | #define pfceil ceil 97 | #define pffloor floor 98 | #define pfsqrt sqrt 99 | 100 | #define pfFPinit(x) strtold(x, 0) 101 | 102 | #endif 103 | 104 | #include "vkFFT/vkFFT_Structs/vkFFT_Structs.h" 105 | #include "vkFFT/vkFFT_AppManagement/vkFFT_RunApp.h" 106 | #include "vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h" 107 | #include "vkFFT/vkFFT_AppManagement/vkFFT_DeleteApp.h" 108 | 109 | static inline int VkFFTGetVersion() { 110 | return 10304; //X.XX.XX format 111 | } 112 | #endif 113 | -------------------------------------------------------------------------------- /vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h: -------------------------------------------------------------------------------- 1 | // This file is part of VkFFT 2 | // 3 | // Copyright (C) 2021 - present Dmitrii Tolmachev 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | // THE SOFTWARE. 22 | #ifndef VKFFT_PUSHCONSTANTS_H 23 | #define VKFFT_PUSHCONSTANTS_H 24 | #include "vkFFT/vkFFT_Structs/vkFFT_Structs.h" 25 | #include "vkFFT/vkFFT_CodeGen/vkFFT_StringManagement/vkFFT_StringManager.h" 26 | #include "vkFFT/vkFFT_CodeGen/vkFFT_MathUtils/vkFFT_MathUtils.h" 27 | 28 | static inline void appendPushConstant(VkFFTSpecializationConstantsLayout* sc, PfContainer* var) { 29 | if (sc->res != VKFFT_SUCCESS) return; 30 | if (var->type > 100) { 31 | PfContainer* varType = VKFFT_ZERO_INIT; 32 | PfGetTypeFromCode(sc, var->type, &varType); 33 | sc->tempLen = sprintf(sc->tempStr, " %s %s;\n", varType->name, var->name); 34 | PfAppendLine(sc); 35 | } 36 | else { 37 | sc->res = VKFFT_ERROR_MATH_FAILED; 38 | } 39 | return; 40 | } 41 | static inline void appendPushConstants(VkFFTSpecializationConstantsLayout* sc) { 42 | if (sc->res != VKFFT_SUCCESS) return; 43 | if (sc->pushConstantsStructSize == 0) 44 | return; 45 | #if(VKFFT_BACKEND==0) 46 | sc->tempLen = sprintf(sc->tempStr, "layout(push_constant) uniform PushConsts\n{\n"); 47 | PfAppendLine(sc); 48 | 49 | #elif(VKFFT_BACKEND==1) 50 | sc->tempLen = sprintf(sc->tempStr, " typedef struct {\n"); 51 | PfAppendLine(sc); 52 | 53 | #elif(VKFFT_BACKEND==2) 54 | sc->tempLen = sprintf(sc->tempStr, " typedef struct {\n"); 55 | PfAppendLine(sc); 56 | 57 | #elif(VKFFT_BACKEND==3) 58 | sc->tempLen = sprintf(sc->tempStr, " typedef struct {\n"); 59 | PfAppendLine(sc); 60 | 61 | #endif 62 | char tempCopyStr[60]; 63 | if (sc->performWorkGroupShift[0]) { 64 | appendPushConstant(sc, &sc->workGroupShiftX); 65 | sprintf(tempCopyStr, "consts.%s", sc->workGroupShiftX.name); 66 | sprintf(sc->workGroupShiftX.name, "%s", tempCopyStr); 67 | } 68 | if (sc->performWorkGroupShift[1]) { 69 | appendPushConstant(sc, &sc->workGroupShiftY); 70 | sprintf(tempCopyStr, "consts.%s", sc->workGroupShiftY.name); 71 | sprintf(sc->workGroupShiftY.name, "%s", tempCopyStr); 72 | } 73 | if (sc->performWorkGroupShift[2]) { 74 | appendPushConstant(sc, &sc->workGroupShiftZ); 75 | sprintf(tempCopyStr, "consts.%s", sc->workGroupShiftZ.name); 76 | sprintf(sc->workGroupShiftZ.name, "%s", tempCopyStr); 77 | } 78 | if (sc->performPostCompilationInputOffset) { 79 | appendPushConstant(sc, &sc->inputOffset); 80 | sprintf(tempCopyStr, "consts.%s", sc->inputOffset.name); 81 | sprintf(sc->inputOffset.name, "%s", tempCopyStr); 82 | } 83 | if (sc->performPostCompilationOutputOffset) { 84 | appendPushConstant(sc, &sc->outputOffset); 85 | sprintf(tempCopyStr, "consts.%s", sc->outputOffset.name); 86 | sprintf(sc->outputOffset.name, "%s", tempCopyStr); 87 | } 88 | if (sc->performPostCompilationKernelOffset) { 89 | appendPushConstant(sc, &sc->kernelOffset); 90 | sprintf(tempCopyStr, "consts.%s", sc->kernelOffset.name); 91 | sprintf(sc->kernelOffset.name, "%s", tempCopyStr); 92 | } 93 | #if(VKFFT_BACKEND==0) 94 | sc->tempLen = sprintf(sc->tempStr, "} consts;\n\n"); 95 | PfAppendLine(sc); 96 | 97 | #elif(VKFFT_BACKEND==1) 98 | sc->tempLen = sprintf(sc->tempStr, " }PushConsts;\n"); 99 | PfAppendLine(sc); 100 | //sc->tempLen = sprintf(sc->tempStr, " __constant__ PushConsts consts;\n"); 101 | //PfAppendLine(sc); 102 | #elif(VKFFT_BACKEND==2) 103 | sc->tempLen = sprintf(sc->tempStr, " }PushConsts;\n"); 104 | PfAppendLine(sc); 105 | 106 | //sc->tempLen = sprintf(sc->tempStr, " __constant__ PushConsts consts;\n"); 107 | //PfAppendLine(sc); 108 | 109 | #elif(VKFFT_BACKEND==3) 110 | sc->tempLen = sprintf(sc->tempStr, " }PushConsts;\n"); 111 | PfAppendLine(sc); 112 | 113 | #endif 114 | return; 115 | } 116 | 117 | #endif 118 | -------------------------------------------------------------------------------- /vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_StringManagement/vkFFT_StringManager.h: -------------------------------------------------------------------------------- 1 | // This file is part of VkFFT 2 | // 3 | // Copyright (C) 2021 - present Dmitrii Tolmachev 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | // THE SOFTWARE. 22 | #ifndef VKFFT_STRINGMANAGER_H 23 | #define VKFFT_STRINGMANAGER_H 24 | #include "vkFFT/vkFFT_Structs/vkFFT_Structs.h" 25 | static inline void PfAppendLine(VkFFTSpecializationConstantsLayout* sc) { 26 | if (sc->res != VKFFT_SUCCESS) return; 27 | //appends code line stored in tempStr to generated code 28 | if (sc->tempLen < 0) sc->res = VKFFT_ERROR_INSUFFICIENT_TEMP_BUFFER; 29 | if (sc->currentLen + sc->tempLen > sc->maxCodeLength) sc->res = VKFFT_ERROR_INSUFFICIENT_CODE_BUFFER; 30 | sc->currentLen += sprintf(sc->code0 + sc->currentLen, "%s", sc->tempStr); 31 | return; 32 | }; 33 | #endif -------------------------------------------------------------------------------- /vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DeletePlan.h: -------------------------------------------------------------------------------- 1 | // This file is part of VkFFT 2 | // 3 | // Copyright (C) 2021 - present Dmitrii Tolmachev 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | // THE SOFTWARE. 22 | #ifndef VKFFT_DELETEPLAN_H 23 | #define VKFFT_DELETEPLAN_H 24 | #include "vkFFT/vkFFT_Structs/vkFFT_Structs.h" 25 | 26 | static inline void deleteAxis(VkFFTApplication* app, VkFFTAxis* axis, int isInverseBluesteinAxes) { 27 | if (axis->specializationConstants.numRaderPrimes && (!isInverseBluesteinAxes)) { 28 | free(axis->specializationConstants.raderContainer); 29 | axis->specializationConstants.raderContainer = 0; 30 | axis->specializationConstants.numRaderPrimes = 0; 31 | } 32 | #if(VKFFT_BACKEND==0) 33 | if ((app->configuration.useLUT == 1) && (!axis->referenceLUT)) { 34 | if (axis->bufferLUT != 0) { 35 | vkDestroyBuffer(app->configuration.device[0], axis->bufferLUT, 0); 36 | axis->bufferLUT = 0; 37 | } 38 | if (axis->bufferLUTDeviceMemory != 0) { 39 | vkFreeMemory(app->configuration.device[0], axis->bufferLUTDeviceMemory, 0); 40 | axis->bufferLUTDeviceMemory = 0; 41 | } 42 | } 43 | if (axis->descriptorPool != 0) { 44 | vkDestroyDescriptorPool(app->configuration.device[0], axis->descriptorPool, 0); 45 | axis->descriptorPool = 0; 46 | } 47 | if (axis->descriptorSetLayout != 0) { 48 | vkDestroyDescriptorSetLayout(app->configuration.device[0], axis->descriptorSetLayout, 0); 49 | axis->descriptorSetLayout = 0; 50 | } 51 | if (axis->pipelineLayout != 0) { 52 | vkDestroyPipelineLayout(app->configuration.device[0], axis->pipelineLayout, 0); 53 | axis->pipelineLayout = 0; 54 | } 55 | if (axis->pipeline != 0) { 56 | vkDestroyPipeline(app->configuration.device[0], axis->pipeline, 0); 57 | axis->pipeline = 0; 58 | } 59 | #elif(VKFFT_BACKEND==1) 60 | CUresult res = CUDA_SUCCESS; 61 | cudaError_t res_t = cudaSuccess; 62 | if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) { 63 | res_t = cudaFree(axis->bufferLUT); 64 | if (res_t == cudaSuccess) axis->bufferLUT = 0; 65 | } 66 | if (axis->VkFFTModule != 0) { 67 | res = cuModuleUnload(axis->VkFFTModule); 68 | if (res == CUDA_SUCCESS) axis->VkFFTModule = 0; 69 | } 70 | #elif(VKFFT_BACKEND==2) 71 | hipError_t res = hipSuccess; 72 | if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) { 73 | res = hipFree(axis->bufferLUT); 74 | if (res == hipSuccess) axis->bufferLUT = 0; 75 | } 76 | if (axis->VkFFTModule != 0) { 77 | res = hipModuleUnload(axis->VkFFTModule); 78 | if (res == hipSuccess) axis->VkFFTModule = 0; 79 | } 80 | #elif(VKFFT_BACKEND==3) 81 | cl_int res = 0; 82 | if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) { 83 | res = clReleaseMemObject(axis->bufferLUT); 84 | if (res == 0) axis->bufferLUT = 0; 85 | } 86 | if (axis->program != 0) { 87 | res = clReleaseProgram(axis->program); 88 | if (res == 0) axis->program = 0; 89 | } 90 | if (axis->kernel != 0) { 91 | res = clReleaseKernel(axis->kernel); 92 | if (res == 0) axis->kernel = 0; 93 | } 94 | #elif(VKFFT_BACKEND==4) 95 | ze_result_t res = ZE_RESULT_SUCCESS; 96 | if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) { 97 | res = zeMemFree(app->configuration.context[0], axis->bufferLUT); 98 | if (res == ZE_RESULT_SUCCESS) axis->bufferLUT = 0; 99 | } 100 | if (axis->VkFFTKernel != 0) { 101 | res = zeKernelDestroy(axis->VkFFTKernel); 102 | if (res == ZE_RESULT_SUCCESS)axis->VkFFTKernel = 0; 103 | } 104 | if (axis->VkFFTModule != 0) { 105 | res = zeModuleDestroy(axis->VkFFTModule); 106 | if (res == ZE_RESULT_SUCCESS)axis->VkFFTModule = 0; 107 | } 108 | #elif(VKFFT_BACKEND==5) 109 | if (axis->pushConstants.dataUintBuffer) { 110 | axis->pushConstants.dataUintBuffer->release(); 111 | axis->pushConstants.dataUintBuffer = 0; 112 | } 113 | if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) { 114 | ((MTL::Buffer*)axis->bufferLUT)->release(); 115 | //free(axis->bufferLUT); 116 | axis->bufferLUT = 0; 117 | } 118 | if (axis->pipeline != 0) { 119 | axis->pipeline->release(); 120 | //free(axis->pipeline); 121 | axis->pipeline = 0; 122 | } 123 | if (axis->library != 0) { 124 | axis->library->release(); 125 | //free(axis->library); 126 | axis->library = 0; 127 | } 128 | #endif 129 | if (app->configuration.saveApplicationToString) { 130 | if (axis->binary != 0) { 131 | free(axis->binary); 132 | axis->binary = 0; 133 | } 134 | } 135 | } 136 | 137 | #endif 138 | --------------------------------------------------------------------------------