├── CHANGELOG ├── src ├── library │ ├── plan.cpp │ ├── clFFT.pc.in │ ├── action.transpose.h │ ├── generator.transpose.gcn.h │ ├── stdafx.cpp │ ├── dllmain.cpp │ ├── generator.h │ ├── md5sum.h │ ├── ReadMe.txt │ ├── generator.transpose.h │ ├── lifetime.cpp │ ├── CMakeLists.txt │ └── lock.h ├── scripts │ └── perf │ │ ├── manual.pdf │ │ ├── CMakeLists.txt │ │ ├── errorHandler.py │ │ └── performanceUtility.py ├── clFFTConfig.cmake.in ├── tests │ ├── c-compliance.c │ ├── typedefs.h │ ├── buffer_memory.cpp │ ├── accuracy_test_common.cpp │ ├── test_constants.cpp │ ├── copyTestDependencies.cmake.in │ ├── CMakeLists.txt │ └── buffer_memory.h ├── include │ ├── clFFT.version.h.in │ ├── convenienceFunctions.h │ ├── targetver.h │ ├── clAmdFft.version.h │ ├── stdafx.h │ ├── unicode.compatibility.h │ └── sharedLibrary.h ├── statTimer │ ├── stdafx.cpp │ ├── targetver.h │ ├── dllmain.cpp │ ├── statisticalTimer.extern.cpp │ ├── stdafx.h │ ├── ReadMe.txt │ ├── statisticalTimer.extern.h │ ├── CMakeLists.txt │ ├── statisticalTimer.h │ └── statisticalTimer.CPU.h ├── client │ ├── stdafx.cpp │ ├── CMakeLists.txt │ ├── client.h │ └── openCL.misc.h ├── callback-client │ ├── stdafx.cpp │ ├── CMakeLists.txt │ ├── openCL.misc.h │ └── client.h ├── clFFTConfigVersion.cmake.in ├── cuFFT-client │ └── CMakeLists.txt ├── examples │ ├── CMakeLists.txt │ ├── fft1d.c │ ├── fft2d.c │ └── fft3d.c ├── FindclFFT.cmake ├── gtest.cmake ├── FindFFTW.cmake └── FindOpenCL.cmake ├── docs ├── realfft_1dlen.jpg ├── realfft_ex_n7.jpg ├── realfft_ex_n8.jpg ├── realfft_fwdinv.jpg ├── realfft_expl_01.jpg ├── realfft_expl_02.jpg ├── realfft_expl_03.jpg ├── realfft_expl_04.jpg ├── realfft_expl_05.jpg ├── realfft_expl_06.jpg ├── realfft_expl_07.jpg ├── realfft_expl_08.jpg └── performance │ ├── cuFFT_7.0 │ └── Tesla_K40 │ │ ├── R2C_3D_double.csv │ │ ├── R2C_3D_single.csv │ │ ├── C2C_3D_double.csv │ │ ├── C2C_3D_single.csv │ │ ├── R2C_2D_double.csv │ │ ├── R2C_2D_single.csv │ │ ├── C2C_2D_double.csv │ │ ├── C2C_2D_single.csv │ │ ├── R2C_1D_double.csv │ │ ├── R2C_1D_single.csv │ │ ├── C2C_1D_single.csv │ │ └── C2C_1D_double.csv │ └── clFFT_2.6.0 │ └── FirePro_W9100 │ ├── C2C_3D_double.csv │ ├── C2C_3D_single.csv │ ├── R2C_3D_single.csv │ ├── R2C_3D_double.csv │ ├── C2C_2D_double.csv │ ├── C2C_2D_single.csv │ ├── R2C_2D_double.csv │ ├── R2C_2D_single.csv │ ├── R2C_1D_double.csv │ ├── C2C_1D_single.csv │ ├── C2C_1D_double.csv │ └── R2C_1D_single.csv ├── NOTICE ├── .gitignore ├── .gitattributes ├── appveyor.yml ├── ReleaseNotes.txt ├── CONTRIBUTING.md ├── .travis.yml └── README.md /CHANGELOG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/CHANGELOG -------------------------------------------------------------------------------- /src/library/plan.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/src/library/plan.cpp -------------------------------------------------------------------------------- /docs/realfft_1dlen.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_1dlen.jpg -------------------------------------------------------------------------------- /docs/realfft_ex_n7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_ex_n7.jpg -------------------------------------------------------------------------------- /docs/realfft_ex_n8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_ex_n8.jpg -------------------------------------------------------------------------------- /docs/realfft_fwdinv.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_fwdinv.jpg -------------------------------------------------------------------------------- /docs/realfft_expl_01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_01.jpg -------------------------------------------------------------------------------- /docs/realfft_expl_02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_02.jpg -------------------------------------------------------------------------------- /docs/realfft_expl_03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_03.jpg -------------------------------------------------------------------------------- /docs/realfft_expl_04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_04.jpg -------------------------------------------------------------------------------- /docs/realfft_expl_05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_05.jpg -------------------------------------------------------------------------------- /docs/realfft_expl_06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_06.jpg -------------------------------------------------------------------------------- /docs/realfft_expl_07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_07.jpg -------------------------------------------------------------------------------- /docs/realfft_expl_08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_08.jpg -------------------------------------------------------------------------------- /src/scripts/perf/manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/src/scripts/perf/manual.pdf -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | AMD clFFT 2 | Copyright 2013 Advanced Micro Devices, Inc. 3 | 4 | This product includes software developed at 5 | Advanced Micro Devices, Inc. (http://www.amd.com). 6 | -------------------------------------------------------------------------------- /src/clFFTConfig.cmake.in: -------------------------------------------------------------------------------- 1 | include(${CMAKE_CURRENT_LIST_DIR}/clFFTTargets.cmake) 2 | get_filename_component(CLFFT_INCLUDE_DIRS ${CMAKE_CURRENT_LIST_DIR}/@reldir@/include ABSOLUTE) 3 | set(CLFFT_LIBRARIES clFFT) 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | 6 | # Compiled Dynamic libraries 7 | *.so 8 | *.dylib 9 | 10 | # Compiled Static libraries 11 | *.lai 12 | *.la 13 | *.a 14 | 15 | # ignore build directory if name is 'build' 16 | build/ 17 | 18 | # ignore tilde files 19 | *~ 20 | -------------------------------------------------------------------------------- /src/library/clFFT.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix}/bin@CLFFT_SUFFIX_BIN@ 3 | includedir=${prefix}/include 4 | libdir=${prefix}/lib@CLFFT_SUFFIX_LIB@ 5 | 6 | Name: clFFT 7 | Description: Open source OpenCL FFT library 8 | Version: @clFFT_VERSION@ 9 | URL: https://github.com/clMathLibraries/clFFT 10 | 11 | Cflags: -I${includedir} 12 | Libs: -L${libdir} -lclFFT 13 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/R2C_3D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,2,2097152,g,5,3,in,double,cuFFT3D,1.25686 3 | 4,4,4,262144,g,5,3,in,double,cuFFT3D,6.68634 4 | 8,8,8,32768,g,5,3,in,double,cuFFT3D,44.328471 5 | 16,16,16,4096,g,5,3,in,double,cuFFT3D,64.499246 6 | 32,32,32,512,g,5,3,in,double,cuFFT3D,83.623983 7 | 64,64,64,64,g,5,3,in,double,cuFFT3D,98.194147 8 | 128,128,128,8,g,5,3,in,double,cuFFT3D,114.979824 9 | 256,256,256,1,g,5,3,in,double,cuFFT3D,128.095675 10 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/R2C_3D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,2,2097152,g,5,3,in,single,cuFFT3D,1.948795 3 | 4,4,4,262144,g,5,3,in,single,cuFFT3D,10.53232 4 | 8,8,8,32768,g,5,3,in,single,cuFFT3D,88.707531 5 | 16,16,16,4096,g,5,3,in,single,cuFFT3D,125.457423 6 | 32,32,32,512,g,5,3,in,single,cuFFT3D,162.2312 7 | 64,64,64,64,g,5,3,in,single,cuFFT3D,186.305497 8 | 128,128,128,8,g,5,3,in,single,cuFFT3D,218.05084 9 | 256,256,256,1,g,5,3,in,single,cuFFT3D,255.37672 10 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/C2C_3D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,2,4194304,g,1,1,in,double,cuFFT3D,28.162064 3 | 4,4,4,524288,g,1,1,in,double,cuFFT3D,57.518869 4 | 8,8,8,65536,g,1,1,in,double,cuFFT3D,87.648677 5 | 16,16,16,8192,g,1,1,in,double,cuFFT3D,118.044529 6 | 32,32,32,1024,g,1,1,in,double,cuFFT3D,146.907326 7 | 64,64,64,128,g,1,1,in,double,cuFFT3D,172.036879 8 | 128,128,128,16,g,1,1,in,double,cuFFT3D,199.233594 9 | 256,256,256,2,g,1,1,in,double,cuFFT3D,220.430123 10 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/C2C_3D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,2,4194304,g,1,1,in,single,cuFFT3D,58.42527 3 | 4,4,4,524288,g,1,1,in,single,cuFFT3D,112.28049 4 | 8,8,8,65536,g,1,1,in,single,cuFFT3D,169.252406 5 | 16,16,16,8192,g,1,1,in,single,cuFFT3D,228.567003 6 | 32,32,32,1024,g,1,1,in,single,cuFFT3D,289.213888 7 | 64,64,64,128,g,1,1,in,single,cuFFT3D,343.15019 8 | 128,128,128,16,g,1,1,in,single,cuFFT3D,392.626589 9 | 256,256,256,2,g,1,1,in,single,cuFFT3D,438.342723 10 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_3D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,2,4194304,g,1,1,in,double,clFFT3D_2.6,40.0694 3 | 4,4,4,524288,g,1,1,in,double,clFFT3D_2.6,81.8712 4 | 8,8,8,65536,g,1,1,in,double,clFFT3D_2.6,123.015 5 | 16,16,16,8192,g,1,1,in,double,clFFT3D_2.6,146.233 6 | 32,32,32,1024,g,1,1,in,double,clFFT3D_2.6,174.584 7 | 64,64,64,128,g,1,1,in,double,clFFT3D_2.6,158.831 8 | 128,128,128,16,g,1,1,in,double,clFFT3D_2.6,120.786 9 | 256,256,256,2,g,1,1,in,double,clFFT3D_2.6,17.0093 10 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_3D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,2,4194304,g,1,1,in,single,clFFT3D_2.6,75.6796 3 | 4,4,4,524288,g,1,1,in,single,clFFT3D_2.6,154.346 4 | 8,8,8,65536,g,1,1,in,single,clFFT3D_2.6,227.783 5 | 16,16,16,8192,g,1,1,in,single,clFFT3D_2.6,280.944 6 | 32,32,32,1024,g,1,1,in,single,clFFT3D_2.6,356.416 7 | 64,64,64,128,g,1,1,in,single,clFFT3D_2.6,232.128 8 | 128,128,128,16,g,1,1,in,single,clFFT3D_2.6,164.313 9 | 256,256,256,2,g,1,1,in,single,clFFT3D_2.6,21.5838 10 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_3D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,2,2097152,g,5,3,in,single,clFFT3D_2.6,0.04324405 3 | 4,4,4,262144,g,5,3,in,single,clFFT3D_2.6,0.40443 4 | 8,8,8,32768,g,5,3,in,single,clFFT3D_2.6,2.791705 5 | 16,16,16,4096,g,5,3,in,single,clFFT3D_2.6,12.79715 6 | 32,32,32,512,g,5,3,in,single,clFFT3D_2.6,49.1747 7 | 64,64,64,64,g,5,3,in,single,clFFT3D_2.6,160.058 8 | 128,128,128,8,g,5,3,in,single,clFFT3D_2.6,180.044 9 | 256,256,256,1,g,5,3,in,single,clFFT3D_2.6,245.51 10 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_3D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,2,2097152,g,5,3,in,double,clFFT3D_2.6,0.04589015 3 | 4,4,4,262144,g,5,3,in,double,clFFT3D_2.6,0.3984265 4 | 8,8,8,32768,g,5,3,in,double,clFFT3D_2.6,2.61428 5 | 16,16,16,4096,g,5,3,in,double,clFFT3D_2.6,12.5894 6 | 32,32,32,512,g,5,3,in,double,clFFT3D_2.6,41.4404 7 | 64,64,64,64,g,5,3,in,double,clFFT3D_2.6,82.7885 8 | 128,128,128,8,g,5,3,in,double,clFFT3D_2.6,92.5995 9 | 256,256,256,1,g,5,3,in,double,clFFT3D_2.6,125.071 10 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/R2C_2D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,1,4194304,g,5,3,in,double,cuFFT2D,1.112218 3 | 4,4,1,1048576,g,5,3,in,double,cuFFT2D,5.131769 4 | 8,8,1,262144,g,5,3,in,double,cuFFT2D,31.630199 5 | 16,16,1,65536,g,5,3,in,double,cuFFT2D,47.836545 6 | 32,32,1,16384,g,5,3,in,double,cuFFT2D,68.144852 7 | 64,64,1,4096,g,5,3,in,double,cuFFT2D,84.450258 8 | 128,128,1,1024,g,5,3,in,double,cuFFT2D,99.624748 9 | 256,256,1,256,g,5,3,in,double,cuFFT2D,112.566884 10 | 512,512,1,64,g,5,3,in,double,cuFFT2D,100.980881 11 | 1024,1024,1,16,g,5,3,in,double,cuFFT2D,110.616129 12 | 2048,2048,1,4,g,5,3,in,double,cuFFT2D,120.502623 13 | 4096,4096,1,1,g,5,3,in,double,cuFFT2D,125.728096 14 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/R2C_2D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,1,4194304,g,5,3,in,single,cuFFT2D,1.791532 3 | 4,4,1,1048576,g,5,3,in,single,cuFFT2D,8.29206 4 | 8,8,1,262144,g,5,3,in,single,cuFFT2D,71.79269 5 | 16,16,1,65536,g,5,3,in,single,cuFFT2D,102.621816 6 | 32,32,1,16384,g,5,3,in,single,cuFFT2D,133.768603 7 | 64,64,1,4096,g,5,3,in,single,cuFFT2D,164.967666 8 | 128,128,1,1024,g,5,3,in,single,cuFFT2D,196.094691 9 | 256,256,1,256,g,5,3,in,single,cuFFT2D,226.455545 10 | 512,512,1,64,g,5,3,in,single,cuFFT2D,198.34244 11 | 1024,1024,1,16,g,5,3,in,single,cuFFT2D,217.726856 12 | 2048,2048,1,4,g,5,3,in,single,cuFFT2D,237.499682 13 | 4096,4096,1,1,g,5,3,in,single,cuFFT2D,254.357062 14 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/C2C_2D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,1,8388608,g,1,1,in,double,cuFFT2D,27.974814 3 | 4,4,1,2097152,g,1,1,in,double,cuFFT2D,57.028381 4 | 8,8,1,524288,g,1,1,in,double,cuFFT2D,87.651529 5 | 16,16,1,131072,g,1,1,in,double,cuFFT2D,117.355353 6 | 32,32,1,32768,g,1,1,in,double,cuFFT2D,145.958473 7 | 64,64,1,8192,g,1,1,in,double,cuFFT2D,176.854846 8 | 128,128,1,2048,g,1,1,in,double,cuFFT2D,204.919105 9 | 256,256,1,512,g,1,1,in,double,cuFFT2D,232.14229 10 | 512,512,1,128,g,1,1,in,double,cuFFT2D,170.83545 11 | 1024,1024,1,32,g,1,1,in,double,cuFFT2D,187.166338 12 | 2048,2048,1,8,g,1,1,in,double,cuFFT2D,192.282189 13 | 4096,4096,1,2,g,1,1,in,double,cuFFT2D,164.26726 14 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/C2C_2D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,1,8388608,g,1,1,in,single,cuFFT2D,58.660739 3 | 4,4,1,2097152,g,1,1,in,single,cuFFT2D,110.939092 4 | 8,8,1,524288,g,1,1,in,single,cuFFT2D,167.463965 5 | 16,16,1,131072,g,1,1,in,single,cuFFT2D,226.538742 6 | 32,32,1,32768,g,1,1,in,single,cuFFT2D,285.72516 7 | 64,64,1,8192,g,1,1,in,single,cuFFT2D,346.266161 8 | 128,128,1,2048,g,1,1,in,single,cuFFT2D,400.480557 9 | 256,256,1,512,g,1,1,in,single,cuFFT2D,455.021399 10 | 512,512,1,128,g,1,1,in,single,cuFFT2D,336.292353 11 | 1024,1024,1,32,g,1,1,in,single,cuFFT2D,368.983107 12 | 2048,2048,1,8,g,1,1,in,single,cuFFT2D,402.543841 13 | 4096,4096,1,2,g,1,1,in,single,cuFFT2D,436.652575 14 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_2D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,1,8388608,g,1,1,in,double,clFFT2D_2.6,40.4909 3 | 4,4,1,2097152,g,1,1,in,double,clFFT2D_2.6,81.961 4 | 8,8,1,524288,g,1,1,in,double,clFFT2D_2.6,123.012 5 | 16,16,1,131072,g,1,1,in,double,clFFT2D_2.6,162.167 6 | 32,32,1,32768,g,1,1,in,double,clFFT2D_2.6,155.011 7 | 64,64,1,8192,g,1,1,in,double,clFFT2D_2.6,211.648 8 | 128,128,1,2048,g,1,1,in,double,clFFT2D_2.6,210.676 9 | 256,256,1,512,g,1,1,in,double,clFFT2D_2.6,112.635 10 | 512,512,1,128,g,1,1,in,double,clFFT2D_2.6,123.928 11 | 1024,1024,1,32,g,1,1,in,double,clFFT2D_2.6,144.293 12 | 2048,2048,1,8,g,1,1,in,double,clFFT2D_2.6,152.674 13 | 4096,4096,1,2,g,1,1,in,double,clFFT2D_2.6,119.07 14 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_2D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,1,8388608,g,1,1,in,single,clFFT2D_2.6,73.8863 3 | 4,4,1,2097152,g,1,1,in,single,clFFT2D_2.6,154.754 4 | 8,8,1,524288,g,1,1,in,single,clFFT2D_2.6,243.805 5 | 16,16,1,131072,g,1,1,in,single,clFFT2D_2.6,328.784 6 | 32,32,1,32768,g,1,1,in,single,clFFT2D_2.6,379.601 7 | 64,64,1,8192,g,1,1,in,single,clFFT2D_2.6,314.629 8 | 128,128,1,2048,g,1,1,in,single,clFFT2D_2.6,360.231 9 | 256,256,1,512,g,1,1,in,single,clFFT2D_2.6,166.78 10 | 512,512,1,128,g,1,1,in,single,clFFT2D_2.6,297.307 11 | 1024,1024,1,32,g,1,1,in,single,clFFT2D_2.6,304.312 12 | 2048,2048,1,8,g,1,1,in,single,clFFT2D_2.6,318.457 13 | 4096,4096,1,2,g,1,1,in,single,clFFT2D_2.6,308.812 14 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_2D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,1,4194304,g,5,3,in,double,clFFT2D_2.6,0.0441201 3 | 4,4,1,1048576,g,5,3,in,double,clFFT2D_2.6,0.33538 4 | 8,8,1,262144,g,5,3,in,double,clFFT2D_2.6,2.028745 5 | 16,16,1,65536,g,5,3,in,double,clFFT2D_2.6,10.6405 6 | 32,32,1,16384,g,5,3,in,double,clFFT2D_2.6,40.02365 7 | 64,64,1,4096,g,5,3,in,double,clFFT2D_2.6,85.764 8 | 128,128,1,1024,g,5,3,in,double,clFFT2D_2.6,99.798 9 | 256,256,1,256,g,5,3,in,double,clFFT2D_2.6,142.3255 10 | 512,512,1,64,g,5,3,in,double,clFFT2D_2.6,162.6435 11 | 1024,1024,1,16,g,5,3,in,double,clFFT2D_2.6,163.1355 12 | 2048,2048,1,4,g,5,3,in,double,clFFT2D_2.6,158.7915 13 | 4096,4096,1,1,g,5,3,in,double,clFFT2D_2.6,91.1925 14 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_2D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,2,1,4194304,g,5,3,in,single,clFFT2D_2.6,0.04122405 3 | 4,4,1,1048576,g,5,3,in,single,clFFT2D_2.6,0.314967 4 | 8,8,1,262144,g,5,3,in,single,clFFT2D_2.6,1.886985 5 | 16,16,1,65536,g,5,3,in,single,clFFT2D_2.6,9.93125 6 | 32,32,1,16384,g,5,3,in,single,clFFT2D_2.6,43.75745 7 | 64,64,1,4096,g,5,3,in,single,clFFT2D_2.6,158.5295 8 | 128,128,1,1024,g,5,3,in,single,clFFT2D_2.6,179.4235 9 | 256,256,1,256,g,5,3,in,single,clFFT2D_2.6,266.48 10 | 512,512,1,64,g,5,3,in,single,clFFT2D_2.6,320.154 11 | 1024,1024,1,16,g,5,3,in,single,clFFT2D_2.6,360.564 12 | 2048,2048,1,4,g,5,3,in,single,clFFT2D_2.6,388.305 13 | 4096,4096,1,1,g,5,3,in,single,clFFT2D_2.6,318.994 14 | -------------------------------------------------------------------------------- /src/tests/c-compliance.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include "clFFT.h" 19 | -------------------------------------------------------------------------------- /src/library/action.transpose.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #pragma once 18 | #if !defined( AMD_CLFFT_action_transpose_H ) 19 | #define AMD_CLFFT_action_transpose_H 20 | #include "private.h" 21 | #include "repo.h" 22 | #include "plan.h" 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/library/generator.transpose.gcn.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #pragma once 18 | #if !defined( AMD_CLFFT_generator_transpose_H ) 19 | #define AMD_CLFFT_generator_transpose_H 20 | #include "private.h" 21 | #include "repo.h" 22 | #include "plan.h" 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/include/clFFT.version.h.in: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* the configured version and settings for clFFT 19 | */ 20 | #define clfftVersionMajor @clFFT_VERSION_MAJOR@ 21 | #define clfftVersionMinor @clFFT_VERSION_MINOR@ 22 | #define clfftVersionPatch @clFFT_VERSION_PATCH@ 23 | 24 | #cmakedefine CLFFT_STATIC 25 | -------------------------------------------------------------------------------- /src/library/stdafx.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // stdafx.cpp : source file that includes just the standard includes 19 | // clfft.pch will be the pre-compiled header 20 | // stdafx.obj will contain the pre-compiled type information 21 | 22 | #include "stdafx.h" 23 | 24 | // Reference any additional headers you need in STDAFX.H and not in this file 25 | -------------------------------------------------------------------------------- /src/statTimer/stdafx.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // stdafx.cpp : source file that includes just the standard includes 19 | // clfft.pch will be the pre-compiled header 20 | // stdafx.obj will contain the pre-compiled type information 21 | 22 | #include "stdafx.h" 23 | 24 | // Reference any additional headers you need in STDAFX.H and not in this file 25 | -------------------------------------------------------------------------------- /src/client/stdafx.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // stdafx.cpp : source file that includes just the standard includes 19 | // clFFT.pch will be the pre-compiled header 20 | // stdafx.obj will contain the pre-compiled type information 21 | 22 | #include "stdafx.h" 23 | 24 | // TODO: reference any additional headers you need in STDAFX.H 25 | // and not in this file 26 | -------------------------------------------------------------------------------- /src/callback-client/stdafx.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // stdafx.cpp : source file that includes just the standard includes 19 | // clFFT.pch will be the pre-compiled header 20 | // stdafx.obj will contain the pre-compiled type information 21 | 22 | #include "stdafx.h" 23 | 24 | // TODO: reference any additional headers you need in STDAFX.H 25 | // and not in this file 26 | -------------------------------------------------------------------------------- /src/include/convenienceFunctions.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /*****************************************************/ 19 | template< typename T > 20 | unsigned int float_as_hex( T a ) { 21 | return *(unsigned int*)&a; 22 | } 23 | 24 | /*****************************************************/ 25 | template< typename T > 26 | T hex_as_float( unsigned int a ) { 27 | return *(T*)&a; 28 | } -------------------------------------------------------------------------------- /src/statTimer/targetver.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #pragma once 18 | 19 | // Including SDKDDKVer.h defines the highest available Windows platform. 20 | 21 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and 22 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. 23 | 24 | #include 25 | -------------------------------------------------------------------------------- /src/include/targetver.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | 20 | // Including SDKDDKVer.h defines the highest available Windows platform. 21 | 22 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and 23 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. 24 | 25 | #include 26 | -------------------------------------------------------------------------------- /src/scripts/perf/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | set(GRAPHING_SCRIPTS measurePerformance.py 18 | plotPerformance.py 19 | fftPerformanceTesting.py 20 | errorHandler.py 21 | performanceUtility.py 22 | ) 23 | # if( WIN32 ) 24 | # install( FILES ${GRAPHING_SCRIPTS} DESTINATION bin${CLFFT_SUFFIX_BIN} ) 25 | # else ( ) 26 | # install( FILES ${GRAPHING_SCRIPTS} DESTINATION share/clFFT ) 27 | # endif( ) 28 | 29 | -------------------------------------------------------------------------------- /src/library/dllmain.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // dllmain.cpp : Defines the entry point for the DLL application. 19 | #include "stdafx.h" 20 | 21 | BOOL APIENTRY DllMain( HMODULE hModule, 22 | DWORD ul_reason_for_call, 23 | LPVOID lpReserved 24 | ) 25 | { 26 | switch (ul_reason_for_call) 27 | { 28 | case DLL_PROCESS_ATTACH: 29 | case DLL_THREAD_ATTACH: 30 | case DLL_THREAD_DETACH: 31 | case DLL_PROCESS_DETACH: 32 | break; 33 | } 34 | return TRUE; 35 | } 36 | 37 | -------------------------------------------------------------------------------- /src/statTimer/dllmain.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | // dllmain.cpp : Defines the entry point for the DLL application. 18 | #include "stdafx.h" 19 | 20 | BOOL APIENTRY DllMain( HMODULE hModule, 21 | DWORD ul_reason_for_call, 22 | LPVOID lpReserved 23 | ) 24 | { 25 | switch (ul_reason_for_call) 26 | { 27 | case DLL_PROCESS_ATTACH: 28 | case DLL_THREAD_ATTACH: 29 | case DLL_THREAD_DETACH: 30 | case DLL_PROCESS_DETACH: 31 | break; 32 | } 33 | return TRUE; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /src/tests/typedefs.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #if !defined( CLFFT_TYPEDEFS_H ) 20 | #define CLFFT_TYPEDEFS_H 21 | 22 | #include "test_constants.h" 23 | #include "fftw_transform.h" 24 | #include "cl_transform.h" 25 | 26 | typedef clfft clfft_single; 27 | typedef clfft clfft_double; 28 | typedef buffer buffer_single; 29 | typedef buffer buffer_double; 30 | typedef fftw fftw_single; 31 | typedef fftw fftw_double; 32 | 33 | #endif -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_1D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,1,1,8388608,g,5,3,in,double,clFFT1D_2.6,14.0024 3 | 4,1,1,4194304,g,5,3,in,double,clFFT1D_2.6,35.5083 4 | 8,1,1,2097152,g,5,3,in,double,clFFT1D_2.6,82.012 5 | 16,1,1,1048576,g,5,3,in,double,clFFT1D_2.6,107.0085 6 | 32,1,1,524288,g,5,3,in,double,clFFT1D_2.6,128.5305 7 | 64,1,1,262144,g,5,3,in,double,clFFT1D_2.6,224.9175 8 | 128,1,1,131072,g,5,3,in,double,clFFT1D_2.6,217.2845 9 | 256,1,1,65536,g,5,3,in,double,clFFT1D_2.6,318.731 10 | 512,1,1,32768,g,5,3,in,double,clFFT1D_2.6,302.7885 11 | 1024,1,1,16384,g,5,3,in,double,clFFT1D_2.6,270.694 12 | 2048,1,1,8192,g,5,3,in,double,clFFT1D_2.6,244.8945 13 | 4096,1,1,4096,g,5,3,in,double,clFFT1D_2.6,96.934 14 | 8192,1,1,2048,g,5,3,in,double,clFFT1D_2.6,51.7095 15 | 16384,1,1,1024,g,5,3,in,double,clFFT1D_2.6,65.2255 16 | 32768,1,1,512,g,5,3,in,double,clFFT1D_2.6,76.0235 17 | 65536,1,1,256,g,5,3,in,double,clFFT1D_2.6,93.041 18 | 131072,1,1,128,g,5,3,in,double,clFFT1D_2.6,97.4785 19 | 262144,1,1,64,g,5,3,in,double,clFFT1D_2.6,98.3995 20 | 524288,1,1,32,g,5,3,in,double,clFFT1D_2.6,100.0525 21 | 1048576,1,1,16,g,5,3,in,double,clFFT1D_2.6,94.2235 22 | 2097152,1,1,8,g,5,3,in,double,clFFT1D_2.6,92.7895 23 | 4194304,1,1,4,g,5,3,in,double,clFFT1D_2.6,89.985 24 | -------------------------------------------------------------------------------- /src/library/generator.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #if !defined( AMD_CLFFT_generator_H ) 20 | #define AMD_CLFFT_generator_H 21 | 22 | // Enum to help provide descriptive names to array indices, when indexing into our various vectors 23 | enum clfftGenerators 24 | { 25 | Stockham, // Using the Stockham autosort frameworks 26 | Transpose_GCN, 27 | Transpose_SQUARE, 28 | Transpose_NONSQUARE, 29 | Copy, 30 | ENDGENERATORS ///< This value will always be last, and marks the length of clfftGenerators 31 | }; 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/R2C_1D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,1,1,8388608,g,5,3,in,double,cuFFT1D,0.906726 3 | 4,1,1,4194304,g,5,3,in,double,cuFFT1D,3.146765 4 | 8,1,1,2097152,g,5,3,in,double,cuFFT1D,19.294895 5 | 16,1,1,1048576,g,5,3,in,double,cuFFT1D,32.758462 6 | 32,1,1,524288,g,5,3,in,double,cuFFT1D,45.237741 7 | 64,1,1,262144,g,5,3,in,double,cuFFT1D,55.76315 8 | 128,1,1,131072,g,5,3,in,double,cuFFT1D,66.019701 9 | 256,1,1,65536,g,5,3,in,double,cuFFT1D,75.670117 10 | 512,1,1,32768,g,5,3,in,double,cuFFT1D,86.578216 11 | 1024,1,1,16384,g,5,3,in,double,cuFFT1D,95.679203 12 | 2048,1,1,8192,g,5,3,in,double,cuFFT1D,104.06715 13 | 4096,1,1,4096,g,5,3,in,double,cuFFT1D,107.265346 14 | 8192,1,1,2048,g,5,3,in,double,cuFFT1D,88.545477 15 | 16384,1,1,1024,g,5,3,in,double,cuFFT1D,89.413243 16 | 32768,1,1,512,g,5,3,in,double,cuFFT1D,94.809626 17 | 65536,1,1,256,g,5,3,in,double,cuFFT1D,96.783663 18 | 131072,1,1,128,g,5,3,in,double,cuFFT1D,92.781627 19 | 262144,1,1,64,g,5,3,in,double,cuFFT1D,97.037165 20 | 524288,1,1,32,g,5,3,in,double,cuFFT1D,101.732314 21 | 1048576,1,1,16,g,5,3,in,double,cuFFT1D,102.10937 22 | 2097152,1,1,8,g,5,3,in,double,cuFFT1D,102.590091 23 | 4194304,1,1,4,g,5,3,in,double,cuFFT1D,106.90631 24 | 8388608,1,1,2,g,5,3,in,double,cuFFT1D,112.769815 25 | 16777216,1,1,1,g,5,3,in,double,cuFFT1D,110.213005 26 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/R2C_1D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,1,1,8388608,g,5,3,in,single,cuFFT1D,1.732317 3 | 4,1,1,4194304,g,5,3,in,single,cuFFT1D,6.739837 4 | 8,1,1,2097152,g,5,3,in,single,cuFFT1D,46.082513 5 | 16,1,1,1048576,g,5,3,in,single,cuFFT1D,67.164278 6 | 32,1,1,524288,g,5,3,in,single,cuFFT1D,88.070397 7 | 64,1,1,262144,g,5,3,in,single,cuFFT1D,108.72499 8 | 128,1,1,131072,g,5,3,in,single,cuFFT1D,129.659424 9 | 256,1,1,65536,g,5,3,in,single,cuFFT1D,151.242882 10 | 512,1,1,32768,g,5,3,in,single,cuFFT1D,168.221379 11 | 1024,1,1,16384,g,5,3,in,single,cuFFT1D,187.566483 12 | 2048,1,1,8192,g,5,3,in,single,cuFFT1D,204.435883 13 | 4096,1,1,4096,g,5,3,in,single,cuFFT1D,222.020044 14 | 8192,1,1,2048,g,5,3,in,single,cuFFT1D,241.883832 15 | 16384,1,1,1024,g,5,3,in,single,cuFFT1D,214.359681 16 | 32768,1,1,512,g,5,3,in,single,cuFFT1D,204.094289 17 | 65536,1,1,256,g,5,3,in,single,cuFFT1D,216.740851 18 | 131072,1,1,128,g,5,3,in,single,cuFFT1D,219.115177 19 | 262144,1,1,64,g,5,3,in,single,cuFFT1D,222.507641 20 | 524288,1,1,32,g,5,3,in,single,cuFFT1D,206.73413 21 | 1048576,1,1,16,g,5,3,in,single,cuFFT1D,217.324265 22 | 2097152,1,1,8,g,5,3,in,single,cuFFT1D,215.676848 23 | 4194304,1,1,4,g,5,3,in,single,cuFFT1D,228.664256 24 | 8388608,1,1,2,g,5,3,in,single,cuFFT1D,239.209548 25 | 16777216,1,1,1,g,5,3,in,single,cuFFT1D,257.114237 26 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/C2C_1D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,1,1,16777216,g,1,1,in,single,cuFFT1D,57.417066 3 | 4,1,1,8388608,g,1,1,in,single,cuFFT1D,112.284839 4 | 8,1,1,4194304,g,1,1,in,single,cuFFT1D,167.42753 5 | 16,1,1,2097152,g,1,1,in,single,cuFFT1D,223.01212 6 | 32,1,1,1048576,g,1,1,in,single,cuFFT1D,280.25624 7 | 64,1,1,524288,g,1,1,in,single,cuFFT1D,340.974729 8 | 128,1,1,262144,g,1,1,in,single,cuFFT1D,393.559297 9 | 256,1,1,131072,g,1,1,in,single,cuFFT1D,443.052168 10 | 512,1,1,65536,g,1,1,in,single,cuFFT1D,508.837809 11 | 1024,1,1,32768,g,1,1,in,single,cuFFT1D,565.195013 12 | 2048,1,1,16384,g,1,1,in,single,cuFFT1D,615.589675 13 | 4096,1,1,8192,g,1,1,in,single,cuFFT1D,686.664408 14 | 8192,1,1,4096,g,1,1,in,single,cuFFT1D,453.598643 15 | 16384,1,1,2048,g,1,1,in,single,cuFFT1D,400.493769 16 | 32768,1,1,1024,g,1,1,in,single,cuFFT1D,422.59182 17 | 65536,1,1,512,g,1,1,in,single,cuFFT1D,420.857829 18 | 131072,1,1,256,g,1,1,in,single,cuFFT1D,397.358856 19 | 262144,1,1,128,g,1,1,in,single,cuFFT1D,334.527752 20 | 524288,1,1,64,g,1,1,in,single,cuFFT1D,351.140673 21 | 1048576,1,1,32,g,1,1,in,single,cuFFT1D,359.615475 22 | 2097152,1,1,16,g,1,1,in,single,cuFFT1D,376.128268 23 | 4194304,1,1,8,g,1,1,in,single,cuFFT1D,393.90095 24 | 8388608,1,1,4,g,1,1,in,single,cuFFT1D,403.230912 25 | 16777216,1,1,2,g,1,1,in,single,cuFFT1D,422.502973 26 | -------------------------------------------------------------------------------- /docs/performance/cuFFT_7.0/Tesla_K40/C2C_1D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,1,1,16777216,g,1,1,in,double,cuFFT1D,27.724106 3 | 4,1,1,8388608,g,1,1,in,double,cuFFT1D,57.747175 4 | 8,1,1,4194304,g,1,1,in,double,cuFFT1D,88.106149 5 | 16,1,1,2097152,g,1,1,in,double,cuFFT1D,117.536091 6 | 32,1,1,1048576,g,1,1,in,double,cuFFT1D,146.175977 7 | 64,1,1,524288,g,1,1,in,double,cuFFT1D,174.841499 8 | 128,1,1,262144,g,1,1,in,double,cuFFT1D,204.371887 9 | 256,1,1,131072,g,1,1,in,double,cuFFT1D,237.097699 10 | 512,1,1,65536,g,1,1,in,double,cuFFT1D,266.799358 11 | 1024,1,1,32768,g,1,1,in,double,cuFFT1D,289.812625 12 | 2048,1,1,16384,g,1,1,in,double,cuFFT1D,268.214622 13 | 4096,1,1,8192,g,1,1,in,double,cuFFT1D,168.595754 14 | 8192,1,1,4096,g,1,1,in,double,cuFFT1D,165.250054 15 | 16384,1,1,2048,g,1,1,in,double,cuFFT1D,174.020168 16 | 32768,1,1,1024,g,1,1,in,double,cuFFT1D,174.773738 17 | 65536,1,1,512,g,1,1,in,double,cuFFT1D,155.236793 18 | 131072,1,1,256,g,1,1,in,double,cuFFT1D,161.97944 19 | 262144,1,1,128,g,1,1,in,double,cuFFT1D,165.983672 20 | 524288,1,1,64,g,1,1,in,double,cuFFT1D,173.099901 21 | 1048576,1,1,32,g,1,1,in,double,cuFFT1D,170.025358 22 | 2097152,1,1,16,g,1,1,in,double,cuFFT1D,179.184631 23 | 4194304,1,1,8,g,1,1,in,double,cuFFT1D,181.914462 24 | 8388608,1,1,4,g,1,1,in,double,cuFFT1D,156.583882 25 | 16777216,1,1,2,g,1,1,in,double,cuFFT1D,163.086671 26 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_1D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,1,1,16777216,g,1,1,in,single,clFFT1D_2.6,76.9949 3 | 4,1,1,8388608,g,1,1,in,single,clFFT1D_2.6,152.554 4 | 8,1,1,4194304,g,1,1,in,single,clFFT1D_2.6,200.656 5 | 16,1,1,2097152,g,1,1,in,single,clFFT1D_2.6,270.405 6 | 32,1,1,1048576,g,1,1,in,single,clFFT1D_2.6,314.084 7 | 64,1,1,524288,g,1,1,in,single,clFFT1D_2.6,490.901 8 | 128,1,1,262144,g,1,1,in,single,clFFT1D_2.6,599.706 9 | 256,1,1,131072,g,1,1,in,single,clFFT1D_2.6,671.885 10 | 512,1,1,65536,g,1,1,in,single,clFFT1D_2.6,787.181 11 | 1024,1,1,32768,g,1,1,in,single,clFFT1D_2.6,840.822 12 | 2048,1,1,16384,g,1,1,in,single,clFFT1D_2.6,869.563 13 | 4096,1,1,8192,g,1,1,in,single,clFFT1D_2.6,580.775 14 | 8192,1,1,4096,g,1,1,in,single,clFFT1D_2.6,398.025 15 | 16384,1,1,2048,g,1,1,in,single,clFFT1D_2.6,451.457 16 | 32768,1,1,1024,g,1,1,in,single,clFFT1D_2.6,459.492 17 | 65536,1,1,512,g,1,1,in,single,clFFT1D_2.6,417.588 18 | 131072,1,1,256,g,1,1,in,single,clFFT1D_2.6,401.075 19 | 262144,1,1,128,g,1,1,in,single,clFFT1D_2.6,345.829 20 | 524288,1,1,64,g,1,1,in,single,clFFT1D_2.6,372.671 21 | 1048576,1,1,32,g,1,1,in,single,clFFT1D_2.6,361.599 22 | 2097152,1,1,16,g,1,1,in,single,clFFT1D_2.6,335.494 23 | 4194304,1,1,8,g,1,1,in,single,clFFT1D_2.6,347.831 24 | 8388608,1,1,4,g,1,1,in,single,clFFT1D_2.6,324.39 25 | 16777216,1,1,2,g,1,1,in,single,clFFT1D_2.6,310.083 26 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_1D_double.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,1,1,16777216,g,1,1,in,double,clFFT1D_2.6,40.0275 3 | 4,1,1,8388608,g,1,1,in,double,clFFT1D_2.6,81.2218 4 | 8,1,1,4194304,g,1,1,in,double,clFFT1D_2.6,122.268 5 | 16,1,1,2097152,g,1,1,in,double,clFFT1D_2.6,146.554 6 | 32,1,1,1048576,g,1,1,in,double,clFFT1D_2.6,142.564 7 | 64,1,1,524288,g,1,1,in,double,clFFT1D_2.6,255.158 8 | 128,1,1,262144,g,1,1,in,double,clFFT1D_2.6,226.868 9 | 256,1,1,131072,g,1,1,in,double,clFFT1D_2.6,339.785 10 | 512,1,1,65536,g,1,1,in,double,clFFT1D_2.6,380.501 11 | 1024,1,1,32768,g,1,1,in,double,clFFT1D_2.6,353.783 12 | 2048,1,1,16384,g,1,1,in,double,clFFT1D_2.6,355.406 13 | 4096,1,1,8192,g,1,1,in,double,clFFT1D_2.6,220.831 14 | 8192,1,1,4096,g,1,1,in,double,clFFT1D_2.6,221.184 15 | 16384,1,1,2048,g,1,1,in,double,clFFT1D_2.6,250.442 16 | 32768,1,1,1024,g,1,1,in,double,clFFT1D_2.6,215.115 17 | 65536,1,1,512,g,1,1,in,double,clFFT1D_2.6,196.085 18 | 131072,1,1,256,g,1,1,in,double,clFFT1D_2.6,177.969 19 | 262144,1,1,128,g,1,1,in,double,clFFT1D_2.6,183.749 20 | 524288,1,1,64,g,1,1,in,double,clFFT1D_2.6,175.629 21 | 1048576,1,1,32,g,1,1,in,double,clFFT1D_2.6,146.653 22 | 2097152,1,1,16,g,1,1,in,double,clFFT1D_2.6,144.641 23 | 4194304,1,1,8,g,1,1,in,double,clFFT1D_2.6,147.301 24 | 8388608,1,1,4,g,1,1,in,double,clFFT1D_2.6,115.146 25 | 16777216,1,1,2,g,1,1,in,double,clFFT1D_2.6,110.471 26 | -------------------------------------------------------------------------------- /docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_1D_single.csv: -------------------------------------------------------------------------------- 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS 2 | 2,1,1,8388608,g,5,3,in,single,clFFT1D_2.6,34.1305 3 | 4,1,1,4194304,g,5,3,in,single,clFFT1D_2.6,90.9585 4 | 8,1,1,2097152,g,5,3,in,single,clFFT1D_2.6,125.277 5 | 16,1,1,1048576,g,5,3,in,single,clFFT1D_2.6,219.2965 6 | 32,1,1,524288,g,5,3,in,single,clFFT1D_2.6,194.6665 7 | 64,1,1,262144,g,5,3,in,single,clFFT1D_2.6,431.73 8 | 128,1,1,131072,g,5,3,in,single,clFFT1D_2.6,445.3705 9 | 256,1,1,65536,g,5,3,in,single,clFFT1D_2.6,638.46 10 | 512,1,1,32768,g,5,3,in,single,clFFT1D_2.6,668.6 11 | 1024,1,1,16384,g,5,3,in,single,clFFT1D_2.6,738.26 12 | 2048,1,1,8192,g,5,3,in,single,clFFT1D_2.6,783.31 13 | 4096,1,1,4096,g,5,3,in,single,clFFT1D_2.6,487.8415 14 | 8192,1,1,2048,g,5,3,in,single,clFFT1D_2.6,112.0415 15 | 16384,1,1,1024,g,5,3,in,single,clFFT1D_2.6,134.027 16 | 32768,1,1,512,g,5,3,in,single,clFFT1D_2.6,163.1195 17 | 65536,1,1,256,g,5,3,in,single,clFFT1D_2.6,195.164 18 | 131072,1,1,128,g,5,3,in,single,clFFT1D_2.6,210.9695 19 | 262144,1,1,64,g,5,3,in,single,clFFT1D_2.6,215.927 20 | 524288,1,1,32,g,5,3,in,single,clFFT1D_2.6,225.0265 21 | 1048576,1,1,16,g,5,3,in,single,clFFT1D_2.6,187.0255 22 | 2097152,1,1,8,g,5,3,in,single,clFFT1D_2.6,194.0675 23 | 4194304,1,1,4,g,5,3,in,single,clFFT1D_2.6,193.9135 24 | 8388608,1,1,2,g,5,3,in,single,clFFT1D_2.6,182.1115 25 | 16777216,1,1,1,g,5,3,in,single,clFFT1D_2.6,170.7285 26 | -------------------------------------------------------------------------------- /src/include/clAmdFft.version.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | /*! @file clAmdFft.version.h 18 | * /note clAmdFft.version.h is a deprecated header file. 19 | * This header is provided to help projects that were written with the older clAmdFft codebase, to help them 20 | * port to the new API at their own schedule. It will not be maintained or updated, and will be removed after 21 | * a reasonable amount of time has passed. All new code should be written against clFFT.h. 22 | * Older projects should migrate to the new header at their earliest convenience. 23 | */ 24 | 25 | /* the configured version and settings for clFFT 26 | */ 27 | #define clAmdFftVersionMajor 2 28 | #define clAmdFftVersionMinor 0 29 | #define clAmdFftVersionPatch 0 30 | -------------------------------------------------------------------------------- /src/statTimer/statisticalTimer.extern.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // StatTimer.cpp : Defines the exported functions for the DLL application. 19 | // 20 | 21 | #include "stdafx.h" 22 | #include "statisticalTimer.extern.h" 23 | #include "statisticalTimer.CPU.h" 24 | #include "statisticalTimer.GPU.h" 25 | 26 | // Even though the individual getInstance functions of the timer classes return references, 27 | // we convert those to pointers before returning from here so that the clients can initialize 28 | // their local variables to NULL, which refernces do not allow. 29 | baseStatTimer* getStatTimer( const clfftTimerType type ) 30 | { 31 | if( type == CLFFT_CPU ) 32 | return &CpuStatTimer::getInstance( ); 33 | 34 | return &GpuStatTimer::getInstance( ); 35 | } 36 | -------------------------------------------------------------------------------- /src/tests/buffer_memory.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | 20 | /*****************************************************/ 21 | /*****************************************************/ 22 | uint32_t float_as_hex( float a ) { 23 | return *(uint32_t*)&a; 24 | } 25 | 26 | /*****************************************************/ 27 | /*****************************************************/ 28 | uint64_t float_as_hex( double a ) { 29 | return *(uint64_t*)&a; 30 | } 31 | 32 | /*****************************************************/ 33 | /*****************************************************/ 34 | uint32_t nan_as_hex( float a ) { 35 | a; 36 | return ~0x0; 37 | } 38 | 39 | /*****************************************************/ 40 | /*****************************************************/ 41 | uint64_t nan_as_hex( double a ) { 42 | a; 43 | return ~0x0ull; 44 | } -------------------------------------------------------------------------------- /src/clFFTConfigVersion.cmake.in: -------------------------------------------------------------------------------- 1 | # This is a basic version file for the Config-mode of find_package(). 2 | # It is used by write_basic_package_version_file() as input file for configure_file() 3 | # to create a version-file which can be installed along a config.cmake file. 4 | # 5 | # The created file sets PACKAGE_VERSION_EXACT if the current version string and 6 | # the requested version string are exactly the same and it sets 7 | # PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version, 8 | # but only if the requested major version is the same as the current one. 9 | 10 | 11 | set(PACKAGE_VERSION "@clFFT_VERSION@") 12 | 13 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}" ) 14 | set(PACKAGE_VERSION_COMPATIBLE FALSE) 15 | else() 16 | if("${PACKAGE_FIND_VERSION_MAJOR}" STREQUAL "${clFFT_VERSION_MAJOR}") 17 | set(PACKAGE_VERSION_COMPATIBLE TRUE) 18 | else() 19 | set(PACKAGE_VERSION_COMPATIBLE FALSE) 20 | endif() 21 | 22 | if( "${PACKAGE_FIND_VERSION}" STREQUAL "${PACKAGE_VERSION}") 23 | set(PACKAGE_VERSION_EXACT TRUE) 24 | endif() 25 | endif() 26 | 27 | # if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: 28 | if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "@CMAKE_SIZEOF_VOID_P@" STREQUAL "") 29 | return() 30 | endif() 31 | 32 | # check that the installed version has the same 32/64bit-ness as the one which is currently searching: 33 | if(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "@CMAKE_SIZEOF_VOID_P@") 34 | math(EXPR installedBits "@CMAKE_SIZEOF_VOID_P@ * 8") 35 | set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)") 36 | set(PACKAGE_VERSION_UNSUITABLE TRUE) 37 | endif() 38 | -------------------------------------------------------------------------------- /src/statTimer/stdafx.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // stdafx.h : include file for standard system include files, 19 | // or project specific include files that are used frequently, but 20 | // are changed infrequently 21 | // 22 | 23 | #pragma once 24 | 25 | #define _CRT_SECURE_NO_WARNINGS 26 | 27 | //#include 28 | //#include 29 | //#include 30 | //#include 31 | //#include 32 | //#include 33 | #include 34 | //#include 35 | //#include 36 | #include 37 | //#include 38 | 39 | // _WIN32 is defined for both 32 & 64 bit environments 40 | #if defined( _WIN32 ) 41 | // #include 42 | #include "targetver.h" 43 | 44 | #if !defined( NOMINMAX ) 45 | #define NOMINMAX 46 | #endif 47 | 48 | #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers 49 | // Windows Header Files: 50 | #include 51 | #endif 52 | -------------------------------------------------------------------------------- /src/include/stdafx.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // stdafx.h : include file for standard system include files, 19 | // or project specific include files that are used frequently, but 20 | // are changed infrequently 21 | // 22 | 23 | #pragma once 24 | 25 | #define _CRT_SECURE_NO_WARNINGS 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | // _WIN32 is defined for both 32 & 64 bit environments 41 | #if defined( _WIN32 ) 42 | #include 43 | #include "targetver.h" 44 | 45 | #if !defined( NOMINMAX ) 46 | #define NOMINMAX 47 | #endif 48 | 49 | #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers 50 | // Windows Header Files: 51 | #include 52 | #endif 53 | -------------------------------------------------------------------------------- /src/library/md5sum.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. 3 | * MD5 Message-Digest Algorithm (RFC 1321). 4 | * 5 | * Homepage: 6 | * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 7 | * 8 | * Author: 9 | * Alexander Peslyak, better known as Solar Designer 10 | * 11 | * This software was written by Alexander Peslyak in 2001. No copyright is 12 | * claimed, and the software is hereby placed in the public domain. 13 | * In case this attempt to disclaim copyright and place the software in the 14 | * public domain is deemed null and void, then the software is 15 | * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the 16 | * general public under the following terms: 17 | * 18 | * Redistribution and use in source and binary forms, with or without 19 | * modification, are permitted. 20 | * 21 | * There's ABSOLUTELY NO WARRANTY, express or implied. 22 | * 23 | * See md5.c for more information. 24 | */ 25 | 26 | #ifndef _MD5_SUM_H 27 | #define _MD5_SUM_H 28 | 29 | #ifdef HAVE_OPENSSL 30 | #include 31 | #else 32 | 33 | /* Any 32-bit or wider unsigned integer data type will do */ 34 | typedef unsigned int MD5_u32plus; 35 | 36 | typedef struct { 37 | MD5_u32plus lo, hi; 38 | MD5_u32plus a, b, c, d; 39 | unsigned char buffer[64]; 40 | MD5_u32plus block[16]; 41 | } MD5_CTX; 42 | 43 | extern void MD5_Init(MD5_CTX *ctx); 44 | extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size); 45 | extern void MD5_Final(unsigned char *result, MD5_CTX *ctx); 46 | 47 | #endif // HAVE_OPENSSL 48 | 49 | void md5sum (const void * data, unsigned long size, char * md5sum); 50 | 51 | #endif // _MD5_SUM_H 52 | -------------------------------------------------------------------------------- /src/cuFFT-client/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2015 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | cmake_minimum_required(VERSION 2.8) 18 | find_package(CUDA REQUIRED) 19 | 20 | # client 21 | set( cuFFT-client.Source cuFFT-client.cpp ) 22 | 23 | set( cuFFT-client.Files ${cuFFT-client.Source} ) 24 | 25 | # Pass options to NVCC 26 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -gencode arch=compute_20,code=sm_20; -gencode arch=compute_30,code=sm_30; -gencode arch=compute_35,code=sm_35; -gencode arch=compute_37,code=sm_37; -gencode arch=compute_50,code=sm_50; -gencode arch=compute_52,code=sm_52) 27 | 28 | include_directories( ./ ../common/inc/ ) 29 | 30 | CUDA_ADD_EXECUTABLE( cuFFT-client ${cuFFT-client.Files} ) 31 | 32 | CUDA_ADD_CUFFT_TO_TARGET( cuFFT-client ) 33 | 34 | target_link_libraries( cuFFT-client ${CUDA_LIBRARIES}) 35 | 36 | # Set output directory to bin 37 | if( MSVC ) 38 | set(CUDA_GENERATED_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BITNESS}) 39 | else() 40 | set(CUDA_GENERATED_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BITNESS}/${CMAKE_BUILD_TYPE}) 41 | endif() 42 | -------------------------------------------------------------------------------- /src/include/unicode.compatibility.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #if !defined( amd_unicode_h ) 20 | #define amd_unicode_h 21 | 22 | // Typedefs to support unicode and ansii compilation 23 | #if defined( _UNICODE ) 24 | typedef std::wstring tstring; 25 | typedef std::wstringstream tstringstream; 26 | typedef std::wifstream tifstream; 27 | typedef std::wofstream tofstream; 28 | typedef std::wfstream tfstream; 29 | static std::wostream& tout = std::wcout; 30 | static std::wostream& terr = std::wcerr; 31 | #else 32 | typedef std::string tstring; 33 | typedef std::stringstream tstringstream; 34 | typedef std::ifstream tifstream; 35 | typedef std::ofstream tofstream; 36 | typedef std::fstream tfstream; 37 | static std::ostream& tout = std::cout; 38 | static std::ostream& terr = std::cerr; 39 | #endif 40 | 41 | // These macros help linux cope with the conventions of windows tchar.h file 42 | #if defined( _WIN32 ) 43 | #include 44 | #include 45 | #else 46 | #if defined( __GNUC__ ) 47 | typedef char TCHAR; 48 | typedef char _TCHAR; 49 | #define _tmain main 50 | 51 | #if defined( UNICODE ) 52 | #define _T(x) L ## x 53 | #else 54 | #define _T(x) x 55 | #endif 56 | #endif 57 | #endif 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /src/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | INCLUDE_DIRECTORIES( 18 | "${CMAKE_CURRENT_SOURCE_DIR}" 19 | "${OPENCL_INCLUDE_DIRS}" 20 | "${PROJECT_SOURCE_DIR}/include" 21 | "${PROJECT_BINARY_DIR}/include" 22 | ) 23 | 24 | LINK_DIRECTORIES("${PROJECT_BINARY_DIR}/package/lib${CLFFT_SUFFIX_LIB}") 25 | 26 | FILE(GLOB FILES "*.c") 27 | 28 | FOREACH(FILE ${FILES}) 29 | 30 | if( MSVC ) 31 | if( MSVC_VERSION LESS 1800 ) 32 | # Use C++ with Microsoft compiler 33 | SET_SOURCE_FILES_PROPERTIES( ${FILE} PROPERTIES LANGUAGE CXX) 34 | endif () 35 | endif( ) 36 | 37 | GET_FILENAME_COMPONENT(EXAMPLE ${FILE} NAME_WE) 38 | GET_FILENAME_COMPONENT(FULL_DIR_NAME ${FILE} PATH) 39 | GET_FILENAME_COMPONENT(DIR_NAME ${FULL_DIR_NAME} NAME) 40 | SET(EXAMPLE_NAME example_${DIR_NAME}_${EXAMPLE}) 41 | ADD_EXECUTABLE(${EXAMPLE_NAME} ${FILE}) 42 | 43 | TARGET_LINK_LIBRARIES(${EXAMPLE_NAME} clFFT ${OPENCL_LIBRARIES} ${CMAKE_DL_LIBS}) 44 | 45 | SET_TARGET_PROPERTIES(${EXAMPLE_NAME} 46 | PROPERTIES 47 | OUTPUT_NAME ${EXAMPLE} 48 | RUNTIME_OUTPUT_DIRECTORY ${DIR_NAME}) 49 | 50 | INSTALL(FILES ${FILE} DESTINATION "share/clFFT/examples") 51 | INSTALL(TARGETS ${EXAMPLE_NAME} 52 | RUNTIME DESTINATION "share/clFFT/examples") 53 | ENDFOREACH() 54 | -------------------------------------------------------------------------------- /src/tests/accuracy_test_common.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | #include 20 | 21 | #include "test_constants.h" 22 | #include "fftw_transform.h" 23 | #include "cl_transform.h" 24 | #include "typedefs.h" 25 | #include "accuracy_test_common.h" 26 | #include 27 | #include 28 | 29 | /*****************************************************/ 30 | clfftResultLocation cl_placeness( placeness::placeness_t placeness ) 31 | { 32 | if( placeness == placeness::in_place ) 33 | return CLFFT_INPLACE; 34 | else if( placeness == placeness::out_of_place ) 35 | return CLFFT_OUTOFPLACE; 36 | else 37 | throw std::runtime_error( "invalid placeness" ); 38 | } 39 | 40 | /*****************************************************/ 41 | clfftLayout cl_layout( layout::buffer_layout_t layout_in ) 42 | { 43 | if( layout_in == layout::real ) 44 | return CLFFT_REAL; 45 | else if( layout_in == layout::hermitian_planar ) 46 | return CLFFT_HERMITIAN_PLANAR; 47 | else if( layout_in == layout::complex_planar ) 48 | return CLFFT_COMPLEX_PLANAR; 49 | else if( layout_in == layout::hermitian_interleaved ) 50 | return CLFFT_HERMITIAN_INTERLEAVED; 51 | else if( layout_in == layout::complex_interleaved ) 52 | return CLFFT_COMPLEX_INTERLEAVED; 53 | else 54 | throw std::runtime_error( "invalid layout_in" ); 55 | } 56 | -------------------------------------------------------------------------------- /src/client/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | 18 | # client 19 | set( Client.Source client.cpp 20 | openCL.misc.cpp 21 | stdafx.cpp ) 22 | 23 | set( Client.Headers client.h 24 | openCL.misc.h 25 | ../statTimer/statisticalTimer.extern.h 26 | ../include/unicode.compatibility.h 27 | ../include/stdafx.h 28 | ../include/targetver.h 29 | ../include/clFFT.h ) 30 | 31 | set( Client.Files ${Client.Source} ${Client.Headers} ) 32 | 33 | set( RT_LIB "" ) 34 | if( WIN32 ) 35 | add_definitions( "/D_CONSOLE" ) 36 | elseif( NOT APPLE ) 37 | # To use the dlopen() and dlclose() functions, we should link with libdl 38 | set( RT_LIB "-lrt" ) 39 | endif( ) 40 | 41 | # Include standard OpenCL headers 42 | include_directories( ${Boost_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ../../../common ${PROJECT_BINARY_DIR}/include ../include ) 43 | 44 | add_executable( clFFT-client ${Client.Files} ) 45 | 46 | target_link_libraries( clFFT-client clFFT ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} ${CMAKE_DL_LIBS} ${RT_LIB} ) 47 | 48 | set_target_properties( clFFT-client PROPERTIES VERSION ${clFFT_VERSION} ) 49 | set_target_properties( clFFT-client PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) 50 | if( APPLE ) 51 | # properly deal with RPATH on mac 52 | set_target_properties( clFFT-client PROPERTIES INSTALL_RPATH "@loader_path/../lib${CLFFT_SUFFIX_LIB}") 53 | endif() 54 | 55 | install( TARGETS clFFT-client 56 | RUNTIME DESTINATION bin${CLFFT_SUFFIX_BIN} 57 | LIBRARY DESTINATION lib${CLFFT_SUFFIX_LIB} 58 | ARCHIVE DESTINATION lib${CLFFT_SUFFIX_LIB}/import 59 | ) 60 | -------------------------------------------------------------------------------- /src/FindclFFT.cmake: -------------------------------------------------------------------------------- 1 | # - Find clFFT, AMD's OpenCL FFT library 2 | 3 | # This script defines the following variables: 4 | # CLFFT_INCLUDE_DIRS - Location of clFFT's include directory. 5 | # CLFFT_LIBRARIES - Location of clFFT's libraries 6 | # CLFFT_FOUND - True if clFFT has been located 7 | # 8 | # If your clFFT installation is not in a standard installation directory, you 9 | # may provide a hint to where it may be found. Simply set the value CLFFT_ROOT 10 | # to the directory containing 'include/clFFT.h" prior to calling this script. 11 | # 12 | # By default this script will attempt to find the 32-bit version of clFFT. 13 | # If you desire to use the 64-bit version instead, set 14 | # set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON) 15 | # prior to calling this script. 16 | # 17 | #============================================================================= 18 | # Copyright 2014 Brian Kloppenborg 19 | # 20 | # Licensed under the Apache License, Version 2.0 (the "License"); 21 | # you may not use this file except in compliance with the License. 22 | # You may obtain a copy of the License at 23 | # 24 | # http://www.apache.org/licenses/LICENSE-2.0 25 | # 26 | # Unless required by applicable law or agreed to in writing, software 27 | # distributed under the License is distributed on an "AS IS" BASIS, 28 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29 | # See the License for the specific language governing permissions and 30 | # limitations under the License. 31 | #============================================================================= 32 | 33 | IF(CLFFT_INCLUDE_DIRS) 34 | # Already in cache, be silent 35 | set (CLFFT_FIND_QUIETLY TRUE) 36 | ENDIF (CLFFT_INCLUDE_DIRS) 37 | 38 | FIND_PATH(CLFFT_ROOT_DIR 39 | NAMES include/clFFT.h 40 | HINTS /usr/local/ ${CLFFT_ROOT} 41 | DOC "clFFT root directory.") 42 | 43 | FIND_PATH(_CLFFT_INCLUDE_DIRS 44 | NAMES clFFT.h 45 | HINTS ${CLFFT_ROOT_DIR}/include 46 | DOC "clFFT Include directory") 47 | 48 | FIND_LIBRARY(_CLFFT_LIBRARY 49 | NAMES clFFT 50 | HINTS ${CLFFT_ROOT_DIR}/lib) 51 | 52 | SET(CLFFT_INCLUDE_DIRS ${_CLFFT_INCLUDE_DIRS}) 53 | SET(CLFFT_LIBRARIES ${_CLFFT_LIBRARY}) 54 | 55 | # handle the QUIETLY and REQUIRED arguments and set CLFFT_FOUND to TRUE if 56 | # all listed variables are TRUE 57 | INCLUDE (FindPackageHandleStandardArgs) 58 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(CLFFT DEFAULT_MSG CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS) 59 | MARK_AS_ADVANCED(CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS) 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/statTimer/ReadMe.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | ======================================================================== 18 | DYNAMIC LINK LIBRARY : StatTimer Project Overview 19 | ======================================================================== 20 | 21 | AppWizard has created this StatTimer DLL for you. 22 | 23 | This file contains a summary of what you will find in each of the files that 24 | make up your StatTimer application. 25 | 26 | 27 | StatTimer.vcxproj 28 | This is the main project file for VC++ projects generated using an Application Wizard. 29 | It contains information about the version of Visual C++ that generated the file, and 30 | information about the platforms, configurations, and project features selected with the 31 | Application Wizard. 32 | 33 | StatTimer.vcxproj.filters 34 | This is the filters file for VC++ projects generated using an Application Wizard. 35 | It contains information about the association between the files in your project 36 | and the filters. This association is used in the IDE to show grouping of files with 37 | similar extensions under a specific node (for e.g. ".cpp" files are associated with the 38 | "Source Files" filter). 39 | 40 | StatTimer.cpp 41 | This is the main DLL source file. 42 | 43 | ///////////////////////////////////////////////////////////////////////////// 44 | Other standard files: 45 | 46 | StdAfx.h, StdAfx.cpp 47 | These files are used to build a precompiled header (PCH) file 48 | named StatTimer.pch and a precompiled types file named StdAfx.obj. 49 | 50 | ///////////////////////////////////////////////////////////////////////////// 51 | Other notes: 52 | 53 | AppWizard uses "TODO:" comments to indicate parts of the source code you 54 | should add to or customize. 55 | 56 | ///////////////////////////////////////////////////////////////////////////// 57 | -------------------------------------------------------------------------------- /src/client/client.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #if !defined( CLIENT_H ) 20 | #define CLIENT_H 21 | 22 | // Boost headers that we want to use 23 | // #define BOOST_PROGRAM_OPTIONS_DYN_LINK 24 | #include 25 | 26 | #ifdef WIN32 27 | 28 | struct Timer 29 | { 30 | LARGE_INTEGER start, stop, freq; 31 | 32 | public: 33 | Timer() { QueryPerformanceFrequency( &freq ); } 34 | 35 | void Start() { QueryPerformanceCounter(&start); } 36 | double Sample() 37 | { 38 | QueryPerformanceCounter ( &stop ); 39 | double time = (double)(stop.QuadPart-start.QuadPart) / (double)(freq.QuadPart); 40 | return time; 41 | } 42 | }; 43 | 44 | #elif defined(__APPLE__) || defined(__MACOSX) 45 | 46 | #include 47 | #include 48 | 49 | struct Timer 50 | { 51 | clock_serv_t clock; 52 | mach_timespec_t start, end; 53 | 54 | public: 55 | Timer() { host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &clock); } 56 | ~Timer() { mach_port_deallocate(mach_task_self(), clock); } 57 | 58 | void Start() { clock_get_time(clock, &start); } 59 | double Sample() 60 | { 61 | clock_get_time(clock, &end); 62 | double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; 63 | return time * 1E-9; 64 | } 65 | }; 66 | 67 | #else 68 | 69 | #include 70 | #include 71 | 72 | struct Timer 73 | { 74 | struct timespec start, end; 75 | 76 | public: 77 | Timer() { } 78 | 79 | void Start() { clock_gettime(CLOCK_MONOTONIC, &start); } 80 | double Sample() 81 | { 82 | clock_gettime(CLOCK_MONOTONIC, &end); 83 | double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; 84 | return time * 1E-9; 85 | } 86 | }; 87 | 88 | #endif 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /src/library/ReadMe.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | ======================================================================== 18 | CONSOLE APPLICATION : AMD.clFFT Project Overview 19 | ======================================================================== 20 | 21 | AppWizard has created this AMD.clFFT application for you. 22 | 23 | This file contains a summary of what you will find in each of the files that 24 | make up your AMD.clFFT application. 25 | 26 | 27 | AMD.clFFT.vcxproj 28 | This is the main project file for VC++ projects generated using an Application Wizard. 29 | It contains information about the version of Visual C++ that generated the file, and 30 | information about the platforms, configurations, and project features selected with the 31 | Application Wizard. 32 | 33 | AMD.clFFT.vcxproj.filters 34 | This is the filters file for VC++ projects generated using an Application Wizard. 35 | It contains information about the association between the files in your project 36 | and the filters. This association is used in the IDE to show grouping of files with 37 | similar extensions under a specific node (for e.g. ".cpp" files are associated with the 38 | "Source Files" filter). 39 | 40 | AMD.clFFT.cpp 41 | This is the main application source file. 42 | 43 | ///////////////////////////////////////////////////////////////////////////// 44 | Other standard files: 45 | 46 | StdAfx.h, StdAfx.cpp 47 | These files are used to build a precompiled header (PCH) file 48 | named AMD.clFFT.pch and a precompiled types file named StdAfx.obj. 49 | 50 | ///////////////////////////////////////////////////////////////////////////// 51 | Other notes: 52 | 53 | AppWizard uses "TODO:" comments to indicate parts of the source code you 54 | should add to or customize. 55 | 56 | ///////////////////////////////////////////////////////////////////////////// 57 | -------------------------------------------------------------------------------- /src/callback-client/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2015 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | 18 | # client 19 | set( Client.Source callback-client.cpp 20 | openCL.misc.cpp 21 | stdafx.cpp ) 22 | 23 | set( Client.Headers client.h 24 | openCL.misc.h 25 | ../statTimer/statisticalTimer.extern.h 26 | ../include/unicode.compatibility.h 27 | ../include/stdafx.h 28 | ../include/targetver.h 29 | ../include/clFFT.h ) 30 | 31 | set( Client.Files ${Client.Source} ${Client.Headers} ) 32 | 33 | set( DL_LIB "" ) 34 | if( WIN32 ) 35 | add_definitions( "/D_CONSOLE" ) 36 | elseif( APPLE ) 37 | set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) 38 | else( ) 39 | # To use the dlopen() and dlclose() functions, we should link with libdl 40 | set( DL_LIB "-ldl -lrt" ) 41 | endif( ) 42 | 43 | # Include standard OpenCL headers 44 | include_directories( ${Boost_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ${FFTW_INCLUDE_DIRS} ../../../common ${PROJECT_BINARY_DIR}/include ../include ) 45 | 46 | add_executable( clFFT-callback-client ${Client.Files} ) 47 | 48 | target_link_libraries( clFFT-callback-client clFFT ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} ${FFTW_LIBRARIES} ${DL_LIB} ) 49 | 50 | set_target_properties( clFFT-callback-client PROPERTIES VERSION ${clFFT_VERSION} ) 51 | set_target_properties( clFFT-callback-client PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) 52 | if( APPLE ) 53 | # properly deal with RPATH on mac 54 | set_target_properties( clFFT-callback-client PROPERTIES INSTALL_RPATH "@loader_path/../lib${CLFFT_SUFFIX_LIB}") 55 | endif() 56 | 57 | install( TARGETS clFFT-callback-client 58 | RUNTIME DESTINATION bin${CLFFT_SUFFIX_BIN} 59 | LIBRARY DESTINATION lib${CLFFT_SUFFIX_LIB} 60 | ARCHIVE DESTINATION lib${CLFFT_SUFFIX_LIB}/import 61 | ) 62 | -------------------------------------------------------------------------------- /src/library/generator.transpose.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2016 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #pragma once 18 | #if !defined( AMD_CLFFT_GENERATOR_TRANSPOSE_HEADER ) 19 | #define AMD_CLFFT_GENERATOR_TRANSPOSE_HEADER 20 | #include 21 | #include "private.h" 22 | #include "repo.h" 23 | #include "plan.h" 24 | #include "generator.stockham.h" 25 | #include "action.h" 26 | 27 | #define AVAIL_MEM_SIZE 32768 28 | 29 | inline std::stringstream& clKernWrite(std::stringstream& rhs, const size_t tabIndex) 30 | { 31 | rhs << std::setw(tabIndex) << ""; 32 | return rhs; 33 | } 34 | 35 | namespace clfft_transpose_generator 36 | { 37 | //generate transepose kernel with sqaure 2d matrix of row major with arbitrary batch size 38 | /* 39 | Below is a matrix(row major) containing three sqaure sub matrix along column 40 | The transpose will be done within each sub matrix. 41 | [M0 42 | M1 43 | M2] 44 | */ 45 | clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::Signature & params, std::string& strKernel, const size_t& lwSize, const size_t reShapeFactor); 46 | 47 | //generate transpose kernel with square 2d matrix of row major with blocks along the leading dimension 48 | //aka leading dimension batched 49 | /* 50 | Below is a matrix(row major) contaning three square sub matrix along row 51 | [M0 M2 M2] 52 | */ 53 | clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTransposeNonSquareAction::Signature & params, std::string& strKernel, const size_t& lwSize, const size_t reShapeFactor); 54 | 55 | //swap lines. This kind of kernels are using with combination of square transpose kernels to perform nonsqaure transpose 1:2 ratio 56 | clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature & params, std::string& strKernel, std::string& KernelFuncName, const size_t& lwSize, const size_t reShapeFactor); 57 | 58 | clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Signature & params, std::string& strKernel, std::string& KernelFuncName, const size_t& lwSize, const size_t reShapeFactor); 59 | 60 | void get_cycles(size_t *cycle_map, size_t num_reduced_row, size_t num_reduced_col); 61 | 62 | void permutation_calculation(size_t m, size_t n, std::vector > &permutationVec); 63 | }//end of namespace clfft_transpose_generator 64 | 65 | #endif -------------------------------------------------------------------------------- /src/scripts/perf/errorHandler.py: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | #---------------------------------File Note------------------------------------ 18 | #Date: 27 January 2012 19 | #This file defines all the error code and error handler mechanism 20 | #--------------------------------Global Variables------------------------------ 21 | 22 | UINS_CAT = 100 23 | WIN_REG_SEARCH_FAIL = 101 24 | UNIMPL_APP = 200 25 | SYS_ERR = 300 26 | TIME_OUT = 400 27 | DIM_INCO_FILE_FMT = 500 #incorrect file format for dimension 28 | DIM_FILE_VAL_INCO = 501 #Value coming from dimension file is incorrect 29 | 30 | #__errorTable : Defines all the errors in the system. Add a new error code and 31 | # error message here 32 | """Error table is defined as private to this module""" 33 | errorTable = { 34 | UINS_CAT: 'Application is not able to find the installed catalyst', 35 | WIN_REG_SEARCH_FAIL: 'Windows Registry search for catalysts version is unsuccessful', 36 | UNIMPL_APP: 'Unimplemented Application requirement', 37 | SYS_ERR: 'System error occurred - Please check the source code', 38 | TIME_OUT: 'Operation is timed out', 39 | DIM_INCO_FILE_FMT: 'incorrect file format for dimension - Not able to find dimension', 40 | DIM_FILE_VAL_INCO: 'Value coming from dimension file is incorrect' 41 | } 42 | 43 | #--------------------------------Class Definitions----------------------------- 44 | class TimeoutException(Exception): 45 | pass 46 | 47 | """Base class for handling all the application generated exception""" 48 | class ApplicationException(Exception): 49 | 50 | def __init__(self, fileName, errno, msg = ""): 51 | self.fileName = fileName 52 | self.errno = errno 53 | self.mess = errorTable[errno] + msg 54 | self.message = 'Application ERROR:'+repr(self.fileName+'-'+str(self.errno)+'-'+self.mess) 55 | 56 | def __str__(self): 57 | return repr(self.fileName+'-'+str(self.errno)+'-'+self.mess) 58 | 59 | 60 | #--------------------------------Global Function------------------------------- 61 | if __name__ == '__main__': 62 | #print errorTable 63 | try: 64 | raise ApplicationException('errorHandler', SYS_ERR) 65 | 66 | except: 67 | print 'Generic exception' 68 | 69 | -------------------------------------------------------------------------------- /src/statTimer/statisticalTimer.extern.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #ifndef _STATISTICALTIMER_EXTERN_H_ 20 | #define _STATISTICALTIMER_EXTERN_H_ 21 | #include "../include/clFFT.h" 22 | #include "statisticalTimer.h" 23 | 24 | /** 25 | * \file clfft.StatisticalTimer.extern.h 26 | * \brief A timer class that provides a cross platform timer for use 27 | * in timing code progress with a high degree of accuracy. 28 | * This class is implemented entirely in the header, to facilitate inclusion into multiple 29 | * projects without needing to compile an object file for each project. 30 | */ 31 | 32 | // The following ifdef block is the standard way of creating macros which make exporting 33 | // from a DLL simpler. All files within this DLL are compiled with the STATTIMER_EXPORTS 34 | // symbol defined on the command line. this symbol should not be defined on any project 35 | // that uses this DLL. This way any other project whose source files include this file see 36 | // STATTIMER_API functions as being imported from a DLL, whereas this DLL sees symbols 37 | // defined with this macro as being exported. 38 | #if defined( _WIN32 ) 39 | #if !defined( __cplusplus ) 40 | #define inline __inline 41 | #endif 42 | 43 | #if defined( CLFFT_STATIC ) 44 | #define STATTIMER_API 45 | #elif defined( STATTIMER_EXPORTS ) 46 | #define STATTIMER_API __declspec( dllexport ) 47 | #else 48 | #define STATTIMER_API __declspec( dllimport ) 49 | #endif 50 | #else 51 | #define STATTIMER_API 52 | #endif 53 | 54 | // The type of timer to be returned from ::getStatTimer( ) 55 | typedef enum clfftTimerType_ 56 | { 57 | CLFFT_GPU = 1, 58 | CLFFT_CPU, 59 | } clfftTimerType; 60 | 61 | // Table of typedef definitions for all exported functions from this shared module. 62 | // Clients of this module can use these typedefs to help create function pointers 63 | // that can be initialized to point to the functions exported from this module. 64 | typedef baseStatTimer* (*PFGETSTATTIMER)( const clfftTimerType type ); 65 | 66 | /** 67 | * \fn getInstance() 68 | * \brief This returns a reference to the singleton timer. Guarantees only 1 timer class is ever 69 | * instantiated within a compilable executable. 70 | */ 71 | extern "C" STATTIMER_API baseStatTimer* getStatTimer( const clfftTimerType type ); 72 | 73 | #endif // _STATISTICALTIMER_EXTERN_H_ 74 | -------------------------------------------------------------------------------- /src/statTimer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | 18 | # List the names of the files to compile for the external client . . . 19 | set( StatTimer.Source statisticalTimer.CPU.cpp 20 | statisticalTimer.GPU.cpp 21 | statisticalTimer.extern.cpp 22 | stdafx.cpp ) 23 | 24 | # Windows only uses dllmain 25 | if( MSVC ) 26 | set( StatTimer.Source ${StatTimer.Source} dllmain.cpp ) 27 | endif( ) 28 | 29 | set( StatTimer.Headers statisticalTimer.h 30 | statisticalTimer.extern.h 31 | statisticalTimer.CPU.h 32 | statisticalTimer.GPU.h 33 | stdafx.h 34 | targetver.h 35 | ../include/clFFT.h ) 36 | 37 | set( StatTimer.Files ${StatTimer.Source} ${StatTimer.Headers} ) 38 | 39 | # For a rainy day, add pre-compiled header support 40 | #if( MSVC ) 41 | # if (USE_MSVC_PCH) 42 | 43 | # set_source_files_properties(LungAnalysisPCH.cxx 44 | # PROPERTIES 45 | # COMPILE_FLAGS "/YcLungAnalysisPCH.h" 46 | # ) 47 | # foreach( src_file ${UPMC_LA_SRCS} ) 48 | # set_source_files_properties( 49 | # ${src_file} 50 | # PROPERTIES 51 | # COMPILE_FLAGS "/YuLungAnalysisPCH.h" 52 | # ) 53 | # endforeach( src_file ${UPMC_LA_SRCS} ) 54 | 55 | # list(APPEND UPMC_LA_SRCS LungAnalysisPCH.cxx) 56 | # list(APPEND UPMC_LA_HDRS LungAnalysisPCH.h) 57 | 58 | # endif(USE_MSVC_PCH) 59 | #endif (MSVC) 60 | 61 | add_definitions( "/DSTATTIMER_EXPORTS" ) 62 | 63 | # Include standard OpenCL headers 64 | include_directories( ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ../include ) 65 | 66 | add_library(StatTimer ${StatTimer.Files}) 67 | set_target_properties(StatTimer PROPERTIES VERSION ${clFFT_VERSION}) 68 | set_target_properties(StatTimer PROPERTIES SOVERSION ${clFFT_VERSION_MAJOR}) 69 | set_target_properties(StatTimer PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging") 70 | target_link_libraries(StatTimer ${OPENCL_LIBRARIES}) 71 | 72 | if( UNIX AND NOT APPLE ) 73 | # This library dependency is brought in by the high precision timer available in linux 74 | target_link_libraries( StatTimer -lrt ) 75 | endif( ) 76 | 77 | # CPack configuration; include the executable into the package 78 | install( TARGETS StatTimer 79 | EXPORT Library 80 | RUNTIME DESTINATION bin${CLFFT_SUFFIX_BIN} 81 | LIBRARY DESTINATION lib${CLFFT_SUFFIX_LIB} 82 | ARCHIVE DESTINATION lib${CLFFT_SUFFIX_LIB}/import 83 | ) 84 | -------------------------------------------------------------------------------- /src/gtest.cmake: -------------------------------------------------------------------------------- 1 | 2 | option(USE_SYSTEM_GTEST "Use system installed gtest when set to ON, or build gtest locally when set to OFF" OFF) 3 | 4 | if(USE_SYSTEM_GTEST) 5 | if( (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8) ) 6 | message( STATUS "Cmake version 2.8 or greater needed to use GTest" ) 7 | else() 8 | # This will define GTEST_FOUND 9 | find_package( GTest ) 10 | endif() 11 | else() 12 | if(CMAKE_VERSION VERSION_LESS 3.2 AND CMAKE_GENERATOR MATCHES "Ninja") 13 | message(WARNING "Building GTest with Ninja has known issues with CMake older than 3.2") 14 | endif() 15 | 16 | include(ExternalProject) 17 | 18 | set(GTEST_LIBRARIES gtest gtest_main) 19 | # the binary dir must be know before creating the external project in order 20 | # to pass the byproducts 21 | set(prefix "${CMAKE_CURRENT_BINARY_DIR}/gtest-external-prefix") 22 | set(binary_dir "${prefix}/src/gtest-external-build") 23 | 24 | set(byproducts) 25 | foreach(lib ${GTEST_LIBRARIES}) 26 | set(${lib}_location 27 | ${binary_dir}/${CMAKE_CFG_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${lib}${CMAKE_STATIC_LIBRARY_SUFFIX}) 28 | list(APPEND byproducts ${${lib}_location}) 29 | endforeach() 30 | 31 | if( MSVC ) 32 | if( MSVC_VERSION LESS 1800 ) 33 | set(EXTRA_FLAG "/D_VARIADIC_MAX=10 ") 34 | else() 35 | set(EXTRA_FLAG "") 36 | endif() 37 | else() 38 | set(EXTRA_FLAG "") 39 | endif() 40 | 41 | ExternalProject_Add( 42 | gtest-external 43 | URL https://github.com/google/googletest/archive/release-1.7.0.zip 44 | URL_MD5 EF5E700C8A0F3EE123E2E0209B8B4961 45 | PREFIX ${prefix} 46 | BINARY_DIR ${binary_dir} 47 | CMAKE_CACHE_ARGS 48 | -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER} 49 | -DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS} 50 | -DCMAKE_CXX_FLAGS_DEBUG:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_DEBUG} 51 | -DCMAKE_CXX_FLAGS_MINSIZEREL:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_MINSIZEREL} 52 | -DCMAKE_CXX_FLAGS_RELEASE:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_RELEASE} 53 | -DCMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_RELWITHDEBINFO} 54 | -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER} 55 | -DCMAKE_C_FLAGS:STRING=${CMAKE_C_FLAGS} 56 | -DCMAKE_C_FLAGS_DEBUG:STRING=${CMAKE_C_FLAGS_DEBUG} 57 | -DCMAKE_C_FLAGS_MINSIZEREL:STRING=${CMAKE_C_FLAGS_MINSIZEREL} 58 | -DCMAKE_C_FLAGS_RELEASE:STRING=${CMAKE_C_FLAGS_RELEASE} 59 | -DCMAKE_C_FLAGS_RELWITHDEBINFO:STRING=${CMAKE_C_FLAGS_RELWITHDEBINFO} 60 | -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} 61 | -Dgtest_force_shared_crt:BOOL=ON 62 | BUILD_BYPRODUCTS ${byproducts} 63 | INSTALL_COMMAND "") 64 | 65 | foreach(lib ${GTEST_LIBRARIES}) 66 | add_library(${lib} IMPORTED STATIC) 67 | add_dependencies(${lib} gtest-external) 68 | set_target_properties(${lib} PROPERTIES IMPORTED_LOCATION ${${lib}_location}) 69 | endforeach() 70 | 71 | ExternalProject_Get_Property(gtest-external source_dir) 72 | set(GTEST_INCLUDE_DIRS ${source_dir}/include) 73 | set(GTEST_FOUND ON) 74 | endif() 75 | 76 | # Hack to get googletest v1.6 to work with vs2012 77 | if( MSVC11 ) 78 | add_definitions( "/D_VARIADIC_MAX=10" ) 79 | endif( ) 80 | -------------------------------------------------------------------------------- /src/scripts/perf/performanceUtility.py: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | #This file contains a number of utilities function which could be independent of 18 | #any specific domain concept 19 | 20 | import signal 21 | from subprocess import check_output 22 | import errorHandler 23 | from datetime import datetime 24 | 25 | def currentUser(): 26 | try: 27 | return check_output("who", shell = True).split()[0]; 28 | except: 29 | print 'Unhandled Exception at performanceUtility::currentUser()' 30 | raise 31 | 32 | #Details: Generate sorted numbers in radices of 2,3 and 5 upto a given upper limit number 33 | def generate235Radices(maxSize): 34 | sizeList = list() 35 | i = 0 36 | j = 0 37 | k = 0 38 | SUM = int() 39 | sumj = int() 40 | sumk = int() 41 | sumi = 1 42 | while(True): 43 | sumj = 1 44 | j = 0 45 | while(True): 46 | sumk = 1 47 | k = 0 48 | while(True): 49 | SUM = sumi*sumj*sumk 50 | if ( SUM > maxSize ): break 51 | sizeList.append(SUM) 52 | k += 1 53 | sumk *= 2 54 | if (k == 0): break 55 | j += 1 56 | sumj *= 3 57 | if ( j == 0 and k == 0): break 58 | i += 1 59 | sumi *= 5 60 | sizeList.sort() 61 | return sizeList 62 | 63 | 64 | def timeout(timeout_time, default): 65 | def timeout_function(f): 66 | def f2(args): 67 | def timeout_handler(signum, frame): 68 | raise errorHandler.TimeoutException() 69 | 70 | old_handler = signal.signal(signal.SIGALRM, timeout_handler) 71 | signal.alarm(timeout_time) # triger alarm in timeout_time seconds 72 | retval = "" 73 | try: 74 | retval = f(args) 75 | except errorHandler.TimeoutException: 76 | raise errorHandler.ApplicationException(__file__, errorHandler.TIME_OUT) 77 | except: 78 | signal.alarm(0) 79 | raise 80 | finally: 81 | #print 'executing finally' 82 | signal.signal(signal.SIGALRM, old_handler) 83 | signal.alarm(0) 84 | return retval 85 | return f2 86 | return timeout_function 87 | 88 | 89 | def logTxtOutput(fileName, mode, txt): 90 | todayFile = fileName+'-'+datetime.now().strftime('%Y-%b-%d')+'.txt' 91 | with open(todayFile, mode) as f: 92 | f.write('------\n'+txt+'\n') 93 | 94 | def log(filename, txt): 95 | with open(filename, 'a') as f: 96 | f.write(datetime.now().ctime()+'# '+txt+'\n') 97 | -------------------------------------------------------------------------------- /src/library/lifetime.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // clfft.lifetime.cpp : Functions that control the lifetime of the FFT library and their supporting functions 19 | // 20 | 21 | #include "stdafx.h" 22 | #include "private.h" 23 | #include "repo.h" 24 | #include "../include/sharedLibrary.h" 25 | #include "../statTimer/statisticalTimer.extern.h" 26 | 27 | // Allow AMD's implementation of FFT's to allocate internal resources 28 | clfftStatus clfftSetup( const clfftSetupData* sData ) 29 | { 30 | // Static data is not thread safe (to create), so we implement a lock to protect instantiation for the first call 31 | // Implemented outside of FFTRepo::getInstance to minimize lock overhead; this is only necessary on first creation 32 | scopedLock sLock( FFTRepo::lockRepo, _T( "FFTRepo::getInstance" ) ); 33 | 34 | // First invocation of this function will allocate the FFTRepo singleton; thereafter the object always exists 35 | FFTRepo& fftRepo = FFTRepo::getInstance( ); 36 | 37 | clfftInitRequestLibNoMemAlloc(); 38 | clfftInitBinaryCache(); 39 | 40 | // Discover and load the timer module if present 41 | fftRepo.timerHandle = LoadSharedLibrary( "lib", "StatTimer", true ); 42 | if( fftRepo.timerHandle ) 43 | { 44 | // Timer module discovered and loaded successfully 45 | // Initialize function pointers to call into the shared module 46 | PFGETSTATTIMER pfGetStatTimer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( fftRepo.timerHandle, "getStatTimer" ) ); 47 | 48 | // Create and initialize our timer class, if the external timer shared library loaded 49 | if( pfGetStatTimer ) 50 | { 51 | fftRepo.pStatTimer = reinterpret_cast< GpuStatTimer* > ( pfGetStatTimer( CLFFT_GPU ) ); 52 | } 53 | } 54 | 55 | // If the client has no setupData, we are done 56 | if( sData == NULL ) 57 | return CLFFT_SUCCESS; 58 | 59 | // Versioning checks commented out until necessary 60 | //// If the major version number between the client and library do not match, return mismatch 61 | //if( sData->major > clfftVersionMajor ) 62 | // return CLFFT_VERSION_MISMATCH; 63 | 64 | //// If the minor version number between the client and library do not match, return mismatch 65 | //if( sData->minor > clfftVersionMinor ) 66 | // return CLFFT_VERSION_MISMATCH; 67 | 68 | //// We ignore patch version number for version validation 69 | 70 | fftRepo.setupData = *sData; 71 | 72 | return CLFFT_SUCCESS; 73 | } 74 | 75 | // Allow AMD's implementation of FFT's to destroy internal resources 76 | clfftStatus clfftTeardown( ) 77 | { 78 | FFTRepo& fftRepo = FFTRepo::getInstance( ); 79 | fftRepo.releaseResources( ); 80 | 81 | FreeSharedLibrary( fftRepo.timerHandle ); 82 | 83 | return CLFFT_SUCCESS; 84 | } 85 | 86 | clfftStatus clfftGetVersion( cl_uint* major, cl_uint* minor, cl_uint* patch ) 87 | { 88 | *major = clfftVersionMajor; 89 | *minor = clfftVersionMinor; 90 | *patch = clfftVersionPatch; 91 | 92 | return CLFFT_SUCCESS; 93 | } 94 | -------------------------------------------------------------------------------- /src/statTimer/statisticalTimer.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #ifndef _STATISTICALTIMER_H_ 20 | #define _STATISTICALTIMER_H_ 21 | #include 22 | #include 23 | #include 24 | 25 | #include "../include/clFFT.h" 26 | 27 | /** 28 | * \file clfft.StatisticalTimer.h 29 | * \brief A timer class that provides a cross platform timer for use 30 | * in timing code progress with a high degree of accuracy. 31 | * This class is implemented entirely in the header, to facilitate inclusion into multiple 32 | * projects without needing to compile an object file for each project. 33 | */ 34 | 35 | // Definition of a functor object that is passed by reference into the Print statement 36 | // of the timing class. 37 | // Functor object to help with accumulating values in vectors 38 | template 39 | class flopsFunc: public std::function 40 | { 41 | public: 42 | virtual typename std::function::result_type operator( )( ) = 0; 43 | }; 44 | 45 | /** 46 | * \class StatisticalTimer 47 | * \brief Counter that provides a fairly accurate timing mechanism for both 48 | * windows and linux. This timer is used extensively in all the samples. 49 | */ 50 | class baseStatTimer 51 | { 52 | protected: 53 | /** 54 | * \fn ~baseStatTimer() 55 | * \brief Destructor for StatisticalTimer that cleans up the class 56 | */ 57 | virtual ~baseStatTimer( ){ }; 58 | 59 | // friend std::ostream& operator<<( std::ostream& os, const baseStatTimer& s ); 60 | 61 | public: 62 | /** 63 | * \fn void Start( sTimerID id ) 64 | * \brief Start the timer 65 | * \sa Stop(), Reset() 66 | */ 67 | virtual void Start( size_t id ) = 0; 68 | 69 | /** 70 | * \fn void Stop( size_t id ) 71 | * \brief Stop the timer 72 | * \sa Start(), Reset() 73 | */ 74 | virtual void Stop( size_t id ) = 0; 75 | 76 | /** 77 | * \fn void Reset(void) 78 | * \brief Reset the timer to 0 79 | * \sa Start(), Stop() 80 | */ 81 | virtual void Clear( ) = 0; 82 | 83 | /** 84 | * \fn void Reset(void) 85 | * \brief Reset the timer to 0 86 | * \sa Start(), Stop() 87 | */ 88 | virtual void Reset( ) = 0; 89 | 90 | virtual void Reserve( size_t nEvents, size_t nSamples ) = 0; 91 | 92 | virtual size_t getUniqueID( const std::string& label, cl_uint groupID ) = 0; 93 | 94 | // Calculate the average/mean of data for a given event 95 | virtual void setNormalize( bool norm ) = 0; 96 | 97 | virtual void Print( ) = 0; 98 | 99 | // Using the stdDev of the entire population (of an id), eliminate those samples that fall 100 | // outside some specified multiple of the stdDev. This assumes that the population 101 | // form a gaussian curve. 102 | virtual size_t pruneOutliers( cl_double multiple ) = 0; 103 | virtual std::vector< size_t > pruneOutliers( size_t id , cl_double multiple ) = 0; 104 | }; 105 | 106 | #endif // _STATISTICALTIMER_H_ 107 | -------------------------------------------------------------------------------- /src/FindFFTW.cmake: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | 18 | # Locate the FFTW (http://www.fftw.org/) Framework. 19 | # 20 | # Defines the following variables: 21 | # 22 | # FFTW_FOUND - Found the FFTW framework 23 | # FFTW_INCLUDE_DIRS - Include directories 24 | # 25 | # Also defines the library variables below as normal 26 | # variables. These contain debug/optimized keywords when 27 | # a debugging library is found. 28 | # 29 | # FFTW_LIBRARIES - libfftw 30 | # 31 | # Accepts the following variables as input: 32 | # 33 | # FFTW_ROOT - (as a CMake or environment variable) 34 | # The root directory of the fftw install prefix 35 | # 36 | # FIND_LIBRARY_USE_LIB64_PATHS - Global property that controls whether 37 | # findFFTW should search for 64bit or 32bit libs 38 | #----------------------------------------------- 39 | # Example Usage: 40 | # 41 | # find_package(FFTW REQUIRED) 42 | # include_directories(${FFTW_INCLUDE_DIRS}) 43 | # 44 | # add_executable(foo foo.cc) 45 | # target_link_libraries(foo ${FFTW_LIBRARIES}) 46 | # 47 | #----------------------------------------------- 48 | 49 | find_path(FFTW_INCLUDE_DIRS 50 | NAMES fftw3.h 51 | HINTS 52 | ${FFTW_ROOT}/include 53 | ${FFTW_ROOT}/api 54 | ${FFTW_ROOT} 55 | $ENV{FFTW_ROOT}/include 56 | $ENV{FFTW_ROOT}/api 57 | ENV FFTW_ROOT 58 | PATHS 59 | /usr/include 60 | /usr/local/include 61 | ) 62 | mark_as_advanced( FFTW_INCLUDE_DIRS ) 63 | 64 | find_library( FFTW_SINGLE_PRECISION_LIBRARIES 65 | NAMES fftw3f libfftw3f-3 66 | HINTS 67 | ${FFTW_ROOT}/lib 68 | ${FFTW_ROOT}/.libs 69 | ${FFTW_ROOT} 70 | $ENV{FFTW_ROOT}/lib 71 | $ENV{FFTW_ROOT}/.libs 72 | ENV FFTW_ROOT 73 | PATHS 74 | /usr/lib 75 | /usr/local/lib 76 | DOC "FFTW dynamic library" 77 | ) 78 | mark_as_advanced( FFTW_SINGLE_PRECISION_LIBRARIES ) 79 | 80 | find_library( FFTW_DOUBLE_PRECISION_LIBRARIES 81 | NAMES fftw3 libfftw3-3 82 | HINTS 83 | ${FFTW_ROOT}/lib 84 | ${FFTW_ROOT}/.libs 85 | ${FFTW_ROOT} 86 | $ENV{FFTW_ROOT}/lib 87 | $ENV{FFTW_ROOT}/.libs 88 | ENV FFTW_ROOT 89 | PATHS 90 | /usr/lib 91 | /usr/local/lib 92 | DOC "FFTW dynamic library" 93 | ) 94 | mark_as_advanced( FFTW_DOUBLE_PRECISION_LIBRARIES ) 95 | 96 | set( FFTW_LIBRARIES ${FFTW_SINGLE_PRECISION_LIBRARIES} ${FFTW_DOUBLE_PRECISION_LIBRARIES} ) 97 | mark_as_advanced( FFTW_LIBRARIES ) 98 | 99 | include( FindPackageHandleStandardArgs ) 100 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDE_DIRS ) 101 | 102 | if( NOT FFTW_FOUND ) 103 | message( STATUS "FindFFTW looked for single precision libraries named: fftw3f or libfftw3f-3" ) 104 | message( STATUS "FindFFTW looked for double precision libraries named: fftw3 or libfftw3-3" ) 105 | endif() 106 | -------------------------------------------------------------------------------- /src/FindOpenCL.cmake: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | 18 | # Locate an OpenCL implementation. 19 | # Currently supports AMD APP SDK (http://developer.amd.com/sdks/AMDAPPSDK/Pages/default.aspx/) 20 | # 21 | # Defines the following variables: 22 | # 23 | # OpenCL_FOUND - Found the OPENCL framework 24 | # OPENCL_INCLUDE_DIRS - Include directories 25 | # 26 | # Also defines the library variables below as normal 27 | # variables. These contain debug/optimized keywords when 28 | # a debugging library is found. 29 | # 30 | # OPENCL_LIBRARIES - libopencl 31 | # 32 | # Accepts the following variables as input: 33 | # 34 | # OPENCL_ROOT - (as a CMake or environment variable) 35 | # The root directory of the OpenCL implementation found 36 | # 37 | # FIND_LIBRARY_USE_LIB64_PATHS - Global property that controls whether findOpenCL should search for 38 | # 64bit or 32bit libs 39 | #----------------------- 40 | # Example Usage: 41 | # 42 | # find_package(OPENCL REQUIRED) 43 | # include_directories(${OPENCL_INCLUDE_DIRS}) 44 | # 45 | # add_executable(foo foo.cc) 46 | # target_link_libraries(foo ${OPENCL_LIBRARIES}) 47 | # 48 | #----------------------- 49 | 50 | find_path(OPENCL_INCLUDE_DIRS 51 | NAMES OpenCL/cl.h CL/cl.h 52 | HINTS 53 | ${OPENCL_ROOT}/include 54 | $ENV{AMDAPPSDKROOT}/include 55 | $ENV{CUDA_PATH}/include 56 | PATHS 57 | /usr/include 58 | /usr/local/include 59 | /usr/local/cuda/include 60 | /opt/cuda/include 61 | DOC "OpenCL header file path" 62 | ) 63 | mark_as_advanced( OPENCL_INCLUDE_DIRS ) 64 | 65 | # Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else 66 | get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ) 67 | 68 | if( LIB64 ) 69 | find_library( OPENCL_LIBRARIES 70 | NAMES OpenCL 71 | HINTS 72 | ${OPENCL_ROOT}/lib 73 | $ENV{AMDAPPSDKROOT}/lib 74 | $ENV{CUDA_PATH}/lib 75 | /usr/local/cuda/lib 76 | /opt/cuda/lib 77 | DOC "OpenCL dynamic library path" 78 | PATH_SUFFIXES x86_64 x64 79 | PATHS 80 | /usr/lib 81 | ) 82 | else( ) 83 | find_library( OPENCL_LIBRARIES 84 | NAMES OpenCL 85 | HINTS 86 | ${OPENCL_ROOT}/lib 87 | $ENV{AMDAPPSDKROOT}/lib 88 | $ENV{CUDA_PATH}/lib 89 | /usr/local/cuda/lib 90 | /opt/cuda/lib 91 | DOC "OpenCL dynamic library path" 92 | PATH_SUFFIXES x86 Win32 93 | PATHS 94 | /usr/lib 95 | ) 96 | endif( ) 97 | mark_as_advanced( OPENCL_LIBRARIES ) 98 | 99 | include( FindPackageHandleStandardArgs ) 100 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) 101 | 102 | if( NOT OpenCL_FOUND ) 103 | message( STATUS "FindOpenCL looked for libraries named: OpenCL" ) 104 | endif() 105 | -------------------------------------------------------------------------------- /src/include/sharedLibrary.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #ifndef _SHAREDLIBRARY_H_ 20 | #define _SHAREDLIBRARY_H_ 21 | #include 22 | 23 | // _WIN32 is defined for both 32 & 64 bit environments 24 | #if defined( _WIN32 ) 25 | #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers 26 | // Windows Header Files: 27 | #include 28 | #else 29 | #include 30 | #endif 31 | 32 | inline void* LoadSharedLibrary( std::string unixPrefix, std::string libraryName, bool quiet ) 33 | { 34 | #if defined( _WIN32 ) 35 | libraryName += ".dll"; 36 | 37 | // HMODULE is actually the load address; function returns NULL if it cannot find the shared library 38 | HMODULE fileHandle = ::LoadLibraryExA( libraryName.c_str( ), NULL, NULL ); 39 | #elif defined(__linux__) 40 | tstring linuxName = unixPrefix; 41 | linuxName += libraryName += ".so"; 42 | void* fileHandle = ::dlopen( linuxName.c_str( ), RTLD_NOW ); 43 | if( !quiet && !fileHandle ) 44 | { 45 | std::cerr << ::dlerror( ) << std::endl; 46 | } 47 | #elif defined(__APPLE__) 48 | tstring appleName = unixPrefix; 49 | appleName += libraryName += ".dylib"; 50 | void* fileHandle = ::dlopen( appleName.c_str( ), RTLD_NOW ); 51 | if( !quiet && !fileHandle ) 52 | { 53 | std::cerr << ::dlerror( ) << std::endl; 54 | } 55 | #elif defined(__FreeBSD_kernel__) 56 | tstring freebsdName = unixPrefix; 57 | freebsdName += libraryName += ".so"; 58 | void* fileHandle = ::dlopen( freebsdName.c_str( ), RTLD_NOW ); 59 | if( !quiet && !fileHandle ) 60 | { 61 | std::cerr << ::dlerror( ) << std::endl; 62 | } 63 | #else 64 | #error "unsupported platform" 65 | #endif 66 | 67 | return fileHandle; 68 | } 69 | 70 | // If the function succeeds, the return value is nonzero. 71 | // If the function fails, the return value is zero. 72 | inline int FreeSharedLibrary( void*& libHandle ) 73 | { 74 | int result = 0; 75 | 76 | #if defined( _WIN32 ) 77 | if( libHandle != 0 ) 78 | result = ::FreeLibrary( reinterpret_cast< HMODULE >( libHandle ) ); 79 | #else 80 | if( libHandle != 0 ) 81 | result = ( ::dlclose( libHandle ) == 0 ); 82 | #endif 83 | 84 | libHandle = NULL; 85 | 86 | return result; 87 | } 88 | 89 | // This takes a shared module handle returned from LoadSharedLibrary, and a text string of a symbol 90 | // to load from the module, and returns a pointer to that symbol. If the symbol is not found, NULL 91 | // is returned. If the module handle is NULL, NULL is returned. 92 | inline void* LoadFunctionAddr( void* libHandle, std::string funcName ) 93 | { 94 | if( libHandle == NULL ) 95 | return NULL; 96 | 97 | #if defined( _WIN32 ) 98 | HMODULE fileHandle = reinterpret_cast< HMODULE >( libHandle ); 99 | 100 | void* pFunc = reinterpret_cast< void* >( ::GetProcAddress( fileHandle, funcName.c_str( ) ) ); 101 | #else 102 | void* pFunc = ::dlsym( libHandle, funcName.c_str( ) ); 103 | #endif 104 | 105 | return pFunc; 106 | } 107 | 108 | #endif // _SHAREDLIBRARY_H_ 109 | -------------------------------------------------------------------------------- /src/library/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | # List the names of common files to compile across all platforms 18 | set( clFFT.Source transform.cpp 19 | accessors.cpp 20 | plan.cpp 21 | repo.cpp 22 | generator.stockham.cpp 23 | generator.transpose.gcn.cpp 24 | generator.transpose.cpp 25 | action.transpose.cpp 26 | generator.copy.cpp 27 | lifetime.cpp 28 | fft_binary_lookup.cpp 29 | md5sum.c 30 | enqueue.cpp 31 | stdafx.cpp ) 32 | 33 | # Windows only uses dllmain 34 | if( MSVC ) 35 | set( clFFT.Source ${clFFT.Source} dllmain.cpp ) 36 | endif( ) 37 | 38 | set( clFFT.Headers private.h 39 | action.h 40 | repo.h 41 | plan.h 42 | lock.h 43 | mainpage.h 44 | generator.h 45 | generator.stockham.h 46 | generator.transpose.gcn.h 47 | generator.transpose.h 48 | action.transpose.h 49 | fft_binary_lookup.h 50 | md5sum.h 51 | ../include/stdafx.h 52 | ../include/unicode.compatibility.h 53 | ../include/targetver.h 54 | ../include/clAmdFft.h 55 | ../include/clFFT.h ) 56 | 57 | set( clFFT.Files ${clFFT.Source} ${clFFT.Headers} ) 58 | 59 | # For a rainy day, add pre-compiled header support 60 | #if( MSVC ) 61 | # if (USE_MSVC_PCH) 62 | 63 | # set_source_files_properties(LungAnalysisPCH.cxx 64 | # PROPERTIES 65 | # COMPILE_FLAGS "/YcLungAnalysisPCH.h" 66 | # ) 67 | # foreach( src_file ${UPMC_LA_SRCS} ) 68 | # set_source_files_properties( 69 | # ${src_file} 70 | # PROPERTIES 71 | # COMPILE_FLAGS "/YuLungAnalysisPCH.h" 72 | # ) 73 | # endforeach( src_file ${UPMC_LA_SRCS} ) 74 | 75 | # list(APPEND UPMC_LA_SRCS LungAnalysisPCH.cxx) 76 | # list(APPEND UPMC_LA_HDRS LungAnalysisPCH.h) 77 | 78 | # endif(USE_MSVC_PCH) 79 | #endif (MSVC) 80 | 81 | # add_definitions( ${Boost_LIB_DIAGNOSTIC_DEFINITIONS} ) 82 | add_definitions( "/DCLFFT_EXPORTS" ) 83 | 84 | add_library( clFFT ${clFFT.Files} ) 85 | 86 | target_include_directories(clFFT 87 | PUBLIC 88 | $ 89 | $ 90 | $ 91 | PRIVATE 92 | ${OPENCL_INCLUDE_DIRS} 93 | ) 94 | 95 | target_link_libraries( clFFT ${OPENCL_LIBRARIES} ${CMAKE_DL_LIBS} ) 96 | 97 | set_target_properties(clFFT PROPERTIES VERSION ${clFFT_VERSION}) 98 | set_target_properties(clFFT PROPERTIES SOVERSION ${clFFT_VERSION_MAJOR}) 99 | set_target_properties(clFFT PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging") 100 | 101 | if(NOT BUILD_SHARED_LIBS) 102 | set_target_properties(clFFT PROPERTIES POSITION_INDEPENDENT_CODE ON) 103 | endif() 104 | 105 | if( CMAKE_COMPILER_IS_GNUCC ) 106 | configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/clFFT.pc.in 107 | ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc @ONLY ) 108 | 109 | install( FILES ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc 110 | DESTINATION lib${CLFFT_SUFFIX_LIB}/pkgconfig ) 111 | endif( ) 112 | 113 | # CPack configuration; include the executable into the package 114 | install( TARGETS clFFT 115 | EXPORT Library 116 | RUNTIME DESTINATION bin${CLFFT_SUFFIX_BIN} 117 | LIBRARY DESTINATION lib${CLFFT_SUFFIX_LIB} 118 | ARCHIVE DESTINATION lib${CLFFT_SUFFIX_LIB}/import 119 | ) 120 | -------------------------------------------------------------------------------- /src/tests/test_constants.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include "test_constants.h" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "../client/openCL.misc.h" 24 | 25 | #if defined( _WIN32 ) && defined( _DEBUG ) 26 | #include 27 | #endif 28 | 29 | void handle_exception( const std::exception& except ) 30 | { 31 | std::string error_message(except.what()); 32 | 33 | std::cout << "--- Exception caught ---" << std::endl; 34 | 35 | if( error_message.find("problem too large for device") != std::string::npos || 36 | error_message.find("CLFFT_INVALID_BUFFER_SIZE" ) != std::string::npos || 37 | error_message.find("CLFFT_MEM_OBJECT_ALLOCATION_FAILURE" ) != std::string::npos || 38 | error_message.find("CLFFT_OUT_OF_HOST_MEMORY" ) != std::string::npos || 39 | error_message.find("CLFFT_OUT_OF_RESOURCES" ) != std::string::npos ) 40 | { 41 | std::cout << "Data set is too large for this device -- skipping test" << std::endl; 42 | //TODO put in (this problem size[data + stride]/max problem size/gpu or cpu) specifics 43 | } 44 | else if( error_message.find("system memory allocation failure") != std::string::npos ) 45 | { 46 | std::cout << "Framework was denied enough system memory to support the data set" 47 | << " -- skipping test" << std::endl; 48 | } 49 | else if( error_message.find("CLFFT_DEVICE_NO_DOUBLE") != std::string::npos ) 50 | { 51 | std::cout << "Device in context does not support double precision" 52 | << " -- skipping test" << std::endl; 53 | } 54 | else if( error_message.find("dereference null pointer") != std::string::npos ) 55 | { 56 | std::cout << error_message << std::endl; 57 | FAIL(); 58 | } 59 | else if( error_message.find("in-place transform, unmatched in/out layouts") 60 | != std::string::npos ) 61 | { 62 | std::cout << "Invalid arguments: for an in-place transform, " 63 | << "in/output layouts must be the same" << std::endl; 64 | FAIL(); 65 | } 66 | else if( error_message.find("device list is empty at transform") != std::string::npos ) 67 | { 68 | std::cout << "A clfft transform is requested, but the device list is empty" << std::endl; 69 | FAIL(); 70 | } 71 | else 72 | { 73 | std::cout << "Unrecognized exception: " << std::endl; 74 | std::cout << error_message << std::endl; 75 | /* 76 | #if defined( _WIN32 ) && defined( _DEBUG ) 77 | ::DebugBreak( ); 78 | #endif 79 | */ 80 | FAIL(); 81 | } 82 | } 83 | 84 | /*****************************************************/ 85 | size_t max_mem_available_on_cl_device(size_t device_index) { 86 | 87 | static size_t g_device_max_mem_size = 0; 88 | 89 | // this is not thread-safe using globals, it is just quick fix for now, todo proper fix 90 | if (g_device_max_mem_size == 0) 91 | { 92 | std::vector< cl_device_id > device_id; 93 | cl_context tempContext = NULL; 94 | device_id = initializeCL( 95 | g_device_type, 96 | (cl_int)device_index, 97 | g_platform_id, 98 | tempContext, 99 | false 100 | ); 101 | 102 | cl_ulong device_max_to_allocate = 0; 103 | if (device_id.size() == 0 || device_index > device_id.size()) 104 | { 105 | } 106 | else 107 | { 108 | OPENCL_V_THROW(::clGetDeviceInfo(device_id[device_index], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &device_max_to_allocate, NULL), 109 | "Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )"); 110 | } 111 | 112 | cl_command_queue tempQueue = NULL; 113 | cl_event tempEvent = NULL; 114 | ::cleanupCL(&tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent); 115 | 116 | g_device_max_mem_size = static_cast(device_max_to_allocate); 117 | } 118 | 119 | return g_device_max_mem_size; 120 | } 121 | -------------------------------------------------------------------------------- /src/tests/copyTestDependencies.cmake.in: -------------------------------------------------------------------------------- 1 | # Customized install script for fftw test program; analyzes all the shared library dependencies and installs 2 | # the dependencies into the package 3 | include( GetPrerequisites ) 4 | 5 | # message( testLocation ": @testLocation@" ) 6 | 7 | # The Microsoft IDE presents a challenge because the full configuration is not known at cmake time 8 | # This logic allows us to 'substitute' the proper confguration at install time 9 | if( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "Debug" ) 10 | string( REPLACE "\$(Configuration)" "Debug" fixedTestLocation "@testLocation@" ) 11 | elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "Release" ) 12 | string( REPLACE "\$(Configuration)" "Release" fixedTestLocation "@testLocation@" ) 13 | elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "MinSizeRel" ) 14 | string( REPLACE "\$(Configuration)" "MinSizeRel" fixedTestLocation "@testLocation@" ) 15 | elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "RelwithDebInfo" ) 16 | string( REPLACE "\$(Configuration)" "RelwithDebInfo" fixedTestLocation "@testLocation@" ) 17 | endif( ) 18 | 19 | # message( fixedTestLocation ": ${fixedTestLocation}" ) 20 | # Get the directory that the test executable resides in; this helps get_prerequisites( ) find dependent libraries 21 | get_filename_component( testDir "${fixedTestLocation}" PATH ) 22 | # message( testDir ": ${testDir}" ) 23 | 24 | set( installPath "" ) 25 | if( WIN32 ) 26 | set( installPath "${CMAKE_INSTALL_PREFIX}/bin@CLFFT_SUFFIX_BIN@" ) 27 | else( ) 28 | set( installPath "${CMAKE_INSTALL_PREFIX}/lib@CLFFT_SUFFIX_LIB@" ) 29 | endif( ) 30 | 31 | # Only search for dependencies that have ROOT defined 32 | set( depList "" ) 33 | 34 | #This logic assumes that FindFFTW.cmake has been called 35 | get_filename_component( fftwDirSingle "@FFTW_SINGLE_PRECISION_LIBRARIES@" PATH ) 36 | get_filename_component( fftwDirDouble "@FFTW_DOUBLE_PRECISION_LIBRARIES@" PATH ) 37 | 38 | if( EXISTS "${fftwDirSingle}" ) 39 | list( APPEND depList "${fftwDirSingle}" ) 40 | # message( "fftwDirSingle: ${fftwDirSingle}" ) 41 | endif( ) 42 | 43 | string( COMPARE NOTEQUAL "${fftwDirSingle}" "${fftwDirDouble}" fftwDiffDirs ) 44 | if( ${fftwDiffDirs} AND EXISTS "${fftwDirDouble}" ) 45 | list( APPEND depList "${fftwDirDouble}" ) 46 | # message( "fftwDirDouble: ${fftwDirDouble}" ) 47 | endif( ) 48 | 49 | #This logic assumes that FindGTest.cmake has been called 50 | get_filename_component( gtestDir "@GTEST_LIBRARY@" PATH ) 51 | get_filename_component( gtestDirDebug "@GTEST_LIBRARY_DEBUG@" PATH ) 52 | 53 | if( EXISTS "${gtestDir}" ) 54 | list( APPEND depList "${gtestDir}" ) 55 | # message( "gtestDir: ${gtestDir}" ) 56 | endif( ) 57 | 58 | string( COMPARE NOTEQUAL "${gtestDir}" "${gtestDirDebug}" gtestDiffDirs ) 59 | if( ${gtestDiffDirs} AND EXISTS "${gtestDirDebug}" ) 60 | list( APPEND depList "${gtestDirDebug}" ) 61 | # message( "gtestDirDebug: ${gtestDirDebug}" ) 62 | endif( ) 63 | 64 | #This logic assumes that FindOpenCL.cmake has been called 65 | get_filename_component( openclDir "@OPENCL_LIBRARIES@" PATH ) 66 | 67 | if( EXISTS "${openclDir}" ) 68 | list( APPEND depList "${openclDir}" ) 69 | # message( "openclDir: ${openclDir}" ) 70 | endif( ) 71 | 72 | if( EXISTS "${testDir}" ) 73 | list( APPEND depList "${testDir}" ) 74 | endif( ) 75 | 76 | # message( "depList: ${depList}" ) 77 | 78 | # This retrieves a list of shared library dependencies from the target; they are not full path names 79 | # Skip system dependencies and skip recursion 80 | get_prerequisites( ${fixedTestLocation} testDependencies 1 0 "" "${depList}" ) 81 | 82 | # Loop on queried library dependencies and copy them into package 83 | foreach( dep ${testDependencies} ) 84 | # This converts the dependency into a full path 85 | gp_resolve_item( "${fixedTestLocation}" "${dep}" "" "${depList}" dep_test_path ) 86 | 87 | # In linux, the dep_test_path may point to a symbolic link, we also need to copy real file 88 | get_filename_component( dep_realpath "${dep_test_path}" REALPATH ) 89 | get_filename_component( dep_name "${dep_test_path}" NAME ) 90 | # message( STATUS "depName: ${dep_name}" ) 91 | # message( STATUS "depFullPath: ${dep_test_path}" ) 92 | # message( STATUS "dep_realpath: ${dep_realpath}" ) 93 | 94 | if( NOT EXISTS ${installPath}/${dep_name} ) 95 | file( INSTALL ${dep_test_path} ${dep_realpath} 96 | USE_SOURCE_PERMISSIONS 97 | DESTINATION ${installPath} 98 | ) 99 | endif( ) 100 | endforeach( ) 101 | -------------------------------------------------------------------------------- /src/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | 18 | # List the names of the files to compile 19 | set( clFFT.Test.Source 20 | test_constants.cpp 21 | buffer_memory.cpp 22 | buffer.cpp 23 | unit_test.cpp 24 | accuracy_test_common.cpp 25 | accuracy_test_pow2.cpp 26 | accuracy_test_pow3.cpp 27 | accuracy_test_pow5.cpp 28 | accuracy_test_pow7.cpp 29 | accuracy_test_mixed_radices.cpp 30 | accuracy_test_random.cpp 31 | accuracy_test_directed.cpp 32 | accuracy_test_mixed_callback.cpp 33 | accuracy_test_pow2_precallback.cpp 34 | accuracy_test_pow3_precallback.cpp 35 | accuracy_test_pow5_precallback.cpp 36 | accuracy_test_pow7_precallback.cpp 37 | accuracy_test_postcallback.cpp 38 | gtest_main.cpp 39 | ${PROJECT_SOURCE_DIR}/client/openCL.misc.cpp 40 | c-compliance.c 41 | ) 42 | 43 | set( clFFT.Test.Headers 44 | ${PROJECT_SOURCE_DIR}/include/clFFT.h 45 | ${PROJECT_SOURCE_DIR}/include/unicode.compatibility.h 46 | ${PROJECT_SOURCE_DIR}/include/convenienceFunctions.h 47 | ${PROJECT_SOURCE_DIR}/library/private.h 48 | ${PROJECT_SOURCE_DIR}/client/openCL.misc.h 49 | accuracy_test_common.h 50 | test_constants.h 51 | buffer_memory.h 52 | buffer.h 53 | cl_transform.h 54 | fftw_transform.h 55 | typedefs.h 56 | ) 57 | 58 | set( clFFT.Test.Files ${clFFT.Test.Source} ${clFFT.Test.Headers} ) 59 | 60 | set( LD_PTHREAD "" ) 61 | if( MINGW ) 62 | # -std=c++0x causes g++ to go into strict ANSI mode, which doesn't declare non-standard functions 63 | # Googletest for mingw appears to have a dependency on _stricmp and off64_t 64 | set( CMAKE_CXX_FLAGS "-std=gnu++0x ${CMAKE_CXX_FLAGS}" ) 65 | elseif( CMAKE_COMPILER_IS_GNUCXX ) 66 | set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" ) 67 | set( LD_PTHREAD "-lpthread" ) 68 | elseif( APPLE ) 69 | set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) 70 | add_definitions( -DGTEST_USE_OWN_TR1_TUPLE ) 71 | endif( ) 72 | 73 | # Include standard OpenCL headers 74 | include_directories( ${Boost_INCLUDE_DIRS} ${GTEST_INCLUDE_DIRS} ${FFTW_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ${PROJECT_SOURCE_DIR}/include ) 75 | 76 | add_executable( Test ${clFFT.Test.Files} ) 77 | 78 | # If the runtime is being built by the project, use it, otherwise link to a runtime library specified in the install prefix 79 | if( CLFFT_BUILD_RUNTIME ) 80 | target_link_libraries( Test clFFT ${Boost_LIBRARIES} ${GTEST_LIBRARIES} ${FFTW_LIBRARIES} ${OPENCL_LIBRARIES} ${LD_PTHREAD} ${CMAKE_DL_LIBS}) 81 | else( ) 82 | # Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else 83 | get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ) 84 | if( LIB64 ) 85 | set( clFFT.library "${CMAKE_INSTALL_PREFIX}/lib64" ) 86 | else( ) 87 | set( clFFT.library "${CMAKE_INSTALL_PREFIX}/lib32" ) 88 | endif( ) 89 | 90 | if( WIN32 ) 91 | set( clFFT.library "${clFFT.library}/import/clFFT${CMAKE_STATIC_LIBRARY_SUFFIX}" ) 92 | else( ) 93 | set( clFFT.library "${clFFT.library}/${CMAKE_SHARED_LIBRARY_PREFIX}clFFT${CMAKE_SHARED_LIBRARY_SUFFIX}" ) 94 | endif( ) 95 | 96 | target_link_libraries( Test ${clFFT.library} ${Boost_LIBRARIES} ${GTEST_LIBRARIES} ${FFTW_LIBRARIES} ${OPENCL_LIBRARIES} ${LD_PTHREAD} ${CMAKE_DL_LIBS}) 97 | 98 | endif( ) 99 | 100 | # The following set_target_properties is to get around a bug in cmake 2.8.2, where the suffix after the first '.' is dropped 101 | IF( (MSVC_VERSION VERSION_EQUAL 1600) AND (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION} VERSION_LESS 2.8.3) ) 102 | message( STATUS "Detected vs2010 and Cmake version less than 2.8.3; renaming Test with underscores " ) 103 | set_target_properties( Test PROPERTIES OUTPUT_NAME "clFFT_Test" ) 104 | ENDIF( ) 105 | 106 | set_target_properties( Test PROPERTIES VERSION ${clFFT_VERSION} ) 107 | set_target_properties( Test PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) 108 | 109 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # Appveyor OS list 2 | # Windows Server 2012 R2 (x64) <== Appveyor default image 3 | # Visual Studio 2015 4 | 5 | # os: expands the build matrix to include multiple os's 6 | os: 7 | - Windows Server 2012 8 | 9 | # compiler: expands the build matrix to include multiple compilers (per os) 10 | platform: 11 | - x64 12 | 13 | configuration: 14 | - Release 15 | 16 | # Only clone the top level commit; don't bother with history 17 | shallow_clone: true 18 | 19 | # environment: specifies additional global variables to define per row in build matrix 20 | environment: 21 | global: 22 | CLFFT_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\nmake\\release" 23 | OPENCL_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\opencl" 24 | FFTW_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\fftw" 25 | BOOST_ROOT: "C:\\Libraries\\boost_1_58_0" 26 | OPENCL_REGISTRY: "https://www.khronos.org/registry/cl" 27 | 28 | init: 29 | - echo init step 30 | - cmake --version 31 | - C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM% 32 | # Uncomment the following to display Remote Desktop connection details 33 | # - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) 34 | 35 | # We need to create an opencl import library that clfft can link against 36 | # Vendor based OpenCL packages are hard to use because of download size, registration requirements 37 | # and unattended installs not well supported 38 | install: 39 | - echo Installing OpenCL 40 | - ps: mkdir $env:OPENCL_ROOT 41 | - ps: pushd $env:OPENCL_ROOT 42 | - ps: $opencl_registry = $env:OPENCL_REGISTRY 43 | # This downloads the source to the Khronos ICD library 44 | - git clone --depth 1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git 45 | - ps: mv ./OpenCL-ICD-Loader/* . 46 | # This downloads all the opencl header files 47 | # The cmake build files expect a directory called inc 48 | - ps: mkdir inc/CL 49 | - git clone --depth 1 https://github.com/KhronosGroup/OpenCL-Headers.git inc/CL 50 | - ps: wget $opencl_registry/api/2.1/cl.hpp -OutFile inc/CL/cl.hpp 51 | # - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom } 52 | # Create the static import lib in a directory called lib, so findopencl() will find it 53 | - ps: mkdir lib 54 | - ps: pushd lib 55 | - cmake -G "NMake Makefiles" .. 56 | - nmake 57 | - ps: popd 58 | # Switch to OpenCL 1.2 headers 59 | - ps: pushd inc/CL 60 | - git fetch origin opencl12:opencl12 61 | - git checkout opencl12 62 | - ps: popd 63 | # Rename the inc directory to include, so FindOpencl() will find it 64 | - ps: ren inc include 65 | - ps: popd 66 | 67 | - echo Installing FFTW 68 | - ps: mkdir $env:FFTW_ROOT 69 | - ps: pushd $env:FFTW_ROOT 70 | # This downloads the windows 64-bit pre-compiled dlls 71 | - ps: wget ftp://ftp.fftw.org/pub/fftw/fftw-3.3.4-dll64.zip -OutFile fftw-3.3.4-dll64.zip 72 | - ps: 7z x fftw-3.3.4-dll64.zip 73 | # - ps: pushd fftw-3.3.4-dll64 74 | - ps: lib /machine:x64 /def:libfftw3-3.def 75 | - ps: lib /machine:x64 /def:libfftw3f-3.def 76 | - ps: lib /machine:x64 /def:libfftw3l-3.def 77 | 78 | # before_build is used to run configure steps 79 | before_build: 80 | - echo before_build step 81 | # Boost 1.58 is not installed in typical fashion, help FindBoost() find binary libs with BOOST_LIBRARYDIR 82 | - ps: $env:BOOST_LIBRARYDIR = "$env:BOOST_ROOT/lib64-msvc-12.0" 83 | - ps: mkdir $env:CLFFT_ROOT 84 | - ps: pushd $env:CLFFT_ROOT 85 | - cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DBoost_NO_SYSTEM_PATHS=OFF -DOPENCL_ROOT=%OPENCL_ROOT% %APPVEYOR_BUILD_FOLDER%/src 86 | 87 | # build_script invokes the compiler 88 | build_script: 89 | - echo build_script step 90 | - nmake package 91 | 92 | after_build: 93 | - echo after_build step 94 | - ps: ls $env:CLFFT_ROOT 95 | - ps: mv $env:CLFFT_ROOT\*.zip $env:APPVEYOR_BUILD_FOLDER 96 | 97 | # Appyeyor will save a copy of the package in it's personal storage 98 | artifacts: 99 | - path: '*.zip' 100 | name: binary_zip 101 | type: zip 102 | 103 | # on_finish always executes regardless of passed or failed builds 104 | on_finish: 105 | - echo on_finish step 106 | 107 | # Appveyor will push the artifacts it has saved to GitHub 'releases' tab 108 | deploy: 109 | provider: GitHub 110 | auth_token: 111 | secure: dRXIWJKpU7h2RsHX7RqmyYCtCw+Q9O3X5MArloY6p34GZC1w7bp+jQYTZqbdO7bw 112 | artifact: binary_zip 113 | draft: true 114 | prerelease: true 115 | on: 116 | appveyor_repo_tag: true 117 | 118 | # Uncomment the following to pause the VM and wait for RDP connetion to debug 119 | # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) 120 | -------------------------------------------------------------------------------- /src/examples/fft1d.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include 18 | #include 19 | 20 | /* No need to explicitely include the OpenCL headers */ 21 | #include 22 | 23 | int main( void ) 24 | { 25 | cl_int err; 26 | cl_platform_id platform = 0; 27 | cl_device_id device = 0; 28 | cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 }; 29 | cl_context ctx = 0; 30 | cl_command_queue queue = 0; 31 | cl_mem bufX; 32 | float *X; 33 | cl_event event = NULL; 34 | int ret = 0; 35 | size_t N = 16; 36 | char platform_name[128]; 37 | char device_name[128]; 38 | 39 | /* FFT library realted declarations */ 40 | clfftPlanHandle planHandle; 41 | clfftDim dim = CLFFT_1D; 42 | size_t clLengths[1] = {N}; 43 | 44 | /* Setup OpenCL environment. */ 45 | err = clGetPlatformIDs( 1, &platform, NULL ); 46 | 47 | size_t ret_param_size = 0; 48 | err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 49 | sizeof(platform_name), platform_name, 50 | &ret_param_size); 51 | printf("Platform found: %s\n", platform_name); 52 | 53 | err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL ); 54 | 55 | err = clGetDeviceInfo(device, CL_DEVICE_NAME, 56 | sizeof(device_name), device_name, 57 | &ret_param_size); 58 | printf("Device found on the above platform: %s\n", device_name); 59 | 60 | props[1] = (cl_context_properties)platform; 61 | ctx = clCreateContext( props, 1, &device, NULL, NULL, &err ); 62 | queue = clCreateCommandQueue( ctx, device, 0, &err ); 63 | 64 | /* Setup clFFT. */ 65 | clfftSetupData fftSetup; 66 | err = clfftInitSetupData(&fftSetup); 67 | err = clfftSetup(&fftSetup); 68 | 69 | /* Allocate host & initialize data. */ 70 | /* Only allocation shown for simplicity. */ 71 | X = (float *)malloc(N * 2 * sizeof(*X)); 72 | 73 | /* print input array */ 74 | printf("\nPerforming fft on an one dimensional array of size N = %lu\n", (unsigned long)N); 75 | int print_iter = 0; 76 | while(print_iter 23 | #include 24 | #include 25 | 26 | uint32_t float_as_hex( float a ); 27 | uint64_t float_as_hex( double a ); 28 | uint32_t nan_as_hex( float a ); 29 | uint64_t nan_as_hex( double a ); 30 | 31 | /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/ 32 | /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/ 33 | template 34 | class buffer_memory { 35 | private: 36 | // Each array will have a cookie of this size placed before and after it. 37 | // We will initialize the cookies to NaN. 38 | // The user can confirm the cookies after operating on the data to confirm that 39 | // his or her operations are respecting the boundaries of the memory. 40 | size_t cookie_size; 41 | 42 | // requested_floats is the number of floats the user requested originally. 43 | // This never changes, even if the memory size is increased. 44 | size_t requested_floats; 45 | 46 | // With this and cookie_size, we can calculate the size of memory the user can access. 47 | // Note that this will be in units of T (so 4 bytes or 8 bytes depending on float or double). 48 | size_t memory_size_including_cookies; 49 | 50 | // Interesting stuff goes here. 51 | std::vector memory; 52 | 53 | public: 54 | /*****************************************************/ 55 | // requested_number_of_floats should already take into account any strides, 56 | // batch size, data layout (real, complex, hermitian, interleaved, planar) 57 | buffer_memory( size_t requested_number_of_floats ) 58 | : cookie_size( 4 ) 59 | , requested_floats( requested_number_of_floats ) 60 | , memory_size_including_cookies( requested_number_of_floats + 2 * cookie_size ) 61 | , memory( memory_size_including_cookies ) 62 | { 63 | clear(); 64 | } 65 | 66 | /*****************************************************/ 67 | ~buffer_memory() { 68 | } 69 | 70 | /*****************************************************/ 71 | buffer_memory & operator=( const buffer_memory & that ) 72 | { 73 | this->cookie_size = that.cookie_size; 74 | this->requested_floats = that.requested_floats; 75 | this->memory_size_including_cookies = that.memory_size_including_cookies; 76 | this->memory = that.memory; 77 | 78 | return *this; 79 | } 80 | 81 | /*****************************************************/ 82 | void check_memory_boundaries() { 83 | for( size_t i = 0; i < cookie_size; ++i) { 84 | // we need to compare hex values instead of float values so that we don't get float ambiguities 85 | if( float_as_hex(memory[i]) != nan_as_hex(memory[0]) || 86 | float_as_hex( memory[ memory.size()-1-i ] ) != nan_as_hex(memory[0]) ) 87 | throw std::runtime_error("some operation wrote beyond bounds of memory"); 88 | } 89 | } 90 | 91 | /*****************************************************/ 92 | void clear() 93 | { 94 | memset(&memory[0], ~0x0, memory_size_including_cookies * sizeof(T)); 95 | } 96 | 97 | /*****************************************************/ 98 | // note that this is in units of T (float or double) 99 | // also see: size_in_bytes() 100 | size_t size() 101 | { 102 | return size_in_bytes() / sizeof(T); 103 | } 104 | 105 | /*****************************************************/ 106 | // returns the amount of memory currently allocated to the buffer in bytes 107 | size_t size_in_bytes() 108 | { 109 | return (memory_size_including_cookies - 2 * cookie_size) * sizeof(T); 110 | } 111 | 112 | /*****************************************************/ 113 | // N.B. memory will be cleared after this 114 | void increase_allocated_memory( size_t amount ) 115 | { 116 | size_t new_memory_size = memory_size_including_cookies + amount; 117 | 118 | memory.resize( new_memory_size ); 119 | memory_size_including_cookies = new_memory_size; 120 | 121 | clear(); 122 | } 123 | 124 | /*****************************************************/ 125 | T* ptr() 126 | { 127 | return &memory[0] + cookie_size; 128 | } 129 | 130 | /*****************************************************/ 131 | T& operator[]( size_t index ) { 132 | if( index >= size() ) 133 | throw std::runtime_error( "operator[] write out of bounds" ); 134 | return memory[0 + cookie_size + index]; 135 | } 136 | }; 137 | 138 | #endif 139 | -------------------------------------------------------------------------------- /src/callback-client/openCL.misc.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #if !defined( OPENCL_MISC_H ) 20 | #define OPENCL_MISC_H 21 | #include 22 | #include 23 | #include "unicode.compatibility.h" 24 | 25 | // Creating a portable defintion of countof 26 | #if defined( _MSC_VER ) 27 | #define countOf _countof 28 | #else 29 | #define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) ) 30 | #endif 31 | 32 | /* 33 | * \brief OpenCL platform and device discovery 34 | * Creates a list of OpenCL platforms 35 | * and their associated devices 36 | */ 37 | int discoverCLPlatforms( cl_device_type deviceType, 38 | std::vector< cl_platform_id >& platforms, 39 | std::vector< std::vector< cl_device_id > >& devices ); 40 | 41 | void prettyPrintCLPlatforms(std::vector< cl_platform_id >& platforms, 42 | std::vector< std::vector< cl_device_id > >& devices); 43 | 44 | /* 45 | * \brief OpenCL related initialization 46 | * Create Context, Device list 47 | * Load CL file, compile, link CL source 48 | * Build program and kernel objects 49 | */ 50 | std::vector< cl_device_id > initializeCL( cl_device_type deviceType, 51 | cl_int deviceId, 52 | cl_int platformId, 53 | cl_context& context, 54 | bool printclInfo ); 55 | 56 | /* 57 | * \brief OpenCL memory buffer creation 58 | */ 59 | int createOpenCLMemoryBuffer( 60 | cl_context& context, 61 | const size_t bufferSizeBytes, 62 | const cl_uint numBuffers, 63 | cl_mem buffer[], 64 | cl_mem_flags accessibility 65 | ); 66 | 67 | /* 68 | * \brief OpenCL command queue creation 69 | * Create Command Queue 70 | * Create OpenCL memory buffer objects 71 | */ 72 | void createOpenCLCommandQueue( cl_context& context, 73 | cl_uint commandQueueFlags, 74 | cl_command_queue& commandQueue, 75 | std::vector< cl_device_id > devices, 76 | const size_t bufferSizeBytesIn, 77 | const cl_uint numBuffersIn, 78 | cl_mem clMemBufferIn[], 79 | const size_t bufferSizeBytesOut, 80 | const cl_uint numBuffersOut, 81 | cl_mem clMemBufferOut[] ); 82 | 83 | /* 84 | * \brief release OpenCL memory buffer 85 | */ 86 | int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[] ); 87 | 88 | std::string prettyPrintclFFTStatus( const cl_int& status ); 89 | 90 | // This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition. 91 | // If an error occurs, we throw. 92 | // Note: std::runtime_error does not take unicode strings as input, so only strings supported 93 | inline cl_int OpenCL_V_Throw ( cl_int res, const std::string& msg, size_t lineno ) 94 | { 95 | switch( res ) 96 | { 97 | case CL_SUCCESS: /**< No error */ 98 | break; 99 | default: 100 | { 101 | std::stringstream tmp; 102 | tmp << "OPENCL_V_THROWERROR< "; 103 | tmp << prettyPrintclFFTStatus( res ); 104 | tmp << " > ("; 105 | tmp << lineno; 106 | tmp << "): "; 107 | tmp << msg; 108 | std::string errorm (tmp.str()); 109 | std::cout << errorm<< std::endl; 110 | throw std::runtime_error( errorm ); 111 | } 112 | } 113 | 114 | return res; 115 | } 116 | #define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw (_status, _message, __LINE__) 117 | 118 | inline cl_int OpenCL_V_Warn(cl_int res, const std::string& msg, size_t lineno) 119 | { 120 | switch (res) 121 | { 122 | case CL_SUCCESS: /**< No error */ 123 | break; 124 | case CL_DEVICE_NOT_FOUND: 125 | // This happens all the time when discovering the OpenCL capabilities of the system, 126 | // so do nothing here. 127 | break; 128 | default: 129 | { 130 | std::stringstream tmp; 131 | tmp << "OPENCL_V_WARN< "; 132 | tmp << prettyPrintclFFTStatus(res); 133 | tmp << " > ("; 134 | tmp << lineno; 135 | tmp << "): "; 136 | tmp << msg; 137 | std::string errorm(tmp.str()); 138 | std::cout << errorm << std::endl; 139 | } 140 | } 141 | 142 | return res; 143 | } 144 | #define OPENCL_V_WARN(_status,_message) OpenCL_V_Warn (_status, _message, __LINE__); 145 | 146 | /* 147 | * \brief Release OpenCL resources (Context, Memory etc.) 148 | */ 149 | int cleanupCL( cl_context* context, cl_command_queue* commandQueue, const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[]); 150 | 151 | #endif 152 | -------------------------------------------------------------------------------- /src/client/openCL.misc.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifdef _MSC_VER 18 | #pragma warning(disable : 4996) 19 | #endif 20 | 21 | #pragma once 22 | #if !defined( OPENCL_MISC_H ) 23 | #define OPENCL_MISC_H 24 | #include 25 | #include 26 | #include "unicode.compatibility.h" 27 | 28 | // Creating a portable defintion of countof 29 | #if defined( _MSC_VER ) 30 | #define countOf _countof 31 | #else 32 | #define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) ) 33 | #endif 34 | 35 | /* 36 | * \brief OpenCL platform and device discovery 37 | * Creates a list of OpenCL platforms 38 | * and their associated devices 39 | */ 40 | int discoverCLPlatforms( cl_device_type deviceType, 41 | std::vector< cl_platform_id >& platforms, 42 | std::vector< std::vector< cl_device_id > >& devices ); 43 | 44 | void prettyPrintCLPlatforms(std::vector< cl_platform_id >& platforms, 45 | std::vector< std::vector< cl_device_id > >& devices); 46 | 47 | /* 48 | * \brief OpenCL related initialization 49 | * Create Context, Device list 50 | * Load CL file, compile, link CL source 51 | * Build program and kernel objects 52 | */ 53 | std::vector< cl_device_id > initializeCL( cl_device_type deviceType, 54 | cl_int deviceId, 55 | cl_int platformId, 56 | cl_context& context, 57 | bool printclInfo ); 58 | 59 | /* 60 | * \brief OpenCL memory buffer creation 61 | */ 62 | int createOpenCLMemoryBuffer( 63 | cl_context& context, 64 | const size_t bufferSizeBytes, 65 | const cl_uint numBuffers, 66 | cl_mem buffer[], 67 | cl_mem_flags accessibility 68 | ); 69 | 70 | /* 71 | * \brief OpenCL command queue creation 72 | * Create Command Queue 73 | * Create OpenCL memory buffer objects 74 | */ 75 | void createOpenCLCommandQueue( cl_context& context, 76 | cl_uint commandQueueFlags, 77 | cl_command_queue& commandQueue, 78 | std::vector< cl_device_id > devices, 79 | const size_t bufferSizeBytesIn, 80 | const cl_uint numBuffersIn, 81 | cl_mem clMemBufferIn[], 82 | const size_t bufferSizeBytesOut, 83 | const cl_uint numBuffersOut, 84 | cl_mem clMemBufferOut[] ); 85 | 86 | /* 87 | * \brief release OpenCL memory buffer 88 | */ 89 | int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[] ); 90 | 91 | std::string prettyPrintclFFTStatus( const cl_int& status ); 92 | 93 | // This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition. 94 | // If an error occurs, we throw. 95 | // Note: std::runtime_error does not take unicode strings as input, so only strings supported 96 | inline cl_int OpenCL_V_Throw ( cl_int res, const std::string& msg, size_t lineno ) 97 | { 98 | switch( res ) 99 | { 100 | case CL_SUCCESS: /**< No error */ 101 | break; 102 | default: 103 | { 104 | std::stringstream tmp; 105 | tmp << "OPENCL_V_THROWERROR< "; 106 | tmp << prettyPrintclFFTStatus( res ); 107 | tmp << " > ("; 108 | tmp << lineno; 109 | tmp << "): "; 110 | tmp << msg; 111 | std::string errorm (tmp.str()); 112 | std::cout << errorm<< std::endl; 113 | throw std::runtime_error( errorm ); 114 | } 115 | } 116 | 117 | return res; 118 | } 119 | #define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw (_status, _message, __LINE__) 120 | 121 | inline cl_int OpenCL_V_Warn(cl_int res, const std::string& msg, size_t lineno) 122 | { 123 | switch (res) 124 | { 125 | case CL_SUCCESS: /**< No error */ 126 | break; 127 | case CL_DEVICE_NOT_FOUND: 128 | // This happens all the time when discovering the OpenCL capabilities of the system, 129 | // so do nothing here. 130 | break; 131 | default: 132 | { 133 | std::stringstream tmp; 134 | tmp << "OPENCL_V_WARN< "; 135 | tmp << prettyPrintclFFTStatus(res); 136 | tmp << " > ("; 137 | tmp << lineno; 138 | tmp << "): "; 139 | tmp << msg; 140 | std::string errorm(tmp.str()); 141 | std::cout << errorm << std::endl; 142 | } 143 | } 144 | 145 | return res; 146 | } 147 | #define OPENCL_V_WARN(_status,_message) OpenCL_V_Warn (_status, _message, __LINE__); 148 | 149 | /* 150 | * \brief Release OpenCL resources (Context, Memory etc.) 151 | */ 152 | int cleanupCL( cl_context* context, cl_command_queue* commandQueue, const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent ); 153 | 154 | #endif 155 | -------------------------------------------------------------------------------- /src/examples/fft2d.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include 18 | #include 19 | 20 | /* No need to explicitely include the OpenCL headers */ 21 | #include 22 | 23 | int main( void ) 24 | { 25 | cl_int err; 26 | cl_platform_id platform = 0; 27 | cl_device_id device = 0; 28 | cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 }; 29 | cl_context ctx = 0; 30 | cl_command_queue queue = 0; 31 | cl_mem bufX; 32 | float *X; 33 | cl_event event = NULL; 34 | int ret = 0; 35 | 36 | const size_t N0 = 8, N1 = 8; 37 | char platform_name[128]; 38 | char device_name[128]; 39 | 40 | /* FFT library realted declarations */ 41 | clfftPlanHandle planHandle; 42 | clfftDim dim = CLFFT_2D; 43 | size_t clLengths[2] = {N0, N1}; 44 | 45 | /* Setup OpenCL environment. */ 46 | err = clGetPlatformIDs( 1, &platform, NULL ); 47 | 48 | size_t ret_param_size = 0; 49 | err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 50 | sizeof(platform_name), platform_name, 51 | &ret_param_size); 52 | printf("Platform found: %s\n", platform_name); 53 | 54 | err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL ); 55 | 56 | err = clGetDeviceInfo(device, CL_DEVICE_NAME, 57 | sizeof(device_name), device_name, 58 | &ret_param_size); 59 | printf("Device found on the above platform: %s\n", device_name); 60 | 61 | props[1] = (cl_context_properties)platform; 62 | ctx = clCreateContext( props, 1, &device, NULL, NULL, &err ); 63 | queue = clCreateCommandQueue( ctx, device, 0, &err ); 64 | 65 | /* Setup clFFT. */ 66 | clfftSetupData fftSetup; 67 | err = clfftInitSetupData(&fftSetup); 68 | err = clfftSetup(&fftSetup); 69 | 70 | /* Allocate host & initialize data. */ 71 | /* Only allocation shown for simplicity. */ 72 | size_t buffer_size = N0 * N1 * 2 * sizeof(*X); 73 | X = (float *)malloc(buffer_size); 74 | 75 | /* print input array just using the 76 | * indices to fill the array with data */ 77 | printf("\nPerforming fft on an two dimensional array of size N0 x N1 : %lu x %lu\n", (unsigned long)N0, (unsigned long)N1); 78 | size_t i, j; 79 | 80 | i = j = 0; 81 | for (i=0; i 18 | #include 19 | 20 | /* No need to explicitely include the OpenCL headers */ 21 | #include 22 | 23 | int main( void ) 24 | { 25 | cl_int err; 26 | cl_platform_id platform = 0; 27 | cl_device_id device = 0; 28 | cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 }; 29 | cl_context ctx = 0; 30 | cl_command_queue queue = 0; 31 | cl_mem bufX; 32 | float *X; 33 | cl_event event = NULL; 34 | int ret = 0; 35 | 36 | const size_t N0 = 4, N1 = 4, N2 = 4; 37 | char platform_name[128]; 38 | char device_name[128]; 39 | 40 | /* FFT library realted declarations */ 41 | clfftPlanHandle planHandle; 42 | clfftDim dim = CLFFT_3D; 43 | size_t clLengths[3] = {N0, N1, N2}; 44 | 45 | /* Setup OpenCL environment. */ 46 | err = clGetPlatformIDs( 1, &platform, NULL ); 47 | 48 | size_t ret_param_size = 0; 49 | err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 50 | sizeof(platform_name), platform_name, 51 | &ret_param_size); 52 | printf("Platform found: %s\n", platform_name); 53 | 54 | err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL ); 55 | 56 | err = clGetDeviceInfo(device, CL_DEVICE_NAME, 57 | sizeof(device_name), device_name, 58 | &ret_param_size); 59 | printf("Device found on the above platform: %s\n", device_name); 60 | 61 | props[1] = (cl_context_properties)platform; 62 | ctx = clCreateContext( props, 1, &device, NULL, NULL, &err ); 63 | queue = clCreateCommandQueue( ctx, device, 0, &err ); 64 | 65 | /* Setup clFFT. */ 66 | clfftSetupData fftSetup; 67 | err = clfftInitSetupData(&fftSetup); 68 | err = clfftSetup(&fftSetup); 69 | 70 | /* Allocate host & initialize data. */ 71 | /* Only allocation shown for simplicity. */ 72 | size_t buffer_size = N0 * N1 * N2 * 2 * sizeof(*X); 73 | X = (float *)malloc(buffer_size); 74 | 75 | /* print input array just using the 76 | * indices to fill the array with data */ 77 | printf("\nPerforming fft on an two dimensional array of size N0 x N1 x N2 : %lu x %lu x %lu\n", (unsigned long)N0, (unsigned long)N1, (unsigned long)N2); 78 | size_t i, j, k; 79 | i = j = k = 0; 80 | for (i=0; i 25 | #include "stdafx.h" 26 | #include "../statTimer/statisticalTimer.extern.h" 27 | #include "../include/unicode.compatibility.h" 28 | 29 | #include 30 | 31 | typedef unsigned char uint24_t[3]; 32 | 33 | #define CALLBCKSTR(...) #__VA_ARGS__ 34 | #define STRINGIFY(...) CALLBCKSTR(__VA_ARGS__) 35 | 36 | #define BATCH_LENGTH 1024 37 | 38 | #define ConvertToFloat typedef unsigned char uint24_t[3]; \n \ 39 | float convert24To32bit(__global void* in, uint inoffset, __global void* userdata) \n \ 40 | { \n \ 41 | __global uint24_t* inData = (__global uint24_t*)in; \n \ 42 | float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \ 43 | return val; \n \ 44 | } 45 | 46 | #define ConvertToFloat_KERNEL typedef unsigned char uint24_t[3]; \n \ 47 | __kernel void convert24To32bit (__global void *input, __global void *output) \n \ 48 | { \n \ 49 | uint inoffset = get_global_id(0); \n \ 50 | __global uint24_t* inData = (__global uint24_t*)input; \n \ 51 | float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \ 52 | *((__global float*)output + inoffset) = val; \n \ 53 | } \n 54 | 55 | #define MagnitudeExtraction void extractMagnitude(__global void *output, uint outoffset, __global void *userdata, float2 fftoutput) \n \ 56 | { \n \ 57 | float magnitude = sqrt(fftoutput.x * fftoutput.x + fftoutput.y * fftoutput.y); \n \ 58 | *((__global float*)output + outoffset) = magnitude; \n \ 59 | } \n 60 | 61 | #define MagnitudeExtraction_KERNEL __kernel void extractMagnitude(__global float2 *output, __global float *magoutput) \n \ 62 | { \n \ 63 | uint outoffset = get_global_id(0); \n \ 64 | float magnitude = sqrt(output[outoffset].x * output[outoffset].x + output[outoffset].y * output[outoffset].y); \n \ 65 | *(magoutput + outoffset) = magnitude; \n \ 66 | } \n 67 | 68 | template < typename T > 69 | void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 70 | clfftDim dim, clfftPrecision precision, cl_uint profile_count); 71 | 72 | template < typename T > 73 | void runR2C_FFT_WithCallback(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue, 74 | size_t* inlengths, clfftDim dim, clfftPrecision precision, 75 | size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count); 76 | 77 | template < typename T > 78 | void runR2C_FFT_PreAndPostprocessKernel(std::auto_ptr< clfftSetupData > setupData, cl_context context, 79 | cl_command_queue commandQueue, cl_device_id device_id, 80 | size_t* inlengths, clfftDim dim, clfftPrecision precision, 81 | size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count); 82 | 83 | fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size, 84 | clfftLayout in_layout, clfftDim dim); 85 | 86 | template < typename T1, typename T2> 87 | bool compare(T1 *refData, std::vector< T2 > data, 88 | size_t length, const float epsilon = 1e-6f); 89 | 90 | #ifdef WIN32 91 | 92 | struct Timer 93 | { 94 | LARGE_INTEGER start, stop, freq; 95 | 96 | public: 97 | Timer() { QueryPerformanceFrequency( &freq ); } 98 | 99 | void Start() { QueryPerformanceCounter(&start); } 100 | double Sample() 101 | { 102 | QueryPerformanceCounter ( &stop ); 103 | double time = (double)(stop.QuadPart-start.QuadPart) / (double)(freq.QuadPart); 104 | return time; 105 | } 106 | }; 107 | 108 | #elif defined(__APPLE__) || defined(__MACOSX) 109 | 110 | #include 111 | #include 112 | 113 | struct Timer 114 | { 115 | clock_serv_t clock; 116 | mach_timespec_t start, end; 117 | 118 | public: 119 | Timer() { host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &clock); } 120 | ~Timer() { mach_port_deallocate(mach_task_self(), clock); } 121 | 122 | void Start() { clock_get_time(clock, &start); } 123 | double Sample() 124 | { 125 | clock_get_time(clock, &end); 126 | double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; 127 | return time * 1E-9; 128 | } 129 | }; 130 | 131 | #else 132 | 133 | #include 134 | #include 135 | 136 | struct Timer 137 | { 138 | struct timespec start, end; 139 | 140 | public: 141 | Timer() { } 142 | 143 | void Start() { clock_gettime(CLOCK_MONOTONIC, &start); } 144 | double Sample() 145 | { 146 | clock_gettime(CLOCK_MONOTONIC, &end); 147 | double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; 148 | return time * 1E-9; 149 | } 150 | }; 151 | 152 | #endif 153 | 154 | #endif 155 | -------------------------------------------------------------------------------- /ReleaseNotes.txt: -------------------------------------------------------------------------------- 1 | 2 | clFFT Contents 3 | -------------- 4 | The clFFT library is an open source OpenCL library 5 | implementation of discrete Fast Fourier Transforms. It: 6 | 7 | * Provides a fast and accurate platform for calculating 8 | discrete FFTs 9 | * Supports 1D, 2D, and 3D transforms with a batch size 10 | that can be greater than 1 11 | * Supports complex and real transforms 12 | * Supports planar (real and complex components in 13 | separate arrays) and interleaved (real and complex 14 | components as a pair contiguous in memory) formats 15 | * Supports dimension lengths that can be any mix of 16 | powers of 2, 3, 5, 7, 11 and 13 17 | * Supports single and double precision floating-point 18 | formats 19 | * Supports in-place or out-of-place transforms 20 | 21 | 22 | clFFT - Release Notes - version 2.12.2 23 | -------------------------------------- 24 | 25 | This is a patch update release to v2.12.1. 26 | It has the following: 27 | 28 | * Fix for thread safety issues 29 | * Fix for failures in certain 2D transforms 30 | * Fix for failures on POCL runtime 31 | * Other minor improvements 32 | 33 | Details on issues available on GitHub. 34 | 35 | 36 | clFFT - Release Notes - version 2.12.1 37 | -------------------------------------- 38 | 39 | New features of this release: 40 | 41 | * Fixes for bugs in decomposition logic of very large problem sizes 42 | with environment flag CLFFT_REQUEST_LIB_NOMEMALLOC=1 43 | * Validation of sizes that are powers of 2,3,5 and combinations 44 | thereof upto maximum size close to 25GB on S9170 45 | 46 | Driver notes: 47 | 48 | * This library version has been tested with Catalyst 49 | Pro driver version 15.302 on Firepro W9100 & S9170. 50 | 51 | 52 | clFFT - Release Notes - version 2.12.0 53 | -------------------------------------- 54 | 55 | New features of this release: 56 | 57 | * Addition of radix 11 & 13 enables support for powers of 11 & 13 size 58 | transforms 59 | * Support for 1D large size transforms with no extra memory allocation 60 | requirement with environment flag CLFFT_REQUEST_LIB_NOMEMALLOC=1 61 | for complex FFTs of powers of 3,5,10 sizes. Note that support for 62 | power-of-2 size has been available since version 2.10. 63 | 64 | Driver notes: 65 | 66 | * This library version has been tested with Catalyst 67 | Pro driver version 15.201 on Firepro W9100. 68 | 69 | 70 | clFFT - Release Notes - version 2.10.2 71 | -------------------------------------- 72 | 73 | This is a patch update release to v2.10.1. 74 | It has the following: 75 | 76 | * Fixes for accuracy/stability issues noted in large size real FFTs 77 | 78 | 79 | clFFT - Release Notes - version 2.10.1 80 | -------------------------------------- 81 | 82 | This is a patch update release to v2.10.0. 83 | It has the following: 84 | 85 | * Performance fixes for slowdown observed vs v2.8. Specifically, 86 | 2D transforms for certain sizes showed significant slowdown. 87 | This issue has been fixed in this release. The fix also makes the 88 | performance in general better than v2.8. 89 | * Code reorganization in transposes and performance improvements. 90 | There is a modest performance improvement with inplace transforms 91 | for large power-of-2 sizes. 92 | * Several minor enhancements and bug fixes 93 | 94 | 95 | clFFT - Release Notes - version 2.10.0 96 | -------------------------------------- 97 | 98 | New features of this release: 99 | 100 | * Post-callback feature that enables custom post-processing 101 | of output data directly by the library with user callback function 102 | * Support for in-place transposes for power-of-2 sizes enables really 103 | large 1D transforms as well as supporting no additional memory 104 | allocation, by library, for a range of problem sizes 105 | 106 | 107 | clFFT - Release Notes - version 2.8.0 108 | -------------------------------------- 109 | 110 | New features of this release: 111 | 112 | * Support for power-of-7 size transforms 113 | * Pre-callback feature that enables custom pre-processing 114 | of input data directly by the library with user callback function 115 | * Support for 1D large size transforms with no extra memory allocation 116 | requirement for certain sizes 117 | 118 | Driver notes: 119 | 120 | * This library version has been tested with Catalyst 121 | Pro driver version 14.502 on Firepro W9100. 122 | 123 | 124 | clFFT - Release Notes - version 2.6.1 125 | -------------------------------------- 126 | 127 | This is a patch update release to v2.6.0. It has the following. 128 | 129 | * Fix for client application crash with Linux 14.502 driver 130 | * Performance fixes for real forward and backward transform when 131 | enabling ECC 132 | * Performance fixes for complex transforms over a small range in 133 | power-of-2 sizes 134 | * Improvement in API timing in client 135 | * Several minor enhancements and bug fixes 136 | 137 | Driver notes: 138 | 139 | * This library version has been tested with Catalyst 140 | Pro driver version 14.502 on Firepro W9100. 141 | 142 | 143 | clFFT - Release Notes - version 2.6.0 144 | -------------------------------------- 145 | 146 | New features of this release: 147 | 148 | * Significant uplift of 1D real forward transform 149 | performance for power-of-2 sizes 150 | * Significant uplift of large 2D/3D real transform 151 | performance for power-of-2 sizes 152 | 153 | Known issues: 154 | 155 | * Size limit on 1D real transforms remain at 2^24 156 | for single precision and 2^22 for double 157 | precision 158 | 159 | Driver notes: 160 | 161 | * This library version has been tested with Catalyst 162 | Pro driver version 14.301 on Firepro W9100. 163 | 164 | 165 | clFFT - Release Notes - version 2.4.0 166 | -------------------------------------- 167 | 168 | New features of this release: 169 | 170 | * Significant uplift of 1D complex transform performance 171 | * 1D large size limit relaxation for complex transforms 172 | * 2D/3D size limit relaxation on real and complex transforms 173 | * Binary caching feature 174 | * Several minor fixes and improvements 175 | -------------------------------------------------------------------------------- /src/statTimer/statisticalTimer.CPU.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #ifndef _STATISTICALTIMER_CPU_H_ 20 | #define _STATISTICALTIMER_CPU_H_ 21 | #include 22 | #include 23 | #include 24 | #ifdef __FreeBSD__ 25 | #include 26 | #endif 27 | #include "statisticalTimer.h" 28 | 29 | /** 30 | * \file clfft.StatisticalTimer.CPU.h 31 | * \brief A timer class that provides a cross platform timer for use 32 | * in timing code progress with a high degree of accuracy. 33 | * This class is implemented entirely in the header, to facilitate inclusion into multiple 34 | * projects without needing to compile an object file for each project. 35 | */ 36 | 37 | class CpuStatTimer : public baseStatTimer 38 | { 39 | // Private typedefs 40 | typedef std::vector< cl_ulong > clkVector; 41 | typedef std::pair< std::string, cl_uint > labelPair; 42 | typedef std::vector< labelPair > stringVector; 43 | 44 | // In order to calculate statistics , we need to keep a history of our timings 45 | stringVector labelID; 46 | clkVector clkStart; 47 | std::vector< clkVector > clkTicks; 48 | 49 | // How many clockticks in a second 50 | cl_ulong clkFrequency; 51 | 52 | // For linux; the resolution of a high-precision timer 53 | // Mingw32 does not define timespec; can use windows timers 54 | #if !defined( _WIN32 ) 55 | timespec res; 56 | #endif 57 | 58 | // Saved sizes for our vectors, used in Reset() to reallocate vectors 59 | clkVector::size_type nEvents, nSamples; 60 | 61 | // This setting controls whether the Timer should convert samples into time by dividing by the 62 | // clock frequency 63 | bool normalize; 64 | 65 | /** 66 | * \fn StatisticalTimer() 67 | * \brief Constructor for StatisticalTimer that initializes the class 68 | * This is private so that user code cannot create their own instantiation. Instead, you 69 | * must go through getInstance( ) to get a reference to the class. 70 | */ 71 | CpuStatTimer( ); 72 | 73 | /** 74 | * \fn ~StatisticalTimer() 75 | * \brief Destructor for StatisticalTimer that cleans up the class 76 | */ 77 | ~CpuStatTimer( ); 78 | 79 | /** 80 | * \fn StatisticalTimer(const StatisticalTimer& ) 81 | * \brief Copy constructors do not make sense for a singleton, disallow copies 82 | */ 83 | CpuStatTimer( const CpuStatTimer& ); 84 | 85 | /** 86 | * \fn operator=( const StatisticalTimer& ) 87 | * \brief Assignment operator does not make sense for a singleton, disallow assignments 88 | */ 89 | CpuStatTimer& operator=( const CpuStatTimer& ); 90 | 91 | friend std::ostream& operator<<( std::ostream& os, const CpuStatTimer& s ); 92 | 93 | /** 94 | * \fn void AddSample( const size_t id, const cl_ulong n ) 95 | * \brief Explicitely add a timing sample into the class 96 | */ 97 | void AddSample( const size_t id, const cl_ulong n ); 98 | 99 | // Calculate the average/mean of data for a given event 100 | cl_double getMean( size_t id ) const; 101 | 102 | // Calculate the variance of data for a given event 103 | // Variance - average of the squared differences between data points and the mean 104 | cl_double getVariance( size_t id ) const; 105 | 106 | // Sqrt of variance, also in units of the original data 107 | cl_double getStdDev( size_t id ) const; 108 | 109 | /** 110 | * \fn double getAverageTime(size_t id) const 111 | * \return Return the arithmetic mean of all the samples that have been saved 112 | */ 113 | cl_double getAverageTime( size_t id ) const; 114 | 115 | /** 116 | * \fn double getMinimumTime(size_t id) const 117 | * \return Return the arithmetic min of all the samples that have been saved 118 | */ 119 | cl_double getMinimumTime( size_t id ) const; 120 | 121 | public: 122 | /** 123 | * \fn getInstance() 124 | * \brief This returns a reference to the singleton timer. Guarantees only 1 timer class is ever 125 | * instantiated within a compilable executable. 126 | */ 127 | static CpuStatTimer& getInstance( ); 128 | 129 | /** 130 | * \fn void Start( size_t id ) 131 | * \brief Start the timer 132 | * \sa Stop(), Reset() 133 | */ 134 | void Start( size_t id ); 135 | 136 | /** 137 | * \fn void Stop( size_t id ) 138 | * \brief Stop the timer 139 | * \sa Start(), Reset() 140 | */ 141 | void Stop( size_t id ); 142 | 143 | /** 144 | * \fn void Reset(void) 145 | * \brief Reset the timer to 0 146 | * \sa Start(), Stop() 147 | */ 148 | void Clear( ); 149 | 150 | /** 151 | * \fn void Reset(void) 152 | * \brief Reset the timer to 0 153 | * \sa Start(), Stop() 154 | */ 155 | void Reset( ); 156 | 157 | void Reserve( size_t nEvents, size_t nSamples ); 158 | 159 | size_t getUniqueID( const std::string& label, cl_uint groupID ); 160 | 161 | // Calculate the average/mean of data for a given event 162 | void setNormalize( bool norm ); 163 | 164 | void Print( ); 165 | 166 | // Using the stdDev of the entire population (of an id), eliminate those samples that fall 167 | // outside some specified multiple of the stdDev. This assumes that the population 168 | // form a gaussian curve. 169 | size_t pruneOutliers( cl_double multiple ); 170 | std::vector< size_t > pruneOutliers( size_t id , cl_double multiple ); 171 | }; 172 | 173 | #endif // _STATISTICALTIMER_CPU_H_ 174 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributor guidelines 2 | 3 | Contributing code to this project is intended to be light weight and intuitive to users familiar with GitHub to actively encourage contributions, but a process is documented and should be followed to prevent chaos, confusion and despair. 4 | 5 | ## The mechanics of contributing code 6 | Firstly, in order to contribute code to this project, a contributor must have a valid and current [GitHub account](https://help.github.com/articles/set-up-git) available to use. Given an account, 7 | * The potential contributor forks this project into his/her account following the traditional [forking](https://help.github.com/articles/fork-a-repo) model native to GitHub 8 | * After forking, the contributor [clones their repository](https://help.github.com/articles/create-a-repo) locally on their machine 9 | * Code is developed and checked into the contributor's repository. These commits are eventually pushed upstream to their GitHub repository 10 | * The contributor then issues a [pull-request](https://help.github.com/articles/using-pull-requests) against the **develop** branch of this repository, which is the [git flow](http://nvie.com/posts/a-successful-git-branching-model/) workflow which is well suited for working with GitHub 11 | * A [git extention](https://github.com/nvie/gitflow) has been developed to ease the use of the 'git flow' methodology, but requires manual installation by the user. Refer to the projects wiki 12 | 13 | At this point, the repository maintainers will be notified by GitHub that a 'pull request' exists pending against their repository. A code review should be completed within a few days, depending on the scope of submitted code, and the code will either be accepted, rejected or commented on for extra feedback. 14 | 15 | ## Code submission guidelines 16 | We want to ensure that the project code base maintains a level of quality over time, such that future contributors find it as easy to jump into the code as hopefully it is today. As such, pull requests should 17 | * remember that clMath is a project licensed under the [Apache License, Version 2.0]( http://www.apache.org/licenses/LICENSE-2.0 ). If you are not already familiar, please review the license before issuing a pull request. We intend this project to be open to external contributors, and encourage developers to contribute code back that they believe will provide value to the overall community. We will interpret an explicit 'pull request' back to this repository as an implicit acknowledgement from the contributor that they wish to share the code with the community under the terms of the Apache license v2.0. 18 | * follow the [code style guidelines]( ) of the project as posted to the project wiki. Unfortunately, there was no unifying code guidelines defined between the BLAS & FFT projects, but code submissions should not mix styles within an individual file. We have since defined and posted a code style guideline for the projects and we expect the code to slowly transition to the new 19 | guidelines over time 20 | * separate check-ins that modify a files style from the ones that add/change/delete code. 21 | * target the **develop** branch in the repository 22 | * ensure that the [code properly builds]( https://github.com/clMathLibraries/clFFT/wiki/Build ) 23 | * cannot break existing test cases 24 | * we encourage contributors to [run all tests]( https://github.com/clMathLibraries/clFFT/wiki/Testing ) on their end before the pull-request 25 | * if possible, upload the test results associated with the pull request to a personal [gist repository]( https://gist.github.com/ ) and insert a link to the test results in the pull request so that collaborators can browse the results 26 | * if no test results are provided with the pull request, official collaborators will run the test suite on their test machines against the patch before we will accept the pull-request 27 | * if we detect failing test cases, we will request that the code associated with the pull request be fixed before the pull request will be merged 28 | * if new functionality is introduced with the pull request, sufficient test cases should be added to verify the new functionality is correct 29 | * new tests should integrate with the existing [googletest framework]( https://code.google.com/p/googletest/wiki/Primer ) located in the src/tests directory of the repo 30 | * if the collaborators feel the new tests do not provide sufficient coverage, feedback on the pull request will be left with suggestions on how to improve the tests before the pull request will be merged 31 | 32 | Pull requests will be reviewed by the set of collaborators that are assigned for the repository. Pull requests may be accepted, declined or a conversation may start on the pull request thread with feedback. If the pull request is trivial and all the submission guidelines defined above are honored, the pull request may be accepted without delay. If the pull request is good, but the guidelines defined above are not followed, the collaborators may leave feedback on the pull request and engage in a conversation with the contributor with what they can do to improve the pull request. At any time, collaborators may decline a pull request if they decide the contribution is not appropriate for the project, or the feedback from reviewers on a pull request is not being addressed in an appropriate amount of time. 33 | 34 | ## Is it possible to become an official collaborator of the repository? 35 | Yes, we hope to promote trusted members of the community, who have proven themselves to be competent and request to take on the extra responsibility to be official collaborators of the project. When an individual requests to be an official collaborator, current project collaborators will browse through the history of the requester's prior pull requests and take a vote amongst themselves if the requester should be promoted to collaborator. These individuals will then have the right to approve/decline pull requests and help shape the path that the project goes. It is worth noting, that on GitHub everybody has read-only access to the source and that everybody has the ability to issue a pull request to contribute to the project. The benefit of being a repository collaborator allows you to be able to be able to manage other peoples pull requests. 36 | 37 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Ubuntu name decoder ring; https://en.wikipedia.org/wiki/List_of_Ubuntu_releases 2 | # Ubuntu 12.04 LTS (Precise Pangolin) <== Travis CI VM image 3 | # Ubuntu 12.10 (Quantal Quetzal) 4 | # Ubuntu 13.04 (Raring Ringtail) 5 | # Ubuntu 13.10 (Saucy Salamander) 6 | # Ubuntu 14.04 LTS (Trusty Tahr) 7 | # Ubuntu 14.10 (Utopic Unicorn) 8 | # Ubuntu 15.04 (Vivid Vervet) 9 | # Ubuntu 15.10 (Wily Werewolf) 10 | # Ubuntu 16.04 LTS (Xenial Xantus) 11 | 12 | # language: instructs travis what compilers && environment to set up in build matrix 13 | language: cpp 14 | 15 | # sudo: false instructs travis to build our project in a docker VM (faster) 16 | # Can not yet install fglrx packages with 'false' 17 | sudo: required # false 18 | dist: trusty 19 | 20 | # os: expands the build matrix to include multiple os's 21 | # disable linux, as we get sporadic failures on building boost, needs investigation 22 | os: 23 | - linux 24 | - osx 25 | 26 | # compiler: expands the build matrix to include multiple compilers (per os) 27 | compiler: 28 | - gcc 29 | - clang 30 | 31 | addons: 32 | # apt: is disabled on osx builds 33 | # apt: needed by docker framework to install project dependencies without 34 | # sudo. Apt uses published Ubunto PPA's from https://launchpad.net/ 35 | # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json 36 | apt: 37 | sources: 38 | # ubuntu-toolchain-r-test contains newer versions of gcc to install 39 | # - ubuntu-toolchain-r-test 40 | # llvm-toolchain-precise-3.6 contains newer versions of clang to install 41 | # - llvm-toolchain-precise-3.6 42 | # kubuntu-backports contains newer versions of cmake to install 43 | - kubuntu-backports 44 | # boost-latest contains boost v1.55 45 | - boost-latest 46 | packages: 47 | # g++-4.8 is minimum version considered to be the first good c++11 gnu compiler 48 | # - g++-4.8 49 | # - clang-3.6 50 | # We require v2.8.12 minimum 51 | - cmake 52 | # I'm finding problems between pre-compiled versions of boost ublas, with gtest 53 | # stl_algobase.h: error: no matching function for call to swap() 54 | - libboost-program-options1.55-dev 55 | # - libboost-serialization1.55-dev 56 | # - libboost-filesystem1.55-dev 57 | # - libboost-system1.55-dev 58 | # - libboost-regex1.55-dev 59 | # The package opencl-headers on 'precise' only installs v1.1 cl headers; uncomment for 'trusty' or greater 60 | # - opencl-headers 61 | # Uncomment one of the following when fglrx modules are added to the apt whitelist 62 | # - fglrx 63 | # - fglrx=2:8.960-0ubuntu1 64 | # - fglrx=2:13.350.1-0ubuntu0.0.1 65 | 66 | # env: specifies additional global variables to define per row in build matrix 67 | env: 68 | global: 69 | - CLFFT_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release 70 | - OPENCL_REGISTRY=https://www.khronos.org/registry/cl 71 | - OPENCL_ROOT=${TRAVIS_BUILD_DIR}/bin/opencl 72 | 73 | # The following filters our build matrix; we are interested in linux-gcc & osx-clang 74 | matrix: 75 | exclude: 76 | - os: linux 77 | compiler: clang 78 | - os: osx 79 | compiler: gcc 80 | 81 | before_install: 82 | # Remove the following linux clause when fglrx can be installed with sudo: false 83 | - if [ ${TRAVIS_OS_NAME} == "linux" ]; then 84 | sudo apt-get update -qq && 85 | sudo apt-get install -qq libboost-all-dev; 86 | export BUILD_BOOST="OFF"; 87 | fi 88 | - if [ ${TRAVIS_OS_NAME} == "linux" ]; then 89 | export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers"; 90 | fi 91 | - if [ ${TRAVIS_OS_NAME} == "osx" ]; then 92 | brew update; 93 | brew outdated boost || brew upgrade boost; 94 | brew outdated cmake || brew upgrade cmake; 95 | fi 96 | # - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi 97 | - cmake --version; 98 | - ${CC} --version; 99 | - ${CXX} --version; 100 | 101 | install: 102 | # The following linux logic is necessary because of Travis's move to the GCE platform, which does not 103 | # currently contain packages for fglrx: https://github.com/travis-ci/travis-ci/issues/5221 104 | # We build our own linkable .so file 105 | - if [ ${TRAVIS_OS_NAME} == "linux" ]; then 106 | mkdir -p ${OPENCL_ROOT}; 107 | pushd ${OPENCL_ROOT}; 108 | travis_retry git clone --depth 1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git; 109 | mv ./OpenCL-ICD-Loader/* .; 110 | travis_retry git clone --depth 1 https://github.com/KhronosGroup/OpenCL-Headers.git inc/CL; 111 | pushd inc/CL; 112 | travis_retry wget -w 1 -np -nd -nv -A h,hpp ${OPENCL_REGISTRY}/api/2.1/cl.hpp; 113 | popd; 114 | mkdir -p lib; 115 | pushd lib; 116 | cmake -G "Unix Makefiles" ..; 117 | make; 118 | cp ./bin/libOpenCL.so .; 119 | popd; 120 | pushd inc/CL; 121 | travis_retry git fetch origin opencl12:opencl12; 122 | git checkout opencl12; 123 | popd; 124 | mv inc/ include/; 125 | popd; 126 | fi 127 | 128 | # osx image does not contain cl.hpp file; download from Khronos 129 | # - if [ ${TRAVIS_OS_NAME} == "osx" ]; then 130 | # pushd /System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/; 131 | # sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/cl.hpp; 132 | # popd; 133 | # fi 134 | 135 | # Use before_script: to run configure steps 136 | before_script: 137 | - mkdir -p ${CLFFT_ROOT} 138 | - pushd ${CLFFT_ROOT} 139 | - cmake -DCMAKE_BUILD_TYPE=Release -DBoost_NO_SYSTEM_PATHS=OFF -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src 140 | 141 | # use script: to execute build steps 142 | script: 143 | - make package 144 | 145 | deploy: 146 | provider: releases 147 | prerelease: true 148 | draft: true 149 | skip_cleanup: true 150 | api_key: 151 | secure: "Bx51QUNYPnJlkJS7D97huRxYzs26kOZst76S0uTmBWp8ZU7gAm6mY79pFbXW9mkL2r6EPX5l5p+pPOwo1kmN/eB3jicamA07oLcI0ZQw4XLxszVEiOLMNXSLFYnsee0RkVX4fu453XXOFDoQupN/JKHUtp4VReJHIiWgQPnvF3Xu7kBLGecKr9IrWV23ig+7z3oiTEO+MTIR/z9mmHnzdAf4K0Nh+9BdtI2QVuVbpRZxPRqLIakMHoYw18h2SdY44wYK+sC+AroU9QWCr5t26GJDzWdu0nrv05ChQqaC128z8+hs+jaPbx39ByBH3BxD05FrKtCO3W0O6VycHewFitrMeMCkXizDn3XzXRkw8bM8OpqFPW/++f5DqxgN+Yh9eIb2vY1QiBjaQ6VIvd/9egVILw6/cxVXlym9iNVwuO2ZG0COKMHoLP/2ZlvPbuoXYgEtZYqqUnvBHVu1SxzrABxR+Cp44McbuB2EFLHYxjA7msF/h+yilfE5e4FdD91MpjR26ASmP3rMx9xUe6h28rGgei3RXNNeF0vnMO65qwfImuUbQ3/on+KW6LlMKFptqq3Twp7wcBdh433bvsrwCW0BDP0eanWuImpS0z3reqcTJFrl0rzplpQmJAW38dcSWTI7MvlDKcMHLJMNy+2or7mIGCj1m9o9Yr6INC7W2Rg=" 152 | file: ${CLFFT_ROOT}/*.tar.gz 153 | file_glob: true 154 | on: 155 | all_branches: true 156 | tags: true 157 | -------------------------------------------------------------------------------- /src/library/lock.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | #if !defined( CLFFT_lock_H ) 20 | #define CLFFT_lock_H 21 | 22 | #if defined( _WIN32 ) 23 | #include 24 | #else 25 | #include 26 | #endif 27 | 28 | #include "private.h" 29 | 30 | #if defined( _WIN32 ) 31 | 32 | // lockRAII provides an abstraction for the concept of a mutex; it wraps all mutex functions in generic methods 33 | // On windows, the mutex is implemented as a CRITICAL_SECTION, as this is the fastest intraprocess mutex 34 | // available. 35 | // The template argument 'debugPrint' activates debugging information, but if not active the compiler optimizes 36 | // the print statements out 37 | template< bool debugPrint > 38 | class lockRAII 39 | { 40 | CRITICAL_SECTION cs; 41 | tstring csName; 42 | tstringstream tstream; 43 | 44 | // Does not make sense to create a copy of a lock object; private method 45 | lockRAII( const lockRAII& rhs ): csName( rhs.csName ) 46 | { 47 | tstream << std::hex << std::showbase; 48 | ::InitializeCriticalSection( &cs ); 49 | } 50 | 51 | public: 52 | lockRAII( ) 53 | { 54 | tstream << std::hex << std::showbase; 55 | ::InitializeCriticalSection( &cs ); 56 | } 57 | 58 | lockRAII( const tstring& name ): csName( name ) 59 | { 60 | tstream << std::hex << std::showbase; 61 | ::InitializeCriticalSection( &cs ); 62 | } 63 | 64 | ~lockRAII( ) 65 | { 66 | ::DeleteCriticalSection( &cs ); 67 | } 68 | 69 | tstring& getName( ) 70 | { 71 | return csName; 72 | } 73 | 74 | void setName( const tstring& name ) 75 | { 76 | csName = name; 77 | } 78 | 79 | void enter( ) 80 | { 81 | if( debugPrint ) 82 | { 83 | tstream.str( _T( "" ) ); 84 | tstream << _T( "Attempting CRITICAL_SECTION( " ) << csName << _T( " )" ) << std::endl; 85 | tout << tstream.str( ); 86 | } 87 | 88 | ::EnterCriticalSection( &cs ); 89 | 90 | if( debugPrint ) 91 | { 92 | tstream.str( _T( "" ) ); 93 | tstream << _T( "Acquired CRITICAL_SECTION( " ) << csName << _T( " )" ) << std::endl; 94 | tstream << _T( "\tOwningThread( " ) << cs.OwningThread << _T( " )" ) << std::endl; 95 | tstream << _T( "\tLockcount( " ) << cs.LockCount << _T( " )" ) << std::endl; 96 | tstream << _T( "\tRecursionCount( " ) << cs.RecursionCount << _T( " )" ) << std::endl; 97 | tout << tstream.str( ); 98 | } 99 | } 100 | 101 | void leave( ) 102 | { 103 | if( debugPrint ) 104 | { 105 | tstream.str( _T( "" ) ); 106 | tstream << _T( "Releasing CRITICAL_SECTION( " ) << csName << _T( " )" ) << std::endl; 107 | tstream << _T( "\tOwningThread( " ) << cs.OwningThread << _T( " )" ) << std::endl; 108 | tstream << _T( "\tLockcount( " ) << cs.LockCount << _T( " )" ) << std::endl; 109 | tstream << _T( "\tRecursionCount( " ) << cs.RecursionCount << _T( " )" ) << std::endl << std::endl; 110 | tout << tstream.str( ); 111 | } 112 | 113 | ::LeaveCriticalSection( &cs ); 114 | } 115 | }; 116 | 117 | #else 118 | // lockRAII provides an abstraction for the concept of a mutex; it wraps all mutex functions in generic methods 119 | // Linux implementation not done yet 120 | // The template argument 'debugPrint' activates debugging information, but if not active the compiler optimizes 121 | // the print statements out 122 | template< bool debugPrint > 123 | class lockRAII 124 | { 125 | pthread_mutex_t mutex; 126 | pthread_mutexattr_t mAttr; 127 | tstring mutexName; 128 | tstringstream tstream; 129 | 130 | // Does not make sense to create a copy of a lock object; private method 131 | lockRAII( const lockRAII& rhs ): mutexName( rhs.mutexName ) 132 | { 133 | tstream << std::hex << std::showbase; 134 | } 135 | 136 | public: 137 | lockRAII( ) 138 | { 139 | tstream << std::hex << std::showbase; 140 | pthread_mutexattr_init( &mAttr ); 141 | pthread_mutexattr_settype( &mAttr, PTHREAD_MUTEX_RECURSIVE ); 142 | pthread_mutex_init( &mutex, &mAttr ); 143 | } 144 | 145 | lockRAII( const tstring& name ): mutexName( name ) 146 | { 147 | tstream << std::hex << std::showbase; 148 | pthread_mutexattr_init( &mAttr ); 149 | pthread_mutexattr_settype( &mAttr, PTHREAD_MUTEX_RECURSIVE ); 150 | pthread_mutex_init( &mutex, &mAttr ); 151 | } 152 | 153 | ~lockRAII( ) 154 | { 155 | pthread_mutex_destroy( &mutex ); 156 | pthread_mutexattr_destroy( &mAttr ); 157 | } 158 | 159 | tstring& getName( ) 160 | { 161 | return mutexName; 162 | } 163 | 164 | void setName( const tstring& name ) 165 | { 166 | mutexName = name; 167 | } 168 | 169 | void enter( ) 170 | { 171 | if( debugPrint ) 172 | { 173 | tstream.str( _T( "" ) ); 174 | tstream << _T( "Attempting pthread_mutex_t( " ) << mutexName << _T( " )" ) << std::endl; 175 | tout << tstream.str( ); 176 | } 177 | 178 | ::pthread_mutex_lock( &mutex ); 179 | 180 | if( debugPrint ) 181 | { 182 | tstream.str( _T( "" ) ); 183 | tstream << _T( "Acquired pthread_mutex_t( " ) << mutexName << _T( " )" ) << std::endl; 184 | //tstream << _T( "\tOwningThread( " ) << mutex.OwningThread << _T( " )" ) << std::endl; 185 | //tstream << _T( "\tLockcount( " ) << mutex.LockCount << _T( " )" ) << std::endl; 186 | //tstream << _T( "\tRecursionCount( " ) << mutex.RecursionCount << _T( " )" ) << std::endl; 187 | tout << tstream.str( ); 188 | } 189 | } 190 | 191 | void leave( ) 192 | { 193 | if( debugPrint ) 194 | { 195 | tstream.str( _T( "" ) ); 196 | tstream << _T( "Releasing pthread_mutex_t( " ) << mutexName << _T( " )" ) << std::endl; 197 | //tstream << _T( "\tOwningThread( " ) << mutex.OwningThread << _T( " )" ) << std::endl; 198 | //tstream << _T( "\tLockcount( " ) << mutex.LockCount << _T( " )" ) << std::endl; 199 | //tstream << _T( "\tRecursionCount( " ) << mutex.RecursionCount << _T( " )" ) << std::endl << std::endl; 200 | tout << tstream.str( ); 201 | } 202 | 203 | ::pthread_mutex_unlock( &mutex ); 204 | } 205 | }; 206 | #endif 207 | 208 | // Class used to make sure that we enter and leave critical sections in pairs 209 | // The template logic logs our CRITICAL_SECTION actions; if the template parameter is false, 210 | // the branch is constant and the compiler will optimize the branch out 211 | template< bool debugPrint > 212 | class scopedLock 213 | { 214 | lockRAII< debugPrint >* sLock; 215 | tstring sLockName; 216 | tstringstream tstream; 217 | 218 | public: 219 | scopedLock( lockRAII< debugPrint >& lock, const tstring& name ): sLock( &lock ), sLockName( name ) 220 | { 221 | if( debugPrint ) 222 | { 223 | tstream.str( _T( "" ) ); 224 | tstream << _T( "Entering scopedLock( " ) << sLockName << _T( " )" ) << std::endl << std::endl; 225 | tout << tstream.str( ); 226 | } 227 | 228 | sLock->enter( ); 229 | } 230 | 231 | ~scopedLock( ) 232 | { 233 | sLock->leave( ); 234 | 235 | if( debugPrint ) 236 | { 237 | tstream.str( _T( "" ) ); 238 | tstream << _T( "Left scopedLock( " ) << sLockName << _T( " )" ) << std::endl << std::endl; 239 | tout << tstream.str( ); 240 | } 241 | } 242 | }; 243 | 244 | // Convenience macro to enable/disable debugging print statements 245 | #define lockRAII lockRAII< false > 246 | #define scopedLock scopedLock< false > 247 | 248 | #endif // CLFFT_lock_H 249 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Build Status 2 | | Build branch | master | develop | 3 | |-----|-----|-----| 4 | | GCC/Clang x64 | [![Build Status](https://travis-ci.org/clMathLibraries/clFFT.svg?branch=master)](https://travis-ci.org/clMathLibraries/clFFT/branches) | [![Build Status](https://travis-ci.org/clMathLibraries/clFFT.svg?branch=develop)](https://travis-ci.org/clMathLibraries/clFFT/branches) | 5 | | Visual Studio x64 | |[![Build status](https://ci.appveyor.com/api/projects/status/facii32v72y98opv/branch/develop?svg=true)](https://ci.appveyor.com/project/kknox/clfft-whc3m/branch/develop) | 6 | 7 | clFFT 8 | ===== 9 | clFFT is a software library containing FFT functions written 10 | in OpenCL. In addition to GPU devices, the library also supports 11 | running on CPU devices to facilitate debugging and heterogeneous 12 | programming. 13 | 14 | Pre-built binaries are available [here][binary_release]. 15 | 16 | ## What's New 17 | 18 | - Support for powers of 11&13 size transforms 19 | - Support for 1D large size transforms with no extra memory allocation 20 | requirement with environment flag CLFFT_REQUEST_LIB_NOMEMALLOC=1 21 | for complex FFTs of powers of 2,3,5,10 sizes 22 | 23 | 24 | ## Note 25 | 26 | - clFFT requires platform/runtime that supports OpenCL 1.2 27 | 28 | ## Introduction to clFFT 29 | 30 | The FFT is an implementation of the Discrete Fourier Transform (DFT) 31 | that makes use of symmetries in the FFT definition to reduce the 32 | mathematical intensity required from O(N^2) to O(N log2(N)) when the 33 | sequence length N is the product of small prime factors. Currently, 34 | there is no standard API for FFT routines. Hardware vendors usually 35 | provide a set of high-performance FFTs optimized for their systems: no 36 | two vendors employ the same interfaces for their FFT routines. clFFT 37 | provides a set of FFT routines that are optimized for AMD graphics 38 | processors, but also are functional across CPU and other compute 39 | devices. 40 | 41 | The clFFT library is an open source OpenCL library implementation of 42 | discrete Fast Fourier Transforms. The library: 43 | 44 | - provides a fast and accurate platform for calculating discrete FFTs. 45 | 46 | - works on CPU or GPU backends. 47 | 48 | - supports in-place or out-of-place transforms. 49 | 50 | - supports 1D, 2D, and 3D transforms with a batch size that can be 51 | greater than 1. 52 | 53 | - supports planar (real and complex components in separate arrays) and 54 | interleaved (real and complex components as a pair contiguous in 55 | memory) formats. 56 | 57 | - supports dimension lengths that can be any combination of powers of 58 | 2, 3, 5, 7, 11 and 13. 59 | 60 | - Supports single and double precision floating point formats. 61 | 62 | ## clFFT library user documentation 63 | 64 | [Library and API documentation][] for developers is available online as 65 | a GitHub Pages website 66 | 67 | ### Google Groups 68 | 69 | Two mailing lists exist for the clMath projects: 70 | 71 | - [clmath@googlegroups.com][] - group whose focus is to answer 72 | questions on using the library or reporting issues 73 | 74 | - [clmath-developers@googlegroups.com][] - group whose focus is for 75 | developers interested in contributing to the library code 76 | 77 | ### API semantic versioning 78 | Good software is typically the result of the loop of feedback and iteration; software 79 | interfaces no less so. clFFT follows the [semantic versioning] guidelines. The version 80 | number used is of the form MAJOR.MINOR.PATCH. 81 | 82 | ## clFFT Wiki 83 | 84 | The [project wiki][clmath@googlegroups.com] contains helpful 85 | documentation, including a [build 86 | primer][clmath-developers@googlegroups.com] 87 | 88 | ## Contributing code 89 | 90 | Please refer to and read the [Contributing][] document for guidelines on 91 | how to contribute code to this open source project. The code in the 92 | /master branch is considered to be stable, and all pull-requests must 93 | be made against the /develop branch. 94 | 95 | ## License 96 | 97 | The source for clFFT is licensed under the [Apache License, Version 98 | 2.0][] 99 | 100 | ## Example 101 | 102 | The following simple example shows how to use clFFT to compute a simple 1D 103 | forward transform 104 | ```c 105 | #include 106 | 107 | /* No need to explicitely include the OpenCL headers */ 108 | #include 109 | 110 | int main( void ) 111 | { 112 | cl_int err; 113 | cl_platform_id platform = 0; 114 | cl_device_id device = 0; 115 | cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 }; 116 | cl_context ctx = 0; 117 | cl_command_queue queue = 0; 118 | cl_mem bufX; 119 | float *X; 120 | cl_event event = NULL; 121 | int ret = 0; 122 | size_t N = 16; 123 | 124 | /* FFT library realted declarations */ 125 | clfftPlanHandle planHandle; 126 | clfftDim dim = CLFFT_1D; 127 | size_t clLengths[1] = {N}; 128 | 129 | /* Setup OpenCL environment. */ 130 | err = clGetPlatformIDs( 1, &platform, NULL ); 131 | err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL ); 132 | 133 | props[1] = (cl_context_properties)platform; 134 | ctx = clCreateContext( props, 1, &device, NULL, NULL, &err ); 135 | queue = clCreateCommandQueue( ctx, device, 0, &err ); 136 | 137 | /* Setup clFFT. */ 138 | clfftSetupData fftSetup; 139 | err = clfftInitSetupData(&fftSetup); 140 | err = clfftSetup(&fftSetup); 141 | 142 | /* Allocate host & initialize data. */ 143 | /* Only allocation shown for simplicity. */ 144 | X = (float *)malloc(N * 2 * sizeof(*X)); 145 | 146 | /* Prepare OpenCL memory objects and place data inside them. */ 147 | bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err ); 148 | 149 | err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0, 150 | N * 2 * sizeof( *X ), X, 0, NULL, NULL ); 151 | 152 | /* Create a default plan for a complex FFT. */ 153 | err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths); 154 | 155 | /* Set plan parameters. */ 156 | err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE); 157 | err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED); 158 | err = clfftSetResultLocation(planHandle, CLFFT_INPLACE); 159 | 160 | /* Bake the plan. */ 161 | err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL); 162 | 163 | /* Execute the plan. */ 164 | err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL); 165 | 166 | /* Wait for calculations to be finished. */ 167 | err = clFinish(queue); 168 | 169 | /* Fetch results of calculations. */ 170 | err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL ); 171 | 172 | /* Release OpenCL memory objects. */ 173 | clReleaseMemObject( bufX ); 174 | 175 | free(X); 176 | 177 | /* Release the plan. */ 178 | err = clfftDestroyPlan( &planHandle ); 179 | 180 | /* Release clFFT library. */ 181 | clfftTeardown( ); 182 | 183 | /* Release OpenCL working objects. */ 184 | clReleaseCommandQueue( queue ); 185 | clReleaseContext( ctx ); 186 | 187 | return ret; 188 | } 189 | ``` 190 | 191 | ## Build dependencies 192 | 193 | ### Library for Windows 194 | To develop the clFFT library code on a Windows operating system, ensure to install the following packages on your system: 195 | 196 | - Windows® 7/8.1 197 | 198 | - Visual Studio 2012 or later 199 | 200 | - Latest CMake 201 | 202 | - An OpenCL SDK, such as APP SDK 3.0 203 | 204 | ### Library for Linux 205 | To develop the clFFT library code on a Linux operating system, ensure to install the following packages on your system: 206 | - GCC 4.6 and onwards 207 | 208 | - Latest CMake 209 | 210 | - An OpenCL SDK, such as APP SDK 3.0 211 | 212 | ### Library for Mac OSX 213 | To develop the clFFT library code on a Mac OS X, it is recommended to generate Unix makefiles with cmake. 214 | 215 | ### Test infrastructure 216 | To test the developed clFFT library code, ensure to install the following packages on your system: 217 | 218 | - Googletest v1.6 219 | 220 | - Latest FFTW 221 | 222 | - Latest Boost 223 | 224 | ### Performance infrastructure 225 | To measure the performance of the clFFT library code, ensure that the Python package is installed on your system. 226 | 227 | [Library and API documentation]: http://clmathlibraries.github.io/clFFT/ 228 | [clmath@googlegroups.com]: https://github.com/clMathLibraries/clFFT/wiki 229 | [clmath-developers@googlegroups.com]: https://github.com/clMathLibraries/clFFT/wiki/Build 230 | [Contributing]: CONTRIBUTING.md 231 | [Apache License, Version 2.0]: http://www.apache.org/licenses/LICENSE-2.0 232 | [binary_release]: https://github.com/clMathLibraries/clFFT/releases 233 | [semantic versioning]: http://semver.org/ --------------------------------------------------------------------------------