├── CHANGELOG
├── src
    ├── library
    │   ├── plan.cpp
    │   ├── clFFT.pc.in
    │   ├── action.transpose.h
    │   ├── generator.transpose.gcn.h
    │   ├── stdafx.cpp
    │   ├── dllmain.cpp
    │   ├── generator.h
    │   ├── md5sum.h
    │   ├── ReadMe.txt
    │   ├── generator.transpose.h
    │   ├── lifetime.cpp
    │   ├── CMakeLists.txt
    │   └── lock.h
    ├── scripts
    │   └── perf
    │   │   ├── manual.pdf
    │   │   ├── CMakeLists.txt
    │   │   ├── errorHandler.py
    │   │   └── performanceUtility.py
    ├── clFFTConfig.cmake.in
    ├── tests
    │   ├── c-compliance.c
    │   ├── typedefs.h
    │   ├── buffer_memory.cpp
    │   ├── accuracy_test_common.cpp
    │   ├── test_constants.cpp
    │   ├── copyTestDependencies.cmake.in
    │   ├── CMakeLists.txt
    │   └── buffer_memory.h
    ├── include
    │   ├── clFFT.version.h.in
    │   ├── convenienceFunctions.h
    │   ├── targetver.h
    │   ├── clAmdFft.version.h
    │   ├── stdafx.h
    │   ├── unicode.compatibility.h
    │   └── sharedLibrary.h
    ├── statTimer
    │   ├── stdafx.cpp
    │   ├── targetver.h
    │   ├── dllmain.cpp
    │   ├── statisticalTimer.extern.cpp
    │   ├── stdafx.h
    │   ├── ReadMe.txt
    │   ├── statisticalTimer.extern.h
    │   ├── CMakeLists.txt
    │   ├── statisticalTimer.h
    │   └── statisticalTimer.CPU.h
    ├── client
    │   ├── stdafx.cpp
    │   ├── CMakeLists.txt
    │   ├── client.h
    │   └── openCL.misc.h
    ├── callback-client
    │   ├── stdafx.cpp
    │   ├── CMakeLists.txt
    │   ├── openCL.misc.h
    │   └── client.h
    ├── clFFTConfigVersion.cmake.in
    ├── cuFFT-client
    │   └── CMakeLists.txt
    ├── examples
    │   ├── CMakeLists.txt
    │   ├── fft1d.c
    │   ├── fft2d.c
    │   └── fft3d.c
    ├── FindclFFT.cmake
    ├── gtest.cmake
    ├── FindFFTW.cmake
    └── FindOpenCL.cmake
├── docs
    ├── realfft_1dlen.jpg
    ├── realfft_ex_n7.jpg
    ├── realfft_ex_n8.jpg
    ├── realfft_fwdinv.jpg
    ├── realfft_expl_01.jpg
    ├── realfft_expl_02.jpg
    ├── realfft_expl_03.jpg
    ├── realfft_expl_04.jpg
    ├── realfft_expl_05.jpg
    ├── realfft_expl_06.jpg
    ├── realfft_expl_07.jpg
    ├── realfft_expl_08.jpg
    └── performance
    │   ├── cuFFT_7.0
    │       └── Tesla_K40
    │       │   ├── R2C_3D_double.csv
    │       │   ├── R2C_3D_single.csv
    │       │   ├── C2C_3D_double.csv
    │       │   ├── C2C_3D_single.csv
    │       │   ├── R2C_2D_double.csv
    │       │   ├── R2C_2D_single.csv
    │       │   ├── C2C_2D_double.csv
    │       │   ├── C2C_2D_single.csv
    │       │   ├── R2C_1D_double.csv
    │       │   ├── R2C_1D_single.csv
    │       │   ├── C2C_1D_single.csv
    │       │   └── C2C_1D_double.csv
    │   └── clFFT_2.6.0
    │       └── FirePro_W9100
    │           ├── C2C_3D_double.csv
    │           ├── C2C_3D_single.csv
    │           ├── R2C_3D_single.csv
    │           ├── R2C_3D_double.csv
    │           ├── C2C_2D_double.csv
    │           ├── C2C_2D_single.csv
    │           ├── R2C_2D_double.csv
    │           ├── R2C_2D_single.csv
    │           ├── R2C_1D_double.csv
    │           ├── C2C_1D_single.csv
    │           ├── C2C_1D_double.csv
    │           └── R2C_1D_single.csv
├── NOTICE
├── .gitignore
├── .gitattributes
├── appveyor.yml
├── ReleaseNotes.txt
├── CONTRIBUTING.md
├── .travis.yml
└── README.md


/CHANGELOG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/CHANGELOG


--------------------------------------------------------------------------------
/src/library/plan.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/src/library/plan.cpp


--------------------------------------------------------------------------------
/docs/realfft_1dlen.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_1dlen.jpg


--------------------------------------------------------------------------------
/docs/realfft_ex_n7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_ex_n7.jpg


--------------------------------------------------------------------------------
/docs/realfft_ex_n8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_ex_n8.jpg


--------------------------------------------------------------------------------
/docs/realfft_fwdinv.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_fwdinv.jpg


--------------------------------------------------------------------------------
/docs/realfft_expl_01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_01.jpg


--------------------------------------------------------------------------------
/docs/realfft_expl_02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_02.jpg


--------------------------------------------------------------------------------
/docs/realfft_expl_03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_03.jpg


--------------------------------------------------------------------------------
/docs/realfft_expl_04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_04.jpg


--------------------------------------------------------------------------------
/docs/realfft_expl_05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_05.jpg


--------------------------------------------------------------------------------
/docs/realfft_expl_06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_06.jpg


--------------------------------------------------------------------------------
/docs/realfft_expl_07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_07.jpg


--------------------------------------------------------------------------------
/docs/realfft_expl_08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/docs/realfft_expl_08.jpg


--------------------------------------------------------------------------------
/src/scripts/perf/manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arrayfire/clFFT/HEAD/src/scripts/perf/manual.pdf


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | AMD clFFT
2 |     Copyright 2013 Advanced Micro Devices, Inc.
3 | 
4 |     This product includes software developed at
5 |     Advanced Micro Devices, Inc. (http://www.amd.com).
6 | 


--------------------------------------------------------------------------------
/src/clFFTConfig.cmake.in:
--------------------------------------------------------------------------------
1 | include(${CMAKE_CURRENT_LIST_DIR}/clFFTTargets.cmake)
2 | get_filename_component(CLFFT_INCLUDE_DIRS ${CMAKE_CURRENT_LIST_DIR}/@reldir@/include ABSOLUTE)
3 | set(CLFFT_LIBRARIES clFFT)
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | 
 6 | # Compiled Dynamic libraries
 7 | *.so
 8 | *.dylib
 9 | 
10 | # Compiled Static libraries
11 | *.lai
12 | *.la
13 | *.a
14 | 
15 | # ignore build directory if name is 'build'
16 | build/
17 | 
18 | # ignore tilde files
19 | *~
20 | 


--------------------------------------------------------------------------------
/src/library/clFFT.pc.in:
--------------------------------------------------------------------------------
 1 | prefix=@CMAKE_INSTALL_PREFIX@
 2 | exec_prefix=${prefix}/bin@CLFFT_SUFFIX_BIN@
 3 | includedir=${prefix}/include
 4 | libdir=${prefix}/lib@CLFFT_SUFFIX_LIB@
 5 | 
 6 | Name: clFFT
 7 | Description: Open source OpenCL FFT library
 8 | Version: @clFFT_VERSION@
 9 | URL: https://github.com/clMathLibraries/clFFT
10 | 
11 | Cflags: -I${includedir}
12 | Libs: -L${libdir} -lclFFT
13 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/R2C_3D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,2,2097152,g,5,3,in,double,cuFFT3D,1.25686
 3 | 4,4,4,262144,g,5,3,in,double,cuFFT3D,6.68634
 4 | 8,8,8,32768,g,5,3,in,double,cuFFT3D,44.328471
 5 | 16,16,16,4096,g,5,3,in,double,cuFFT3D,64.499246
 6 | 32,32,32,512,g,5,3,in,double,cuFFT3D,83.623983
 7 | 64,64,64,64,g,5,3,in,double,cuFFT3D,98.194147
 8 | 128,128,128,8,g,5,3,in,double,cuFFT3D,114.979824
 9 | 256,256,256,1,g,5,3,in,double,cuFFT3D,128.095675
10 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/R2C_3D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,2,2097152,g,5,3,in,single,cuFFT3D,1.948795
 3 | 4,4,4,262144,g,5,3,in,single,cuFFT3D,10.53232
 4 | 8,8,8,32768,g,5,3,in,single,cuFFT3D,88.707531
 5 | 16,16,16,4096,g,5,3,in,single,cuFFT3D,125.457423
 6 | 32,32,32,512,g,5,3,in,single,cuFFT3D,162.2312
 7 | 64,64,64,64,g,5,3,in,single,cuFFT3D,186.305497
 8 | 128,128,128,8,g,5,3,in,single,cuFFT3D,218.05084
 9 | 256,256,256,1,g,5,3,in,single,cuFFT3D,255.37672
10 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/C2C_3D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,2,4194304,g,1,1,in,double,cuFFT3D,28.162064
 3 | 4,4,4,524288,g,1,1,in,double,cuFFT3D,57.518869
 4 | 8,8,8,65536,g,1,1,in,double,cuFFT3D,87.648677
 5 | 16,16,16,8192,g,1,1,in,double,cuFFT3D,118.044529
 6 | 32,32,32,1024,g,1,1,in,double,cuFFT3D,146.907326
 7 | 64,64,64,128,g,1,1,in,double,cuFFT3D,172.036879
 8 | 128,128,128,16,g,1,1,in,double,cuFFT3D,199.233594
 9 | 256,256,256,2,g,1,1,in,double,cuFFT3D,220.430123
10 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/C2C_3D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,2,4194304,g,1,1,in,single,cuFFT3D,58.42527
 3 | 4,4,4,524288,g,1,1,in,single,cuFFT3D,112.28049
 4 | 8,8,8,65536,g,1,1,in,single,cuFFT3D,169.252406
 5 | 16,16,16,8192,g,1,1,in,single,cuFFT3D,228.567003
 6 | 32,32,32,1024,g,1,1,in,single,cuFFT3D,289.213888
 7 | 64,64,64,128,g,1,1,in,single,cuFFT3D,343.15019
 8 | 128,128,128,16,g,1,1,in,single,cuFFT3D,392.626589
 9 | 256,256,256,2,g,1,1,in,single,cuFFT3D,438.342723
10 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_3D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,2,4194304,g,1,1,in,double,clFFT3D_2.6,40.0694
 3 | 4,4,4,524288,g,1,1,in,double,clFFT3D_2.6,81.8712
 4 | 8,8,8,65536,g,1,1,in,double,clFFT3D_2.6,123.015
 5 | 16,16,16,8192,g,1,1,in,double,clFFT3D_2.6,146.233
 6 | 32,32,32,1024,g,1,1,in,double,clFFT3D_2.6,174.584
 7 | 64,64,64,128,g,1,1,in,double,clFFT3D_2.6,158.831
 8 | 128,128,128,16,g,1,1,in,double,clFFT3D_2.6,120.786
 9 | 256,256,256,2,g,1,1,in,double,clFFT3D_2.6,17.0093
10 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_3D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,2,4194304,g,1,1,in,single,clFFT3D_2.6,75.6796
 3 | 4,4,4,524288,g,1,1,in,single,clFFT3D_2.6,154.346
 4 | 8,8,8,65536,g,1,1,in,single,clFFT3D_2.6,227.783
 5 | 16,16,16,8192,g,1,1,in,single,clFFT3D_2.6,280.944
 6 | 32,32,32,1024,g,1,1,in,single,clFFT3D_2.6,356.416
 7 | 64,64,64,128,g,1,1,in,single,clFFT3D_2.6,232.128
 8 | 128,128,128,16,g,1,1,in,single,clFFT3D_2.6,164.313
 9 | 256,256,256,2,g,1,1,in,single,clFFT3D_2.6,21.5838
10 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_3D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,2,2097152,g,5,3,in,single,clFFT3D_2.6,0.04324405
 3 | 4,4,4,262144,g,5,3,in,single,clFFT3D_2.6,0.40443
 4 | 8,8,8,32768,g,5,3,in,single,clFFT3D_2.6,2.791705
 5 | 16,16,16,4096,g,5,3,in,single,clFFT3D_2.6,12.79715
 6 | 32,32,32,512,g,5,3,in,single,clFFT3D_2.6,49.1747
 7 | 64,64,64,64,g,5,3,in,single,clFFT3D_2.6,160.058
 8 | 128,128,128,8,g,5,3,in,single,clFFT3D_2.6,180.044
 9 | 256,256,256,1,g,5,3,in,single,clFFT3D_2.6,245.51
10 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_3D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,2,2097152,g,5,3,in,double,clFFT3D_2.6,0.04589015
 3 | 4,4,4,262144,g,5,3,in,double,clFFT3D_2.6,0.3984265
 4 | 8,8,8,32768,g,5,3,in,double,clFFT3D_2.6,2.61428
 5 | 16,16,16,4096,g,5,3,in,double,clFFT3D_2.6,12.5894
 6 | 32,32,32,512,g,5,3,in,double,clFFT3D_2.6,41.4404
 7 | 64,64,64,64,g,5,3,in,double,clFFT3D_2.6,82.7885
 8 | 128,128,128,8,g,5,3,in,double,clFFT3D_2.6,92.5995
 9 | 256,256,256,1,g,5,3,in,double,clFFT3D_2.6,125.071
10 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | *.sln    merge=union
 7 | *.csproj merge=union
 8 | *.vbproj merge=union
 9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 | 
12 | # Standard to msysgit
13 | *.doc	 diff=astextplain
14 | *.DOC	 diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot  diff=astextplain
18 | *.DOT  diff=astextplain
19 | *.pdf  diff=astextplain
20 | *.PDF	 diff=astextplain
21 | *.rtf	 diff=astextplain
22 | *.RTF	 diff=astextplain
23 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/R2C_2D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,1,4194304,g,5,3,in,double,cuFFT2D,1.112218
 3 | 4,4,1,1048576,g,5,3,in,double,cuFFT2D,5.131769
 4 | 8,8,1,262144,g,5,3,in,double,cuFFT2D,31.630199
 5 | 16,16,1,65536,g,5,3,in,double,cuFFT2D,47.836545
 6 | 32,32,1,16384,g,5,3,in,double,cuFFT2D,68.144852
 7 | 64,64,1,4096,g,5,3,in,double,cuFFT2D,84.450258
 8 | 128,128,1,1024,g,5,3,in,double,cuFFT2D,99.624748
 9 | 256,256,1,256,g,5,3,in,double,cuFFT2D,112.566884
10 | 512,512,1,64,g,5,3,in,double,cuFFT2D,100.980881
11 | 1024,1024,1,16,g,5,3,in,double,cuFFT2D,110.616129
12 | 2048,2048,1,4,g,5,3,in,double,cuFFT2D,120.502623
13 | 4096,4096,1,1,g,5,3,in,double,cuFFT2D,125.728096
14 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/R2C_2D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,1,4194304,g,5,3,in,single,cuFFT2D,1.791532
 3 | 4,4,1,1048576,g,5,3,in,single,cuFFT2D,8.29206
 4 | 8,8,1,262144,g,5,3,in,single,cuFFT2D,71.79269
 5 | 16,16,1,65536,g,5,3,in,single,cuFFT2D,102.621816
 6 | 32,32,1,16384,g,5,3,in,single,cuFFT2D,133.768603
 7 | 64,64,1,4096,g,5,3,in,single,cuFFT2D,164.967666
 8 | 128,128,1,1024,g,5,3,in,single,cuFFT2D,196.094691
 9 | 256,256,1,256,g,5,3,in,single,cuFFT2D,226.455545
10 | 512,512,1,64,g,5,3,in,single,cuFFT2D,198.34244
11 | 1024,1024,1,16,g,5,3,in,single,cuFFT2D,217.726856
12 | 2048,2048,1,4,g,5,3,in,single,cuFFT2D,237.499682
13 | 4096,4096,1,1,g,5,3,in,single,cuFFT2D,254.357062
14 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/C2C_2D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,1,8388608,g,1,1,in,double,cuFFT2D,27.974814
 3 | 4,4,1,2097152,g,1,1,in,double,cuFFT2D,57.028381
 4 | 8,8,1,524288,g,1,1,in,double,cuFFT2D,87.651529
 5 | 16,16,1,131072,g,1,1,in,double,cuFFT2D,117.355353
 6 | 32,32,1,32768,g,1,1,in,double,cuFFT2D,145.958473
 7 | 64,64,1,8192,g,1,1,in,double,cuFFT2D,176.854846
 8 | 128,128,1,2048,g,1,1,in,double,cuFFT2D,204.919105
 9 | 256,256,1,512,g,1,1,in,double,cuFFT2D,232.14229
10 | 512,512,1,128,g,1,1,in,double,cuFFT2D,170.83545
11 | 1024,1024,1,32,g,1,1,in,double,cuFFT2D,187.166338
12 | 2048,2048,1,8,g,1,1,in,double,cuFFT2D,192.282189
13 | 4096,4096,1,2,g,1,1,in,double,cuFFT2D,164.26726
14 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/C2C_2D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,1,8388608,g,1,1,in,single,cuFFT2D,58.660739
 3 | 4,4,1,2097152,g,1,1,in,single,cuFFT2D,110.939092
 4 | 8,8,1,524288,g,1,1,in,single,cuFFT2D,167.463965
 5 | 16,16,1,131072,g,1,1,in,single,cuFFT2D,226.538742
 6 | 32,32,1,32768,g,1,1,in,single,cuFFT2D,285.72516
 7 | 64,64,1,8192,g,1,1,in,single,cuFFT2D,346.266161
 8 | 128,128,1,2048,g,1,1,in,single,cuFFT2D,400.480557
 9 | 256,256,1,512,g,1,1,in,single,cuFFT2D,455.021399
10 | 512,512,1,128,g,1,1,in,single,cuFFT2D,336.292353
11 | 1024,1024,1,32,g,1,1,in,single,cuFFT2D,368.983107
12 | 2048,2048,1,8,g,1,1,in,single,cuFFT2D,402.543841
13 | 4096,4096,1,2,g,1,1,in,single,cuFFT2D,436.652575
14 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_2D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,1,8388608,g,1,1,in,double,clFFT2D_2.6,40.4909
 3 | 4,4,1,2097152,g,1,1,in,double,clFFT2D_2.6,81.961
 4 | 8,8,1,524288,g,1,1,in,double,clFFT2D_2.6,123.012
 5 | 16,16,1,131072,g,1,1,in,double,clFFT2D_2.6,162.167
 6 | 32,32,1,32768,g,1,1,in,double,clFFT2D_2.6,155.011
 7 | 64,64,1,8192,g,1,1,in,double,clFFT2D_2.6,211.648
 8 | 128,128,1,2048,g,1,1,in,double,clFFT2D_2.6,210.676
 9 | 256,256,1,512,g,1,1,in,double,clFFT2D_2.6,112.635
10 | 512,512,1,128,g,1,1,in,double,clFFT2D_2.6,123.928
11 | 1024,1024,1,32,g,1,1,in,double,clFFT2D_2.6,144.293
12 | 2048,2048,1,8,g,1,1,in,double,clFFT2D_2.6,152.674
13 | 4096,4096,1,2,g,1,1,in,double,clFFT2D_2.6,119.07
14 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_2D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,1,8388608,g,1,1,in,single,clFFT2D_2.6,73.8863
 3 | 4,4,1,2097152,g,1,1,in,single,clFFT2D_2.6,154.754
 4 | 8,8,1,524288,g,1,1,in,single,clFFT2D_2.6,243.805
 5 | 16,16,1,131072,g,1,1,in,single,clFFT2D_2.6,328.784
 6 | 32,32,1,32768,g,1,1,in,single,clFFT2D_2.6,379.601
 7 | 64,64,1,8192,g,1,1,in,single,clFFT2D_2.6,314.629
 8 | 128,128,1,2048,g,1,1,in,single,clFFT2D_2.6,360.231
 9 | 256,256,1,512,g,1,1,in,single,clFFT2D_2.6,166.78
10 | 512,512,1,128,g,1,1,in,single,clFFT2D_2.6,297.307
11 | 1024,1024,1,32,g,1,1,in,single,clFFT2D_2.6,304.312
12 | 2048,2048,1,8,g,1,1,in,single,clFFT2D_2.6,318.457
13 | 4096,4096,1,2,g,1,1,in,single,clFFT2D_2.6,308.812
14 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_2D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,1,4194304,g,5,3,in,double,clFFT2D_2.6,0.0441201
 3 | 4,4,1,1048576,g,5,3,in,double,clFFT2D_2.6,0.33538
 4 | 8,8,1,262144,g,5,3,in,double,clFFT2D_2.6,2.028745
 5 | 16,16,1,65536,g,5,3,in,double,clFFT2D_2.6,10.6405
 6 | 32,32,1,16384,g,5,3,in,double,clFFT2D_2.6,40.02365
 7 | 64,64,1,4096,g,5,3,in,double,clFFT2D_2.6,85.764
 8 | 128,128,1,1024,g,5,3,in,double,clFFT2D_2.6,99.798
 9 | 256,256,1,256,g,5,3,in,double,clFFT2D_2.6,142.3255
10 | 512,512,1,64,g,5,3,in,double,clFFT2D_2.6,162.6435
11 | 1024,1024,1,16,g,5,3,in,double,clFFT2D_2.6,163.1355
12 | 2048,2048,1,4,g,5,3,in,double,clFFT2D_2.6,158.7915
13 | 4096,4096,1,1,g,5,3,in,double,clFFT2D_2.6,91.1925
14 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_2D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,2,1,4194304,g,5,3,in,single,clFFT2D_2.6,0.04122405
 3 | 4,4,1,1048576,g,5,3,in,single,clFFT2D_2.6,0.314967
 4 | 8,8,1,262144,g,5,3,in,single,clFFT2D_2.6,1.886985
 5 | 16,16,1,65536,g,5,3,in,single,clFFT2D_2.6,9.93125
 6 | 32,32,1,16384,g,5,3,in,single,clFFT2D_2.6,43.75745
 7 | 64,64,1,4096,g,5,3,in,single,clFFT2D_2.6,158.5295
 8 | 128,128,1,1024,g,5,3,in,single,clFFT2D_2.6,179.4235
 9 | 256,256,1,256,g,5,3,in,single,clFFT2D_2.6,266.48
10 | 512,512,1,64,g,5,3,in,single,clFFT2D_2.6,320.154
11 | 1024,1024,1,16,g,5,3,in,single,clFFT2D_2.6,360.564
12 | 2048,2048,1,4,g,5,3,in,single,clFFT2D_2.6,388.305
13 | 4096,4096,1,1,g,5,3,in,single,clFFT2D_2.6,318.994
14 | 


--------------------------------------------------------------------------------
/src/tests/c-compliance.c:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | #include "clFFT.h"
19 | 


--------------------------------------------------------------------------------
/src/library/action.transpose.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | #pragma once
18 | #if !defined( AMD_CLFFT_action_transpose_H )
19 | #define AMD_CLFFT_action_transpose_H
20 | #include "private.h"
21 | #include "repo.h"
22 | #include "plan.h"
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/src/library/generator.transpose.gcn.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | #pragma once
18 | #if !defined( AMD_CLFFT_generator_transpose_H )
19 | #define AMD_CLFFT_generator_transpose_H
20 | #include "private.h"
21 | #include "repo.h"
22 | #include "plan.h"
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/src/include/clFFT.version.h.in:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | /* the configured version and settings for clFFT
19 |  */
20 | #define clfftVersionMajor @clFFT_VERSION_MAJOR@
21 | #define clfftVersionMinor @clFFT_VERSION_MINOR@
22 | #define clfftVersionPatch @clFFT_VERSION_PATCH@
23 | 
24 | #cmakedefine CLFFT_STATIC
25 | 


--------------------------------------------------------------------------------
/src/library/stdafx.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | // stdafx.cpp : source file that includes just the standard includes
19 | // clfft.pch will be the pre-compiled header
20 | // stdafx.obj will contain the pre-compiled type information
21 | 
22 | #include "stdafx.h"
23 | 
24 | // Reference any additional headers you need in STDAFX.H and not in this file
25 | 


--------------------------------------------------------------------------------
/src/statTimer/stdafx.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | // stdafx.cpp : source file that includes just the standard includes
19 | // clfft.pch will be the pre-compiled header
20 | // stdafx.obj will contain the pre-compiled type information
21 | 
22 | #include "stdafx.h"
23 | 
24 | // Reference any additional headers you need in STDAFX.H and not in this file
25 | 


--------------------------------------------------------------------------------
/src/client/stdafx.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | // stdafx.cpp : source file that includes just the standard includes
19 | // clFFT.pch will be the pre-compiled header
20 | // stdafx.obj will contain the pre-compiled type information
21 | 
22 | #include "stdafx.h"
23 | 
24 | // TODO: reference any additional headers you need in STDAFX.H
25 | // and not in this file
26 | 


--------------------------------------------------------------------------------
/src/callback-client/stdafx.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | // stdafx.cpp : source file that includes just the standard includes
19 | // clFFT.pch will be the pre-compiled header
20 | // stdafx.obj will contain the pre-compiled type information
21 | 
22 | #include "stdafx.h"
23 | 
24 | // TODO: reference any additional headers you need in STDAFX.H
25 | // and not in this file
26 | 


--------------------------------------------------------------------------------
/src/include/convenienceFunctions.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | /*****************************************************/
19 | template< typename T >
20 | unsigned int float_as_hex( T a ) {
21 | 	return *(unsigned int*)&a;
22 | }
23 | 
24 | /*****************************************************/
25 | template< typename T >
26 | T hex_as_float( unsigned int a ) {
27 | 	return *(T*)&a;
28 | }


--------------------------------------------------------------------------------
/src/statTimer/targetver.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | #pragma once
18 | 
19 | // Including SDKDDKVer.h defines the highest available Windows platform.
20 | 
21 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
22 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
23 | 
24 | #include <SDKDDKVer.h>
25 | 


--------------------------------------------------------------------------------
/src/include/targetver.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | #pragma once
19 | 
20 | // Including SDKDDKVer.h defines the highest available Windows platform.
21 | 
22 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
23 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
24 | 
25 | #include <SDKDDKVer.h>
26 | 


--------------------------------------------------------------------------------
/src/scripts/perf/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2013 Advanced Micro Devices, Inc.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | set(GRAPHING_SCRIPTS 	measurePerformance.py 
18 | 						plotPerformance.py 
19 | 						fftPerformanceTesting.py 
20 | 						errorHandler.py 
21 | 						performanceUtility.py
22 | 						)
23 | # if( WIN32 )
24 | # 	install( FILES ${GRAPHING_SCRIPTS} DESTINATION bin${CLFFT_SUFFIX_BIN} )
25 | # else ( )
26 | # 	install( FILES ${GRAPHING_SCRIPTS} DESTINATION share/clFFT )
27 | # endif( )
28 | 
29 | 


--------------------------------------------------------------------------------
/src/library/dllmain.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | // dllmain.cpp : Defines the entry point for the DLL application.
19 | #include "stdafx.h"
20 | 
21 | BOOL APIENTRY DllMain( HMODULE hModule,
22 | 					   DWORD  ul_reason_for_call,
23 | 					   LPVOID lpReserved
24 | 					 )
25 | {
26 | 	switch (ul_reason_for_call)
27 | 	{
28 | 	case DLL_PROCESS_ATTACH:
29 | 	case DLL_THREAD_ATTACH:
30 | 	case DLL_THREAD_DETACH:
31 | 	case DLL_PROCESS_DETACH:
32 | 		break;
33 | 	}
34 | 	return TRUE;
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/src/statTimer/dllmain.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | // dllmain.cpp : Defines the entry point for the DLL application.
18 | #include "stdafx.h"
19 | 
20 | BOOL APIENTRY DllMain( HMODULE hModule,
21 |                        DWORD  ul_reason_for_call,
22 |                        LPVOID lpReserved
23 | 					 )
24 | {
25 | 	switch (ul_reason_for_call)
26 | 	{
27 | 	case DLL_PROCESS_ATTACH:
28 | 	case DLL_THREAD_ATTACH:
29 | 	case DLL_THREAD_DETACH:
30 | 	case DLL_PROCESS_DETACH:
31 | 		break;
32 | 	}
33 | 	return TRUE;
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/src/tests/typedefs.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | #pragma once
19 | #if !defined( CLFFT_TYPEDEFS_H )
20 | #define CLFFT_TYPEDEFS_H
21 | 
22 | #include "test_constants.h"
23 | #include "fftw_transform.h"
24 | #include "cl_transform.h"
25 | 
26 | typedef clfft<float, cl_float> clfft_single;
27 | typedef clfft<double, cl_double> clfft_double;
28 | typedef buffer<float> buffer_single;
29 | typedef buffer<double> buffer_double;
30 | typedef fftw<float, fftwf_complex> fftw_single;
31 | typedef fftw<double, fftw_complex> fftw_double;
32 | 
33 | #endif


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_1D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,1,1,8388608,g,5,3,in,double,clFFT1D_2.6,14.0024
 3 | 4,1,1,4194304,g,5,3,in,double,clFFT1D_2.6,35.5083
 4 | 8,1,1,2097152,g,5,3,in,double,clFFT1D_2.6,82.012
 5 | 16,1,1,1048576,g,5,3,in,double,clFFT1D_2.6,107.0085
 6 | 32,1,1,524288,g,5,3,in,double,clFFT1D_2.6,128.5305
 7 | 64,1,1,262144,g,5,3,in,double,clFFT1D_2.6,224.9175
 8 | 128,1,1,131072,g,5,3,in,double,clFFT1D_2.6,217.2845
 9 | 256,1,1,65536,g,5,3,in,double,clFFT1D_2.6,318.731
10 | 512,1,1,32768,g,5,3,in,double,clFFT1D_2.6,302.7885
11 | 1024,1,1,16384,g,5,3,in,double,clFFT1D_2.6,270.694
12 | 2048,1,1,8192,g,5,3,in,double,clFFT1D_2.6,244.8945
13 | 4096,1,1,4096,g,5,3,in,double,clFFT1D_2.6,96.934
14 | 8192,1,1,2048,g,5,3,in,double,clFFT1D_2.6,51.7095
15 | 16384,1,1,1024,g,5,3,in,double,clFFT1D_2.6,65.2255
16 | 32768,1,1,512,g,5,3,in,double,clFFT1D_2.6,76.0235
17 | 65536,1,1,256,g,5,3,in,double,clFFT1D_2.6,93.041
18 | 131072,1,1,128,g,5,3,in,double,clFFT1D_2.6,97.4785
19 | 262144,1,1,64,g,5,3,in,double,clFFT1D_2.6,98.3995
20 | 524288,1,1,32,g,5,3,in,double,clFFT1D_2.6,100.0525
21 | 1048576,1,1,16,g,5,3,in,double,clFFT1D_2.6,94.2235
22 | 2097152,1,1,8,g,5,3,in,double,clFFT1D_2.6,92.7895
23 | 4194304,1,1,4,g,5,3,in,double,clFFT1D_2.6,89.985
24 | 


--------------------------------------------------------------------------------
/src/library/generator.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | #pragma once
19 | #if !defined( AMD_CLFFT_generator_H )
20 | #define AMD_CLFFT_generator_H
21 | 
22 | //	Enum to help provide descriptive names to array indices, when indexing into our various vectors
23 | enum clfftGenerators
24 | {
25 |     Stockham, // Using the Stockham autosort frameworks
26 |     Transpose_GCN,
27 |     Transpose_SQUARE,
28 |     Transpose_NONSQUARE,
29 |     Copy,
30 |     ENDGENERATORS			///< This value will always be last, and marks the length of clfftGenerators
31 | };
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/R2C_1D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,1,1,8388608,g,5,3,in,double,cuFFT1D,0.906726
 3 | 4,1,1,4194304,g,5,3,in,double,cuFFT1D,3.146765
 4 | 8,1,1,2097152,g,5,3,in,double,cuFFT1D,19.294895
 5 | 16,1,1,1048576,g,5,3,in,double,cuFFT1D,32.758462
 6 | 32,1,1,524288,g,5,3,in,double,cuFFT1D,45.237741
 7 | 64,1,1,262144,g,5,3,in,double,cuFFT1D,55.76315
 8 | 128,1,1,131072,g,5,3,in,double,cuFFT1D,66.019701
 9 | 256,1,1,65536,g,5,3,in,double,cuFFT1D,75.670117
10 | 512,1,1,32768,g,5,3,in,double,cuFFT1D,86.578216
11 | 1024,1,1,16384,g,5,3,in,double,cuFFT1D,95.679203
12 | 2048,1,1,8192,g,5,3,in,double,cuFFT1D,104.06715
13 | 4096,1,1,4096,g,5,3,in,double,cuFFT1D,107.265346
14 | 8192,1,1,2048,g,5,3,in,double,cuFFT1D,88.545477
15 | 16384,1,1,1024,g,5,3,in,double,cuFFT1D,89.413243
16 | 32768,1,1,512,g,5,3,in,double,cuFFT1D,94.809626
17 | 65536,1,1,256,g,5,3,in,double,cuFFT1D,96.783663
18 | 131072,1,1,128,g,5,3,in,double,cuFFT1D,92.781627
19 | 262144,1,1,64,g,5,3,in,double,cuFFT1D,97.037165
20 | 524288,1,1,32,g,5,3,in,double,cuFFT1D,101.732314
21 | 1048576,1,1,16,g,5,3,in,double,cuFFT1D,102.10937
22 | 2097152,1,1,8,g,5,3,in,double,cuFFT1D,102.590091
23 | 4194304,1,1,4,g,5,3,in,double,cuFFT1D,106.90631
24 | 8388608,1,1,2,g,5,3,in,double,cuFFT1D,112.769815
25 | 16777216,1,1,1,g,5,3,in,double,cuFFT1D,110.213005
26 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/R2C_1D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,1,1,8388608,g,5,3,in,single,cuFFT1D,1.732317
 3 | 4,1,1,4194304,g,5,3,in,single,cuFFT1D,6.739837
 4 | 8,1,1,2097152,g,5,3,in,single,cuFFT1D,46.082513
 5 | 16,1,1,1048576,g,5,3,in,single,cuFFT1D,67.164278
 6 | 32,1,1,524288,g,5,3,in,single,cuFFT1D,88.070397
 7 | 64,1,1,262144,g,5,3,in,single,cuFFT1D,108.72499
 8 | 128,1,1,131072,g,5,3,in,single,cuFFT1D,129.659424
 9 | 256,1,1,65536,g,5,3,in,single,cuFFT1D,151.242882
10 | 512,1,1,32768,g,5,3,in,single,cuFFT1D,168.221379
11 | 1024,1,1,16384,g,5,3,in,single,cuFFT1D,187.566483
12 | 2048,1,1,8192,g,5,3,in,single,cuFFT1D,204.435883
13 | 4096,1,1,4096,g,5,3,in,single,cuFFT1D,222.020044
14 | 8192,1,1,2048,g,5,3,in,single,cuFFT1D,241.883832
15 | 16384,1,1,1024,g,5,3,in,single,cuFFT1D,214.359681
16 | 32768,1,1,512,g,5,3,in,single,cuFFT1D,204.094289
17 | 65536,1,1,256,g,5,3,in,single,cuFFT1D,216.740851
18 | 131072,1,1,128,g,5,3,in,single,cuFFT1D,219.115177
19 | 262144,1,1,64,g,5,3,in,single,cuFFT1D,222.507641
20 | 524288,1,1,32,g,5,3,in,single,cuFFT1D,206.73413
21 | 1048576,1,1,16,g,5,3,in,single,cuFFT1D,217.324265
22 | 2097152,1,1,8,g,5,3,in,single,cuFFT1D,215.676848
23 | 4194304,1,1,4,g,5,3,in,single,cuFFT1D,228.664256
24 | 8388608,1,1,2,g,5,3,in,single,cuFFT1D,239.209548
25 | 16777216,1,1,1,g,5,3,in,single,cuFFT1D,257.114237
26 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/C2C_1D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,1,1,16777216,g,1,1,in,single,cuFFT1D,57.417066
 3 | 4,1,1,8388608,g,1,1,in,single,cuFFT1D,112.284839
 4 | 8,1,1,4194304,g,1,1,in,single,cuFFT1D,167.42753
 5 | 16,1,1,2097152,g,1,1,in,single,cuFFT1D,223.01212
 6 | 32,1,1,1048576,g,1,1,in,single,cuFFT1D,280.25624
 7 | 64,1,1,524288,g,1,1,in,single,cuFFT1D,340.974729
 8 | 128,1,1,262144,g,1,1,in,single,cuFFT1D,393.559297
 9 | 256,1,1,131072,g,1,1,in,single,cuFFT1D,443.052168
10 | 512,1,1,65536,g,1,1,in,single,cuFFT1D,508.837809
11 | 1024,1,1,32768,g,1,1,in,single,cuFFT1D,565.195013
12 | 2048,1,1,16384,g,1,1,in,single,cuFFT1D,615.589675
13 | 4096,1,1,8192,g,1,1,in,single,cuFFT1D,686.664408
14 | 8192,1,1,4096,g,1,1,in,single,cuFFT1D,453.598643
15 | 16384,1,1,2048,g,1,1,in,single,cuFFT1D,400.493769
16 | 32768,1,1,1024,g,1,1,in,single,cuFFT1D,422.59182
17 | 65536,1,1,512,g,1,1,in,single,cuFFT1D,420.857829
18 | 131072,1,1,256,g,1,1,in,single,cuFFT1D,397.358856
19 | 262144,1,1,128,g,1,1,in,single,cuFFT1D,334.527752
20 | 524288,1,1,64,g,1,1,in,single,cuFFT1D,351.140673
21 | 1048576,1,1,32,g,1,1,in,single,cuFFT1D,359.615475
22 | 2097152,1,1,16,g,1,1,in,single,cuFFT1D,376.128268
23 | 4194304,1,1,8,g,1,1,in,single,cuFFT1D,393.90095
24 | 8388608,1,1,4,g,1,1,in,single,cuFFT1D,403.230912
25 | 16777216,1,1,2,g,1,1,in,single,cuFFT1D,422.502973
26 | 


--------------------------------------------------------------------------------
/docs/performance/cuFFT_7.0/Tesla_K40/C2C_1D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,1,1,16777216,g,1,1,in,double,cuFFT1D,27.724106
 3 | 4,1,1,8388608,g,1,1,in,double,cuFFT1D,57.747175
 4 | 8,1,1,4194304,g,1,1,in,double,cuFFT1D,88.106149
 5 | 16,1,1,2097152,g,1,1,in,double,cuFFT1D,117.536091
 6 | 32,1,1,1048576,g,1,1,in,double,cuFFT1D,146.175977
 7 | 64,1,1,524288,g,1,1,in,double,cuFFT1D,174.841499
 8 | 128,1,1,262144,g,1,1,in,double,cuFFT1D,204.371887
 9 | 256,1,1,131072,g,1,1,in,double,cuFFT1D,237.097699
10 | 512,1,1,65536,g,1,1,in,double,cuFFT1D,266.799358
11 | 1024,1,1,32768,g,1,1,in,double,cuFFT1D,289.812625
12 | 2048,1,1,16384,g,1,1,in,double,cuFFT1D,268.214622
13 | 4096,1,1,8192,g,1,1,in,double,cuFFT1D,168.595754
14 | 8192,1,1,4096,g,1,1,in,double,cuFFT1D,165.250054
15 | 16384,1,1,2048,g,1,1,in,double,cuFFT1D,174.020168
16 | 32768,1,1,1024,g,1,1,in,double,cuFFT1D,174.773738
17 | 65536,1,1,512,g,1,1,in,double,cuFFT1D,155.236793
18 | 131072,1,1,256,g,1,1,in,double,cuFFT1D,161.97944
19 | 262144,1,1,128,g,1,1,in,double,cuFFT1D,165.983672
20 | 524288,1,1,64,g,1,1,in,double,cuFFT1D,173.099901
21 | 1048576,1,1,32,g,1,1,in,double,cuFFT1D,170.025358
22 | 2097152,1,1,16,g,1,1,in,double,cuFFT1D,179.184631
23 | 4194304,1,1,8,g,1,1,in,double,cuFFT1D,181.914462
24 | 8388608,1,1,4,g,1,1,in,double,cuFFT1D,156.583882
25 | 16777216,1,1,2,g,1,1,in,double,cuFFT1D,163.086671
26 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_1D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,1,1,16777216,g,1,1,in,single,clFFT1D_2.6,76.9949
 3 | 4,1,1,8388608,g,1,1,in,single,clFFT1D_2.6,152.554
 4 | 8,1,1,4194304,g,1,1,in,single,clFFT1D_2.6,200.656
 5 | 16,1,1,2097152,g,1,1,in,single,clFFT1D_2.6,270.405
 6 | 32,1,1,1048576,g,1,1,in,single,clFFT1D_2.6,314.084
 7 | 64,1,1,524288,g,1,1,in,single,clFFT1D_2.6,490.901
 8 | 128,1,1,262144,g,1,1,in,single,clFFT1D_2.6,599.706
 9 | 256,1,1,131072,g,1,1,in,single,clFFT1D_2.6,671.885
10 | 512,1,1,65536,g,1,1,in,single,clFFT1D_2.6,787.181
11 | 1024,1,1,32768,g,1,1,in,single,clFFT1D_2.6,840.822
12 | 2048,1,1,16384,g,1,1,in,single,clFFT1D_2.6,869.563
13 | 4096,1,1,8192,g,1,1,in,single,clFFT1D_2.6,580.775
14 | 8192,1,1,4096,g,1,1,in,single,clFFT1D_2.6,398.025
15 | 16384,1,1,2048,g,1,1,in,single,clFFT1D_2.6,451.457
16 | 32768,1,1,1024,g,1,1,in,single,clFFT1D_2.6,459.492
17 | 65536,1,1,512,g,1,1,in,single,clFFT1D_2.6,417.588
18 | 131072,1,1,256,g,1,1,in,single,clFFT1D_2.6,401.075
19 | 262144,1,1,128,g,1,1,in,single,clFFT1D_2.6,345.829
20 | 524288,1,1,64,g,1,1,in,single,clFFT1D_2.6,372.671
21 | 1048576,1,1,32,g,1,1,in,single,clFFT1D_2.6,361.599
22 | 2097152,1,1,16,g,1,1,in,single,clFFT1D_2.6,335.494
23 | 4194304,1,1,8,g,1,1,in,single,clFFT1D_2.6,347.831
24 | 8388608,1,1,4,g,1,1,in,single,clFFT1D_2.6,324.39
25 | 16777216,1,1,2,g,1,1,in,single,clFFT1D_2.6,310.083
26 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/C2C_1D_double.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,1,1,16777216,g,1,1,in,double,clFFT1D_2.6,40.0275
 3 | 4,1,1,8388608,g,1,1,in,double,clFFT1D_2.6,81.2218
 4 | 8,1,1,4194304,g,1,1,in,double,clFFT1D_2.6,122.268
 5 | 16,1,1,2097152,g,1,1,in,double,clFFT1D_2.6,146.554
 6 | 32,1,1,1048576,g,1,1,in,double,clFFT1D_2.6,142.564
 7 | 64,1,1,524288,g,1,1,in,double,clFFT1D_2.6,255.158
 8 | 128,1,1,262144,g,1,1,in,double,clFFT1D_2.6,226.868
 9 | 256,1,1,131072,g,1,1,in,double,clFFT1D_2.6,339.785
10 | 512,1,1,65536,g,1,1,in,double,clFFT1D_2.6,380.501
11 | 1024,1,1,32768,g,1,1,in,double,clFFT1D_2.6,353.783
12 | 2048,1,1,16384,g,1,1,in,double,clFFT1D_2.6,355.406
13 | 4096,1,1,8192,g,1,1,in,double,clFFT1D_2.6,220.831
14 | 8192,1,1,4096,g,1,1,in,double,clFFT1D_2.6,221.184
15 | 16384,1,1,2048,g,1,1,in,double,clFFT1D_2.6,250.442
16 | 32768,1,1,1024,g,1,1,in,double,clFFT1D_2.6,215.115
17 | 65536,1,1,512,g,1,1,in,double,clFFT1D_2.6,196.085
18 | 131072,1,1,256,g,1,1,in,double,clFFT1D_2.6,177.969
19 | 262144,1,1,128,g,1,1,in,double,clFFT1D_2.6,183.749
20 | 524288,1,1,64,g,1,1,in,double,clFFT1D_2.6,175.629
21 | 1048576,1,1,32,g,1,1,in,double,clFFT1D_2.6,146.653
22 | 2097152,1,1,16,g,1,1,in,double,clFFT1D_2.6,144.641
23 | 4194304,1,1,8,g,1,1,in,double,clFFT1D_2.6,147.301
24 | 8388608,1,1,4,g,1,1,in,double,clFFT1D_2.6,115.146
25 | 16777216,1,1,2,g,1,1,in,double,clFFT1D_2.6,110.471
26 | 


--------------------------------------------------------------------------------
/docs/performance/clFFT_2.6.0/FirePro_W9100/R2C_1D_single.csv:
--------------------------------------------------------------------------------
 1 | lengthx,lengthy,lengthz,batch,device,inlay,outlay,place,precision,label,GFLOPS
 2 | 2,1,1,8388608,g,5,3,in,single,clFFT1D_2.6,34.1305
 3 | 4,1,1,4194304,g,5,3,in,single,clFFT1D_2.6,90.9585
 4 | 8,1,1,2097152,g,5,3,in,single,clFFT1D_2.6,125.277
 5 | 16,1,1,1048576,g,5,3,in,single,clFFT1D_2.6,219.2965
 6 | 32,1,1,524288,g,5,3,in,single,clFFT1D_2.6,194.6665
 7 | 64,1,1,262144,g,5,3,in,single,clFFT1D_2.6,431.73
 8 | 128,1,1,131072,g,5,3,in,single,clFFT1D_2.6,445.3705
 9 | 256,1,1,65536,g,5,3,in,single,clFFT1D_2.6,638.46
10 | 512,1,1,32768,g,5,3,in,single,clFFT1D_2.6,668.6
11 | 1024,1,1,16384,g,5,3,in,single,clFFT1D_2.6,738.26
12 | 2048,1,1,8192,g,5,3,in,single,clFFT1D_2.6,783.31
13 | 4096,1,1,4096,g,5,3,in,single,clFFT1D_2.6,487.8415
14 | 8192,1,1,2048,g,5,3,in,single,clFFT1D_2.6,112.0415
15 | 16384,1,1,1024,g,5,3,in,single,clFFT1D_2.6,134.027
16 | 32768,1,1,512,g,5,3,in,single,clFFT1D_2.6,163.1195
17 | 65536,1,1,256,g,5,3,in,single,clFFT1D_2.6,195.164
18 | 131072,1,1,128,g,5,3,in,single,clFFT1D_2.6,210.9695
19 | 262144,1,1,64,g,5,3,in,single,clFFT1D_2.6,215.927
20 | 524288,1,1,32,g,5,3,in,single,clFFT1D_2.6,225.0265
21 | 1048576,1,1,16,g,5,3,in,single,clFFT1D_2.6,187.0255
22 | 2097152,1,1,8,g,5,3,in,single,clFFT1D_2.6,194.0675
23 | 4194304,1,1,4,g,5,3,in,single,clFFT1D_2.6,193.9135
24 | 8388608,1,1,2,g,5,3,in,single,clFFT1D_2.6,182.1115
25 | 16777216,1,1,1,g,5,3,in,single,clFFT1D_2.6,170.7285
26 | 


--------------------------------------------------------------------------------
/src/include/clAmdFft.version.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | /*! @file clAmdFft.version.h
18 |  * /note clAmdFft.version.h is a deprecated header file.  
19 |  * This header is provided to help projects that were written with the older clAmdFft codebase, to help them 
20 |  * port to the new API at their own schedule.  It will not be maintained or updated, and will be removed after 
21 |  * a reasonable amount of time has passed.  All new code should be written against clFFT.h.  
22 |  * Older projects should migrate to the new header at their earliest convenience.
23 |  */
24 | 
25 | /* the configured version and settings for clFFT
26 |  */
27 | #define clAmdFftVersionMajor 2
28 | #define clAmdFftVersionMinor 0
29 | #define clAmdFftVersionPatch 0
30 | 


--------------------------------------------------------------------------------
/src/statTimer/statisticalTimer.extern.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | // StatTimer.cpp : Defines the exported functions for the DLL application.
19 | //
20 | 
21 | #include "stdafx.h"
22 | #include "statisticalTimer.extern.h"
23 | #include "statisticalTimer.CPU.h"
24 | #include "statisticalTimer.GPU.h"
25 | 
26 | //	Even though the individual getInstance functions of the timer classes return references,
27 | //	we convert those to pointers before returning from here so that the clients can initialize
28 | //	their local variables to NULL, which refernces do not allow.
29 | baseStatTimer* getStatTimer( const clfftTimerType type )
30 | {
31 | 	if( type == CLFFT_CPU )
32 | 		return	&CpuStatTimer::getInstance( );
33 | 
34 | 	return	&GpuStatTimer::getInstance( );
35 | }
36 | 


--------------------------------------------------------------------------------
/src/tests/buffer_memory.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | #include <stdint.h>
19 | 
20 | /*****************************************************/
21 | /*****************************************************/
22 | uint32_t float_as_hex( float a ) {
23 | 	return *(uint32_t*)&a;
24 | }
25 | 
26 | /*****************************************************/
27 | /*****************************************************/
28 | uint64_t float_as_hex( double a ) {
29 | 	return *(uint64_t*)&a;
30 | }
31 | 
32 | /*****************************************************/
33 | /*****************************************************/
34 | uint32_t nan_as_hex( float a ) {
35 | 	a;
36 | 	return ~0x0;
37 | }
38 | 
39 | /*****************************************************/
40 | /*****************************************************/
41 | uint64_t nan_as_hex( double a ) {
42 | 	a;
43 | 	return ~0x0ull;
44 | }


--------------------------------------------------------------------------------
/src/clFFTConfigVersion.cmake.in:
--------------------------------------------------------------------------------
 1 | # This is a basic version file for the Config-mode of find_package().
 2 | # It is used by write_basic_package_version_file() as input file for configure_file()
 3 | # to create a version-file which can be installed along a config.cmake file.
 4 | #
 5 | # The created file sets PACKAGE_VERSION_EXACT if the current version string and
 6 | # the requested version string are exactly the same and it sets
 7 | # PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
 8 | # but only if the requested major version is the same as the current one.
 9 | 
10 | 
11 | set(PACKAGE_VERSION "@clFFT_VERSION@")
12 | 
13 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}" )
14 |   set(PACKAGE_VERSION_COMPATIBLE FALSE)
15 | else()
16 |   if("${PACKAGE_FIND_VERSION_MAJOR}" STREQUAL "${clFFT_VERSION_MAJOR}")
17 |     set(PACKAGE_VERSION_COMPATIBLE TRUE)
18 |   else()
19 |     set(PACKAGE_VERSION_COMPATIBLE FALSE)
20 |   endif()
21 | 
22 |   if( "${PACKAGE_FIND_VERSION}" STREQUAL "${PACKAGE_VERSION}")
23 |       set(PACKAGE_VERSION_EXACT TRUE)
24 |   endif()
25 | endif()
26 | 
27 | # if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
28 | if("${CMAKE_SIZEOF_VOID_P}"  STREQUAL ""  OR "@CMAKE_SIZEOF_VOID_P@" STREQUAL "")
29 |    return()
30 | endif()
31 | 
32 | # check that the installed version has the same 32/64bit-ness as the one which is currently searching:
33 | if(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "@CMAKE_SIZEOF_VOID_P@")
34 |   math(EXPR installedBits "@CMAKE_SIZEOF_VOID_P@ * 8")
35 |   set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
36 |   set(PACKAGE_VERSION_UNSUITABLE TRUE)
37 | endif()
38 | 


--------------------------------------------------------------------------------
/src/statTimer/stdafx.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | // stdafx.h : include file for standard system include files,
19 | // or project specific include files that are used frequently, but
20 | // are changed infrequently
21 | //
22 | 
23 | #pragma once
24 | 
25 | #define _CRT_SECURE_NO_WARNINGS
26 | 
27 | //#include <iostream>
28 | //#include <sstream>
29 | //#include <fstream>
30 | //#include <iomanip>
31 | //#include <cstring>
32 | //#include <memory>
33 | #include <vector>
34 | //#include <cstring>
35 | //#include <stdarg.h>
36 | #include <assert.h>
37 | //#include <complex>
38 | 
39 | //	_WIN32 is defined for both 32 & 64 bit environments
40 | #if defined( _WIN32 )
41 | //	#include <tchar.h>
42 | 	#include "targetver.h"
43 | 
44 | #if !defined( NOMINMAX )
45 | 	#define NOMINMAX
46 | #endif
47 | 
48 | 	#define WIN32_LEAN_AND_MEAN			// Exclude rarely-used stuff from Windows headers
49 | 	// Windows Header Files:
50 | 	#include <windows.h>
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/include/stdafx.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | // stdafx.h : include file for standard system include files,
19 | // or project specific include files that are used frequently, but
20 | // are changed infrequently
21 | //
22 | 
23 | #pragma once
24 | 
25 | #define _CRT_SECURE_NO_WARNINGS
26 | 
27 | #include <iostream>
28 | #include <sstream>
29 | #include <fstream>
30 | #include <iomanip>
31 | #include <cstring>
32 | #include <memory>
33 | #include <vector>
34 | #include <valarray>
35 | #include <cstring>
36 | #include <stdarg.h>
37 | #include <assert.h>
38 | #include <complex>
39 | 
40 | //	_WIN32 is defined for both 32 & 64 bit environments
41 | #if defined( _WIN32 )
42 | 	#include <tchar.h>
43 | 	#include "targetver.h"
44 | 
45 | #if !defined( NOMINMAX )
46 | 	#define NOMINMAX
47 | #endif
48 | 
49 |     #define WIN32_LEAN_AND_MEAN			// Exclude rarely-used stuff from Windows headers
50 | 	// Windows Header Files:
51 | 	#include <windows.h>
52 | #endif
53 | 


--------------------------------------------------------------------------------
/src/library/md5sum.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
 3 |  * MD5 Message-Digest Algorithm (RFC 1321).
 4 |  *
 5 |  * Homepage:
 6 |  * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
 7 |  *
 8 |  * Author:
 9 |  * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
10 |  *
11 |  * This software was written by Alexander Peslyak in 2001.  No copyright is
12 |  * claimed, and the software is hereby placed in the public domain.
13 |  * In case this attempt to disclaim copyright and place the software in the
14 |  * public domain is deemed null and void, then the software is
15 |  * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
16 |  * general public under the following terms:
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without
19 |  * modification, are permitted.
20 |  *
21 |  * There's ABSOLUTELY NO WARRANTY, express or implied.
22 |  *
23 |  * See md5.c for more information.
24 |  */
25 | 
26 | #ifndef _MD5_SUM_H
27 | #define _MD5_SUM_H
28 | 
29 | #ifdef HAVE_OPENSSL
30 | #include <openssl/md5.h>
31 | #else
32 | 
33 | /* Any 32-bit or wider unsigned integer data type will do */
34 | typedef unsigned int MD5_u32plus;
35 | 
36 | typedef struct {
37 | 	MD5_u32plus lo, hi;
38 | 	MD5_u32plus a, b, c, d;
39 | 	unsigned char buffer[64];
40 | 	MD5_u32plus block[16];
41 | } MD5_CTX;
42 | 
43 | extern void MD5_Init(MD5_CTX *ctx);
44 | extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
45 | extern void MD5_Final(unsigned char *result, MD5_CTX *ctx);
46 | 
47 | #endif // HAVE_OPENSSL
48 | 
49 | void md5sum (const void * data, unsigned long size, char * md5sum);
50 | 
51 | #endif // _MD5_SUM_H
52 | 


--------------------------------------------------------------------------------
/src/cuFFT-client/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2015 Advanced Micro Devices, Inc.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | cmake_minimum_required(VERSION 2.8)
18 | find_package(CUDA REQUIRED)
19 | 
20 | #  client
21 | set( cuFFT-client.Source	cuFFT-client.cpp )
22 | 
23 | set( cuFFT-client.Files ${cuFFT-client.Source} )
24 | 
25 | # Pass options to NVCC
26 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -gencode arch=compute_20,code=sm_20; -gencode arch=compute_30,code=sm_30; -gencode arch=compute_35,code=sm_35; -gencode arch=compute_37,code=sm_37; -gencode arch=compute_50,code=sm_50; -gencode arch=compute_52,code=sm_52)
27 | 
28 | include_directories( ./ ../common/inc/ )
29 | 
30 | CUDA_ADD_EXECUTABLE( cuFFT-client ${cuFFT-client.Files} )
31 | 
32 | CUDA_ADD_CUFFT_TO_TARGET( cuFFT-client )
33 | 
34 | target_link_libraries( cuFFT-client ${CUDA_LIBRARIES})
35 | 
36 | # Set output directory to bin
37 | if( MSVC )
38 | 	set(CUDA_GENERATED_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BITNESS})
39 | else()
40 | 	set(CUDA_GENERATED_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BITNESS}/${CMAKE_BUILD_TYPE})
41 | endif()
42 | 


--------------------------------------------------------------------------------
/src/include/unicode.compatibility.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | #pragma once
19 | #if !defined( amd_unicode_h )
20 | #define amd_unicode_h
21 | 
22 | //	Typedefs to support unicode and ansii compilation
23 | #if defined( _UNICODE )
24 | 	typedef std::wstring		tstring;
25 | 	typedef std::wstringstream	tstringstream;
26 | 	typedef std::wifstream		tifstream;
27 | 	typedef std::wofstream		tofstream;
28 | 	typedef std::wfstream		tfstream;
29 | 	static std::wostream&	tout	= std::wcout;
30 | 	static std::wostream&	terr	= std::wcerr;
31 | #else
32 | 	typedef std::string tstring;
33 | 	typedef std::stringstream tstringstream;
34 | 	typedef std::ifstream		tifstream;
35 | 	typedef std::ofstream		tofstream;
36 | 	typedef std::fstream		tfstream;
37 | 	static std::ostream&	tout	= std::cout;
38 | 	static std::ostream&	terr	= std::cerr;
39 | #endif
40 | 
41 | //	These macros help linux cope with the conventions of windows tchar.h file
42 | #if defined( _WIN32 )
43 | 	#include <tchar.h>
44 | 	#include <windows.h>
45 | #else
46 | 	#if defined( __GNUC__ )
47 | 		typedef char TCHAR;
48 | 		typedef char _TCHAR;
49 | 		#define _tmain main
50 | 
51 | 		#if defined( UNICODE )
52 | 			#define _T(x)	L ## x
53 | 		#else
54 | 			#define _T(x)	x
55 | 		#endif
56 | 	#endif
57 | #endif
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/src/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2013 Advanced Micro Devices, Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | INCLUDE_DIRECTORIES(
18 |     "${CMAKE_CURRENT_SOURCE_DIR}"
19 |     "${OPENCL_INCLUDE_DIRS}"
20 |     "${PROJECT_SOURCE_DIR}/include"
21 |     "${PROJECT_BINARY_DIR}/include"
22 |     )
23 | 
24 | LINK_DIRECTORIES("${PROJECT_BINARY_DIR}/package/lib${CLFFT_SUFFIX_LIB}")
25 | 
26 | FILE(GLOB FILES "*.c")
27 | 
28 | FOREACH(FILE ${FILES})
29 | 
30 |     if( MSVC )
31 |     	if( MSVC_VERSION LESS 1800 )
32 |     		# Use C++ with Microsoft compiler
33 |     		SET_SOURCE_FILES_PROPERTIES( ${FILE} PROPERTIES LANGUAGE CXX)
34 |     	endif ()
35 |     endif( )
36 | 
37 |     GET_FILENAME_COMPONENT(EXAMPLE ${FILE} NAME_WE)
38 |     GET_FILENAME_COMPONENT(FULL_DIR_NAME ${FILE} PATH)
39 |     GET_FILENAME_COMPONENT(DIR_NAME ${FULL_DIR_NAME} NAME)
40 |     SET(EXAMPLE_NAME example_${DIR_NAME}_${EXAMPLE})
41 |     ADD_EXECUTABLE(${EXAMPLE_NAME} ${FILE})
42 | 
43 |     TARGET_LINK_LIBRARIES(${EXAMPLE_NAME} clFFT ${OPENCL_LIBRARIES} ${CMAKE_DL_LIBS})
44 | 
45 |     SET_TARGET_PROPERTIES(${EXAMPLE_NAME}
46 |         PROPERTIES
47 |         OUTPUT_NAME ${EXAMPLE}
48 |         RUNTIME_OUTPUT_DIRECTORY ${DIR_NAME})
49 | 
50 |     INSTALL(FILES ${FILE} DESTINATION "share/clFFT/examples")
51 |     INSTALL(TARGETS ${EXAMPLE_NAME}
52 |         RUNTIME DESTINATION "share/clFFT/examples")
53 | ENDFOREACH()
54 | 


--------------------------------------------------------------------------------
/src/tests/accuracy_test_common.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | #include <gtest/gtest.h>
19 | #include<math.h>
20 | 
21 | #include "test_constants.h"
22 | #include "fftw_transform.h"
23 | #include "cl_transform.h"
24 | #include "typedefs.h"
25 | #include "accuracy_test_common.h"
26 | #include <stdexcept>
27 | #include <vector>
28 | 
29 | /*****************************************************/
30 | clfftResultLocation cl_placeness( placeness::placeness_t placeness )
31 | {
32 | 	if( placeness == placeness::in_place )
33 | 		return CLFFT_INPLACE;
34 | 	else if( placeness == placeness::out_of_place )
35 | 		return CLFFT_OUTOFPLACE;
36 | 	else
37 | 		throw std::runtime_error( "invalid placeness" );
38 | }
39 | 
40 | /*****************************************************/
41 | clfftLayout cl_layout( layout::buffer_layout_t layout_in )
42 | {
43 | 	if( layout_in == layout::real )
44 | 		return CLFFT_REAL;
45 | 	else if( layout_in == layout::hermitian_planar )
46 | 		return CLFFT_HERMITIAN_PLANAR;
47 | 	else if( layout_in == layout::complex_planar )
48 | 		return CLFFT_COMPLEX_PLANAR;
49 | 	else if( layout_in == layout::hermitian_interleaved )
50 | 		return CLFFT_HERMITIAN_INTERLEAVED;
51 | 	else if( layout_in == layout::complex_interleaved )
52 | 		return CLFFT_COMPLEX_INTERLEAVED;
53 | 	else
54 | 		throw std::runtime_error( "invalid layout_in" );
55 | }
56 | 


--------------------------------------------------------------------------------
/src/client/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2013 Advanced Micro Devices, Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | 
18 | #  client
19 | set( Client.Source	client.cpp
20 |                     openCL.misc.cpp
21 |                     stdafx.cpp )
22 | 
23 | set( Client.Headers client.h
24 |                     openCL.misc.h
25 |                     ../statTimer/statisticalTimer.extern.h
26 |                     ../include/unicode.compatibility.h
27 |                     ../include/stdafx.h
28 |                     ../include/targetver.h
29 |                     ../include/clFFT.h )
30 | 
31 | set( Client.Files ${Client.Source} ${Client.Headers} )
32 | 
33 | set( RT_LIB "" )
34 | if( WIN32 )
35 | 	add_definitions( "/D_CONSOLE" )
36 | elseif( NOT APPLE )
37 | 	# To use the dlopen() and dlclose() functions, we should link with libdl
38 | 	set( RT_LIB "-lrt" )
39 | endif( )
40 | 
41 | # Include standard OpenCL headers
42 | include_directories( ${Boost_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ../../../common ${PROJECT_BINARY_DIR}/include ../include )
43 | 
44 | add_executable( clFFT-client ${Client.Files} )
45 | 
46 | target_link_libraries( clFFT-client clFFT ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} ${CMAKE_DL_LIBS} ${RT_LIB} )
47 | 
48 | set_target_properties( clFFT-client PROPERTIES VERSION ${clFFT_VERSION} )
49 | set_target_properties( clFFT-client PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
50 | if( APPLE )
51 |     # properly deal with RPATH on mac
52 |     set_target_properties( clFFT-client PROPERTIES INSTALL_RPATH "@loader_path/../lib${CLFFT_SUFFIX_LIB}")
53 | endif()
54 | 
55 | install( TARGETS clFFT-client
56 |         RUNTIME DESTINATION bin${CLFFT_SUFFIX_BIN}
57 |         LIBRARY DESTINATION lib${CLFFT_SUFFIX_LIB}
58 |         ARCHIVE DESTINATION lib${CLFFT_SUFFIX_LIB}/import
59 |         )
60 | 


--------------------------------------------------------------------------------
/src/FindclFFT.cmake:
--------------------------------------------------------------------------------
 1 | # - Find clFFT, AMD's OpenCL FFT library
 2 | 
 3 | # This script defines the following variables:
 4 | # CLFFT_INCLUDE_DIRS    - Location of clFFT's include directory.
 5 | # CLFFT_LIBRARIES       - Location of clFFT's libraries
 6 | # CLFFT_FOUND           - True if clFFT has been located
 7 | #
 8 | # If your clFFT installation is not in a standard installation directory, you
 9 | # may provide a hint to where it may be found. Simply set the value CLFFT_ROOT
10 | # to the directory containing 'include/clFFT.h" prior to calling this script.
11 | #
12 | # By default this script will attempt to find the 32-bit version of clFFT.
13 | # If you desire to use the 64-bit version instead, set
14 | #   set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON)
15 | # prior to calling this script.
16 | #
17 | #=============================================================================
18 | # Copyright 2014 Brian Kloppenborg
19 | #
20 | # Licensed under the Apache License, Version 2.0 (the "License");
21 | # you may not use this file except in compliance with the License.
22 | # You may obtain a copy of the License at
23 | #
24 | # http://www.apache.org/licenses/LICENSE-2.0
25 | #
26 | # Unless required by applicable law or agreed to in writing, software
27 | # distributed under the License is distributed on an "AS IS" BASIS,
28 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29 | # See the License for the specific language governing permissions and
30 | # limitations under the License.
31 | #=============================================================================
32 | 
33 | IF(CLFFT_INCLUDE_DIRS)
34 |   # Already in cache, be silent
35 |   set (CLFFT_FIND_QUIETLY TRUE)
36 | ENDIF (CLFFT_INCLUDE_DIRS)
37 | 
38 | FIND_PATH(CLFFT_ROOT_DIR
39 |     NAMES include/clFFT.h
40 |     HINTS /usr/local/ ${CLFFT_ROOT} 
41 |     DOC "clFFT root directory.")
42 |     
43 | FIND_PATH(_CLFFT_INCLUDE_DIRS
44 |     NAMES clFFT.h
45 |     HINTS ${CLFFT_ROOT_DIR}/include
46 |     DOC "clFFT Include directory")
47 | 
48 | FIND_LIBRARY(_CLFFT_LIBRARY
49 |     NAMES clFFT
50 |     HINTS ${CLFFT_ROOT_DIR}/lib)
51 | 
52 | SET(CLFFT_INCLUDE_DIRS ${_CLFFT_INCLUDE_DIRS})
53 | SET(CLFFT_LIBRARIES ${_CLFFT_LIBRARY})
54 | 
55 | # handle the QUIETLY and REQUIRED arguments and set CLFFT_FOUND to TRUE if
56 | # all listed variables are TRUE
57 | INCLUDE (FindPackageHandleStandardArgs)
58 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(CLFFT DEFAULT_MSG CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS)
59 | MARK_AS_ADVANCED(CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS)
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/src/statTimer/ReadMe.txt:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2013 Advanced Micro Devices, Inc.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | ========================================================================
18 |     DYNAMIC LINK LIBRARY : StatTimer Project Overview
19 | ========================================================================
20 | 
21 | AppWizard has created this StatTimer DLL for you.
22 | 
23 | This file contains a summary of what you will find in each of the files that
24 | make up your StatTimer application.
25 | 
26 | 
27 | StatTimer.vcxproj
28 |     This is the main project file for VC++ projects generated using an Application Wizard.
29 |     It contains information about the version of Visual C++ that generated the file, and
30 |     information about the platforms, configurations, and project features selected with the
31 |     Application Wizard.
32 | 
33 | StatTimer.vcxproj.filters
34 |     This is the filters file for VC++ projects generated using an Application Wizard. 
35 |     It contains information about the association between the files in your project 
36 |     and the filters. This association is used in the IDE to show grouping of files with
37 |     similar extensions under a specific node (for e.g. ".cpp" files are associated with the
38 |     "Source Files" filter).
39 | 
40 | StatTimer.cpp
41 |     This is the main DLL source file.
42 | 
43 | /////////////////////////////////////////////////////////////////////////////
44 | Other standard files:
45 | 
46 | StdAfx.h, StdAfx.cpp
47 |     These files are used to build a precompiled header (PCH) file
48 |     named StatTimer.pch and a precompiled types file named StdAfx.obj.
49 | 
50 | /////////////////////////////////////////////////////////////////////////////
51 | Other notes:
52 | 
53 | AppWizard uses "TODO:" comments to indicate parts of the source code you
54 | should add to or customize.
55 | 
56 | /////////////////////////////////////////////////////////////////////////////
57 | 


--------------------------------------------------------------------------------
/src/client/client.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | #pragma once
19 | #if !defined( CLIENT_H )
20 | #define CLIENT_H
21 | 
22 | //	Boost headers that we want to use
23 | //	#define BOOST_PROGRAM_OPTIONS_DYN_LINK
24 | #include <boost/program_options.hpp>
25 | 
26 | #ifdef WIN32
27 | 
28 | struct Timer
29 | {
30 |     LARGE_INTEGER start, stop, freq;
31 | 
32 | public:
33 |     Timer() { QueryPerformanceFrequency( &freq ); }
34 | 
35 |     void Start() { QueryPerformanceCounter(&start); }
36 |     double Sample()
37 |     {
38 |         QueryPerformanceCounter  ( &stop );
39 |         double time = (double)(stop.QuadPart-start.QuadPart) / (double)(freq.QuadPart);
40 |         return time;
41 |     }
42 | };
43 | 
44 | #elif defined(__APPLE__) || defined(__MACOSX)
45 | 
46 | #include <mach/clock.h>
47 | #include <mach/mach.h>
48 | 
49 | struct Timer
50 | {
51 |     clock_serv_t clock;
52 |     mach_timespec_t start, end;
53 | 
54 | public:
55 |     Timer() { host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &clock); }
56 |     ~Timer() { mach_port_deallocate(mach_task_self(), clock); }
57 | 
58 |     void Start() { clock_get_time(clock, &start); }
59 |     double Sample()
60 |     {
61 |         clock_get_time(clock, &end);
62 |         double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
63 |         return time * 1E-9;
64 |     }
65 | };
66 | 
67 | #else
68 | 
69 | #include <time.h>
70 | #include <math.h>
71 | 
72 | struct Timer
73 | {
74 |     struct timespec start, end;
75 | 
76 | public:
77 |     Timer() { }
78 | 
79 |     void Start() { clock_gettime(CLOCK_MONOTONIC, &start); }
80 |     double Sample()
81 |     {
82 |         clock_gettime(CLOCK_MONOTONIC, &end);
83 |         double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
84 |         return time * 1E-9;
85 |     }
86 | };
87 | 
88 | #endif
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/src/library/ReadMe.txt:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2013 Advanced Micro Devices, Inc.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | ========================================================================
18 |     CONSOLE APPLICATION : AMD.clFFT Project Overview
19 | ========================================================================
20 | 
21 | AppWizard has created this AMD.clFFT application for you.
22 | 
23 | This file contains a summary of what you will find in each of the files that
24 | make up your AMD.clFFT application.
25 | 
26 | 
27 | AMD.clFFT.vcxproj
28 |     This is the main project file for VC++ projects generated using an Application Wizard.
29 |     It contains information about the version of Visual C++ that generated the file, and
30 |     information about the platforms, configurations, and project features selected with the
31 |     Application Wizard.
32 | 
33 | AMD.clFFT.vcxproj.filters
34 |     This is the filters file for VC++ projects generated using an Application Wizard. 
35 |     It contains information about the association between the files in your project 
36 |     and the filters. This association is used in the IDE to show grouping of files with
37 |     similar extensions under a specific node (for e.g. ".cpp" files are associated with the
38 |     "Source Files" filter).
39 | 
40 | AMD.clFFT.cpp
41 |     This is the main application source file.
42 | 
43 | /////////////////////////////////////////////////////////////////////////////
44 | Other standard files:
45 | 
46 | StdAfx.h, StdAfx.cpp
47 |     These files are used to build a precompiled header (PCH) file
48 |     named AMD.clFFT.pch and a precompiled types file named StdAfx.obj.
49 | 
50 | /////////////////////////////////////////////////////////////////////////////
51 | Other notes:
52 | 
53 | AppWizard uses "TODO:" comments to indicate parts of the source code you
54 | should add to or customize.
55 | 
56 | /////////////////////////////////////////////////////////////////////////////
57 | 


--------------------------------------------------------------------------------
/src/callback-client/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2015 Advanced Micro Devices, Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | 
18 | #  client
19 | set( Client.Source	callback-client.cpp
20 |                     openCL.misc.cpp
21 |                     stdafx.cpp )
22 | 
23 | set( Client.Headers client.h
24 |                     openCL.misc.h
25 |                     ../statTimer/statisticalTimer.extern.h
26 |                     ../include/unicode.compatibility.h
27 |                     ../include/stdafx.h
28 |                     ../include/targetver.h
29 |                     ../include/clFFT.h )
30 | 
31 | set( Client.Files ${Client.Source} ${Client.Headers} )
32 | 
33 | set( DL_LIB "" )
34 | if( WIN32 )
35 | 	add_definitions( "/D_CONSOLE" )
36 | elseif( APPLE )
37 | 	set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" )
38 | else( )
39 | 	# To use the dlopen() and dlclose() functions, we should link with libdl
40 | 	set( DL_LIB "-ldl -lrt" )
41 | endif( )
42 | 
43 | # Include standard OpenCL headers
44 | include_directories( ${Boost_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ${FFTW_INCLUDE_DIRS} ../../../common ${PROJECT_BINARY_DIR}/include ../include )
45 | 
46 | add_executable( clFFT-callback-client ${Client.Files} )
47 | 
48 | target_link_libraries( clFFT-callback-client clFFT ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} ${FFTW_LIBRARIES} ${DL_LIB} )
49 | 
50 | set_target_properties( clFFT-callback-client PROPERTIES VERSION ${clFFT_VERSION} )
51 | set_target_properties( clFFT-callback-client PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
52 | if( APPLE )
53 |     # properly deal with RPATH on mac
54 |     set_target_properties( clFFT-callback-client PROPERTIES INSTALL_RPATH "@loader_path/../lib${CLFFT_SUFFIX_LIB}")
55 | endif()
56 | 
57 | install( TARGETS clFFT-callback-client
58 |         RUNTIME DESTINATION bin${CLFFT_SUFFIX_BIN}
59 |         LIBRARY DESTINATION lib${CLFFT_SUFFIX_LIB}
60 |         ARCHIVE DESTINATION lib${CLFFT_SUFFIX_LIB}/import
61 |         )
62 | 


--------------------------------------------------------------------------------
/src/library/generator.transpose.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 | * Copyright 2016 Advanced Micro Devices, Inc.
 3 | *
 4 | * Licensed under the Apache License, Version 2.0 (the "License");
 5 | * you may not use this file except in compliance with the License.
 6 | * You may obtain a copy of the License at
 7 | *
 8 | * http://www.apache.org/licenses/LICENSE-2.0
 9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | * ************************************************************************/
16 | 
17 | #pragma once
18 | #if !defined( AMD_CLFFT_GENERATOR_TRANSPOSE_HEADER )
19 | #define AMD_CLFFT_GENERATOR_TRANSPOSE_HEADER
20 | #include <iomanip>
21 | #include "private.h"
22 | #include "repo.h"
23 | #include "plan.h"
24 | #include "generator.stockham.h"
25 | #include "action.h"
26 | 
27 | #define AVAIL_MEM_SIZE 32768 
28 | 
29 | inline std::stringstream& clKernWrite(std::stringstream& rhs, const size_t tabIndex)
30 | {
31 | 	rhs << std::setw(tabIndex) << "";
32 | 	return rhs;
33 | }
34 | 
35 | namespace clfft_transpose_generator
36 | {
37 | //generate transepose kernel with sqaure 2d matrix of row major with arbitrary batch size
38 | /*
39 | Below is a matrix(row major) containing three sqaure sub matrix along column
40 | The transpose will be done within each sub matrix.
41 | [M0
42 | M1
43 | M2]
44 | */
45 | clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::Signature & params, std::string& strKernel, const size_t& lwSize, const size_t reShapeFactor);
46 | 
47 | //generate transpose kernel with square 2d matrix of row major with blocks along the leading dimension
48 | //aka leading dimension batched
49 | /*
50 | Below is a matrix(row major) contaning three square sub matrix along row
51 | [M0 M2 M2]
52 | */
53 | clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTransposeNonSquareAction::Signature & params, std::string& strKernel, const size_t& lwSize, const size_t reShapeFactor);
54 | 
55 | //swap lines. This kind of kernels are using with combination of square transpose kernels to perform nonsqaure transpose 1:2 ratio
56 | clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature & params, std::string& strKernel, std::string& KernelFuncName, const size_t& lwSize, const size_t reShapeFactor);
57 | 
58 | clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Signature & params, std::string& strKernel, std::string& KernelFuncName, const size_t& lwSize, const size_t reShapeFactor);
59 | 
60 | void get_cycles(size_t *cycle_map, size_t num_reduced_row, size_t num_reduced_col);
61 | 
62 | void permutation_calculation(size_t m, size_t n, std::vector<std::vector<size_t> > &permutationVec);
63 | }//end of namespace clfft_transpose_generator
64 | 
65 | #endif


--------------------------------------------------------------------------------
/src/scripts/perf/errorHandler.py:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2013 Advanced Micro Devices, Inc.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | #---------------------------------File Note------------------------------------
18 | #Date: 27 January 2012
19 | #This file defines all the error code and error handler mechanism
20 | #--------------------------------Global Variables------------------------------
21 | 
22 | UINS_CAT = 100
23 | WIN_REG_SEARCH_FAIL = 101
24 | UNIMPL_APP = 200
25 | SYS_ERR = 300
26 | TIME_OUT = 400
27 | DIM_INCO_FILE_FMT = 500 #incorrect file format for dimension
28 | DIM_FILE_VAL_INCO = 501 #Value coming from dimension file is incorrect
29 | 
30 | #__errorTable : Defines all the errors in the system. Add a new error code and
31 | #               error message here 
32 | """Error table is defined as private to this module""" 
33 | errorTable = {
34 |               UINS_CAT: 'Application is not able to find the installed catalyst',
35 |               WIN_REG_SEARCH_FAIL: 'Windows Registry search for catalysts version is unsuccessful',
36 |               UNIMPL_APP: 'Unimplemented Application requirement',
37 |               SYS_ERR:    'System error occurred - Please check the source code',
38 |               TIME_OUT: 'Operation is timed out',
39 |               DIM_INCO_FILE_FMT: 'incorrect file format for dimension - Not able to find dimension',
40 |               DIM_FILE_VAL_INCO: 'Value coming from dimension file is incorrect'
41 |               }
42 | 
43 | #--------------------------------Class Definitions-----------------------------
44 | class TimeoutException(Exception): 
45 |     pass
46 | 
47 | """Base class for handling all the application generated exception"""
48 | class ApplicationException(Exception):
49 |     
50 |     def __init__(self, fileName, errno, msg = ""):
51 |         self.fileName = fileName
52 |         self.errno = errno
53 |         self.mess = errorTable[errno] + msg
54 |         self.message = 'Application ERROR:'+repr(self.fileName+'-'+str(self.errno)+'-'+self.mess)
55 |         
56 |     def __str__(self):
57 |         return repr(self.fileName+'-'+str(self.errno)+'-'+self.mess)
58 |     
59 | 
60 | #--------------------------------Global Function-------------------------------
61 | if __name__ == '__main__':
62 |     #print errorTable
63 |     try:
64 |         raise ApplicationException('errorHandler', SYS_ERR)
65 | 
66 |     except:
67 |         print 'Generic exception'
68 | 
69 | 


--------------------------------------------------------------------------------
/src/statTimer/statisticalTimer.extern.h:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | #pragma once
19 | #ifndef _STATISTICALTIMER_EXTERN_H_
20 | #define _STATISTICALTIMER_EXTERN_H_
21 | #include "../include/clFFT.h"
22 | #include "statisticalTimer.h"
23 | 
24 | /**
25 |  * \file clfft.StatisticalTimer.extern.h
26 |  * \brief A timer class that provides a cross platform timer for use
27 |  * in timing code progress with a high degree of accuracy.
28 |  *	This class is implemented entirely in the header, to facilitate inclusion into multiple
29 |  *	projects without needing to compile an object file for each project.
30 |  */
31 | 
32 | // The following ifdef block is the standard way of creating macros which make exporting
33 | // from a DLL simpler. All files within this DLL are compiled with the STATTIMER_EXPORTS
34 | // symbol defined on the command line. this symbol should not be defined on any project
35 | // that uses this DLL. This way any other project whose source files include this file see
36 | // STATTIMER_API functions as being imported from a DLL, whereas this DLL sees symbols
37 | // defined with this macro as being exported.
38 | #if defined( _WIN32 )
39 | 	#if !defined( __cplusplus )
40 | 		#define inline __inline
41 | 	#endif
42 | 
43 |     #if defined( CLFFT_STATIC )
44 |         #define STATTIMER_API
45 |     #elif defined( STATTIMER_EXPORTS )
46 |         #define STATTIMER_API __declspec( dllexport )
47 |     #else
48 |         #define STATTIMER_API __declspec( dllimport )
49 |     #endif
50 | #else
51 | 	#define STATTIMER_API
52 | #endif
53 | 
54 | //	The type of timer to be returned from ::getStatTimer( )
55 | typedef enum clfftTimerType_
56 | {
57 | 	CLFFT_GPU			= 1,
58 | 	CLFFT_CPU,
59 | } clfftTimerType;
60 | 
61 | //	Table of typedef definitions for all exported functions from this shared module.
62 | //	Clients of this module can use these typedefs to help create function pointers
63 | //	that can be initialized to point to the functions exported from this module.
64 | typedef baseStatTimer* (*PFGETSTATTIMER)( const clfftTimerType type );
65 | 
66 | 	/**
67 | 	* \fn getInstance()
68 | 	* \brief This returns a reference to the singleton timer.  Guarantees only 1 timer class is ever
69 | 	*	instantiated within a compilable executable.
70 | 	*/
71 | extern "C" STATTIMER_API baseStatTimer* getStatTimer( const clfftTimerType type );
72 | 
73 | #endif // _STATISTICALTIMER_EXTERN_H_
74 | 


--------------------------------------------------------------------------------
/src/statTimer/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2013 Advanced Micro Devices, Inc.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | 
18 | # List the names of the files to compile for the external client . . .
19 | set( StatTimer.Source 	statisticalTimer.CPU.cpp
20 | 								statisticalTimer.GPU.cpp
21 | 								statisticalTimer.extern.cpp
22 | 								stdafx.cpp )
23 | 
24 | # Windows only uses dllmain
25 | if( MSVC )
26 | 	set( StatTimer.Source ${StatTimer.Source} dllmain.cpp )
27 | endif( )
28 | 
29 | set( StatTimer.Headers	statisticalTimer.h
30 | 								statisticalTimer.extern.h
31 | 								statisticalTimer.CPU.h
32 | 								statisticalTimer.GPU.h
33 | 								stdafx.h 
34 | 								targetver.h 
35 | 								../include/clFFT.h )
36 | 								
37 | set( StatTimer.Files ${StatTimer.Source} ${StatTimer.Headers} )
38 | 
39 | # For a rainy day, add pre-compiled header support
40 | #if( MSVC )
41 | #	if (USE_MSVC_PCH)
42 | 	
43 | #		set_source_files_properties(LungAnalysisPCH.cxx
44 | #			PROPERTIES
45 | #			COMPILE_FLAGS "/YcLungAnalysisPCH.h"
46 | #			)
47 | #		foreach( src_file ${UPMC_LA_SRCS} )
48 | #			set_source_files_properties(
49 | #				${src_file}
50 | #				PROPERTIES
51 | #				COMPILE_FLAGS "/YuLungAnalysisPCH.h"
52 | #				)
53 | #		endforeach( src_file ${UPMC_LA_SRCS} )
54 | 		
55 | #		list(APPEND UPMC_LA_SRCS LungAnalysisPCH.cxx)
56 | #		list(APPEND UPMC_LA_HDRS LungAnalysisPCH.h)
57 | 
58 | #	endif(USE_MSVC_PCH)
59 | #endif (MSVC)
60 | 
61 | add_definitions( "/DSTATTIMER_EXPORTS" )
62 | 
63 | # Include standard OpenCL headers
64 | include_directories( ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ../include )
65 | 
66 | add_library(StatTimer ${StatTimer.Files})
67 | set_target_properties(StatTimer PROPERTIES VERSION ${clFFT_VERSION})
68 | set_target_properties(StatTimer PROPERTIES SOVERSION ${clFFT_VERSION_MAJOR})
69 | set_target_properties(StatTimer PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging")
70 | target_link_libraries(StatTimer ${OPENCL_LIBRARIES})
71 | 
72 | if( UNIX AND NOT APPLE )
73 | 	# This library dependency is brought in by the high precision timer available in linux
74 | 	target_link_libraries( StatTimer -lrt )
75 | endif( )
76 | 
77 | # CPack configuration; include the executable into the package
78 | install( TARGETS StatTimer
79 |         EXPORT Library
80 |         RUNTIME DESTINATION bin${CLFFT_SUFFIX_BIN}
81 |         LIBRARY DESTINATION lib${CLFFT_SUFFIX_LIB}
82 |         ARCHIVE DESTINATION lib${CLFFT_SUFFIX_LIB}/import
83 |         )
84 | 


--------------------------------------------------------------------------------
/src/gtest.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | option(USE_SYSTEM_GTEST "Use system installed gtest when set to ON, or build gtest locally when set to OFF" OFF)
 3 | 
 4 | if(USE_SYSTEM_GTEST)
 5 |   if( (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8) )
 6 |     message( STATUS "Cmake version 2.8 or greater needed to use GTest" )
 7 |   else()
 8 |     # This will define GTEST_FOUND
 9 |     find_package( GTest )
10 |   endif()
11 | else()
12 |   if(CMAKE_VERSION VERSION_LESS 3.2 AND CMAKE_GENERATOR MATCHES "Ninja")
13 |     message(WARNING "Building GTest with Ninja has known issues with CMake older than 3.2")
14 |   endif()
15 | 
16 |   include(ExternalProject)
17 | 
18 |   set(GTEST_LIBRARIES gtest gtest_main)
19 |   # the binary dir must be know before creating the external project in order
20 |   # to pass the byproducts
21 |   set(prefix "${CMAKE_CURRENT_BINARY_DIR}/gtest-external-prefix")
22 |   set(binary_dir "${prefix}/src/gtest-external-build")
23 | 
24 |   set(byproducts)
25 |   foreach(lib ${GTEST_LIBRARIES})
26 |     set(${lib}_location
27 |       ${binary_dir}/${CMAKE_CFG_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${lib}${CMAKE_STATIC_LIBRARY_SUFFIX})
28 |     list(APPEND byproducts ${${lib}_location})
29 |   endforeach()
30 | 
31 |   if( MSVC )
32 | 	if( MSVC_VERSION LESS 1800 )  
33 |       set(EXTRA_FLAG "/D_VARIADIC_MAX=10 ")
34 | 	else()
35 | 	  set(EXTRA_FLAG "")
36 | 	endif()
37 |   else()
38 |     set(EXTRA_FLAG "")
39 |   endif()
40 |   
41 |   ExternalProject_Add(
42 |     gtest-external
43 |     URL https://github.com/google/googletest/archive/release-1.7.0.zip
44 |     URL_MD5 EF5E700C8A0F3EE123E2E0209B8B4961
45 |     PREFIX ${prefix}
46 |     BINARY_DIR ${binary_dir}
47 |     CMAKE_CACHE_ARGS
48 |       -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER}
49 |       -DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS}
50 |       -DCMAKE_CXX_FLAGS_DEBUG:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_DEBUG}
51 |       -DCMAKE_CXX_FLAGS_MINSIZEREL:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_MINSIZEREL}
52 |       -DCMAKE_CXX_FLAGS_RELEASE:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_RELEASE}
53 |       -DCMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_RELWITHDEBINFO}
54 |       -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER}
55 |       -DCMAKE_C_FLAGS:STRING=${CMAKE_C_FLAGS}
56 |       -DCMAKE_C_FLAGS_DEBUG:STRING=${CMAKE_C_FLAGS_DEBUG}
57 |       -DCMAKE_C_FLAGS_MINSIZEREL:STRING=${CMAKE_C_FLAGS_MINSIZEREL}
58 |       -DCMAKE_C_FLAGS_RELEASE:STRING=${CMAKE_C_FLAGS_RELEASE}
59 |       -DCMAKE_C_FLAGS_RELWITHDEBINFO:STRING=${CMAKE_C_FLAGS_RELWITHDEBINFO}
60 |       -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
61 |       -Dgtest_force_shared_crt:BOOL=ON
62 |     BUILD_BYPRODUCTS ${byproducts}
63 |     INSTALL_COMMAND "")
64 | 
65 |   foreach(lib ${GTEST_LIBRARIES})
66 |     add_library(${lib} IMPORTED STATIC)
67 |     add_dependencies(${lib} gtest-external)
68 |     set_target_properties(${lib} PROPERTIES IMPORTED_LOCATION ${${lib}_location})
69 |   endforeach()
70 | 
71 |   ExternalProject_Get_Property(gtest-external source_dir)
72 |   set(GTEST_INCLUDE_DIRS ${source_dir}/include)
73 |   set(GTEST_FOUND ON)
74 | endif()
75 | 
76 | # Hack to get googletest v1.6 to work with vs2012
77 | if( MSVC11 )
78 |   add_definitions( "/D_VARIADIC_MAX=10" )
79 | endif( )
80 | 


--------------------------------------------------------------------------------
/src/scripts/perf/performanceUtility.py:
--------------------------------------------------------------------------------
 1 | # ########################################################################
 2 | # Copyright 2013 Advanced Micro Devices, Inc.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ########################################################################
16 | 
17 | #This file contains a number of utilities function which could be independent of
18 | #any specific domain concept
19 | 
20 | import signal
21 | from subprocess import check_output
22 | import errorHandler
23 | from datetime import datetime
24 | 
25 | def currentUser():
26 |     try:
27 |         return check_output("who", shell = True).split()[0];
28 |     except:
29 |         print 'Unhandled Exception at performanceUtility::currentUser()'
30 |         raise
31 |     
32 | #Details: Generate sorted numbers in radices of 2,3 and 5 upto a given upper limit number
33 | def generate235Radices(maxSize):
34 |     sizeList = list()
35 |     i = 0
36 |     j = 0
37 |     k = 0
38 |     SUM = int()
39 |     sumj = int()
40 |     sumk = int()
41 |     sumi = 1
42 |     while(True):
43 |         sumj = 1
44 |         j = 0
45 |         while(True):
46 |             sumk = 1
47 |             k = 0
48 |             while(True):
49 |                 SUM = sumi*sumj*sumk
50 |                 if ( SUM > maxSize ): break
51 |                 sizeList.append(SUM)
52 |                 k += 1
53 |                 sumk *= 2
54 |             if (k == 0): break
55 |             j += 1
56 |             sumj *= 3
57 |         if ( j == 0 and k == 0): break
58 |         i += 1
59 |         sumi *= 5
60 |     sizeList.sort()
61 |     return sizeList
62 | 
63 | 
64 | def timeout(timeout_time, default):
65 |     def timeout_function(f):
66 |         def f2(args):
67 |             def timeout_handler(signum, frame):
68 |                 raise errorHandler.TimeoutException()
69 |  
70 |             old_handler = signal.signal(signal.SIGALRM, timeout_handler) 
71 |             signal.alarm(timeout_time) # triger alarm in timeout_time seconds
72 |             retval = ""
73 |             try: 
74 |                 retval = f(args)
75 |             except errorHandler.TimeoutException:
76 |                 raise errorHandler.ApplicationException(__file__, errorHandler.TIME_OUT)
77 |             except:
78 |                 signal.alarm(0)
79 |                 raise
80 |             finally:
81 |                 #print 'executing finally'
82 |                 signal.signal(signal.SIGALRM, old_handler) 
83 |             signal.alarm(0)
84 |             return retval
85 |         return f2
86 |     return timeout_function
87 | 
88 | 
89 | def logTxtOutput(fileName, mode, txt):
90 |     todayFile =  fileName+'-'+datetime.now().strftime('%Y-%b-%d')+'.txt'
91 |     with open(todayFile, mode) as f:
92 |         f.write('------\n'+txt+'\n')
93 |         
94 | def log(filename, txt):
95 |     with open(filename, 'a') as f:
96 |         f.write(datetime.now().ctime()+'# '+txt+'\n')
97 |         


--------------------------------------------------------------------------------
/src/library/lifetime.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright 2013 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  * ************************************************************************/
16 | 
17 | 
18 | // clfft.lifetime.cpp : Functions that control the lifetime of the FFT library and their supporting functions
19 | //
20 | 
21 | #include "stdafx.h"
22 | #include "private.h"
23 | #include "repo.h"
24 | #include "../include/sharedLibrary.h"
25 | #include "../statTimer/statisticalTimer.extern.h"
26 | 
27 | //	Allow AMD's implementation of FFT's to allocate internal resources
28 | clfftStatus	clfftSetup( const clfftSetupData* sData )
29 | {
30 | 	//	Static data is not thread safe (to create), so we implement a lock to protect instantiation for the first call
31 | 	//	Implemented outside of FFTRepo::getInstance to minimize lock overhead; this is only necessary on first creation
32 | 	scopedLock sLock( FFTRepo::lockRepo, _T( "FFTRepo::getInstance" ) );
33 | 
34 | 	//	First invocation of this function will allocate the FFTRepo singleton; thereafter the object always exists
35 | 	FFTRepo& fftRepo	= FFTRepo::getInstance( );
36 | 
37 | 	clfftInitRequestLibNoMemAlloc();
38 | 	clfftInitBinaryCache();
39 | 
40 | 	//	Discover and load the timer module if present
41 | 	fftRepo.timerHandle = LoadSharedLibrary( "lib", "StatTimer", true );
42 | 	if( fftRepo.timerHandle )
43 | 	{
44 | 		//	Timer module discovered and loaded successfully
45 | 		//	Initialize function pointers to call into the shared module
46 | 		PFGETSTATTIMER pfGetStatTimer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( fftRepo.timerHandle, "getStatTimer" ) );
47 | 
48 | 		//	Create and initialize our timer class, if the external timer shared library loaded
49 | 		if( pfGetStatTimer )
50 | 		{
51 | 			fftRepo.pStatTimer = reinterpret_cast< GpuStatTimer* > ( pfGetStatTimer( CLFFT_GPU ) );
52 | 		}
53 | 	}
54 | 
55 | 	// If the client has no setupData, we are done
56 | 	if( sData == NULL )
57 | 		return CLFFT_SUCCESS;
58 | 
59 | 	//	Versioning checks commented out until necessary
60 | 	////	If the major version number between the client and library do not match, return mismatch
61 | 	//if( sData->major > clfftVersionMajor )
62 | 	//	return CLFFT_VERSION_MISMATCH;
63 | 
64 | 	////	If the minor version number between the client and library do not match, return mismatch
65 | 	//if( sData->minor > clfftVersionMinor )
66 | 	//	return CLFFT_VERSION_MISMATCH;
67 | 
68 | 	////	We ignore patch version number for version validation
69 | 
70 | 	fftRepo.setupData	= *sData;
71 | 
72 | 	return	CLFFT_SUCCESS;
73 | }
74 | 
75 | //	Allow AMD's implementation of FFT's to destroy internal resources
76 | clfftStatus	clfftTeardown( )
77 | {
78 | 	FFTRepo& fftRepo	= FFTRepo::getInstance( );
79 | 	fftRepo.releaseResources( );
80 | 
81 | 	FreeSharedLibrary( fftRepo.timerHandle );
82 | 
83 | 	return	CLFFT_SUCCESS;
84 | }
85 | 
86 | clfftStatus clfftGetVersion( cl_uint* major, cl_uint* minor, cl_uint* patch )
87 | {
88 | 	*major	= clfftVersionMajor;
89 | 	*minor	= clfftVersionMinor;
90 | 	*patch	= clfftVersionPatch;
91 | 
92 | 	return	CLFFT_SUCCESS;
93 | }
94 | 


--------------------------------------------------------------------------------
/src/statTimer/statisticalTimer.h:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | 
 18 | #pragma once
 19 | #ifndef _STATISTICALTIMER_H_
 20 | #define _STATISTICALTIMER_H_
 21 | #include <vector>
 22 | #include <functional>
 23 | #include <string>
 24 | 
 25 | #include "../include/clFFT.h"
 26 | 
 27 | /**
 28 |  * \file clfft.StatisticalTimer.h
 29 |  * \brief A timer class that provides a cross platform timer for use
 30 |  * in timing code progress with a high degree of accuracy.
 31 |  *	This class is implemented entirely in the header, to facilitate inclusion into multiple
 32 |  *	projects without needing to compile an object file for each project.
 33 |  */
 34 | 
 35 | //	Definition of a functor object that is passed by reference into the Print statement
 36 | //	of the timing class.
 37 | //	Functor object to help with accumulating values in vectors
 38 | template<typename A, typename R >
 39 | class flopsFunc: public std::function<R(A)>
 40 | {
 41 | public:
 42 | 	virtual typename std::function<R(A)>::result_type operator( )( ) = 0;
 43 | };
 44 | 
 45 | /**
 46 |  * \class StatisticalTimer
 47 |  * \brief Counter that provides a fairly accurate timing mechanism for both
 48 |  * windows and linux. This timer is used extensively in all the samples.
 49 |  */
 50 | class baseStatTimer
 51 | {
 52 | protected:
 53 | 	/**
 54 | 	 * \fn ~baseStatTimer()
 55 | 	 * \brief Destructor for StatisticalTimer that cleans up the class
 56 | 	 */
 57 | 	virtual ~baseStatTimer( ){ };
 58 | 
 59 | //	friend std::ostream& operator<<( std::ostream& os, const baseStatTimer& s );
 60 | 
 61 | public:
 62 | 	/**
 63 | 	 * \fn void Start( sTimerID id )
 64 | 	 * \brief Start the timer
 65 | 	 * \sa Stop(), Reset()
 66 | 	 */
 67 | 	virtual void Start( size_t id ) = 0;
 68 | 
 69 | 	/**
 70 | 	 * \fn void Stop( size_t id )
 71 | 	 * \brief Stop the timer
 72 | 	 * \sa Start(), Reset()
 73 | 	 */
 74 | 	virtual void Stop( size_t id ) = 0;
 75 | 
 76 | 	/**
 77 | 	 * \fn void Reset(void)
 78 | 	 * \brief Reset the timer to 0
 79 | 	 * \sa Start(), Stop()
 80 | 	 */
 81 | 	virtual void Clear( ) = 0;
 82 | 
 83 | 	/**
 84 | 	 * \fn void Reset(void)
 85 | 	 * \brief Reset the timer to 0
 86 | 	 * \sa Start(), Stop()
 87 | 	 */
 88 | 	virtual void Reset( ) = 0;
 89 | 
 90 | 	virtual void Reserve( size_t nEvents, size_t nSamples ) = 0;
 91 | 
 92 | 	virtual size_t getUniqueID( const std::string& label, cl_uint groupID ) = 0;
 93 | 
 94 | 	//	Calculate the average/mean of data for a given event
 95 | 	virtual void	setNormalize( bool norm ) = 0;
 96 | 
 97 | 	virtual void Print( ) = 0;
 98 | 
 99 | 	//	Using the stdDev of the entire population (of an id), eliminate those samples that fall
100 | 	//	outside some specified multiple of the stdDev.  This assumes that the population
101 | 	//	form a gaussian curve.
102 | 	virtual size_t	pruneOutliers( cl_double multiple ) = 0;
103 | 	virtual std::vector< size_t > pruneOutliers( size_t id , cl_double multiple ) = 0;
104 | };
105 | 
106 | #endif // _STATISTICALTIMER_H_
107 | 


--------------------------------------------------------------------------------
/src/FindFFTW.cmake:
--------------------------------------------------------------------------------
  1 | # ########################################################################
  2 | # Copyright 2013 Advanced Micro Devices, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ########################################################################
 16 | 
 17 | 
 18 | # Locate the FFTW (http://www.fftw.org/) Framework.
 19 | #
 20 | # Defines the following variables:
 21 | #
 22 | #   FFTW_FOUND - Found the FFTW framework
 23 | #   FFTW_INCLUDE_DIRS - Include directories
 24 | #
 25 | # Also defines the library variables below as normal
 26 | # variables.  These contain debug/optimized keywords when
 27 | # a debugging library is found.
 28 | #
 29 | #   FFTW_LIBRARIES - libfftw
 30 | #
 31 | # Accepts the following variables as input:
 32 | #
 33 | #   FFTW_ROOT - (as a CMake or environment variable)
 34 | #                The root directory of the fftw install prefix
 35 | #
 36 | #   FIND_LIBRARY_USE_LIB64_PATHS - Global property that controls whether 
 37 | #               findFFTW should search for 64bit or 32bit libs
 38 | #-----------------------------------------------
 39 | # Example Usage:
 40 | #
 41 | #    find_package(FFTW REQUIRED)
 42 | #    include_directories(${FFTW_INCLUDE_DIRS})
 43 | #
 44 | #    add_executable(foo foo.cc)
 45 | #    target_link_libraries(foo ${FFTW_LIBRARIES})
 46 | #
 47 | #-----------------------------------------------
 48 | 
 49 | find_path(FFTW_INCLUDE_DIRS
 50 |     NAMES fftw3.h
 51 |     HINTS
 52 |         ${FFTW_ROOT}/include
 53 |         ${FFTW_ROOT}/api
 54 |         ${FFTW_ROOT}
 55 |         $ENV{FFTW_ROOT}/include
 56 |         $ENV{FFTW_ROOT}/api
 57 |         ENV FFTW_ROOT
 58 |     PATHS
 59 |         /usr/include
 60 |         /usr/local/include
 61 | )
 62 | mark_as_advanced( FFTW_INCLUDE_DIRS )
 63 | 
 64 | find_library( FFTW_SINGLE_PRECISION_LIBRARIES
 65 |     NAMES fftw3f libfftw3f-3
 66 |     HINTS
 67 |         ${FFTW_ROOT}/lib
 68 |         ${FFTW_ROOT}/.libs
 69 |         ${FFTW_ROOT}
 70 |         $ENV{FFTW_ROOT}/lib
 71 |         $ENV{FFTW_ROOT}/.libs
 72 |         ENV FFTW_ROOT
 73 |     PATHS
 74 |         /usr/lib
 75 |         /usr/local/lib
 76 |     DOC "FFTW dynamic library"
 77 | )
 78 | mark_as_advanced( FFTW_SINGLE_PRECISION_LIBRARIES )
 79 | 
 80 | find_library( FFTW_DOUBLE_PRECISION_LIBRARIES
 81 |     NAMES fftw3 libfftw3-3
 82 |     HINTS
 83 |         ${FFTW_ROOT}/lib
 84 |         ${FFTW_ROOT}/.libs
 85 |         ${FFTW_ROOT}
 86 |         $ENV{FFTW_ROOT}/lib
 87 |         $ENV{FFTW_ROOT}/.libs
 88 |         ENV FFTW_ROOT
 89 |     PATHS
 90 |         /usr/lib
 91 |         /usr/local/lib
 92 |     DOC "FFTW dynamic library"
 93 | )
 94 | mark_as_advanced( FFTW_DOUBLE_PRECISION_LIBRARIES )
 95 | 
 96 | set( FFTW_LIBRARIES ${FFTW_SINGLE_PRECISION_LIBRARIES} ${FFTW_DOUBLE_PRECISION_LIBRARIES} )
 97 | mark_as_advanced( FFTW_LIBRARIES )
 98 | 
 99 | include( FindPackageHandleStandardArgs )
100 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDE_DIRS )
101 | 
102 | if( NOT FFTW_FOUND )
103 |     message( STATUS "FindFFTW looked for single precision libraries named: fftw3f or libfftw3f-3" )
104 |     message( STATUS "FindFFTW looked for double precision libraries named: fftw3 or libfftw3-3" )
105 | endif()
106 | 


--------------------------------------------------------------------------------
/src/FindOpenCL.cmake:
--------------------------------------------------------------------------------
  1 | # ########################################################################
  2 | # Copyright 2013 Advanced Micro Devices, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ########################################################################
 16 | 
 17 | 
 18 | # Locate an OpenCL implementation.
 19 | # Currently supports AMD APP SDK (http://developer.amd.com/sdks/AMDAPPSDK/Pages/default.aspx/)
 20 | #
 21 | # Defines the following variables:
 22 | #
 23 | #   OpenCL_FOUND - Found the OPENCL framework
 24 | #   OPENCL_INCLUDE_DIRS - Include directories
 25 | #
 26 | # Also defines the library variables below as normal
 27 | # variables.  These contain debug/optimized keywords when
 28 | # a debugging library is found.
 29 | #
 30 | #   OPENCL_LIBRARIES - libopencl
 31 | #
 32 | # Accepts the following variables as input:
 33 | #
 34 | #   OPENCL_ROOT - (as a CMake or environment variable)
 35 | #                The root directory of the OpenCL implementation found
 36 | #
 37 | #   FIND_LIBRARY_USE_LIB64_PATHS - Global property that controls whether findOpenCL should search for
 38 | #                              64bit or 32bit libs
 39 | #-----------------------
 40 | # Example Usage:
 41 | #
 42 | #    find_package(OPENCL REQUIRED)
 43 | #    include_directories(${OPENCL_INCLUDE_DIRS})
 44 | #
 45 | #    add_executable(foo foo.cc)
 46 | #    target_link_libraries(foo ${OPENCL_LIBRARIES})
 47 | #
 48 | #-----------------------
 49 | 
 50 | find_path(OPENCL_INCLUDE_DIRS
 51 |     NAMES OpenCL/cl.h CL/cl.h
 52 |     HINTS
 53 |         ${OPENCL_ROOT}/include
 54 |         $ENV{AMDAPPSDKROOT}/include
 55 |         $ENV{CUDA_PATH}/include
 56 |     PATHS
 57 |         /usr/include
 58 |         /usr/local/include
 59 | 	/usr/local/cuda/include
 60 | 	/opt/cuda/include
 61 |     DOC "OpenCL header file path"
 62 | )
 63 | mark_as_advanced( OPENCL_INCLUDE_DIRS )
 64 | 
 65 | # Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else
 66 | get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS )
 67 | 
 68 | if( LIB64 )
 69 |     find_library( OPENCL_LIBRARIES
 70 |         NAMES OpenCL
 71 |         HINTS
 72 |             ${OPENCL_ROOT}/lib
 73 |             $ENV{AMDAPPSDKROOT}/lib
 74 |             $ENV{CUDA_PATH}/lib
 75 | 	    /usr/local/cuda/lib
 76 | 	    /opt/cuda/lib
 77 |         DOC "OpenCL dynamic library path"
 78 |         PATH_SUFFIXES x86_64 x64
 79 |         PATHS
 80 |             /usr/lib
 81 |     )
 82 | else( )
 83 |     find_library( OPENCL_LIBRARIES
 84 |         NAMES OpenCL
 85 |         HINTS
 86 |             ${OPENCL_ROOT}/lib
 87 |             $ENV{AMDAPPSDKROOT}/lib
 88 |             $ENV{CUDA_PATH}/lib
 89 | 	    /usr/local/cuda/lib
 90 | 	    /opt/cuda/lib
 91 |         DOC "OpenCL dynamic library path"
 92 |         PATH_SUFFIXES x86 Win32
 93 |         PATHS
 94 |             /usr/lib
 95 |     )
 96 | endif( )
 97 | mark_as_advanced( OPENCL_LIBRARIES )
 98 | 
 99 | include( FindPackageHandleStandardArgs )
100 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS )
101 | 
102 | if( NOT OpenCL_FOUND )
103 |     message( STATUS "FindOpenCL looked for libraries named: OpenCL" )
104 | endif()
105 | 


--------------------------------------------------------------------------------
/src/include/sharedLibrary.h:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | 
 18 | #pragma once
 19 | #ifndef _SHAREDLIBRARY_H_
 20 | #define _SHAREDLIBRARY_H_
 21 | #include <string>
 22 | 
 23 | //	_WIN32 is defined for both 32 & 64 bit environments
 24 | #if defined( _WIN32 )
 25 | 	#define WIN32_LEAN_AND_MEAN			// Exclude rarely-used stuff from Windows headers
 26 | 	// Windows Header Files:
 27 | 	#include <windows.h>
 28 | #else
 29 | 	#include <dlfcn.h>
 30 | #endif
 31 | 
 32 | inline void* LoadSharedLibrary( std::string unixPrefix, std::string libraryName, bool quiet )
 33 | {
 34 | #if defined( _WIN32 )
 35 | 	libraryName += ".dll";
 36 | 
 37 | 	//	HMODULE is actually the load address; function returns NULL if it cannot find the shared library
 38 | 	HMODULE fileHandle	= ::LoadLibraryExA( libraryName.c_str( ), NULL, NULL );
 39 | #elif defined(__linux__)
 40 |         tstring linuxName = unixPrefix;
 41 | 	linuxName += libraryName += ".so";
 42 | 	void* fileHandle = ::dlopen( linuxName.c_str( ), RTLD_NOW );
 43 | 	if( !quiet && !fileHandle )
 44 | 	{
 45 | 		std::cerr << ::dlerror( ) << std::endl;
 46 | 	}
 47 | #elif defined(__APPLE__)
 48 |   tstring appleName = unixPrefix;
 49 |   appleName += libraryName += ".dylib";
 50 |   void* fileHandle = ::dlopen( appleName.c_str( ), RTLD_NOW );
 51 |   if( !quiet && !fileHandle )
 52 |   {
 53 |           std::cerr << ::dlerror( ) << std::endl;
 54 |   }
 55 | #elif defined(__FreeBSD_kernel__)
 56 |         tstring freebsdName = unixPrefix;
 57 |         freebsdName += libraryName += ".so";
 58 |         void* fileHandle = ::dlopen( freebsdName.c_str( ), RTLD_NOW );
 59 |         if( !quiet && !fileHandle )
 60 |         {
 61 |                 std::cerr << ::dlerror( ) << std::endl;
 62 |         }
 63 | #else
 64 |         #error "unsupported platform"
 65 | #endif
 66 | 
 67 | 	return fileHandle;
 68 | }
 69 | 
 70 | //	If the function succeeds, the return value is nonzero.
 71 | //	If the function fails, the return value is zero.
 72 | inline int FreeSharedLibrary( void*& libHandle )
 73 | {
 74 | 	int result	= 0;
 75 | 
 76 | #if defined( _WIN32 )
 77 | 	if( libHandle != 0 )
 78 | 		result = ::FreeLibrary( reinterpret_cast< HMODULE >( libHandle ) );
 79 | #else
 80 | 	if( libHandle != 0 )
 81 | 		result = ( ::dlclose( libHandle ) == 0 );
 82 | #endif
 83 | 
 84 | 	libHandle	= NULL;
 85 | 
 86 | 	return result;
 87 | }
 88 | 
 89 | //	This takes a shared module handle returned from LoadSharedLibrary, and a text string of a symbol
 90 | //	to load from the module, and returns a pointer to that symbol.  If the symbol is not found, NULL
 91 | //	is returned.  If the module handle is NULL, NULL is returned.
 92 | inline void* LoadFunctionAddr( void* libHandle, std::string funcName )
 93 | {
 94 | 	if( libHandle == NULL )
 95 | 		return NULL;
 96 | 
 97 | #if defined( _WIN32 )
 98 | 	HMODULE fileHandle = reinterpret_cast< HMODULE >( libHandle );
 99 | 
100 | 	void* pFunc	= reinterpret_cast< void* >( ::GetProcAddress( fileHandle, funcName.c_str( ) ) );
101 | #else
102 | 	void* pFunc = ::dlsym( libHandle, funcName.c_str( ) );
103 | #endif
104 | 
105 | 	return pFunc;
106 | }
107 | 
108 | #endif // _SHAREDLIBRARY_H_
109 | 


--------------------------------------------------------------------------------
/src/library/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # ########################################################################
  2 | # Copyright 2013 Advanced Micro Devices, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ########################################################################
 16 | 
 17 | # List the names of common files to compile across all platforms
 18 | set( clFFT.Source	transform.cpp
 19 | 								accessors.cpp
 20 | 								plan.cpp
 21 | 								repo.cpp
 22 | 								generator.stockham.cpp
 23 | 								generator.transpose.gcn.cpp
 24 | 								generator.transpose.cpp
 25 | 								action.transpose.cpp
 26 | 								generator.copy.cpp
 27 | 								lifetime.cpp
 28 | 								fft_binary_lookup.cpp
 29 | 								md5sum.c
 30 | 								enqueue.cpp
 31 | 								stdafx.cpp )
 32 | 
 33 | # Windows only uses dllmain
 34 | if( MSVC )
 35 | 	set( clFFT.Source ${clFFT.Source} dllmain.cpp )
 36 | endif( )
 37 | 
 38 | set( clFFT.Headers	private.h
 39 | 					action.h
 40 | 					repo.h
 41 | 					plan.h
 42 | 					lock.h
 43 | 					mainpage.h
 44 | 					generator.h
 45 | 					generator.stockham.h
 46 | 					generator.transpose.gcn.h
 47 | 					generator.transpose.h
 48 | 					action.transpose.h
 49 | 					fft_binary_lookup.h
 50 | 					md5sum.h
 51 | 					../include/stdafx.h
 52 | 					../include/unicode.compatibility.h
 53 | 					../include/targetver.h
 54 | 					../include/clAmdFft.h
 55 | 					../include/clFFT.h )
 56 | 
 57 | set( clFFT.Files ${clFFT.Source} ${clFFT.Headers} )
 58 | 
 59 | # For a rainy day, add pre-compiled header support
 60 | #if( MSVC )
 61 | #	if (USE_MSVC_PCH)
 62 | 
 63 | #		set_source_files_properties(LungAnalysisPCH.cxx
 64 | #			PROPERTIES
 65 | #			COMPILE_FLAGS "/YcLungAnalysisPCH.h"
 66 | #			)
 67 | #		foreach( src_file ${UPMC_LA_SRCS} )
 68 | #			set_source_files_properties(
 69 | #				${src_file}
 70 | #				PROPERTIES
 71 | #				COMPILE_FLAGS "/YuLungAnalysisPCH.h"
 72 | #				)
 73 | #		endforeach( src_file ${UPMC_LA_SRCS} )
 74 | 
 75 | #		list(APPEND UPMC_LA_SRCS LungAnalysisPCH.cxx)
 76 | #		list(APPEND UPMC_LA_HDRS LungAnalysisPCH.h)
 77 | 
 78 | #	endif(USE_MSVC_PCH)
 79 | #endif (MSVC)
 80 | 
 81 | #	add_definitions( ${Boost_LIB_DIAGNOSTIC_DEFINITIONS} )
 82 | add_definitions( "/DCLFFT_EXPORTS" )
 83 | 
 84 | add_library( clFFT ${clFFT.Files} )
 85 | 
 86 | target_include_directories(clFFT
 87 |     PUBLIC
 88 |         $<INSTALL_INTERFACE:include>
 89 |         $<BUILD_INTERFACE:${clFFT_SOURCE_DIR}/include>
 90 |         $<BUILD_INTERFACE:${clFFT_BINARY_DIR}/include>
 91 |     PRIVATE
 92 |         ${OPENCL_INCLUDE_DIRS}
 93 |     )
 94 | 
 95 | target_link_libraries( clFFT ${OPENCL_LIBRARIES} ${CMAKE_DL_LIBS} )
 96 | 
 97 | set_target_properties(clFFT PROPERTIES VERSION ${clFFT_VERSION})
 98 | set_target_properties(clFFT PROPERTIES SOVERSION ${clFFT_VERSION_MAJOR})
 99 | set_target_properties(clFFT PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging")
100 | 
101 | if(NOT BUILD_SHARED_LIBS)
102 |     set_target_properties(clFFT PROPERTIES POSITION_INDEPENDENT_CODE ON)
103 | endif()
104 | 
105 | if( CMAKE_COMPILER_IS_GNUCC )
106 |     configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/clFFT.pc.in
107 |                     ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc @ONLY )
108 | 
109 |     install( FILES ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc
110 |              DESTINATION lib${CLFFT_SUFFIX_LIB}/pkgconfig )
111 | endif( )
112 | 
113 | # CPack configuration; include the executable into the package
114 | install( TARGETS clFFT
115 |         EXPORT Library
116 |         RUNTIME DESTINATION bin${CLFFT_SUFFIX_BIN}
117 |         LIBRARY DESTINATION lib${CLFFT_SUFFIX_LIB}
118 |         ARCHIVE DESTINATION lib${CLFFT_SUFFIX_LIB}/import
119 |         )
120 | 


--------------------------------------------------------------------------------
/src/tests/test_constants.cpp:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | #include "test_constants.h"
 18 | #include <gtest/gtest.h>
 19 | #include <stdexcept>
 20 | #include <string>
 21 | #include <sstream>
 22 | #include <iostream>
 23 | #include "../client/openCL.misc.h"
 24 | 
 25 | #if defined( _WIN32 ) && defined( _DEBUG )
 26 | #include <Windows.h>
 27 | #endif
 28 | 
 29 | void handle_exception( const std::exception& except )
 30 | {
 31 | 	std::string error_message(except.what());
 32 | 
 33 | 	std::cout << "--- Exception caught ---" << std::endl;
 34 | 
 35 | 	if( error_message.find("problem too large for device") != std::string::npos ||
 36 | 		error_message.find("CLFFT_INVALID_BUFFER_SIZE" ) != std::string::npos ||
 37 | 		error_message.find("CLFFT_MEM_OBJECT_ALLOCATION_FAILURE" ) != std::string::npos ||
 38 | 		error_message.find("CLFFT_OUT_OF_HOST_MEMORY" ) != std::string::npos ||
 39 | 		error_message.find("CLFFT_OUT_OF_RESOURCES" ) != std::string::npos )
 40 | 	{
 41 | 		std::cout << "Data set is too large for this device -- skipping test" << std::endl;
 42 | 		//TODO put in (this problem size[data + stride]/max problem size/gpu or cpu) specifics
 43 | 	}
 44 | 	else if( error_message.find("system memory allocation failure") != std::string::npos )
 45 | 	{
 46 | 		std::cout << "Framework was denied enough system memory to support the data set"
 47 | 			<< " -- skipping test" << std::endl;
 48 | 	}
 49 | 	else if( error_message.find("CLFFT_DEVICE_NO_DOUBLE") != std::string::npos )
 50 | 	{
 51 | 		std::cout << "Device in context does not support double precision"
 52 | 			<< " -- skipping test" << std::endl;
 53 | 	}
 54 | 	else if( error_message.find("dereference null pointer") != std::string::npos )
 55 | 	{
 56 | 		std::cout << error_message << std::endl;
 57 | 		FAIL();
 58 | 	}
 59 | 	else if( error_message.find("in-place transform, unmatched in/out layouts")
 60 | 					!= std::string::npos )
 61 | 	{
 62 | 		std::cout << "Invalid arguments: for an in-place transform, "
 63 | 			<< "in/output layouts must be the same" << std::endl;
 64 | 		FAIL();
 65 | 	}
 66 | 	else if( error_message.find("device list is empty at transform") != std::string::npos )
 67 | 	{
 68 | 		std::cout << "A clfft transform is requested, but the device list is empty" << std::endl;
 69 | 		FAIL();
 70 | 	}
 71 | 	else
 72 | 	{
 73 | 		std::cout << "Unrecognized exception: " << std::endl;
 74 | 		std::cout << error_message << std::endl;
 75 | 		/*
 76 | #if defined( _WIN32 ) && defined( _DEBUG )
 77 | 		::DebugBreak( );
 78 | #endif
 79 | 		*/
 80 | 		FAIL();
 81 | 	}
 82 | }
 83 | 
 84 | /*****************************************************/
 85 | size_t max_mem_available_on_cl_device(size_t device_index) {
 86 | 
 87 | 	static size_t g_device_max_mem_size  = 0;
 88 | 
 89 | 	// this is not thread-safe using globals, it is just quick fix for now, todo proper fix
 90 | 	if (g_device_max_mem_size == 0)
 91 | 	{
 92 | 		std::vector< cl_device_id >	device_id;
 93 | 		cl_context tempContext = NULL;
 94 | 		device_id = initializeCL(
 95 | 			g_device_type,
 96 | 			(cl_int)device_index,
 97 | 			g_platform_id,
 98 | 			tempContext,
 99 | 			false
100 | 			);
101 | 
102 | 		cl_ulong device_max_to_allocate = 0;
103 | 		if (device_id.size() == 0 || device_index > device_id.size())
104 | 		{
105 | 		}
106 | 		else
107 | 		{
108 | 			OPENCL_V_THROW(::clGetDeviceInfo(device_id[device_index], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &device_max_to_allocate, NULL),
109 | 				"Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )");
110 | 		}
111 | 
112 | 		cl_command_queue tempQueue = NULL;
113 | 		cl_event tempEvent = NULL;
114 | 		::cleanupCL(&tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent);
115 | 
116 | 		g_device_max_mem_size = static_cast<size_t>(device_max_to_allocate);
117 | 	}
118 | 
119 | 	return g_device_max_mem_size;
120 | }
121 | 


--------------------------------------------------------------------------------
/src/tests/copyTestDependencies.cmake.in:
--------------------------------------------------------------------------------
  1 | # Customized install script for fftw test program; analyzes all the shared library dependencies and installs
  2 | # the dependencies into the package
  3 | include( GetPrerequisites )
  4 | 
  5 | #    message( testLocation ": @testLocation@" )
  6 | 
  7 | # The Microsoft IDE presents a challenge because the full configuration is not known at cmake time
  8 | # This logic allows us to 'substitute' the proper confguration at install time
  9 | if( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "Debug" )
 10 |     string( REPLACE "\$(Configuration)" "Debug" fixedTestLocation "@testLocation@" )
 11 | elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "Release" )
 12 |     string( REPLACE "\$(Configuration)" "Release" fixedTestLocation "@testLocation@" )
 13 | elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "MinSizeRel" )
 14 |     string( REPLACE "\$(Configuration)" "MinSizeRel" fixedTestLocation "@testLocation@" )
 15 | elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "RelwithDebInfo" )
 16 |     string( REPLACE "\$(Configuration)" "RelwithDebInfo" fixedTestLocation "@testLocation@" )
 17 | endif( )
 18 | 
 19 | #    message( fixedTestLocation ": ${fixedTestLocation}" )
 20 | # Get the directory that the test executable resides in; this helps get_prerequisites( ) find dependent libraries
 21 | get_filename_component( testDir "${fixedTestLocation}" PATH )
 22 | #    message( testDir ": ${testDir}" )
 23 | 
 24 | set( installPath "" )
 25 | if( WIN32 )
 26 |     set( installPath "${CMAKE_INSTALL_PREFIX}/bin@CLFFT_SUFFIX_BIN@" )
 27 | else( )
 28 |     set( installPath "${CMAKE_INSTALL_PREFIX}/lib@CLFFT_SUFFIX_LIB@" )
 29 | endif( )
 30 | 
 31 | # Only search for dependencies that have ROOT defined
 32 | set( depList "" )
 33 | 
 34 | #This logic assumes that FindFFTW.cmake has been called
 35 | get_filename_component( fftwDirSingle "@FFTW_SINGLE_PRECISION_LIBRARIES@" PATH )
 36 | get_filename_component( fftwDirDouble "@FFTW_DOUBLE_PRECISION_LIBRARIES@" PATH )
 37 | 
 38 | if( EXISTS "${fftwDirSingle}" )
 39 |     list( APPEND depList "${fftwDirSingle}" )
 40 | #    message( "fftwDirSingle: ${fftwDirSingle}" )
 41 | endif( )
 42 | 
 43 | string( COMPARE NOTEQUAL "${fftwDirSingle}" "${fftwDirDouble}" fftwDiffDirs )
 44 | if( ${fftwDiffDirs} AND EXISTS "${fftwDirDouble}" )
 45 |     list( APPEND depList "${fftwDirDouble}" )
 46 | #    message( "fftwDirDouble: ${fftwDirDouble}" )
 47 | endif( )
 48 | 
 49 | #This logic assumes that FindGTest.cmake has been called
 50 | get_filename_component( gtestDir "@GTEST_LIBRARY@" PATH )
 51 | get_filename_component( gtestDirDebug "@GTEST_LIBRARY_DEBUG@" PATH )
 52 | 
 53 | if( EXISTS "${gtestDir}" )
 54 |     list( APPEND depList "${gtestDir}" )
 55 | #    message( "gtestDir: ${gtestDir}" )
 56 | endif( )
 57 | 
 58 | string( COMPARE NOTEQUAL "${gtestDir}" "${gtestDirDebug}" gtestDiffDirs )
 59 | if( ${gtestDiffDirs} AND EXISTS "${gtestDirDebug}" )
 60 |     list( APPEND depList "${gtestDirDebug}" )
 61 | #    message( "gtestDirDebug: ${gtestDirDebug}" )
 62 | endif( )
 63 | 
 64 | #This logic assumes that FindOpenCL.cmake has been called
 65 | get_filename_component( openclDir "@OPENCL_LIBRARIES@" PATH )
 66 | 
 67 | if( EXISTS "${openclDir}" )
 68 |     list( APPEND depList "${openclDir}" )
 69 | #    message( "openclDir: ${openclDir}" )
 70 | endif( )
 71 | 
 72 | if( EXISTS "${testDir}" )
 73 |     list( APPEND depList "${testDir}" )
 74 | endif( )
 75 | 
 76 | # message( "depList: ${depList}" )
 77 | 
 78 | # This retrieves a list of shared library dependencies from the target; they are not full path names
 79 | # Skip system dependencies and skip recursion
 80 | get_prerequisites( ${fixedTestLocation} testDependencies 1 0 "" "${depList}" )
 81 | 
 82 | # Loop on queried library dependencies and copy them into package
 83 | foreach( dep ${testDependencies} )
 84 |     # This converts the dependency into a full path
 85 |     gp_resolve_item( "${fixedTestLocation}" "${dep}" "" "${depList}" dep_test_path )
 86 | 
 87 |     # In linux, the dep_test_path may point to a symbolic link, we also need to copy real file
 88 |     get_filename_component( dep_realpath "${dep_test_path}" REALPATH )
 89 |     get_filename_component( dep_name "${dep_test_path}" NAME )
 90 |     # message( STATUS "depName: ${dep_name}" )
 91 |     # message( STATUS "depFullPath: ${dep_test_path}" )
 92 |     # message( STATUS "dep_realpath: ${dep_realpath}" )
 93 | 
 94 |     if( NOT EXISTS ${installPath}/${dep_name} )
 95 |         file( INSTALL ${dep_test_path} ${dep_realpath}
 96 |               USE_SOURCE_PERMISSIONS
 97 |               DESTINATION ${installPath}
 98 |             )
 99 |     endif( )
100 | endforeach( )
101 | 


--------------------------------------------------------------------------------
/src/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # ########################################################################
  2 | # Copyright 2013 Advanced Micro Devices, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ########################################################################
 16 | 
 17 | 
 18 | # List the names of the files to compile
 19 | set( clFFT.Test.Source
 20 | 	 test_constants.cpp
 21 | 	 buffer_memory.cpp
 22 | 	 buffer.cpp
 23 | 	 unit_test.cpp
 24 | 	 accuracy_test_common.cpp
 25 | 	 accuracy_test_pow2.cpp
 26 | 	 accuracy_test_pow3.cpp
 27 | 	 accuracy_test_pow5.cpp
 28 | 	 accuracy_test_pow7.cpp
 29 | 	 accuracy_test_mixed_radices.cpp
 30 | 	 accuracy_test_random.cpp
 31 | 	 accuracy_test_directed.cpp
 32 | 	 accuracy_test_mixed_callback.cpp
 33 | 	 accuracy_test_pow2_precallback.cpp
 34 | 	 accuracy_test_pow3_precallback.cpp
 35 | 	 accuracy_test_pow5_precallback.cpp
 36 | 	 accuracy_test_pow7_precallback.cpp
 37 | 	 accuracy_test_postcallback.cpp
 38 | 	 gtest_main.cpp
 39 | 	 ${PROJECT_SOURCE_DIR}/client/openCL.misc.cpp
 40 | 	 c-compliance.c
 41 |    )
 42 | 
 43 | set( clFFT.Test.Headers
 44 | 	${PROJECT_SOURCE_DIR}/include/clFFT.h
 45 | 	${PROJECT_SOURCE_DIR}/include/unicode.compatibility.h
 46 | 	${PROJECT_SOURCE_DIR}/include/convenienceFunctions.h
 47 | 	${PROJECT_SOURCE_DIR}/library/private.h
 48 | 	${PROJECT_SOURCE_DIR}/client/openCL.misc.h
 49 | 	accuracy_test_common.h
 50 | 	test_constants.h
 51 | 	buffer_memory.h
 52 | 	buffer.h
 53 | 	cl_transform.h
 54 | 	fftw_transform.h
 55 | 	typedefs.h
 56 |   )
 57 | 
 58 | set( clFFT.Test.Files ${clFFT.Test.Source} ${clFFT.Test.Headers} )
 59 | 
 60 | set( LD_PTHREAD "" )
 61 | if( MINGW )
 62 |     # -std=c++0x causes g++ to go into strict ANSI mode, which doesn't declare non-standard functions
 63 |     # Googletest for mingw appears to have a dependency on _stricmp and off64_t
 64 | 	set( CMAKE_CXX_FLAGS "-std=gnu++0x ${CMAKE_CXX_FLAGS}" )
 65 | elseif( CMAKE_COMPILER_IS_GNUCXX )
 66 | 	set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" )
 67 | 	set( LD_PTHREAD "-lpthread" )
 68 | elseif( APPLE )
 69 | 	set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" )
 70 | 	add_definitions( -DGTEST_USE_OWN_TR1_TUPLE )
 71 | endif( )
 72 | 
 73 | # Include standard OpenCL headers
 74 | include_directories( ${Boost_INCLUDE_DIRS} ${GTEST_INCLUDE_DIRS} ${FFTW_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS}  ${PROJECT_BINARY_DIR}/include ${PROJECT_SOURCE_DIR}/include )
 75 | 
 76 | add_executable( Test ${clFFT.Test.Files} )
 77 | 
 78 | # If the runtime is being built by the project, use it, otherwise link to a runtime library specified in the install prefix
 79 | if( CLFFT_BUILD_RUNTIME )
 80 | 	target_link_libraries( Test clFFT ${Boost_LIBRARIES} ${GTEST_LIBRARIES} ${FFTW_LIBRARIES} ${OPENCL_LIBRARIES} ${LD_PTHREAD} ${CMAKE_DL_LIBS})
 81 | else( )
 82 | 	# Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else
 83 | 	get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS )
 84 | 	if( LIB64 )
 85 | 		set( clFFT.library "${CMAKE_INSTALL_PREFIX}/lib64" )
 86 | 	else( )
 87 | 		set( clFFT.library "${CMAKE_INSTALL_PREFIX}/lib32" )
 88 | 	endif( )
 89 | 
 90 | 	if( WIN32 )
 91 | 		set( clFFT.library "${clFFT.library}/import/clFFT${CMAKE_STATIC_LIBRARY_SUFFIX}" )
 92 | 	else( )
 93 | 		set( clFFT.library "${clFFT.library}/${CMAKE_SHARED_LIBRARY_PREFIX}clFFT${CMAKE_SHARED_LIBRARY_SUFFIX}" )
 94 | 	endif( )
 95 | 
 96 | 	target_link_libraries( Test ${clFFT.library} ${Boost_LIBRARIES} ${GTEST_LIBRARIES} ${FFTW_LIBRARIES} ${OPENCL_LIBRARIES} ${LD_PTHREAD} ${CMAKE_DL_LIBS})
 97 | 
 98 | endif( )
 99 | 
100 | # The following set_target_properties is to get around a bug in cmake 2.8.2, where the suffix after the first '.' is dropped
101 | IF( (MSVC_VERSION VERSION_EQUAL 1600) AND (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION} VERSION_LESS 2.8.3) )
102 | 	message( STATUS "Detected vs2010 and Cmake version less than 2.8.3; renaming Test with underscores " )
103 | 	set_target_properties( Test PROPERTIES OUTPUT_NAME "clFFT_Test" )
104 | ENDIF( )
105 | 
106 | set_target_properties( Test PROPERTIES VERSION ${clFFT_VERSION} )
107 | set_target_properties( Test PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
108 | 
109 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
  1 | # Appveyor OS list
  2 | # Windows Server 2012 R2 (x64) <== Appveyor default image
  3 | # Visual Studio 2015
  4 | 
  5 | # os: expands the build matrix to include multiple os's
  6 | os:
  7 |   - Windows Server 2012
  8 | 
  9 | # compiler: expands the build matrix to include multiple compilers (per os)
 10 | platform:
 11 |   - x64
 12 | 
 13 | configuration:
 14 |   - Release
 15 | 
 16 | # Only clone the top level commit; don't bother with history
 17 | shallow_clone: true
 18 | 
 19 | # environment: specifies additional global variables to define per row in build matrix
 20 | environment:
 21 |   global:
 22 |     CLFFT_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\nmake\\release"
 23 |     OPENCL_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\opencl"
 24 |     FFTW_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\fftw"
 25 |     BOOST_ROOT: "C:\\Libraries\\boost_1_58_0"
 26 |     OPENCL_REGISTRY: "https://www.khronos.org/registry/cl"
 27 | 
 28 | init:
 29 |   - echo init step
 30 |   - cmake --version
 31 |   - C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM%
 32 |   # Uncomment the following to display Remote Desktop connection details
 33 |   # - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
 34 | 
 35 | # We need to create an opencl import library that clfft can link against
 36 | # Vendor based OpenCL packages are hard to use because of download size, registration requirements
 37 | # and unattended installs not well supported
 38 | install:
 39 |   - echo Installing OpenCL
 40 |   - ps: mkdir $env:OPENCL_ROOT
 41 |   - ps: pushd $env:OPENCL_ROOT
 42 |   - ps: $opencl_registry = $env:OPENCL_REGISTRY
 43 |   # This downloads the source to the Khronos ICD library
 44 |   - git clone --depth 1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
 45 |   - ps: mv ./OpenCL-ICD-Loader/* .
 46 |   # This downloads all the opencl header files
 47 |   # The cmake build files expect a directory called inc
 48 |   - ps: mkdir inc/CL
 49 |   - git clone --depth 1 https://github.com/KhronosGroup/OpenCL-Headers.git inc/CL
 50 |   - ps: wget $opencl_registry/api/2.1/cl.hpp -OutFile inc/CL/cl.hpp
 51 |   # - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom }
 52 |   # Create the static import lib in a directory called lib, so findopencl() will find it
 53 |   - ps: mkdir lib
 54 |   - ps: pushd lib
 55 |   - cmake -G "NMake Makefiles" ..
 56 |   - nmake
 57 |   - ps: popd
 58 |   # Switch to OpenCL 1.2 headers
 59 |   - ps: pushd inc/CL
 60 |   - git fetch origin opencl12:opencl12
 61 |   - git checkout opencl12
 62 |   - ps: popd
 63 |   # Rename the inc directory to include, so FindOpencl() will find it
 64 |   - ps: ren inc include
 65 |   - ps: popd
 66 | 
 67 |   - echo Installing FFTW
 68 |   - ps: mkdir $env:FFTW_ROOT
 69 |   - ps: pushd $env:FFTW_ROOT
 70 |   # This downloads the windows 64-bit pre-compiled dlls
 71 |   - ps: wget ftp://ftp.fftw.org/pub/fftw/fftw-3.3.4-dll64.zip -OutFile fftw-3.3.4-dll64.zip
 72 |   - ps: 7z x fftw-3.3.4-dll64.zip
 73 | #  - ps: pushd fftw-3.3.4-dll64
 74 |   - ps: lib /machine:x64 /def:libfftw3-3.def
 75 |   - ps: lib /machine:x64 /def:libfftw3f-3.def
 76 |   - ps: lib /machine:x64 /def:libfftw3l-3.def
 77 | 
 78 | # before_build is used to run configure steps
 79 | before_build:
 80 |   - echo before_build step
 81 |   # Boost 1.58 is not installed in typical fashion, help FindBoost() find binary libs with BOOST_LIBRARYDIR
 82 |   - ps: $env:BOOST_LIBRARYDIR = "$env:BOOST_ROOT/lib64-msvc-12.0"
 83 |   - ps: mkdir $env:CLFFT_ROOT
 84 |   - ps: pushd $env:CLFFT_ROOT
 85 |   - cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DBoost_NO_SYSTEM_PATHS=OFF -DOPENCL_ROOT=%OPENCL_ROOT% %APPVEYOR_BUILD_FOLDER%/src
 86 | 
 87 | # build_script invokes the compiler
 88 | build_script:
 89 |   - echo build_script step
 90 |   - nmake package
 91 | 
 92 | after_build:
 93 |   - echo after_build step
 94 |   - ps: ls $env:CLFFT_ROOT
 95 |   - ps: mv $env:CLFFT_ROOT\*.zip $env:APPVEYOR_BUILD_FOLDER
 96 | 
 97 | # Appyeyor will save a copy of the package in it's personal storage
 98 | artifacts:
 99 |   - path: '*.zip'
100 |     name: binary_zip
101 |     type: zip
102 | 
103 | # on_finish always executes regardless of passed or failed builds
104 | on_finish:
105 |   - echo on_finish step
106 | 
107 | # Appveyor will push the artifacts it has saved to GitHub 'releases' tab
108 | deploy:
109 |   provider: GitHub
110 |   auth_token:
111 |     secure: dRXIWJKpU7h2RsHX7RqmyYCtCw+Q9O3X5MArloY6p34GZC1w7bp+jQYTZqbdO7bw
112 |   artifact: binary_zip
113 |   draft: true
114 |   prerelease: true
115 |   on:
116 |     appveyor_repo_tag: true
117 | 
118 |   # Uncomment the following to pause the VM and wait for RDP connetion to debug
119 |   # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
120 | 


--------------------------------------------------------------------------------
/src/examples/fft1d.c:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | #include <stdio.h>
 18 | #include <stdlib.h>
 19 | 
 20 | /* No need to explicitely include the OpenCL headers */
 21 | #include <clFFT.h>
 22 | 
 23 | int main( void )
 24 | {
 25 |     cl_int err;
 26 |     cl_platform_id platform = 0;
 27 |     cl_device_id device = 0;
 28 |     cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
 29 |     cl_context ctx = 0;
 30 |     cl_command_queue queue = 0;
 31 |     cl_mem bufX;
 32 |     float *X;
 33 |     cl_event event = NULL;
 34 |     int ret = 0;
 35 |     size_t N = 16;
 36 |     char platform_name[128];
 37 |     char device_name[128];
 38 | 
 39 |     /* FFT library realted declarations */
 40 |     clfftPlanHandle planHandle;
 41 |     clfftDim dim = CLFFT_1D;
 42 |     size_t clLengths[1] = {N};
 43 | 
 44 |     /* Setup OpenCL environment. */
 45 |     err = clGetPlatformIDs( 1, &platform, NULL );
 46 | 
 47 |     size_t ret_param_size = 0;
 48 |     err = clGetPlatformInfo(platform, CL_PLATFORM_NAME,
 49 |             sizeof(platform_name), platform_name,
 50 |             &ret_param_size);
 51 |     printf("Platform found: %s\n", platform_name);
 52 | 
 53 |     err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL );
 54 | 
 55 |     err = clGetDeviceInfo(device, CL_DEVICE_NAME,
 56 |             sizeof(device_name), device_name,
 57 |             &ret_param_size);
 58 |     printf("Device found on the above platform: %s\n", device_name);
 59 | 
 60 |     props[1] = (cl_context_properties)platform;
 61 |     ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
 62 |     queue = clCreateCommandQueue( ctx, device, 0, &err );
 63 | 
 64 |     /* Setup clFFT. */
 65 |     clfftSetupData fftSetup;
 66 |     err = clfftInitSetupData(&fftSetup);
 67 |     err = clfftSetup(&fftSetup);
 68 | 
 69 |     /* Allocate host & initialize data. */
 70 |     /* Only allocation shown for simplicity. */
 71 |     X = (float *)malloc(N * 2 * sizeof(*X));
 72 | 
 73 |     /* print input array */
 74 |     printf("\nPerforming fft on an one dimensional array of size N = %lu\n", (unsigned long)N);
 75 |     int print_iter = 0;
 76 |     while(print_iter<N) {
 77 |         float x = (float)print_iter;
 78 |         float y = (float)print_iter*3;
 79 |         X[2*print_iter  ] = x;
 80 |         X[2*print_iter+1] = y;
 81 |         printf("(%f, %f) ", x, y);
 82 |         print_iter++;
 83 |     }
 84 |     printf("\n\nfft result: \n");
 85 | 
 86 |     /* Prepare OpenCL memory objects and place data inside them. */
 87 |     bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err );
 88 | 
 89 |     err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0,
 90 |             N * 2 * sizeof( *X ), X, 0, NULL, NULL );
 91 | 
 92 |     /* Create a default plan for a complex FFT. */
 93 |     err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
 94 | 
 95 |     /* Set plan parameters. */
 96 |     err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
 97 |     err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
 98 |     err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);
 99 | 
100 |     /* Bake the plan. */
101 |     err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);
102 | 
103 |     /* Execute the plan. */
104 |     err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);
105 | 
106 |     /* Wait for calculations to be finished. */
107 |     err = clFinish(queue);
108 | 
109 |     /* Fetch results of calculations. */
110 |     err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL );
111 | 
112 |     /* print output array */
113 |     print_iter = 0;
114 |     while(print_iter<N) {
115 |         printf("(%f, %f) ", X[2*print_iter], X[2*print_iter+1]);
116 |         print_iter++;
117 |     }
118 |     printf("\n");
119 | 
120 |     /* Release OpenCL memory objects. */
121 |     clReleaseMemObject( bufX );
122 | 
123 |     free(X);
124 | 
125 |     /* Release the plan. */
126 |     err = clfftDestroyPlan( &planHandle );
127 | 
128 |     /* Release clFFT library. */
129 |     clfftTeardown( );
130 | 
131 |     /* Release OpenCL working objects. */
132 |     clReleaseCommandQueue( queue );
133 |     clReleaseContext( ctx );
134 | 
135 |     return ret;
136 | }
137 | 


--------------------------------------------------------------------------------
/src/tests/buffer_memory.h:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | 
 18 | #pragma once
 19 | #if !defined( CLFFT_BUFFER_MEMORY_H )
 20 | #define CLFFT_BUFFER_MEMORY_H
 21 | 
 22 | #include <vector>
 23 | #include <stdexcept>
 24 | #include <stdint.h>
 25 | 
 26 | uint32_t float_as_hex( float a );
 27 | uint64_t float_as_hex( double a );
 28 | uint32_t nan_as_hex( float a );
 29 | uint64_t nan_as_hex( double a );
 30 | 
 31 | /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
 32 | /*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
 33 | template <class T>
 34 | class buffer_memory {
 35 | private:
 36 | 	// Each array will have a cookie of this size placed before and after it.
 37 | 	// We will initialize the cookies to NaN.
 38 | 	// The user can confirm the cookies after operating on the data to confirm that
 39 | 	// his or her operations are respecting the boundaries of the memory.
 40 | 	size_t cookie_size;
 41 | 
 42 | 	// requested_floats is the number of floats the user requested originally.
 43 | 	// This never changes, even if the memory size is increased.
 44 | 	size_t requested_floats;
 45 | 
 46 | 	// With this and cookie_size, we can calculate the size of memory the user can access.
 47 | 	// Note that this will be in units of T (so 4 bytes or 8 bytes depending on float or double).
 48 | 	size_t memory_size_including_cookies;
 49 | 
 50 | 	// Interesting stuff goes here.
 51 | 	std::vector<T> memory;
 52 | 
 53 | public:
 54 | 	 /*****************************************************/
 55 | 	 // requested_number_of_floats should already take into account any strides,
 56 | 	 // batch size, data layout (real, complex, hermitian, interleaved, planar)
 57 | 	buffer_memory( size_t requested_number_of_floats )
 58 | 		: cookie_size( 4 )
 59 | 		, requested_floats( requested_number_of_floats )
 60 | 		, memory_size_including_cookies( requested_number_of_floats + 2 * cookie_size )
 61 | 		, memory( memory_size_including_cookies )
 62 | 	{
 63 | 		clear();
 64 | 	}
 65 | 
 66 | 	 /*****************************************************/
 67 | 	~buffer_memory() {
 68 | 	}
 69 | 
 70 | 	 /*****************************************************/
 71 | 	buffer_memory<T> & operator=( const buffer_memory<T> & that )
 72 | 	{
 73 | 		this->cookie_size = that.cookie_size;
 74 | 		this->requested_floats = that.requested_floats;
 75 | 		this->memory_size_including_cookies = that.memory_size_including_cookies;
 76 | 		this->memory = that.memory;
 77 | 
 78 | 		return *this;
 79 | 	}
 80 | 
 81 | 	 /*****************************************************/
 82 | 	void check_memory_boundaries() {
 83 | 		for( size_t i = 0; i < cookie_size; ++i) {
 84 | 			// we need to compare hex values instead of float values so that we don't get float ambiguities
 85 | 			if( float_as_hex(memory[i]) != nan_as_hex(memory[0]) ||
 86 | 				float_as_hex( memory[ memory.size()-1-i ] ) != nan_as_hex(memory[0]) )
 87 | 			 throw std::runtime_error("some operation wrote beyond bounds of memory");
 88 | 		}
 89 | 	}
 90 | 
 91 | 	 /*****************************************************/
 92 | 	void clear()
 93 | 	{
 94 | 		memset(&memory[0], ~0x0, memory_size_including_cookies * sizeof(T));
 95 | 	}
 96 | 
 97 | 	 /*****************************************************/
 98 | 	 // note that this is in units of T (float or double)
 99 | 	 // also see: size_in_bytes()
100 | 	size_t size()
101 | 	{
102 | 		return size_in_bytes() / sizeof(T);
103 | 	}
104 | 
105 | 	 /*****************************************************/
106 | 	 // returns the amount of memory currently allocated to the buffer in bytes
107 | 	size_t size_in_bytes()
108 | 	{
109 | 		return (memory_size_including_cookies - 2 * cookie_size) * sizeof(T);
110 | 	}
111 | 
112 | 	 /*****************************************************/
113 | 	 // N.B. memory will be cleared after this
114 | 	void increase_allocated_memory( size_t amount )
115 | 	{
116 | 		size_t new_memory_size = memory_size_including_cookies + amount;
117 | 
118 | 		memory.resize( new_memory_size );
119 | 		memory_size_including_cookies = new_memory_size;
120 | 
121 | 		clear();
122 | 	}
123 | 
124 | 	 /*****************************************************/
125 | 	T* ptr()
126 | 	{
127 | 		return &memory[0] + cookie_size;
128 | 	}
129 | 
130 | 	 /*****************************************************/
131 | 	T& operator[]( size_t index ) {
132 | 		if( index >= size() )
133 | 			throw std::runtime_error( "operator[] write out of bounds" );
134 | 		return memory[0 + cookie_size + index];
135 | 	}
136 | };
137 | 
138 | #endif
139 | 


--------------------------------------------------------------------------------
/src/callback-client/openCL.misc.h:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | 
 18 | #pragma once
 19 | #if !defined( OPENCL_MISC_H )
 20 | #define OPENCL_MISC_H
 21 | #include <memory>
 22 | #include <stdexcept>
 23 | #include "unicode.compatibility.h"
 24 | 
 25 | //	Creating a portable defintion of countof
 26 | #if defined( _MSC_VER )
 27 | 	#define countOf _countof
 28 | #else
 29 | 	#define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) )
 30 | #endif
 31 | 
 32 | /*
 33 |  * \brief OpenCL platform and device discovery
 34 |  *        Creates a list of OpenCL platforms
 35 |  *        and their associated devices 
 36 |  */
 37 | int discoverCLPlatforms( cl_device_type deviceType,
 38 | 					     std::vector< cl_platform_id >& platforms,
 39 | 						 std::vector< std::vector< cl_device_id > >& devices );
 40 | 
 41 | void prettyPrintCLPlatforms(std::vector< cl_platform_id >& platforms,
 42 | 	std::vector< std::vector< cl_device_id > >& devices);
 43 | 
 44 | /*
 45 |  * \brief OpenCL related initialization
 46 |  *        Create Context, Device list
 47 |  *        Load CL file, compile, link CL source
 48 |  *		  Build program and kernel objects
 49 |  */
 50 | std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
 51 | 										  cl_int deviceId,
 52 | 										  cl_int platformId,
 53 | 										  cl_context& context,
 54 | 										  bool printclInfo );
 55 | 
 56 | /*
 57 |  * \brief OpenCL memory buffer creation
 58 |  */
 59 | int createOpenCLMemoryBuffer(
 60 | 		cl_context& context,
 61 | 		const size_t bufferSizeBytes,
 62 | 		const cl_uint numBuffers,
 63 | 		cl_mem buffer[],
 64 | 		cl_mem_flags accessibility
 65 | 		);
 66 | 
 67 | /*
 68 |  * \brief OpenCL command queue creation
 69 |  *        Create Command Queue
 70 |  *        Create OpenCL memory buffer objects
 71 |  */
 72 | void createOpenCLCommandQueue( cl_context& context,
 73 | 							   cl_uint commandQueueFlags,
 74 | 							   cl_command_queue& commandQueue,
 75 | 							   std::vector< cl_device_id > devices,
 76 | 							   const size_t bufferSizeBytesIn,
 77 | 							   const cl_uint numBuffersIn,
 78 | 							   cl_mem clMemBufferIn[],
 79 | 							   const size_t bufferSizeBytesOut,
 80 | 							   const cl_uint numBuffersOut,
 81 | 							   cl_mem clMemBufferOut[] );
 82 | 
 83 | /*
 84 |  * \brief release OpenCL memory buffer
 85 |  */
 86 | int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[] );
 87 | 
 88 | std::string prettyPrintclFFTStatus( const cl_int& status );
 89 | 
 90 | //	This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition.
 91 | //	If an error occurs, we throw.
 92 | //	Note: std::runtime_error does not take unicode strings as input, so only strings supported
 93 | inline cl_int OpenCL_V_Throw ( cl_int res, const std::string& msg, size_t lineno )
 94 | {
 95 | 	switch( res )
 96 | 	{
 97 | 		case	CL_SUCCESS:		/**< No error */
 98 | 			break;
 99 | 		default:
100 | 		{
101 | 			std::stringstream tmp;
102 | 			tmp << "OPENCL_V_THROWERROR< ";
103 | 			tmp << prettyPrintclFFTStatus( res );
104 | 			tmp << " > (";
105 | 			tmp << lineno;
106 | 			tmp << "): ";
107 | 			tmp << msg;
108 | 			std::string errorm (tmp.str());
109 | 			std::cout << errorm<< std::endl;
110 | 			throw	std::runtime_error( errorm );
111 | 		}
112 | 	}
113 | 
114 | 	return	res;
115 | }
116 | #define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw (_status, _message, __LINE__)
117 | 
118 | inline cl_int OpenCL_V_Warn(cl_int res, const std::string& msg, size_t lineno)
119 | {
120 | 	switch (res)
121 | 	{
122 | 		case	CL_SUCCESS:		/**< No error */
123 | 			break;
124 | 		case	CL_DEVICE_NOT_FOUND:
125 | 			// This happens all the time when discovering the OpenCL capabilities of the system,
126 | 			// so do nothing here.
127 | 			break;
128 | 		default:
129 | 		{
130 | 			std::stringstream tmp;
131 | 			tmp << "OPENCL_V_WARN< ";
132 | 			tmp << prettyPrintclFFTStatus(res);
133 | 			tmp << " > (";
134 | 			tmp << lineno;
135 | 			tmp << "): ";
136 | 			tmp << msg;
137 | 			std::string errorm(tmp.str());
138 | 			std::cout << errorm << std::endl;
139 | 		}
140 | 	}
141 | 
142 | 	return	res;
143 | }
144 | #define OPENCL_V_WARN(_status,_message) OpenCL_V_Warn (_status, _message, __LINE__);
145 | 
146 | /*
147 |  * \brief Release OpenCL resources (Context, Memory etc.)
148 |  */
149 | int cleanupCL( cl_context* context, cl_command_queue* commandQueue, const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[]);
150 | 
151 | #endif
152 | 


--------------------------------------------------------------------------------
/src/client/openCL.misc.h:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | #ifdef _MSC_VER
 18 | #pragma warning(disable : 4996)
 19 | #endif
 20 | 
 21 | #pragma once
 22 | #if !defined( OPENCL_MISC_H )
 23 | #define OPENCL_MISC_H
 24 | #include <memory>
 25 | #include <stdexcept>
 26 | #include "unicode.compatibility.h"
 27 | 
 28 | //	Creating a portable defintion of countof
 29 | #if defined( _MSC_VER )
 30 | 	#define countOf _countof
 31 | #else
 32 | 	#define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) )
 33 | #endif
 34 | 
 35 | /*
 36 |  * \brief OpenCL platform and device discovery
 37 |  *        Creates a list of OpenCL platforms
 38 |  *        and their associated devices 
 39 |  */
 40 | int discoverCLPlatforms( cl_device_type deviceType,
 41 | 					     std::vector< cl_platform_id >& platforms,
 42 | 						 std::vector< std::vector< cl_device_id > >& devices );
 43 | 
 44 | void prettyPrintCLPlatforms(std::vector< cl_platform_id >& platforms,
 45 | 	std::vector< std::vector< cl_device_id > >& devices);
 46 | 
 47 | /*
 48 |  * \brief OpenCL related initialization
 49 |  *        Create Context, Device list
 50 |  *        Load CL file, compile, link CL source
 51 |  *		  Build program and kernel objects
 52 |  */
 53 | std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
 54 | 										  cl_int deviceId,
 55 | 										  cl_int platformId,
 56 | 										  cl_context& context,
 57 | 										  bool printclInfo );
 58 | 
 59 | /*
 60 |  * \brief OpenCL memory buffer creation
 61 |  */
 62 | int createOpenCLMemoryBuffer(
 63 | 		cl_context& context,
 64 | 		const size_t bufferSizeBytes,
 65 | 		const cl_uint numBuffers,
 66 | 		cl_mem buffer[],
 67 | 		cl_mem_flags accessibility
 68 | 		);
 69 | 
 70 | /*
 71 |  * \brief OpenCL command queue creation
 72 |  *        Create Command Queue
 73 |  *        Create OpenCL memory buffer objects
 74 |  */
 75 | void createOpenCLCommandQueue( cl_context& context,
 76 | 							   cl_uint commandQueueFlags,
 77 | 							   cl_command_queue& commandQueue,
 78 | 							   std::vector< cl_device_id > devices,
 79 | 							   const size_t bufferSizeBytesIn,
 80 | 							   const cl_uint numBuffersIn,
 81 | 							   cl_mem clMemBufferIn[],
 82 | 							   const size_t bufferSizeBytesOut,
 83 | 							   const cl_uint numBuffersOut,
 84 | 							   cl_mem clMemBufferOut[] );
 85 | 
 86 | /*
 87 |  * \brief release OpenCL memory buffer
 88 |  */
 89 | int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[] );
 90 | 
 91 | std::string prettyPrintclFFTStatus( const cl_int& status );
 92 | 
 93 | //	This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition.
 94 | //	If an error occurs, we throw.
 95 | //	Note: std::runtime_error does not take unicode strings as input, so only strings supported
 96 | inline cl_int OpenCL_V_Throw ( cl_int res, const std::string& msg, size_t lineno )
 97 | {
 98 | 	switch( res )
 99 | 	{
100 | 		case	CL_SUCCESS:		/**< No error */
101 | 			break;
102 | 		default:
103 | 		{
104 | 			std::stringstream tmp;
105 | 			tmp << "OPENCL_V_THROWERROR< ";
106 | 			tmp << prettyPrintclFFTStatus( res );
107 | 			tmp << " > (";
108 | 			tmp << lineno;
109 | 			tmp << "): ";
110 | 			tmp << msg;
111 | 			std::string errorm (tmp.str());
112 | 			std::cout << errorm<< std::endl;
113 | 			throw	std::runtime_error( errorm );
114 | 		}
115 | 	}
116 | 
117 | 	return	res;
118 | }
119 | #define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw (_status, _message, __LINE__)
120 | 
121 | inline cl_int OpenCL_V_Warn(cl_int res, const std::string& msg, size_t lineno)
122 | {
123 | 	switch (res)
124 | 	{
125 | 		case	CL_SUCCESS:		/**< No error */
126 | 			break;
127 | 		case	CL_DEVICE_NOT_FOUND:
128 | 			// This happens all the time when discovering the OpenCL capabilities of the system,
129 | 			// so do nothing here.
130 | 			break;
131 | 		default:
132 | 		{
133 | 			std::stringstream tmp;
134 | 			tmp << "OPENCL_V_WARN< ";
135 | 			tmp << prettyPrintclFFTStatus(res);
136 | 			tmp << " > (";
137 | 			tmp << lineno;
138 | 			tmp << "): ";
139 | 			tmp << msg;
140 | 			std::string errorm(tmp.str());
141 | 			std::cout << errorm << std::endl;
142 | 		}
143 | 	}
144 | 
145 | 	return	res;
146 | }
147 | #define OPENCL_V_WARN(_status,_message) OpenCL_V_Warn (_status, _message, __LINE__);
148 | 
149 | /*
150 |  * \brief Release OpenCL resources (Context, Memory etc.)
151 |  */
152 | int cleanupCL( cl_context* context, cl_command_queue* commandQueue, const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent );
153 | 
154 | #endif
155 | 


--------------------------------------------------------------------------------
/src/examples/fft2d.c:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | #include <stdio.h>
 18 | #include <stdlib.h>
 19 | 
 20 | /* No need to explicitely include the OpenCL headers */
 21 | #include <clFFT.h>
 22 | 
 23 | int main( void )
 24 | {
 25 |     cl_int err;
 26 |     cl_platform_id platform = 0;
 27 |     cl_device_id device = 0;
 28 |     cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
 29 |     cl_context ctx = 0;
 30 |     cl_command_queue queue = 0;
 31 |     cl_mem bufX;
 32 |     float *X;
 33 |     cl_event event = NULL;
 34 |     int ret = 0;
 35 | 
 36 |     const size_t N0 = 8, N1 = 8;
 37 |     char platform_name[128];
 38 |     char device_name[128];
 39 | 
 40 |     /* FFT library realted declarations */
 41 |     clfftPlanHandle planHandle;
 42 |     clfftDim dim = CLFFT_2D;
 43 |     size_t clLengths[2] = {N0, N1};
 44 | 
 45 |     /* Setup OpenCL environment. */
 46 |     err = clGetPlatformIDs( 1, &platform, NULL );
 47 | 
 48 |     size_t ret_param_size = 0;
 49 |     err = clGetPlatformInfo(platform, CL_PLATFORM_NAME,
 50 |             sizeof(platform_name), platform_name,
 51 |             &ret_param_size);
 52 |     printf("Platform found: %s\n", platform_name);
 53 | 
 54 |     err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL );
 55 | 
 56 |     err = clGetDeviceInfo(device, CL_DEVICE_NAME,
 57 |             sizeof(device_name), device_name,
 58 |             &ret_param_size);
 59 |     printf("Device found on the above platform: %s\n", device_name);
 60 | 
 61 |     props[1] = (cl_context_properties)platform;
 62 |     ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
 63 |     queue = clCreateCommandQueue( ctx, device, 0, &err );
 64 | 
 65 |     /* Setup clFFT. */
 66 |     clfftSetupData fftSetup;
 67 |     err = clfftInitSetupData(&fftSetup);
 68 |     err = clfftSetup(&fftSetup);
 69 | 
 70 |     /* Allocate host & initialize data. */
 71 |     /* Only allocation shown for simplicity. */
 72 |     size_t buffer_size  = N0 * N1 * 2 * sizeof(*X);
 73 |     X = (float *)malloc(buffer_size);
 74 | 
 75 |     /* print input array just using the
 76 |      * indices to fill the array with data */
 77 |     printf("\nPerforming fft on an two dimensional array of size N0 x N1 : %lu x %lu\n", (unsigned long)N0, (unsigned long)N1);
 78 | 	size_t i, j;
 79 | 
 80 |     i = j = 0;
 81 |     for (i=0; i<N0; ++i) {
 82 |         for (j=0; j<N1; ++j) {
 83 |             float x = 0.5f;
 84 |             float y = 0.5f;
 85 |             size_t idx = 2*(j+i*N1);
 86 |             X[idx] = x;
 87 |             X[idx+1] = y;
 88 |             printf("(%f, %f) ", x, y);
 89 |         }
 90 |         printf("\n");
 91 |     }
 92 | 
 93 |     /* Prepare OpenCL memory objects and place data inside them. */
 94 |     bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, buffer_size, NULL, &err );
 95 | 
 96 |     err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );
 97 | 
 98 |     /* Create a default plan for a complex FFT. */
 99 |     err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
100 | 
101 |     /* Set plan parameters. */
102 |     err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
103 |     err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
104 |     err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);
105 | 
106 |     /* Bake the plan. */
107 |     err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);
108 | 
109 |     /* Execute the plan. */
110 |     err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);
111 | 
112 |     /* Wait for calculations to be finished. */
113 |     err = clFinish(queue);
114 | 
115 |     /* Fetch results of calculations. */
116 |     err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );
117 | 
118 |     /* print output array */
119 |     printf("\n\nfft result: \n");
120 |     i = j = 0;
121 |     for (i=0; i<N0; ++i) {
122 |         for (j=0; j<N1; ++j) {
123 |             size_t idx = 2*(j+i*N1);
124 |             printf("(%f, %f) ", X[idx], X[idx+1]);
125 |         }
126 |         printf("\n");
127 |     }
128 |     printf("\n");
129 | 
130 |     /* Release OpenCL memory objects. */
131 |     clReleaseMemObject( bufX );
132 | 
133 |     free(X);
134 | 
135 |     /* Release the plan. */
136 |     err = clfftDestroyPlan( &planHandle );
137 | 
138 |     /* Release clFFT library. */
139 |     clfftTeardown( );
140 | 
141 |     /* Release OpenCL working objects. */
142 |     clReleaseCommandQueue( queue );
143 |     clReleaseContext( ctx );
144 | 
145 |     return ret;
146 | }
147 | 


--------------------------------------------------------------------------------
/src/examples/fft3d.c:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | #include <stdio.h>
 18 | #include <stdlib.h>
 19 | 
 20 | /* No need to explicitely include the OpenCL headers */
 21 | #include <clFFT.h>
 22 | 
 23 | int main( void )
 24 | {
 25 |     cl_int err;
 26 |     cl_platform_id platform = 0;
 27 |     cl_device_id device = 0;
 28 |     cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
 29 |     cl_context ctx = 0;
 30 |     cl_command_queue queue = 0;
 31 |     cl_mem bufX;
 32 |     float *X;
 33 |     cl_event event = NULL;
 34 |     int ret = 0;
 35 | 
 36 |     const size_t N0 = 4, N1 = 4, N2 = 4;
 37 |     char platform_name[128];
 38 |     char device_name[128];
 39 | 
 40 |     /* FFT library realted declarations */
 41 |     clfftPlanHandle planHandle;
 42 |     clfftDim dim = CLFFT_3D;
 43 |     size_t clLengths[3] = {N0, N1, N2};
 44 | 
 45 |     /* Setup OpenCL environment. */
 46 |     err = clGetPlatformIDs( 1, &platform, NULL );
 47 | 
 48 |     size_t ret_param_size = 0;
 49 |     err = clGetPlatformInfo(platform, CL_PLATFORM_NAME,
 50 |             sizeof(platform_name), platform_name,
 51 |             &ret_param_size);
 52 |     printf("Platform found: %s\n", platform_name);
 53 | 
 54 |     err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL );
 55 | 
 56 |     err = clGetDeviceInfo(device, CL_DEVICE_NAME,
 57 |             sizeof(device_name), device_name,
 58 |             &ret_param_size);
 59 |     printf("Device found on the above platform: %s\n", device_name);
 60 | 
 61 |     props[1] = (cl_context_properties)platform;
 62 |     ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
 63 |     queue = clCreateCommandQueue( ctx, device, 0, &err );
 64 | 
 65 |     /* Setup clFFT. */
 66 |     clfftSetupData fftSetup;
 67 |     err = clfftInitSetupData(&fftSetup);
 68 |     err = clfftSetup(&fftSetup);
 69 | 
 70 |     /* Allocate host & initialize data. */
 71 |     /* Only allocation shown for simplicity. */
 72 |     size_t buffer_size  = N0 * N1 * N2 * 2 * sizeof(*X);
 73 |     X = (float *)malloc(buffer_size);
 74 | 
 75 |     /* print input array just using the
 76 |      * indices to fill the array with data */
 77 |     printf("\nPerforming fft on an two dimensional array of size N0 x N1 x N2 : %lu x %lu x %lu\n", (unsigned long)N0, (unsigned long)N1, (unsigned long)N2);
 78 |     size_t i, j, k;
 79 |     i = j = k = 0;
 80 |     for (i=0; i<N0; ++i) {
 81 |         for (j=0; j<N1; ++j) {
 82 |             for (k=0; k<N2; ++k) {
 83 |                 float x = 0.0f;
 84 |                 float y = 0.0f;
 85 |                 if (i==0 && j==0 && k==0) {
 86 |                     x = y = 0.5f;
 87 |                 }
 88 |                 size_t idx = 2*(k+j*N2+i*N1*N2);
 89 |                 X[idx] = x;
 90 |                 X[idx+1] = y;
 91 |                 printf("(%f, %f) ", X[idx], X[idx+1]);
 92 |             }
 93 |             printf("\n");
 94 |         }
 95 |         printf("\n");
 96 |     }
 97 | 
 98 |     /* Prepare OpenCL memory objects and place data inside them. */
 99 |     bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, buffer_size, NULL, &err );
100 | 
101 |     err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );
102 | 
103 |     /* Create a default plan for a complex FFT. */
104 |     err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
105 | 
106 |     /* Set plan parameters. */
107 |     err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
108 |     err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
109 |     err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);
110 | 
111 |     /* Bake the plan. */
112 |     err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);
113 | 
114 |     /* Execute the plan. */
115 |     err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);
116 | 
117 |     /* Wait for calculations to be finished. */
118 |     err = clFinish(queue);
119 | 
120 |     /* Fetch results of calculations. */
121 |     err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );
122 | 
123 |     /* print output array */
124 |     printf("\n\nfft result: \n");
125 |     i = j = k = 0;
126 |     for (i=0; i<N0; ++i) {
127 |         for (j=0; j<N1; ++j) {
128 |             for (k=0; k<N2; ++k) {
129 |                 size_t idx = 2*(k+j*N2+i*N1*N2);
130 |                 printf("(%f, %f) ", X[idx], X[idx+1]);
131 |             }
132 |             printf("\n");
133 |         }
134 |         printf("\n");
135 |     }
136 |     printf("\n");
137 | 
138 |     /* Release OpenCL memory objects. */
139 |     clReleaseMemObject( bufX );
140 | 
141 |     free(X);
142 | 
143 |     /* Release the plan. */
144 |     err = clfftDestroyPlan( &planHandle );
145 | 
146 |     /* Release clFFT library. */
147 |     clfftTeardown( );
148 | 
149 |     /* Release OpenCL working objects. */
150 |     clReleaseCommandQueue( queue );
151 |     clReleaseContext( ctx );
152 | 
153 |     return ret;
154 | }
155 | 


--------------------------------------------------------------------------------
/src/callback-client/client.h:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | 
 18 | #pragma once
 19 | #if !defined( CLIENT_H )
 20 | #define CLIENT_H
 21 | 
 22 | //	Boost headers that we want to use
 23 | //	#define BOOST_PROGRAM_OPTIONS_DYN_LINK
 24 | #include <boost/program_options.hpp>
 25 | #include "stdafx.h"
 26 | #include "../statTimer/statisticalTimer.extern.h"
 27 | #include "../include/unicode.compatibility.h"
 28 | 
 29 | #include <fftw3.h>
 30 | 
 31 | typedef unsigned char uint24_t[3]; 
 32 | 
 33 | #define CALLBCKSTR(...) #__VA_ARGS__
 34 | #define STRINGIFY(...) 	CALLBCKSTR(__VA_ARGS__)
 35 | 
 36 | #define BATCH_LENGTH 1024 
 37 | 
 38 | #define ConvertToFloat typedef unsigned char uint24_t[3]; \n \
 39 | 						float convert24To32bit(__global void* in, uint inoffset, __global void* userdata) \n \
 40 | 						{ \n \
 41 | 						__global uint24_t* inData =  (__global uint24_t*)in; \n \
 42 | 						float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \
 43 | 						return val;  \n \
 44 | 						}
 45 | 
 46 | #define ConvertToFloat_KERNEL typedef unsigned char uint24_t[3]; \n \
 47 | 							__kernel void convert24To32bit (__global void *input, __global void *output) \n \
 48 | 							 { \n \
 49 | 								uint inoffset = get_global_id(0); \n \
 50 | 								__global uint24_t* inData =  (__global uint24_t*)input; \n \
 51 | 								float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \
 52 | 								*((__global float*)output + inoffset) = val;  \n \
 53 | 							} \n
 54 | 
 55 | #define MagnitudeExtraction void extractMagnitude(__global void *output, uint outoffset, __global void *userdata, float2 fftoutput) \n \
 56 | 							{ \n \
 57 | 								float magnitude = sqrt(fftoutput.x * fftoutput.x + fftoutput.y * fftoutput.y); \n \
 58 | 								*((__global float*)output + outoffset) = magnitude; \n \
 59 | 							} \n
 60 | 
 61 | #define MagnitudeExtraction_KERNEL __kernel void extractMagnitude(__global float2 *output, __global float *magoutput) \n \
 62 | 							{ \n \
 63 | 								uint outoffset = get_global_id(0); \n \
 64 | 								float magnitude = sqrt(output[outoffset].x * output[outoffset].x + output[outoffset].y * output[outoffset].y); \n \
 65 | 								*(magoutput + outoffset) = magnitude; \n \
 66 | 							} \n
 67 | 
 68 | template < typename T >
 69 | void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 
 70 | 				   clfftDim dim, clfftPrecision precision,  cl_uint profile_count);
 71 | 
 72 | template < typename T >
 73 | void runR2C_FFT_WithCallback(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
 74 | 						size_t* inlengths, clfftDim dim, clfftPrecision precision,
 75 | 						size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
 76 | 
 77 | template < typename T >
 78 | void runR2C_FFT_PreAndPostprocessKernel(std::auto_ptr< clfftSetupData > setupData, cl_context context, 
 79 | 							cl_command_queue commandQueue, cl_device_id device_id,
 80 | 							size_t* inlengths, clfftDim dim, clfftPrecision precision,
 81 | 							size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
 82 | 
 83 | fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size,
 84 | 									clfftLayout in_layout, clfftDim dim);
 85 | 
 86 | template < typename T1, typename T2>
 87 | bool compare(T1 *refData, std::vector< T2 > data,
 88 |              size_t length, const float epsilon = 1e-6f);
 89 | 
 90 | #ifdef WIN32
 91 | 
 92 | struct Timer
 93 | {
 94 |     LARGE_INTEGER start, stop, freq;
 95 | 
 96 | public:
 97 |     Timer() { QueryPerformanceFrequency( &freq ); }
 98 | 
 99 |     void Start() { QueryPerformanceCounter(&start); }
100 |     double Sample()
101 |     {
102 |         QueryPerformanceCounter  ( &stop );
103 |         double time = (double)(stop.QuadPart-start.QuadPart) / (double)(freq.QuadPart);
104 |         return time;
105 |     }
106 | };
107 | 
108 | #elif defined(__APPLE__) || defined(__MACOSX)
109 | 
110 | #include <mach/clock.h>
111 | #include <mach/mach.h>
112 | 
113 | struct Timer
114 | {
115 |     clock_serv_t clock;
116 |     mach_timespec_t start, end;
117 | 
118 | public:
119 |     Timer() { host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &clock); }
120 |     ~Timer() { mach_port_deallocate(mach_task_self(), clock); }
121 | 
122 |     void Start() { clock_get_time(clock, &start); }
123 |     double Sample()
124 |     {
125 |         clock_get_time(clock, &end);
126 |         double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
127 |         return time * 1E-9;
128 |     }
129 | };
130 | 
131 | #else
132 | 
133 | #include <time.h>
134 | #include <math.h>
135 | 
136 | struct Timer
137 | {
138 |     struct timespec start, end;
139 | 
140 | public:
141 |     Timer() { }
142 | 
143 |     void Start() { clock_gettime(CLOCK_MONOTONIC, &start); }
144 |     double Sample()
145 |     {
146 |         clock_gettime(CLOCK_MONOTONIC, &end);
147 |         double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
148 |         return time * 1E-9;
149 |     }
150 | };
151 | 
152 | #endif
153 | 
154 | #endif
155 | 


--------------------------------------------------------------------------------
/ReleaseNotes.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | clFFT Contents
  3 | --------------
  4 | The clFFT library is an open source OpenCL library
  5 | implementation of discrete Fast Fourier Transforms. It:
  6 | 
  7 | * Provides a fast and accurate platform for calculating
  8 |   discrete FFTs
  9 | * Supports 1D, 2D, and 3D transforms with a batch size
 10 |   that can be greater than 1
 11 | * Supports complex and real transforms
 12 | * Supports planar (real and complex components in
 13 |   separate arrays) and interleaved (real and complex
 14 |   components as a pair contiguous in memory) formats
 15 | * Supports dimension lengths that can be any mix of
 16 |   powers of 2, 3, 5, 7, 11 and 13
 17 | * Supports single and double precision floating-point
 18 |   formats
 19 | * Supports in-place or out-of-place transforms
 20 | 
 21 | 
 22 | clFFT - Release Notes - version 2.12.2
 23 | --------------------------------------
 24 | 
 25 | This is a patch update release to v2.12.1.
 26 | It has the following:
 27 | 
 28 | * Fix for thread safety issues
 29 | * Fix for failures in certain 2D transforms
 30 | * Fix for failures on POCL runtime
 31 | * Other minor improvements
 32 | 
 33 | Details on issues available on GitHub.
 34 | 
 35 | 
 36 | clFFT - Release Notes - version 2.12.1
 37 | --------------------------------------
 38 | 
 39 | New features of this release:
 40 | 
 41 | * Fixes for bugs in decomposition logic of very large problem sizes
 42 |   with environment flag CLFFT_REQUEST_LIB_NOMEMALLOC=1
 43 | * Validation of sizes that are powers of 2,3,5 and combinations
 44 |   thereof upto maximum size close to 25GB on S9170
 45 | 
 46 | Driver notes:
 47 | 
 48 | * This library version has been tested with Catalyst
 49 |   Pro driver version 15.302 on Firepro W9100 & S9170.
 50 | 
 51 | 
 52 | clFFT - Release Notes - version 2.12.0
 53 | --------------------------------------
 54 | 
 55 | New features of this release:
 56 | 
 57 | * Addition of radix 11 & 13 enables support for powers of 11 & 13 size
 58 |   transforms
 59 | * Support for 1D large size transforms with no extra memory allocation
 60 |   requirement with environment flag CLFFT_REQUEST_LIB_NOMEMALLOC=1
 61 |   for complex FFTs of powers of 3,5,10 sizes. Note that support for
 62 |   power-of-2 size has been available since version 2.10.
 63 | 
 64 | Driver notes:
 65 | 
 66 | * This library version has been tested with Catalyst
 67 |   Pro driver version 15.201 on Firepro W9100.
 68 | 
 69 | 
 70 | clFFT - Release Notes - version 2.10.2
 71 | --------------------------------------
 72 | 
 73 | This is a patch update release to v2.10.1.
 74 | It has the following:
 75 | 
 76 | * Fixes for accuracy/stability issues noted in large size real FFTs
 77 | 
 78 | 
 79 | clFFT - Release Notes - version 2.10.1
 80 | --------------------------------------
 81 | 
 82 | This is a patch update release to v2.10.0.
 83 | It has the following:
 84 | 
 85 | * Performance fixes for slowdown observed vs v2.8. Specifically,
 86 |   2D transforms for certain sizes showed significant slowdown.
 87 |   This issue has been fixed in this release. The fix also makes the
 88 |   performance in general better than v2.8.
 89 | * Code reorganization in transposes and performance improvements.
 90 |   There is a modest performance improvement with inplace transforms
 91 |   for large power-of-2 sizes.
 92 | * Several minor enhancements and bug fixes 
 93 | 
 94 | 
 95 | clFFT - Release Notes - version 2.10.0
 96 | --------------------------------------
 97 | 
 98 | New features of this release:
 99 | 
100 | * Post-callback feature that enables custom post-processing
101 |   of output data directly by the library with user callback function
102 | * Support for in-place transposes for power-of-2 sizes enables really
103 |   large 1D transforms as well as supporting no additional memory
104 |   allocation, by library, for a range of problem sizes
105 | 
106 | 
107 | clFFT - Release Notes - version 2.8.0
108 | --------------------------------------
109 | 
110 | New features of this release:
111 | 
112 | * Support for power-of-7 size transforms
113 | * Pre-callback feature that enables custom pre-processing
114 |   of input data directly by the library with user callback function
115 | * Support for 1D large size transforms with no extra memory allocation
116 |   requirement for certain sizes
117 | 
118 | Driver notes:
119 | 
120 | * This library version has been tested with Catalyst
121 |   Pro driver version 14.502 on Firepro W9100.
122 | 
123 | 
124 | clFFT - Release Notes - version 2.6.1
125 | --------------------------------------
126 | 
127 | This is a patch update release to v2.6.0. It has the following.
128 | 
129 | * Fix for client application crash with Linux 14.502 driver
130 | * Performance fixes for real forward and backward transform when
131 |   enabling ECC
132 | * Performance fixes for complex transforms over a small range in
133 |   power-of-2 sizes
134 | * Improvement in API timing in client
135 | * Several minor enhancements and bug fixes
136 | 
137 | Driver notes:
138 | 
139 | * This library version has been tested with Catalyst
140 |   Pro driver version 14.502 on Firepro W9100.
141 | 
142 | 
143 | clFFT - Release Notes - version 2.6.0
144 | --------------------------------------
145 | 
146 | New features of this release:
147 | 
148 | * Significant uplift of 1D real forward transform
149 |   performance for power-of-2 sizes
150 | * Significant uplift of large 2D/3D real transform
151 |   performance for power-of-2 sizes
152 | 
153 | Known issues:
154 | 
155 | * Size limit on 1D real transforms remain at 2^24
156 |   for single precision and 2^22 for double
157 |   precision
158 | 
159 | Driver notes:
160 | 
161 | * This library version has been tested with Catalyst
162 |   Pro driver version 14.301 on Firepro W9100.
163 | 
164 | 
165 | clFFT - Release Notes - version 2.4.0
166 | --------------------------------------
167 | 
168 | New features of this release:
169 | 
170 | * Significant uplift of 1D complex transform performance
171 | * 1D large size limit relaxation for complex transforms
172 | * 2D/3D size limit relaxation on real and complex transforms
173 | * Binary caching feature
174 | * Several minor fixes and improvements
175 | 


--------------------------------------------------------------------------------
/src/statTimer/statisticalTimer.CPU.h:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | 
 18 | #pragma once
 19 | #ifndef _STATISTICALTIMER_CPU_H_
 20 | #define _STATISTICALTIMER_CPU_H_
 21 | #include <iosfwd>
 22 | #include <vector>
 23 | #include <algorithm>
 24 | #ifdef __FreeBSD__
 25 | #include <sys/timespec.h>
 26 | #endif
 27 | #include "statisticalTimer.h"
 28 | 
 29 | /**
 30 |  * \file clfft.StatisticalTimer.CPU.h
 31 |  * \brief A timer class that provides a cross platform timer for use
 32 |  * in timing code progress with a high degree of accuracy.
 33 |  *	This class is implemented entirely in the header, to facilitate inclusion into multiple
 34 |  *	projects without needing to compile an object file for each project.
 35 |  */
 36 | 
 37 | class CpuStatTimer : public baseStatTimer
 38 | {
 39 | 	//	Private typedefs
 40 | 	typedef std::vector< cl_ulong > clkVector;
 41 | 	typedef	std::pair< std::string, cl_uint > labelPair;
 42 | 	typedef	std::vector< labelPair > stringVector;
 43 | 
 44 | 	//	In order to calculate statistics <std. dev.>, we need to keep a history of our timings
 45 | 	stringVector	labelID;
 46 | 	clkVector	clkStart;
 47 | 	std::vector< clkVector >	clkTicks;
 48 | 
 49 | 	//	How many clockticks in a second
 50 | 	cl_ulong	clkFrequency;
 51 | 
 52 | 	//	For linux; the resolution of a high-precision timer
 53 | 	//  Mingw32 does not define timespec; can use windows timers
 54 | #if !defined( _WIN32 )
 55 | 	timespec res;
 56 | #endif
 57 | 
 58 | 	//	Saved sizes for our vectors, used in Reset() to reallocate vectors
 59 | 	clkVector::size_type	nEvents, nSamples;
 60 | 
 61 | 	//	This setting controls whether the Timer should convert samples into time by dividing by the
 62 | 	//	clock frequency
 63 | 	bool normalize;
 64 | 
 65 | 	/**
 66 | 	 * \fn StatisticalTimer()
 67 | 	 * \brief Constructor for StatisticalTimer that initializes the class
 68 | 	 *	This is private so that user code cannot create their own instantiation.  Instead, you
 69 | 	 *	must go through getInstance( ) to get a reference to the class.
 70 | 	 */
 71 | 	CpuStatTimer( );
 72 | 
 73 | 	/**
 74 | 	 * \fn ~StatisticalTimer()
 75 | 	 * \brief Destructor for StatisticalTimer that cleans up the class
 76 | 	 */
 77 | 	~CpuStatTimer( );
 78 | 
 79 | 	/**
 80 | 	 * \fn StatisticalTimer(const StatisticalTimer& )
 81 | 	 * \brief Copy constructors do not make sense for a singleton, disallow copies
 82 | 	 */
 83 | 	CpuStatTimer( const CpuStatTimer& );
 84 | 
 85 | 	/**
 86 | 	 * \fn operator=( const StatisticalTimer& )
 87 | 	 * \brief Assignment operator does not make sense for a singleton, disallow assignments
 88 | 	 */
 89 | 	CpuStatTimer& operator=( const CpuStatTimer& );
 90 | 
 91 | 	friend std::ostream& operator<<( std::ostream& os, const CpuStatTimer& s );
 92 | 
 93 | 	/**
 94 | 	 * \fn void AddSample( const size_t id, const cl_ulong n )
 95 | 	 * \brief Explicitely add a timing sample into the class
 96 | 	 */
 97 | 	void AddSample( const size_t id, const cl_ulong n );
 98 | 
 99 | 	//	Calculate the average/mean of data for a given event
100 | 	cl_double	getMean( size_t id ) const;
101 | 
102 | 	//	Calculate the variance of data for a given event
103 | 	//	Variance - average of the squared differences between data points and the mean
104 | 	cl_double	getVariance( size_t id ) const;
105 | 
106 | 	//	Sqrt of variance, also in units of the original data
107 | 	cl_double	getStdDev( size_t id ) const;
108 | 
109 | 	/**
110 | 	 * \fn double getAverageTime(size_t id) const
111 | 	 * \return Return the arithmetic mean of all the samples that have been saved
112 | 	 */
113 | 	cl_double getAverageTime( size_t id ) const;
114 | 
115 | 	/**
116 | 	 * \fn double getMinimumTime(size_t id) const
117 | 	 * \return Return the arithmetic min of all the samples that have been saved
118 | 	 */
119 | 	cl_double getMinimumTime( size_t id ) const;
120 | 
121 | public:
122 | 	/**
123 | 	 * \fn getInstance()
124 | 	 * \brief This returns a reference to the singleton timer.  Guarantees only 1 timer class is ever
125 | 	 *	instantiated within a compilable executable.
126 | 	 */
127 | 	static CpuStatTimer& getInstance( );
128 | 
129 | 	/**
130 | 	 * \fn void Start( size_t id )
131 | 	 * \brief Start the timer
132 | 	 * \sa Stop(), Reset()
133 | 	 */
134 | 	void Start( size_t id );
135 | 
136 | 	/**
137 | 	 * \fn void Stop( size_t id )
138 | 	 * \brief Stop the timer
139 | 	 * \sa Start(), Reset()
140 | 	 */
141 | 	void Stop( size_t id );
142 | 
143 | 	/**
144 | 	 * \fn void Reset(void)
145 | 	 * \brief Reset the timer to 0
146 | 	 * \sa Start(), Stop()
147 | 	 */
148 | 	void Clear( );
149 | 
150 | 	/**
151 | 	 * \fn void Reset(void)
152 | 	 * \brief Reset the timer to 0
153 | 	 * \sa Start(), Stop()
154 | 	 */
155 | 	void Reset( );
156 | 
157 | 	void Reserve( size_t nEvents, size_t nSamples );
158 | 
159 | 	size_t getUniqueID( const std::string& label, cl_uint groupID );
160 | 
161 | 	//	Calculate the average/mean of data for a given event
162 | 	void	setNormalize( bool norm );
163 | 
164 | 	void Print( );
165 | 
166 | 	//	Using the stdDev of the entire population (of an id), eliminate those samples that fall
167 | 	//	outside some specified multiple of the stdDev.  This assumes that the population
168 | 	//	form a gaussian curve.
169 | 	size_t	pruneOutliers( cl_double multiple );
170 | 	std::vector< size_t > pruneOutliers( size_t id , cl_double multiple );
171 | };
172 | 
173 | #endif // _STATISTICALTIMER_CPU_H_
174 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contributor guidelines
 2 | 
 3 | Contributing code to this project is intended to be light weight and intuitive to users familiar with GitHub to actively encourage contributions, but a process is documented and should be followed to prevent chaos, confusion and despair.  
 4 | 
 5 | ## The mechanics of contributing code
 6 | Firstly, in order to contribute code to this project, a contributor must have a valid and current [GitHub account](https://help.github.com/articles/set-up-git) available to use.  Given an account,
 7 | * The potential contributor forks this project into his/her account following the traditional [forking](https://help.github.com/articles/fork-a-repo) model native to GitHub
 8 | * After forking, the contributor [clones their repository](https://help.github.com/articles/create-a-repo) locally on their machine
 9 | * Code is developed and checked into the contributor's repository.  These commits are eventually pushed upstream to their GitHub repository
10 | * The contributor then issues a [pull-request](https://help.github.com/articles/using-pull-requests) against the **develop** branch of this repository, which is the [git flow](http://nvie.com/posts/a-successful-git-branching-model/) workflow which is well suited for working with GitHub
11 |     * A [git extention](https://github.com/nvie/gitflow) has been developed to ease the use of the 'git flow' methodology, but requires manual installation by the user.  Refer to the projects wiki
12 | 
13 | At this point, the repository maintainers will be notified by GitHub that a 'pull request' exists pending against their repository.  A code review should be completed within a few days, depending on the scope of submitted code, and the code will either be accepted, rejected or commented on for extra feedback.
14 | 
15 | ## Code submission guidelines
16 | We want to ensure that the project code base maintains a level of quality over time, such that future contributors find it as easy to jump into the code as hopefully it is today.  As such, pull requests should 
17 | * remember that clMath is a project licensed under the [Apache License, Version 2.0]( http://www.apache.org/licenses/LICENSE-2.0 ).  If you are not already familiar, please review the license before issuing a pull request.  We intend this project to be open to external contributors, and encourage developers to contribute code back that they believe will provide value to the overall community.  We will interpret an explicit 'pull request' back to this repository as an implicit acknowledgement from the contributor that they wish to share the code with the community under the terms of the Apache license v2.0.
18 | * follow the [code style guidelines]( ) of the project as posted to the project wiki.  Unfortunately, there was no unifying code guidelines defined between the BLAS & FFT projects, but code submissions should not mix styles within an individual file.  We have since defined and posted a code style guideline for the projects and we expect the code to slowly transition to the new
19 | guidelines over time
20 |     *  separate check-ins that modify a files style from the ones that add/change/delete code.
21 | * target the **develop** branch in the repository
22 | * ensure that the [code properly builds]( https://github.com/clMathLibraries/clFFT/wiki/Build )
23 | * cannot break existing test cases
24 |     * we encourage contributors to [run all tests]( https://github.com/clMathLibraries/clFFT/wiki/Testing ) on their end before the pull-request
25 |         * if possible, upload the test results associated with the pull request to a personal [gist repository]( https://gist.github.com/ ) and insert a link to the test results in the pull request so that collaborators can browse the results
26 |         * if no test results are provided with the pull request, official collaborators will run the test suite on their test machines against the patch before we will accept the pull-request
27 |             * if we detect failing test cases, we will request that the code associated with the pull request be fixed before the pull request will be merged
28 |     * if new functionality is introduced with the pull request, sufficient test cases should be added to verify the new functionality is correct
29 |         * new tests should integrate with the existing [googletest framework]( https://code.google.com/p/googletest/wiki/Primer ) located in the src/tests directory of the repo
30 |         * if the collaborators feel the new tests do not provide sufficient coverage, feedback on the pull request will be left with suggestions on how to improve the tests before the pull request will be merged
31 | 
32 | Pull requests will be reviewed by the set of collaborators that are assigned for the repository.  Pull requests may be accepted, declined or a conversation may start on the pull request thread with feedback.  If the pull request is trivial and all the submission guidelines defined above are honored, the pull request may be accepted without delay.  If the pull request is good, but the guidelines defined above are not followed, the collaborators may leave feedback on the pull request and engage in a conversation with the contributor with what they can do to improve the pull request.  At any time, collaborators may decline a pull request if they decide the contribution is not appropriate for the project, or the feedback from reviewers on a pull request is not being addressed in an appropriate amount of time.
33 | 
34 | ## Is it possible to become an official collaborator of the repository?
35 | Yes, we hope to promote trusted members of the community, who have proven themselves to be competent and request to take on the extra responsibility to be official collaborators of the project.  When an individual requests to be an official collaborator, current project collaborators will browse through the history of the requester's prior pull requests and take a vote amongst themselves if the requester should be promoted to collaborator.  These individuals will then have the right to approve/decline pull requests and help shape the path that the project goes.  It is worth noting, that on GitHub everybody has read-only access to the source and that everybody has the ability to issue a pull request to contribute to the project.  The benefit of being a repository collaborator allows you to be able to be able to manage other peoples pull requests.
36 | 
37 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
  1 | # Ubuntu name decoder ring; https://en.wikipedia.org/wiki/List_of_Ubuntu_releases
  2 | # Ubuntu 12.04 LTS (Precise Pangolin) <== Travis CI VM image
  3 | # Ubuntu 12.10 (Quantal Quetzal)
  4 | # Ubuntu 13.04 (Raring Ringtail)
  5 | # Ubuntu 13.10 (Saucy Salamander)
  6 | # Ubuntu 14.04 LTS (Trusty Tahr)
  7 | # Ubuntu 14.10 (Utopic Unicorn)
  8 | # Ubuntu 15.04 (Vivid Vervet)
  9 | # Ubuntu 15.10 (Wily Werewolf)
 10 | # Ubuntu 16.04 LTS (Xenial Xantus)
 11 | 
 12 | # language: instructs travis what compilers && environment to set up in build matrix
 13 | language: cpp
 14 | 
 15 | # sudo: false instructs travis to build our project in a docker VM (faster)
 16 | # Can not yet install fglrx packages with 'false'
 17 | sudo: required # false
 18 | dist: trusty
 19 | 
 20 | # os: expands the build matrix to include multiple os's
 21 | # disable linux, as we get sporadic failures on building boost, needs investigation
 22 | os:
 23 |   - linux
 24 |   - osx
 25 | 
 26 | # compiler: expands the build matrix to include multiple compilers (per os)
 27 | compiler:
 28 |   - gcc
 29 |   - clang
 30 | 
 31 | addons:
 32 |   # apt: is disabled on osx builds
 33 |   # apt: needed by docker framework to install project dependencies without
 34 |   # sudo.  Apt uses published Ubunto PPA's from https://launchpad.net/
 35 |   # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json
 36 |   apt:
 37 |     sources:
 38 |       # ubuntu-toolchain-r-test contains newer versions of gcc to install
 39 |       # - ubuntu-toolchain-r-test
 40 |       # llvm-toolchain-precise-3.6 contains newer versions of clang to install
 41 |       # - llvm-toolchain-precise-3.6
 42 |       # kubuntu-backports contains newer versions of cmake to install
 43 |       - kubuntu-backports
 44 |       # boost-latest contains boost v1.55
 45 |       - boost-latest
 46 |     packages:
 47 |       # g++-4.8 is minimum version considered to be the first good c++11 gnu compiler
 48 |       # - g++-4.8
 49 |       # - clang-3.6
 50 |       # We require v2.8.12 minimum
 51 |       - cmake
 52 |       # I'm finding problems between pre-compiled versions of boost ublas, with gtest
 53 |       # stl_algobase.h: error: no matching function for call to swap()
 54 |       - libboost-program-options1.55-dev
 55 |       # - libboost-serialization1.55-dev
 56 |       # - libboost-filesystem1.55-dev
 57 |       # - libboost-system1.55-dev
 58 |       # - libboost-regex1.55-dev
 59 |       # The package opencl-headers on 'precise' only installs v1.1 cl headers; uncomment for 'trusty' or greater
 60 | #      - opencl-headers
 61 |       # Uncomment one of the following when fglrx modules are added to the apt whitelist
 62 | #      - fglrx
 63 | #      - fglrx=2:8.960-0ubuntu1
 64 | #      - fglrx=2:13.350.1-0ubuntu0.0.1
 65 | 
 66 | # env: specifies additional global variables to define per row in build matrix
 67 | env:
 68 |   global:
 69 |     - CLFFT_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release
 70 |     - OPENCL_REGISTRY=https://www.khronos.org/registry/cl
 71 |     - OPENCL_ROOT=${TRAVIS_BUILD_DIR}/bin/opencl
 72 | 
 73 | # The following filters our build matrix; we are interested in linux-gcc & osx-clang
 74 | matrix:
 75 |   exclude:
 76 |     - os: linux
 77 |       compiler: clang
 78 |     - os: osx
 79 |       compiler: gcc
 80 | 
 81 | before_install:
 82 |   # Remove the following linux clause when fglrx can be installed with sudo: false
 83 |   - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
 84 |       sudo apt-get update -qq &&
 85 |       sudo apt-get install -qq libboost-all-dev;
 86 |       export BUILD_BOOST="OFF";
 87 |     fi
 88 |   - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
 89 |       export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
 90 |     fi
 91 |   - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
 92 |       brew update;
 93 |       brew outdated boost || brew upgrade boost;
 94 |       brew outdated cmake || brew upgrade cmake;
 95 |     fi
 96 | # - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi
 97 |   - cmake --version;
 98 |   - ${CC} --version;
 99 |   - ${CXX} --version;
100 | 
101 | install:
102 |   # The following linux logic is necessary because of Travis's move to the GCE platform, which does not
103 |   # currently contain packages for fglrx: https://github.com/travis-ci/travis-ci/issues/5221
104 |   # We build our own linkable .so file
105 |   - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
106 |       mkdir -p ${OPENCL_ROOT};
107 |       pushd ${OPENCL_ROOT};
108 |       travis_retry git clone --depth 1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git;
109 |       mv ./OpenCL-ICD-Loader/* .;
110 |       travis_retry git clone --depth 1 https://github.com/KhronosGroup/OpenCL-Headers.git inc/CL;
111 |       pushd inc/CL;
112 |       travis_retry wget -w 1 -np -nd -nv -A h,hpp ${OPENCL_REGISTRY}/api/2.1/cl.hpp;
113 |       popd;
114 |       mkdir -p lib;
115 |       pushd lib;
116 |       cmake -G "Unix Makefiles" ..;
117 |       make;
118 |       cp ./bin/libOpenCL.so .;
119 |       popd;
120 |       pushd inc/CL;
121 |       travis_retry git fetch origin opencl12:opencl12;
122 |       git checkout opencl12;
123 |       popd;
124 |       mv inc/ include/;
125 |       popd;
126 |     fi
127 | 
128 |   # osx image does not contain cl.hpp file; download from Khronos
129 |   # - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
130 |   #     pushd /System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/;
131 |   #     sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/cl.hpp;
132 |   #     popd;
133 |   #   fi
134 | 
135 | # Use before_script: to run configure steps
136 | before_script:
137 |   - mkdir -p ${CLFFT_ROOT}
138 |   - pushd ${CLFFT_ROOT}
139 |   - cmake -DCMAKE_BUILD_TYPE=Release -DBoost_NO_SYSTEM_PATHS=OFF -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src
140 | 
141 | # use script: to execute build steps
142 | script:
143 |   - make package
144 | 
145 | deploy:
146 |   provider: releases
147 |   prerelease: true
148 |   draft: true
149 |   skip_cleanup: true
150 |   api_key:
151 |     secure: "Bx51QUNYPnJlkJS7D97huRxYzs26kOZst76S0uTmBWp8ZU7gAm6mY79pFbXW9mkL2r6EPX5l5p+pPOwo1kmN/eB3jicamA07oLcI0ZQw4XLxszVEiOLMNXSLFYnsee0RkVX4fu453XXOFDoQupN/JKHUtp4VReJHIiWgQPnvF3Xu7kBLGecKr9IrWV23ig+7z3oiTEO+MTIR/z9mmHnzdAf4K0Nh+9BdtI2QVuVbpRZxPRqLIakMHoYw18h2SdY44wYK+sC+AroU9QWCr5t26GJDzWdu0nrv05ChQqaC128z8+hs+jaPbx39ByBH3BxD05FrKtCO3W0O6VycHewFitrMeMCkXizDn3XzXRkw8bM8OpqFPW/++f5DqxgN+Yh9eIb2vY1QiBjaQ6VIvd/9egVILw6/cxVXlym9iNVwuO2ZG0COKMHoLP/2ZlvPbuoXYgEtZYqqUnvBHVu1SxzrABxR+Cp44McbuB2EFLHYxjA7msF/h+yilfE5e4FdD91MpjR26ASmP3rMx9xUe6h28rGgei3RXNNeF0vnMO65qwfImuUbQ3/on+KW6LlMKFptqq3Twp7wcBdh433bvsrwCW0BDP0eanWuImpS0z3reqcTJFrl0rzplpQmJAW38dcSWTI7MvlDKcMHLJMNy+2or7mIGCj1m9o9Yr6INC7W2Rg="
152 |   file: ${CLFFT_ROOT}/*.tar.gz
153 |   file_glob: true
154 |   on:
155 |     all_branches: true
156 |     tags: true
157 | 


--------------------------------------------------------------------------------
/src/library/lock.h:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright 2013 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  * http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  * ************************************************************************/
 16 | 
 17 | 
 18 | #pragma once
 19 | #if !defined( CLFFT_lock_H )
 20 | #define CLFFT_lock_H
 21 | 
 22 | #if defined( _WIN32 )
 23 | 	#include <windows.h>
 24 | #else
 25 | 	#include <pthread.h>
 26 | #endif
 27 | 
 28 | #include "private.h"
 29 | 
 30 | #if defined( _WIN32 )
 31 | 
 32 | //	lockRAII provides an abstraction for the concept of a mutex; it wraps all  mutex functions in generic methods
 33 | //	On windows, the mutex is implemented as a CRITICAL_SECTION, as this is the fastest intraprocess mutex
 34 | //	available.
 35 | //	The template argument 'debugPrint' activates debugging information, but if not active the compiler optimizes
 36 | //	the print statements out
 37 | template< bool debugPrint >
 38 | class lockRAII
 39 | {
 40 | 	CRITICAL_SECTION cs;
 41 | 	tstring			csName;
 42 | 	tstringstream	tstream;
 43 | 
 44 | 	//	Does not make sense to create a copy of a lock object; private method
 45 | 	lockRAII( const lockRAII& rhs ): csName( rhs.csName )
 46 | 	{
 47 | 		tstream << std::hex << std::showbase;
 48 | 		::InitializeCriticalSection( &cs );
 49 | 	}
 50 | 
 51 | 	public:
 52 | 		lockRAII( )
 53 | 		{
 54 | 			tstream << std::hex << std::showbase;
 55 | 			::InitializeCriticalSection( &cs );
 56 | 		}
 57 | 
 58 | 		lockRAII( const tstring& name ): csName( name )
 59 | 		{
 60 | 			tstream << std::hex << std::showbase;
 61 | 			::InitializeCriticalSection( &cs );
 62 | 		}
 63 | 
 64 | 		~lockRAII( )
 65 | 		{
 66 | 			::DeleteCriticalSection( &cs );
 67 | 		}
 68 | 
 69 | 		tstring& getName( )
 70 | 		{
 71 | 			return csName;
 72 | 		}
 73 | 
 74 | 		void setName( const tstring& name )
 75 | 		{
 76 | 			csName	= name;
 77 | 		}
 78 | 
 79 | 		void enter( )
 80 | 		{
 81 | 			if( debugPrint )
 82 | 			{
 83 | 				tstream.str( _T( "" ) );
 84 | 				tstream << _T( "Attempting CRITICAL_SECTION( " ) << csName << _T( " )" ) << std::endl;
 85 | 				tout << tstream.str( );
 86 | 			}
 87 | 
 88 | 			::EnterCriticalSection( &cs );
 89 | 
 90 | 			if( debugPrint )
 91 | 			{
 92 | 				tstream.str( _T( "" ) );
 93 | 				tstream << _T( "Acquired CRITICAL_SECTION( " ) << csName << _T( " )" ) << std::endl;
 94 | 				tstream << _T( "\tOwningThread( " ) << cs.OwningThread << _T( " )" ) << std::endl;
 95 | 				tstream << _T( "\tLockcount( " ) << cs.LockCount << _T( " )" ) << std::endl;
 96 | 				tstream << _T( "\tRecursionCount( " ) << cs.RecursionCount << _T( " )" ) << std::endl;
 97 | 				tout << tstream.str( );
 98 | 			}
 99 | 		}
100 | 
101 | 		void leave( )
102 | 		{
103 | 			if( debugPrint )
104 | 			{
105 | 				tstream.str( _T( "" ) );
106 | 				tstream << _T( "Releasing CRITICAL_SECTION( " ) << csName << _T( " )" ) << std::endl;
107 | 				tstream << _T( "\tOwningThread( " ) << cs.OwningThread << _T( " )" ) << std::endl;
108 | 				tstream << _T( "\tLockcount( " ) << cs.LockCount << _T( " )" ) << std::endl;
109 | 				tstream << _T( "\tRecursionCount( " ) << cs.RecursionCount << _T( " )" ) << std::endl << std::endl;
110 | 				tout << tstream.str( );
111 | 			}
112 | 
113 | 			::LeaveCriticalSection( &cs );
114 | 		}
115 | };
116 | 
117 | #else
118 | //	lockRAII provides an abstraction for the concept of a mutex; it wraps all  mutex functions in generic methods
119 | //	Linux implementation not done yet
120 | //	The template argument 'debugPrint' activates debugging information, but if not active the compiler optimizes
121 | //	the print statements out
122 | template< bool debugPrint >
123 | class lockRAII
124 | {
125 | 	pthread_mutex_t	mutex;
126 | 	pthread_mutexattr_t mAttr;
127 | 	tstring			mutexName;
128 | 	tstringstream	tstream;
129 | 
130 | 	//	Does not make sense to create a copy of a lock object; private method
131 | 	lockRAII( const lockRAII& rhs ): mutexName( rhs.mutexName )
132 | 	{
133 | 		tstream << std::hex << std::showbase;
134 | 	}
135 | 
136 | 	public:
137 | 		lockRAII( )
138 | 		{
139 | 			tstream << std::hex << std::showbase;
140 | 			pthread_mutexattr_init( &mAttr );
141 | 			pthread_mutexattr_settype( &mAttr, PTHREAD_MUTEX_RECURSIVE );
142 | 			pthread_mutex_init( &mutex, &mAttr );
143 | 		}
144 | 
145 | 		lockRAII( const tstring& name ): mutexName( name )
146 | 		{
147 | 			tstream << std::hex << std::showbase;
148 | 			pthread_mutexattr_init( &mAttr );
149 | 			pthread_mutexattr_settype( &mAttr, PTHREAD_MUTEX_RECURSIVE );
150 | 			pthread_mutex_init( &mutex, &mAttr );
151 | 		}
152 | 
153 | 		~lockRAII( )
154 | 		{
155 | 			pthread_mutex_destroy( &mutex );
156 | 			pthread_mutexattr_destroy( &mAttr );
157 | 		}
158 | 
159 | 		tstring& getName( )
160 | 		{
161 | 			return mutexName;
162 | 		}
163 | 
164 | 		void setName( const tstring& name )
165 | 		{
166 | 			mutexName	= name;
167 | 		}
168 | 
169 | 		void enter( )
170 | 		{
171 | 			if( debugPrint )
172 | 			{
173 | 				tstream.str( _T( "" ) );
174 | 				tstream << _T( "Attempting pthread_mutex_t( " ) << mutexName << _T( " )" ) << std::endl;
175 | 				tout << tstream.str( );
176 | 			}
177 | 
178 | 			::pthread_mutex_lock( &mutex );
179 | 
180 | 			if( debugPrint )
181 | 			{
182 | 				tstream.str( _T( "" ) );
183 | 				tstream << _T( "Acquired pthread_mutex_t( " ) << mutexName << _T( " )" ) << std::endl;
184 | 				//tstream << _T( "\tOwningThread( " ) << mutex.OwningThread << _T( " )" ) << std::endl;
185 | 				//tstream << _T( "\tLockcount( " ) << mutex.LockCount << _T( " )" ) << std::endl;
186 | 				//tstream << _T( "\tRecursionCount( " ) << mutex.RecursionCount << _T( " )" ) << std::endl;
187 | 				tout << tstream.str( );
188 | 			}
189 | 		}
190 | 
191 | 		void leave( )
192 | 		{
193 | 			if( debugPrint )
194 | 			{
195 | 				tstream.str( _T( "" ) );
196 | 				tstream << _T( "Releasing pthread_mutex_t( " ) << mutexName << _T( " )" ) << std::endl;
197 | 				//tstream << _T( "\tOwningThread( " ) << mutex.OwningThread << _T( " )" ) << std::endl;
198 | 				//tstream << _T( "\tLockcount( " ) << mutex.LockCount << _T( " )" ) << std::endl;
199 | 				//tstream << _T( "\tRecursionCount( " ) << mutex.RecursionCount << _T( " )" ) << std::endl << std::endl;
200 | 				tout << tstream.str( );
201 | 			}
202 | 
203 | 			::pthread_mutex_unlock( &mutex );
204 | 		}
205 | };
206 | #endif
207 | 
208 | //	Class used to make sure that we enter and leave critical sections in pairs
209 | //	The template logic logs our CRITICAL_SECTION actions; if the template parameter is false,
210 | //	the branch is constant and the compiler will optimize the branch out
211 | template< bool debugPrint >
212 | class scopedLock
213 | {
214 | 	lockRAII< debugPrint >* sLock;
215 | 	tstring			sLockName;
216 | 	tstringstream	tstream;
217 | 
218 | 	public:
219 | 		scopedLock( lockRAII< debugPrint >& lock, const tstring& name ): sLock( &lock ), sLockName( name )
220 | 		{
221 | 			if( debugPrint )
222 | 			{
223 | 				tstream.str( _T( "" ) );
224 | 				tstream << _T( "Entering scopedLock( " ) << sLockName << _T( " )" ) << std::endl << std::endl;
225 | 				tout << tstream.str( );
226 | 			}
227 | 
228 | 			sLock->enter( );
229 | 		}
230 | 
231 | 		~scopedLock( )
232 | 		{
233 | 			sLock->leave( );
234 | 
235 | 			if( debugPrint )
236 | 			{
237 | 				tstream.str( _T( "" ) );
238 | 				tstream << _T( "Left scopedLock( " ) << sLockName << _T( " )" ) << std::endl << std::endl;
239 | 				tout << tstream.str( );
240 | 			}
241 | 		}
242 | };
243 | 
244 | //	Convenience macro to enable/disable debugging print statements
245 | #define lockRAII lockRAII< false >
246 | #define scopedLock scopedLock< false >
247 | 
248 | #endif	// CLFFT_lock_H
249 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Build Status
  2 | | Build branch | master | develop |
  3 | |-----|-----|-----|
  4 | | GCC/Clang x64 | [![Build Status](https://travis-ci.org/clMathLibraries/clFFT.svg?branch=master)](https://travis-ci.org/clMathLibraries/clFFT/branches) | [![Build Status](https://travis-ci.org/clMathLibraries/clFFT.svg?branch=develop)](https://travis-ci.org/clMathLibraries/clFFT/branches) |
  5 | | Visual Studio x64 |  |[![Build status](https://ci.appveyor.com/api/projects/status/facii32v72y98opv/branch/develop?svg=true)](https://ci.appveyor.com/project/kknox/clfft-whc3m/branch/develop) |
  6 | 
  7 | clFFT
  8 | =====
  9 | clFFT is a software library containing FFT functions written
 10 | in OpenCL. In addition to GPU devices, the library also supports
 11 | running on CPU devices to facilitate debugging and heterogeneous
 12 | programming.
 13 | 
 14 | Pre-built binaries are available [here][binary_release].
 15 | 
 16 | ## What's New
 17 | 
 18 | -   Support for powers of 11&13 size transforms
 19 | -   Support for 1D large size transforms with no extra memory allocation
 20 |     requirement with environment flag CLFFT_REQUEST_LIB_NOMEMALLOC=1
 21 |     for complex FFTs of powers of 2,3,5,10 sizes
 22 | 
 23 | 
 24 | ## Note
 25 | 
 26 | -   clFFT requires platform/runtime that supports OpenCL 1.2
 27 | 
 28 | ## Introduction to clFFT
 29 | 
 30 | The FFT is an implementation of the Discrete Fourier Transform (DFT)
 31 | that makes use of symmetries in the FFT definition to reduce the
 32 | mathematical intensity required from O(N^2) to O(N log2(N)) when the
 33 | sequence length N is the product of small prime factors. Currently,
 34 | there is no standard API for FFT routines. Hardware vendors usually
 35 | provide a set of high-performance FFTs optimized for their systems: no
 36 | two vendors employ the same interfaces for their FFT routines. clFFT
 37 | provides a set of FFT routines that are optimized for AMD graphics
 38 | processors, but also are functional across CPU and other compute
 39 | devices.
 40 | 
 41 | The clFFT library is an open source OpenCL library implementation of
 42 | discrete Fast Fourier Transforms. The library:
 43 | 
 44 | -   provides a fast and accurate platform for calculating discrete FFTs.
 45 | 
 46 | -   works on CPU or GPU backends.
 47 | 
 48 | -   supports in-place or out-of-place transforms.
 49 | 
 50 | -   supports 1D, 2D, and 3D transforms with a batch size that can be
 51 |     greater than 1.
 52 | 
 53 | -   supports planar (real and complex components in separate arrays) and
 54 |     interleaved (real and complex components as a pair contiguous in
 55 |     memory) formats.
 56 | 
 57 | -   supports dimension lengths that can be any combination of powers of
 58 |     2, 3, 5, 7, 11 and 13.
 59 | 
 60 | -   Supports single and double precision floating point formats.
 61 | 
 62 | ## clFFT library user documentation
 63 | 
 64 | [Library and API documentation][] for developers is available online as
 65 | a GitHub Pages website
 66 | 
 67 | ### Google Groups
 68 | 
 69 | Two mailing lists exist for the clMath projects:
 70 | 
 71 | -   [clmath@googlegroups.com][] - group whose focus is to answer
 72 |     questions on using the library or reporting issues
 73 | 
 74 | -   [clmath-developers@googlegroups.com][] - group whose focus is for
 75 |     developers interested in contributing to the library code
 76 | 
 77 | ### API semantic versioning
 78 | Good software is typically the result of the loop of feedback and iteration; software 
 79 | interfaces no less so. clFFT follows the [semantic versioning] guidelines. The version
 80 | number used is of the form MAJOR.MINOR.PATCH.
 81 | 
 82 | ## clFFT Wiki
 83 | 
 84 | The [project wiki][clmath@googlegroups.com] contains helpful
 85 | documentation, including a [build
 86 | primer][clmath-developers@googlegroups.com]
 87 | 
 88 | ## Contributing code
 89 | 
 90 | Please refer to and read the [Contributing][] document for guidelines on
 91 | how to contribute code to this open source project. The code in the
 92 | /master branch is considered to be stable, and all pull-requests must
 93 | be made against the /develop branch.
 94 | 
 95 | ## License
 96 | 
 97 | The source for clFFT is licensed under the [Apache License, Version
 98 | 2.0][]
 99 | 
100 | ## Example
101 | 
102 | The following simple example shows how to use clFFT to compute a simple 1D
103 | forward transform
104 | ```c
105 | #include <stdlib.h>
106 | 
107 | /* No need to explicitely include the OpenCL headers */
108 | #include <clFFT.h>
109 | 
110 | int main( void )
111 | {
112 |     cl_int err;
113 |     cl_platform_id platform = 0;
114 |     cl_device_id device = 0;
115 |     cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
116 |     cl_context ctx = 0;
117 |     cl_command_queue queue = 0;
118 |     cl_mem bufX;
119 | 	float *X;
120 |     cl_event event = NULL;
121 |     int ret = 0;
122 | 	size_t N = 16;
123 | 
124 | 	/* FFT library realted declarations */
125 | 	clfftPlanHandle planHandle;
126 | 	clfftDim dim = CLFFT_1D;
127 | 	size_t clLengths[1] = {N};
128 | 
129 |     /* Setup OpenCL environment. */
130 |     err = clGetPlatformIDs( 1, &platform, NULL );
131 |     err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL );
132 | 
133 |     props[1] = (cl_context_properties)platform;
134 |     ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
135 |     queue = clCreateCommandQueue( ctx, device, 0, &err );
136 | 
137 |     /* Setup clFFT. */
138 | 	clfftSetupData fftSetup;
139 | 	err = clfftInitSetupData(&fftSetup);
140 | 	err = clfftSetup(&fftSetup);
141 | 
142 | 	/* Allocate host & initialize data. */
143 | 	/* Only allocation shown for simplicity. */
144 | 	X = (float *)malloc(N * 2 * sizeof(*X));
145 | 
146 |     /* Prepare OpenCL memory objects and place data inside them. */
147 |     bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err );
148 | 
149 |     err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0,
150 | 	N * 2 * sizeof( *X ), X, 0, NULL, NULL );
151 | 
152 | 	/* Create a default plan for a complex FFT. */
153 | 	err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
154 | 
155 | 	/* Set plan parameters. */
156 | 	err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
157 | 	err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
158 | 	err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);
159 | 
160 |     /* Bake the plan. */
161 | 	err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);
162 | 
163 | 	/* Execute the plan. */
164 | 	err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);
165 | 
166 | 	/* Wait for calculations to be finished. */
167 | 	err = clFinish(queue);
168 | 
169 | 	/* Fetch results of calculations. */
170 | 	err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL );
171 | 
172 |     /* Release OpenCL memory objects. */
173 |     clReleaseMemObject( bufX );
174 | 
175 | 	free(X);
176 | 
177 | 	/* Release the plan. */
178 | 	err = clfftDestroyPlan( &planHandle );
179 | 
180 |     /* Release clFFT library. */
181 |     clfftTeardown( );
182 | 
183 |     /* Release OpenCL working objects. */
184 |     clReleaseCommandQueue( queue );
185 |     clReleaseContext( ctx );
186 | 
187 |     return ret;
188 | }
189 | ```
190 | 
191 | ## Build dependencies
192 | 
193 | ### Library for Windows   
194 | To develop the clFFT library code on a Windows operating system, ensure to install the following packages on your system:
195 | 
196 | -   Windows® 7/8.1
197 | 
198 | -   Visual Studio 2012 or later
199 | 
200 | -   Latest CMake
201 | 
202 | -   An OpenCL SDK, such as APP SDK 3.0
203 | 
204 | ### Library for Linux
205 | To develop the clFFT library code on a Linux operating system, ensure to install the following packages on your system:
206 | -   GCC 4.6 and onwards
207 | 
208 | -   Latest CMake
209 | 
210 | -   An OpenCL SDK, such as APP SDK 3.0
211 | 
212 | ### Library for Mac OSX
213 | To develop the clFFT library code on a Mac OS X, it is recommended to generate Unix makefiles with cmake.
214 | 
215 | ### Test infrastructure
216 | To test the developed clFFT library code, ensure to install the following packages on your system:
217 | 
218 | -   Googletest v1.6
219 | 
220 | -   Latest FFTW
221 | 
222 | -   Latest Boost
223 | 
224 | ### Performance infrastructure
225 | To measure the performance of the clFFT library code, ensure that the Python package is installed on your system.
226 | 
227 |   [Library and API documentation]: http://clmathlibraries.github.io/clFFT/
228 |   [clmath@googlegroups.com]: https://github.com/clMathLibraries/clFFT/wiki
229 |   [clmath-developers@googlegroups.com]: https://github.com/clMathLibraries/clFFT/wiki/Build
230 |   [Contributing]: CONTRIBUTING.md
231 |   [Apache License, Version 2.0]: http://www.apache.org/licenses/LICENSE-2.0
232 |   [binary_release]: https://github.com/clMathLibraries/clFFT/releases
233 |   [semantic versioning]: http://semver.org/


--------------------------------------------------------------------------------