├── .appveyor.yml ├── .github ├── FUNDING.yml └── workflows │ ├── build_and_test.yml │ └── release.yml ├── .gitignore ├── CHANGELOG ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── clblast.pc.in ├── cmake ├── Modules │ ├── FindCBLAS.cmake │ ├── FindMKL.cmake │ ├── FindOpenCL.cmake │ ├── FindclBLAS.cmake │ └── FindcuBLAS.cmake ├── c_flag_overrides.cmake └── cxx_flag_overrides.cmake ├── doc ├── api.md ├── benchmarking.md ├── bindings.md ├── details_conv.md ├── details_gemm.md ├── faq.md ├── glossary.md ├── installation.md ├── routines.md ├── testing.md └── tuning.md ├── include ├── clblast.h ├── clblast_c.h ├── clblast_cuda.h ├── clblast_half.h └── clblast_netlib_c.h ├── samples ├── cache.c ├── daxpy_cuda.cpp ├── dgemv.c ├── dtrsm.cpp ├── haxpy.c ├── samax.c ├── sasum.c ├── sgemm.c ├── sgemm.cpp ├── sgemm_batched.cpp ├── sgemm_cuda.cpp ├── sgemm_netlib.c └── tuning_api.cpp ├── scripts ├── benchmark │ ├── benchmark.py │ ├── benchmark_all.py │ ├── plot.py │ ├── settings.py │ └── utils.py ├── database │ ├── database.py │ └── database │ │ ├── __init__.py │ │ ├── bests.py │ │ ├── clblast.py │ │ ├── db.py │ │ ├── defaults.py │ │ └── io.py └── generator │ ├── generator.py │ └── generator │ ├── __init__.py │ ├── convert.py │ ├── cpp.py │ ├── datatype.py │ ├── doc.py │ ├── pyclblast.py │ └── routine.py ├── src ├── api_common.cpp ├── cache.cpp ├── cache.hpp ├── clblast.cpp ├── clblast_c.cpp ├── clblast_cuda.cpp ├── clblast_netlib_c.cpp ├── clpp11.hpp ├── cupp11.hpp ├── cxpp11_common.hpp ├── database │ ├── apple_cpu_fallback.hpp │ ├── database.cpp │ ├── database.hpp │ ├── database_structure.hpp │ └── kernels │ │ ├── copy │ │ ├── copy.cpp │ │ ├── copy.hpp │ │ ├── copy_16.hpp │ │ ├── copy_32.hpp │ │ ├── copy_3232.hpp │ │ ├── copy_64.hpp │ │ └── copy_6464.hpp │ │ ├── gemm_routine │ │ ├── gemm_routine.cpp │ │ ├── gemm_routine.hpp │ │ ├── gemm_routine_16.hpp │ │ ├── gemm_routine_32.hpp │ │ ├── gemm_routine_3232.hpp │ │ ├── gemm_routine_64.hpp │ │ └── gemm_routine_6464.hpp │ │ ├── invert │ │ ├── invert.cpp │ │ ├── invert.hpp │ │ ├── invert_16.hpp │ │ ├── invert_32.hpp │ │ ├── invert_3232.hpp │ │ ├── invert_64.hpp │ │ └── invert_6464.hpp │ │ ├── pad │ │ ├── pad.cpp │ │ ├── pad.hpp │ │ ├── pad_16.hpp │ │ ├── pad_32.hpp │ │ ├── pad_3232.hpp │ │ ├── pad_64.hpp │ │ └── pad_6464.hpp │ │ ├── padtranspose │ │ ├── padtranspose.cpp │ │ ├── padtranspose.hpp │ │ ├── padtranspose_16.hpp │ │ ├── padtranspose_32.hpp │ │ ├── padtranspose_3232.hpp │ │ ├── padtranspose_64.hpp │ │ └── padtranspose_6464.hpp │ │ ├── transpose │ │ ├── transpose.cpp │ │ ├── transpose.hpp │ │ ├── transpose_16.hpp │ │ ├── transpose_32.hpp │ │ ├── transpose_3232.hpp │ │ ├── transpose_64.hpp │ │ └── transpose_6464.hpp │ │ ├── trsv_routine │ │ ├── trsv_routine.cpp │ │ ├── trsv_routine.hpp │ │ ├── trsv_routine_16.hpp │ │ ├── trsv_routine_32.hpp │ │ ├── trsv_routine_3232.hpp │ │ ├── trsv_routine_64.hpp │ │ └── trsv_routine_6464.hpp │ │ ├── xaxpy │ │ ├── xaxpy.cpp │ │ ├── xaxpy.hpp │ │ ├── xaxpy_16.hpp │ │ ├── xaxpy_32.hpp │ │ ├── xaxpy_3232.hpp │ │ ├── xaxpy_64.hpp │ │ └── xaxpy_6464.hpp │ │ ├── xconvgemm │ │ ├── xconvgemm.cpp │ │ ├── xconvgemm.hpp │ │ ├── xconvgemm_16.hpp │ │ ├── xconvgemm_32.hpp │ │ ├── xconvgemm_3232.hpp │ │ ├── xconvgemm_64.hpp │ │ └── xconvgemm_6464.hpp │ │ ├── xdot │ │ ├── xdot.cpp │ │ ├── xdot.hpp │ │ ├── xdot_16.hpp │ │ ├── xdot_32.hpp │ │ ├── xdot_3232.hpp │ │ ├── xdot_64.hpp │ │ └── xdot_6464.hpp │ │ ├── xgemm │ │ ├── xgemm.cpp │ │ ├── xgemm.hpp │ │ ├── xgemm_16.hpp │ │ ├── xgemm_32.hpp │ │ ├── xgemm_3232.hpp │ │ ├── xgemm_64.hpp │ │ └── xgemm_6464.hpp │ │ ├── xgemm_direct │ │ ├── xgemm_direct.cpp │ │ ├── xgemm_direct.hpp │ │ ├── xgemm_direct_16.hpp │ │ ├── xgemm_direct_32.hpp │ │ ├── xgemm_direct_3232.hpp │ │ ├── xgemm_direct_64.hpp │ │ └── xgemm_direct_6464.hpp │ │ ├── xgemv │ │ ├── xgemv.cpp │ │ ├── xgemv.hpp │ │ ├── xgemv_16.hpp │ │ ├── xgemv_32.hpp │ │ ├── xgemv_3232.hpp │ │ ├── xgemv_64.hpp │ │ └── xgemv_6464.hpp │ │ ├── xgemv_fast │ │ ├── xgemv_fast.cpp │ │ ├── xgemv_fast.hpp │ │ ├── xgemv_fast_16.hpp │ │ ├── xgemv_fast_32.hpp │ │ ├── xgemv_fast_3232.hpp │ │ ├── xgemv_fast_64.hpp │ │ └── xgemv_fast_6464.hpp │ │ ├── xgemv_fast_rot │ │ ├── xgemv_fast_rot.cpp │ │ ├── xgemv_fast_rot.hpp │ │ ├── xgemv_fast_rot_16.hpp │ │ ├── xgemv_fast_rot_32.hpp │ │ ├── xgemv_fast_rot_3232.hpp │ │ ├── xgemv_fast_rot_64.hpp │ │ └── xgemv_fast_rot_6464.hpp │ │ └── xger │ │ ├── xger.cpp │ │ ├── xger.hpp │ │ ├── xger_16.hpp │ │ ├── xger_32.hpp │ │ ├── xger_3232.hpp │ │ ├── xger_64.hpp │ │ └── xger_6464.hpp ├── kernel_preprocessor.cpp ├── kernel_preprocessor.hpp ├── kernels │ ├── common.opencl │ ├── level1 │ │ ├── level1.opencl │ │ ├── xamax.opencl │ │ ├── xasum.opencl │ │ ├── xaxpy.opencl │ │ ├── xcopy.opencl │ │ ├── xdot.opencl │ │ ├── xhad.opencl │ │ ├── xnrm2.opencl │ │ ├── xscal.opencl │ │ └── xswap.opencl │ ├── level2 │ │ ├── level2.opencl │ │ ├── xgemv.opencl │ │ ├── xgemv_fast.opencl │ │ ├── xger.opencl │ │ ├── xher.opencl │ │ ├── xher2.opencl │ │ └── xtrsv.opencl │ ├── level3 │ │ ├── convert_hermitian.opencl │ │ ├── convert_symmetric.opencl │ │ ├── convert_triangular.opencl │ │ ├── copy_fast.opencl │ │ ├── copy_pad.opencl │ │ ├── invert_diagonal_blocks_part1.opencl │ │ ├── invert_diagonal_blocks_part2.opencl │ │ ├── level3.opencl │ │ ├── transpose_fast.opencl │ │ ├── transpose_pad.opencl │ │ ├── xgemm_batched.opencl │ │ ├── xgemm_direct_batched.opencl │ │ ├── xgemm_direct_part1.opencl │ │ ├── xgemm_direct_part2.opencl │ │ ├── xgemm_direct_part3.opencl │ │ ├── xgemm_part1.opencl │ │ ├── xgemm_part2.opencl │ │ ├── xgemm_part3.opencl │ │ └── xgemm_part4.opencl │ ├── levelx │ │ ├── col2im.opencl │ │ ├── im2col.opencl │ │ ├── xconvgemm_part1.opencl │ │ └── xconvgemm_part2.opencl │ └── opencl_to_cuda.h ├── pyclblast │ ├── CMakeLists.txt │ ├── MANIFEST.in │ ├── README.md │ ├── pyproject.toml │ ├── samples │ │ ├── haxpy.py │ │ ├── override_parameters.py │ │ ├── saxpy.py │ │ ├── saxpybatched.py │ │ ├── sgemm.py │ │ └── sgemv.py │ ├── src │ │ └── pyclblast.pyx │ └── test │ │ ├── __init__.py │ │ └── test_pyclblast.py ├── routine.cpp ├── routine.hpp ├── routines │ ├── common.cpp │ ├── common.hpp │ ├── level1 │ │ ├── xamax.cpp │ │ ├── xamax.hpp │ │ ├── xamin.hpp │ │ ├── xasum.cpp │ │ ├── xasum.hpp │ │ ├── xaxpy.cpp │ │ ├── xaxpy.hpp │ │ ├── xcopy.cpp │ │ ├── xcopy.hpp │ │ ├── xdot.cpp │ │ ├── xdot.hpp │ │ ├── xdotc.cpp │ │ ├── xdotc.hpp │ │ ├── xdotu.cpp │ │ ├── xdotu.hpp │ │ ├── xmax.hpp │ │ ├── xmin.hpp │ │ ├── xnrm2.cpp │ │ ├── xnrm2.hpp │ │ ├── xscal.cpp │ │ ├── xscal.hpp │ │ ├── xsum.hpp │ │ ├── xswap.cpp │ │ └── xswap.hpp │ ├── level2 │ │ ├── xgbmv.cpp │ │ ├── xgbmv.hpp │ │ ├── xgemv.cpp │ │ ├── xgemv.hpp │ │ ├── xger.cpp │ │ ├── xger.hpp │ │ ├── xgerc.cpp │ │ ├── xgerc.hpp │ │ ├── xgeru.cpp │ │ ├── xgeru.hpp │ │ ├── xhbmv.cpp │ │ ├── xhbmv.hpp │ │ ├── xhemv.cpp │ │ ├── xhemv.hpp │ │ ├── xher.cpp │ │ ├── xher.hpp │ │ ├── xher2.cpp │ │ ├── xher2.hpp │ │ ├── xhpmv.cpp │ │ ├── xhpmv.hpp │ │ ├── xhpr.cpp │ │ ├── xhpr.hpp │ │ ├── xhpr2.cpp │ │ ├── xhpr2.hpp │ │ ├── xsbmv.cpp │ │ ├── xsbmv.hpp │ │ ├── xspmv.cpp │ │ ├── xspmv.hpp │ │ ├── xspr.cpp │ │ ├── xspr.hpp │ │ ├── xspr2.cpp │ │ ├── xspr2.hpp │ │ ├── xsymv.cpp │ │ ├── xsymv.hpp │ │ ├── xsyr.cpp │ │ ├── xsyr.hpp │ │ ├── xsyr2.cpp │ │ ├── xsyr2.hpp │ │ ├── xtbmv.cpp │ │ ├── xtbmv.hpp │ │ ├── xtpmv.cpp │ │ ├── xtpmv.hpp │ │ ├── xtrmv.cpp │ │ ├── xtrmv.hpp │ │ ├── xtrsv.cpp │ │ └── xtrsv.hpp │ ├── level3 │ │ ├── xgemm.cpp │ │ ├── xgemm.hpp │ │ ├── xhemm.cpp │ │ ├── xhemm.hpp │ │ ├── xher2k.cpp │ │ ├── xher2k.hpp │ │ ├── xherk.cpp │ │ ├── xherk.hpp │ │ ├── xsymm.cpp │ │ ├── xsymm.hpp │ │ ├── xsyr2k.cpp │ │ ├── xsyr2k.hpp │ │ ├── xsyrk.cpp │ │ ├── xsyrk.hpp │ │ ├── xtrmm.cpp │ │ ├── xtrmm.hpp │ │ ├── xtrsm.cpp │ │ └── xtrsm.hpp │ ├── levelx │ │ ├── xaxpybatched.cpp │ │ ├── xaxpybatched.hpp │ │ ├── xcol2im.cpp │ │ ├── xcol2im.hpp │ │ ├── xconvgemm.cpp │ │ ├── xconvgemm.hpp │ │ ├── xgemmbatched.cpp │ │ ├── xgemmbatched.hpp │ │ ├── xgemmstridedbatched.cpp │ │ ├── xgemmstridedbatched.hpp │ │ ├── xhad.cpp │ │ ├── xhad.hpp │ │ ├── xim2col.cpp │ │ ├── xim2col.hpp │ │ ├── xinvert.cpp │ │ ├── xinvert.hpp │ │ ├── xomatcopy.cpp │ │ └── xomatcopy.hpp │ └── routines.hpp ├── tuning │ ├── configurations.cpp │ ├── configurations.hpp │ ├── kernels │ │ ├── copy_fast.cpp │ │ ├── copy_fast.hpp │ │ ├── copy_pad.cpp │ │ ├── copy_pad.hpp │ │ ├── invert.cpp │ │ ├── invert.hpp │ │ ├── transpose_fast.cpp │ │ ├── transpose_fast.hpp │ │ ├── transpose_pad.cpp │ │ ├── transpose_pad.hpp │ │ ├── xaxpy.cpp │ │ ├── xaxpy.hpp │ │ ├── xconvgemm.cpp │ │ ├── xconvgemm.hpp │ │ ├── xdot.cpp │ │ ├── xdot.hpp │ │ ├── xgemm.cpp │ │ ├── xgemm.hpp │ │ ├── xgemm_direct.cpp │ │ ├── xgemm_direct.hpp │ │ ├── xgemv.cpp │ │ ├── xgemv.hpp │ │ ├── xger.cpp │ │ └── xger.hpp │ ├── routines │ │ ├── routine_tuner.hpp │ │ ├── xgemm.cpp │ │ └── xtrsv.cpp │ ├── tuning.cpp │ ├── tuning.hpp │ └── tuning_api.cpp └── utilities │ ├── android.hpp │ ├── buffer_test.hpp │ ├── clblast_exceptions.cpp │ ├── clblast_exceptions.hpp │ ├── compile.cpp │ ├── compile.hpp │ ├── device_mapping.hpp │ ├── msvc.hpp │ ├── timing.cpp │ ├── timing.hpp │ ├── utilities.cpp │ └── utilities.hpp ├── test ├── correctness │ ├── misc │ │ ├── override_parameters.cpp │ │ ├── preprocessor.cpp │ │ └── retrieve_parameters.cpp │ ├── routines │ │ ├── level1 │ │ │ ├── xamax.cpp │ │ │ ├── xasum.cpp │ │ │ ├── xaxpy.cpp │ │ │ ├── xcopy.cpp │ │ │ ├── xdot.cpp │ │ │ ├── xdotc.cpp │ │ │ ├── xdotu.cpp │ │ │ ├── xnrm2.cpp │ │ │ ├── xrot.cpp │ │ │ ├── xrotg.cpp │ │ │ ├── xrotm.cpp │ │ │ ├── xrotmg.cpp │ │ │ ├── xscal.cpp │ │ │ └── xswap.cpp │ │ ├── level2 │ │ │ ├── xgbmv.cpp │ │ │ ├── xgemv.cpp │ │ │ ├── xger.cpp │ │ │ ├── xgerc.cpp │ │ │ ├── xgeru.cpp │ │ │ ├── xhbmv.cpp │ │ │ ├── xhemv.cpp │ │ │ ├── xher.cpp │ │ │ ├── xher2.cpp │ │ │ ├── xhpmv.cpp │ │ │ ├── xhpr.cpp │ │ │ ├── xhpr2.cpp │ │ │ ├── xsbmv.cpp │ │ │ ├── xspmv.cpp │ │ │ ├── xspr.cpp │ │ │ ├── xspr2.cpp │ │ │ ├── xsymv.cpp │ │ │ ├── xsyr.cpp │ │ │ ├── xsyr2.cpp │ │ │ ├── xtbmv.cpp │ │ │ ├── xtbsv.cpp │ │ │ ├── xtpmv.cpp │ │ │ ├── xtpsv.cpp │ │ │ ├── xtrmv.cpp │ │ │ └── xtrsv.cpp │ │ ├── level3 │ │ │ ├── xgemm.cpp │ │ │ ├── xhemm.cpp │ │ │ ├── xher2k.cpp │ │ │ ├── xherk.cpp │ │ │ ├── xsymm.cpp │ │ │ ├── xsyr2k.cpp │ │ │ ├── xsyrk.cpp │ │ │ ├── xtrmm.cpp │ │ │ └── xtrsm.cpp │ │ └── levelx │ │ │ ├── xaxpybatched.cpp │ │ │ ├── xcol2im.cpp │ │ │ ├── xconvgemm.cpp │ │ │ ├── xgemmbatched.cpp │ │ │ ├── xgemmstridedbatched.cpp │ │ │ ├── xhad.cpp │ │ │ ├── xim2col.cpp │ │ │ ├── xinvert.cpp │ │ │ └── xomatcopy.cpp │ ├── testblas.cpp │ ├── testblas.hpp │ ├── tester.cpp │ └── tester.hpp ├── diagnostics.cpp ├── performance │ ├── client.cpp │ ├── client.hpp │ └── routines │ │ ├── level1 │ │ ├── xamax.cpp │ │ ├── xasum.cpp │ │ ├── xaxpy.cpp │ │ ├── xcopy.cpp │ │ ├── xdot.cpp │ │ ├── xdotc.cpp │ │ ├── xdotu.cpp │ │ ├── xnrm2.cpp │ │ ├── xrot.cpp │ │ ├── xrotg.cpp │ │ ├── xrotm.cpp │ │ ├── xrotmg.cpp │ │ ├── xscal.cpp │ │ └── xswap.cpp │ │ ├── level2 │ │ ├── xgbmv.cpp │ │ ├── xgemv.cpp │ │ ├── xger.cpp │ │ ├── xgerc.cpp │ │ ├── xgeru.cpp │ │ ├── xhbmv.cpp │ │ ├── xhemv.cpp │ │ ├── xher.cpp │ │ ├── xher2.cpp │ │ ├── xhpmv.cpp │ │ ├── xhpr.cpp │ │ ├── xhpr2.cpp │ │ ├── xsbmv.cpp │ │ ├── xspmv.cpp │ │ ├── xspr.cpp │ │ ├── xspr2.cpp │ │ ├── xsymv.cpp │ │ ├── xsyr.cpp │ │ ├── xsyr2.cpp │ │ ├── xtbmv.cpp │ │ ├── xtbsv.cpp │ │ ├── xtpmv.cpp │ │ ├── xtpsv.cpp │ │ ├── xtrmv.cpp │ │ └── xtrsv.cpp │ │ ├── level3 │ │ ├── xgemm.cpp │ │ ├── xhemm.cpp │ │ ├── xher2k.cpp │ │ ├── xherk.cpp │ │ ├── xsymm.cpp │ │ ├── xsyr2k.cpp │ │ ├── xsyrk.cpp │ │ ├── xtrmm.cpp │ │ └── xtrsm.cpp │ │ └── levelx │ │ ├── xaxpybatched.cpp │ │ ├── xcol2im.cpp │ │ ├── xconvgemm.cpp │ │ ├── xgemmbatched.cpp │ │ ├── xgemmstridedbatched.cpp │ │ ├── xhad.cpp │ │ ├── xim2col.cpp │ │ ├── xinvert.cpp │ │ └── xomatcopy.cpp ├── routines │ ├── common.hpp │ ├── level1 │ │ ├── xamax.hpp │ │ ├── xasum.hpp │ │ ├── xaxpy.hpp │ │ ├── xcopy.hpp │ │ ├── xdot.hpp │ │ ├── xdotc.hpp │ │ ├── xdotu.hpp │ │ ├── xnrm2.hpp │ │ ├── xscal.hpp │ │ └── xswap.hpp │ ├── level2 │ │ ├── xgbmv.hpp │ │ ├── xgemv.hpp │ │ ├── xger.hpp │ │ ├── xgerc.hpp │ │ ├── xgeru.hpp │ │ ├── xhbmv.hpp │ │ ├── xhemv.hpp │ │ ├── xher.hpp │ │ ├── xher2.hpp │ │ ├── xhpmv.hpp │ │ ├── xhpr.hpp │ │ ├── xhpr2.hpp │ │ ├── xsbmv.hpp │ │ ├── xspmv.hpp │ │ ├── xspr.hpp │ │ ├── xspr2.hpp │ │ ├── xsymv.hpp │ │ ├── xsyr.hpp │ │ ├── xsyr2.hpp │ │ ├── xtbmv.hpp │ │ ├── xtpmv.hpp │ │ ├── xtrmv.hpp │ │ └── xtrsv.hpp │ ├── level3 │ │ ├── xgemm.hpp │ │ ├── xhemm.hpp │ │ ├── xher2k.hpp │ │ ├── xherk.hpp │ │ ├── xsymm.hpp │ │ ├── xsyr2k.hpp │ │ ├── xsyrk.hpp │ │ ├── xtrmm.hpp │ │ ├── xtrsm.hpp │ │ └── xtrsm_data.hpp │ └── levelx │ │ ├── xaxpybatched.hpp │ │ ├── xcol2im.hpp │ │ ├── xconvgemm.hpp │ │ ├── xgemmbatched.hpp │ │ ├── xgemmstridedbatched.hpp │ │ ├── xhad.hpp │ │ ├── xim2col.hpp │ │ ├── xinvert.hpp │ │ └── xomatcopy.hpp ├── test_utilities.cpp ├── test_utilities.hpp ├── wrapper_cblas.hpp ├── wrapper_clblas.hpp ├── wrapper_cublas.hpp └── wrapper_cuda.hpp └── vcpkg.json /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: CNugteren 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | stash 3 | .* 4 | *.pyc 5 | database.json 6 | database_best.json 7 | cl.hpp 8 | opencl.hpp 9 | src/pyclblast/dist 10 | *.egg-info -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | CLBlast: Contributing guidelines 3 | ================ 4 | 5 | For information about the CLBlast library, see the [README](README.md) file instead. 6 | 7 | Tuning results 8 | ------------- 9 | 10 | A [dedicated GitHub issue](https://github.com/CNugteren/CLBlast/issues/1) is available to post new tuning results. If you compiled with the tuners (see the [README](README.md) for instructions), ran one of the tuners on your device (or all perhaps?), and feel that these results should be included in the next release of CLBlast, please post them there. You can do this by attaching the JSON files to the issue (archived in a .ZIP file). 11 | 12 | 13 | Code improvements and additions 14 | ------------- 15 | 16 | Pull requests are welcome as long as they: 17 | 18 | * Contain unit additions or modifications 19 | * Follow the CLBlast coding style, which is loosely based on the [Google C++ style guide](https://google-styleguide.googlecode.com/svn/trunk/cppguide.html) and the Effective C++ books by Scott Meyers. We use a tab-size of 2 spaces and a max-width of 100 characters. 20 | * Are made against the `master` branch. 21 | -------------------------------------------------------------------------------- /clblast.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix} 3 | includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ 4 | libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@ 5 | 6 | Name: CLBlast 7 | Description: CLBlast is a modern, lightweight, performant and tunable OpenCL BLAS library written in C++11 8 | Version: @clblast_VERSION_MAJOR@.@clblast_VERSION_MINOR@.@clblast_VERSION_PATCH@ 9 | Libs: -L${libdir} -lclblast 10 | Cflags: -I${includedir} 11 | -------------------------------------------------------------------------------- /cmake/c_flag_overrides.cmake: -------------------------------------------------------------------------------- 1 | # Overriding the CMake flags to use static runtime libraries 2 | # See http://www.cmake.org/Wiki/CMake_FAQ#How_can_I_build_my_MSVC_application_with_a_static_runtime.3F 3 | if(MSVC) 4 | set(CMAKE_C_FLAGS_DEBUG_INIT "/D_DEBUG /MTd /Zi /Ob0 /Od /RTC1") 5 | set(CMAKE_C_FLAGS_MINSIZEREL_INIT "/MT /O1 /Ob1 /D NDEBUG") 6 | set(CMAKE_C_FLAGS_RELEASE_INIT "/MT /O2 /Ob2 /D NDEBUG") 7 | set(CMAKE_C_FLAGS_RELWITHDEBINFO_INIT "/MT /Zi /O2 /Ob1 /D NDEBUG") 8 | endif() 9 | 10 | -------------------------------------------------------------------------------- /cmake/cxx_flag_overrides.cmake: -------------------------------------------------------------------------------- 1 | # Overriding the CMake flags to use static runtime libraries 2 | # See http://www.cmake.org/Wiki/CMake_FAQ#How_can_I_build_my_MSVC_application_with_a_static_runtime.3F 3 | if(MSVC) 4 | set(CMAKE_CXX_FLAGS_DEBUG_INIT "/D_DEBUG /MTd /Zi /Ob0 /Od /RTC1") 5 | set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "/MT /O1 /Ob1 /D NDEBUG") 6 | set(CMAKE_CXX_FLAGS_RELEASE_INIT "/MT /O2 /Ob2 /D NDEBUG") 7 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT "/MT /Zi /O2 /Ob1 /D NDEBUG") 8 | endif() 9 | 10 | -------------------------------------------------------------------------------- /doc/glossary.md: -------------------------------------------------------------------------------- 1 | CLBlast: Glossary 2 | ================ 3 | 4 | This document describes some commonly used terms in CLBlast documentation and code. For other information about CLBlast, see the [main README](../README.md). 5 | 6 | * __BLAS__: The set of 'Basic Linear Algebra Subroutines'. 7 | * __Netlib BLAS__: The official BLAS API definition, with __CBLAS__ providing the C headers. 8 | * __OpenCL__: The open compute language, a Khronos standard for heterogeneous and parallel computing, e.g. on GPUs. 9 | * __kernel__: An OpenCL parallel program that runs on the target device. 10 | * __clBLAS__: Another OpenCL BLAS library, maintained by AMD. 11 | * __cuBLAS__: The main CUDA BLAS library, maintained by NVIDIA. 12 | * __GEMM__: The 'GEneral Matrix Multiplication' routine. 13 | * __Direct GEMM__: Computing GEMM using a single generic kernel which handles all cases (e.g. all kinds of matrix sizes). 14 | * __Indirect GEMM__: Computing GEMM using multiple kernels: the main GEMM kernel and a few pre-processing and post-processing kernels. The main kernel makes several assumptions (e.g. sizes need to be multiples of 32), which the other kernels make sure are satisfied. The main kernel is often faster than the generic kernel of the direct approach, but the cost of pre-processing and post-processing kernels can sometimes be high for small sizes or particular devices. 15 | -------------------------------------------------------------------------------- /scripts/database/database/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CNugteren/CLBlast/e2ee100d7b7a3fb1cb60eb541d787c07f137171b/scripts/database/database/__init__.py -------------------------------------------------------------------------------- /scripts/generator/generator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CNugteren/CLBlast/e2ee100d7b7a3fb1cb60eb541d787c07f137171b/scripts/generator/generator/__init__.py -------------------------------------------------------------------------------- /src/database/kernels/copy/copy.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Copy' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/copy/copy.hpp" 11 | #include "database/kernels/copy/copy_16.hpp" 12 | #include "database/kernels/copy/copy_32.hpp" 13 | #include "database/kernels/copy/copy_3232.hpp" 14 | #include "database/kernels/copy/copy_64.hpp" 15 | #include "database/kernels/copy/copy_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/copy/copy.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Copy' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry CopyHalf; 16 | extern const DatabaseEntry CopySingle; 17 | extern const DatabaseEntry CopyComplexSingle; 18 | extern const DatabaseEntry CopyDouble; 19 | extern const DatabaseEntry CopyComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/gemm_routine/gemm_routine.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Gemm_Routine' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/gemm_routine/gemm_routine.hpp" 11 | #include "database/kernels/gemm_routine/gemm_routine_16.hpp" 12 | #include "database/kernels/gemm_routine/gemm_routine_32.hpp" 13 | #include "database/kernels/gemm_routine/gemm_routine_3232.hpp" 14 | #include "database/kernels/gemm_routine/gemm_routine_64.hpp" 15 | #include "database/kernels/gemm_routine/gemm_routine_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/gemm_routine/gemm_routine.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Gemm_Routine' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry GemmRoutineHalf; 16 | extern const DatabaseEntry GemmRoutineSingle; 17 | extern const DatabaseEntry GemmRoutineComplexSingle; 18 | extern const DatabaseEntry GemmRoutineDouble; 19 | extern const DatabaseEntry GemmRoutineComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/invert/invert.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Invert' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/invert/invert.hpp" 11 | #include "database/kernels/invert/invert_16.hpp" 12 | #include "database/kernels/invert/invert_32.hpp" 13 | #include "database/kernels/invert/invert_3232.hpp" 14 | #include "database/kernels/invert/invert_64.hpp" 15 | #include "database/kernels/invert/invert_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/invert/invert.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Invert' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry InvertHalf; 16 | extern const DatabaseEntry InvertSingle; 17 | extern const DatabaseEntry InvertComplexSingle; 18 | extern const DatabaseEntry InvertDouble; 19 | extern const DatabaseEntry InvertComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/pad/pad.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Pad' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/pad/pad.hpp" 11 | #include "database/kernels/pad/pad_16.hpp" 12 | #include "database/kernels/pad/pad_32.hpp" 13 | #include "database/kernels/pad/pad_3232.hpp" 14 | #include "database/kernels/pad/pad_64.hpp" 15 | #include "database/kernels/pad/pad_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/pad/pad.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Pad' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry PadHalf; 16 | extern const DatabaseEntry PadSingle; 17 | extern const DatabaseEntry PadComplexSingle; 18 | extern const DatabaseEntry PadDouble; 19 | extern const DatabaseEntry PadComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/padtranspose/padtranspose.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/padtranspose/padtranspose.hpp" 11 | #include "database/kernels/padtranspose/padtranspose_16.hpp" 12 | #include "database/kernels/padtranspose/padtranspose_32.hpp" 13 | #include "database/kernels/padtranspose/padtranspose_3232.hpp" 14 | #include "database/kernels/padtranspose/padtranspose_64.hpp" 15 | #include "database/kernels/padtranspose/padtranspose_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/padtranspose/padtranspose.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry PadtransposeHalf; 16 | extern const DatabaseEntry PadtransposeSingle; 17 | extern const DatabaseEntry PadtransposeComplexSingle; 18 | extern const DatabaseEntry PadtransposeDouble; 19 | extern const DatabaseEntry PadtransposeComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/transpose/transpose.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Transpose' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/transpose/transpose.hpp" 11 | #include "database/kernels/transpose/transpose_16.hpp" 12 | #include "database/kernels/transpose/transpose_32.hpp" 13 | #include "database/kernels/transpose/transpose_3232.hpp" 14 | #include "database/kernels/transpose/transpose_64.hpp" 15 | #include "database/kernels/transpose/transpose_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/transpose/transpose.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Transpose' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry TransposeHalf; 16 | extern const DatabaseEntry TransposeSingle; 17 | extern const DatabaseEntry TransposeComplexSingle; 18 | extern const DatabaseEntry TransposeDouble; 19 | extern const DatabaseEntry TransposeComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/trsv_routine/trsv_routine.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Trsv_Routine' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/trsv_routine/trsv_routine.hpp" 11 | #include "database/kernels/trsv_routine/trsv_routine_16.hpp" 12 | #include "database/kernels/trsv_routine/trsv_routine_32.hpp" 13 | #include "database/kernels/trsv_routine/trsv_routine_3232.hpp" 14 | #include "database/kernels/trsv_routine/trsv_routine_64.hpp" 15 | #include "database/kernels/trsv_routine/trsv_routine_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/trsv_routine/trsv_routine.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Trsv_Routine' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry TrsvRoutineHalf; 16 | extern const DatabaseEntry TrsvRoutineSingle; 17 | extern const DatabaseEntry TrsvRoutineComplexSingle; 18 | extern const DatabaseEntry TrsvRoutineDouble; 19 | extern const DatabaseEntry TrsvRoutineComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/trsv_routine/trsv_routine_16.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Trsv_Routine16' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | namespace clblast { 11 | namespace database { 12 | 13 | const DatabaseEntry TrsvRoutineHalf = { 14 | "TrsvRoutine", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, { 15 | { // Default 16 | kDeviceTypeAll, "default", { 17 | { "default", { 18 | { kDeviceNameDefault , Params{ 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, 19 | } }, 20 | } 21 | }, 22 | } 23 | }; 24 | 25 | } // namespace database 26 | } // namespace clblast 27 | -------------------------------------------------------------------------------- /src/database/kernels/xaxpy/xaxpy.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/xaxpy/xaxpy.hpp" 11 | #include "database/kernels/xaxpy/xaxpy_16.hpp" 12 | #include "database/kernels/xaxpy/xaxpy_32.hpp" 13 | #include "database/kernels/xaxpy/xaxpy_3232.hpp" 14 | #include "database/kernels/xaxpy/xaxpy_64.hpp" 15 | #include "database/kernels/xaxpy/xaxpy_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/xaxpy/xaxpy.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry XaxpyHalf; 16 | extern const DatabaseEntry XaxpySingle; 17 | extern const DatabaseEntry XaxpyComplexSingle; 18 | extern const DatabaseEntry XaxpyDouble; 19 | extern const DatabaseEntry XaxpyComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/xconvgemm/xconvgemm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xconvgemm' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/xconvgemm/xconvgemm.hpp" 11 | #include "database/kernels/xconvgemm/xconvgemm_16.hpp" 12 | #include "database/kernels/xconvgemm/xconvgemm_32.hpp" 13 | #include "database/kernels/xconvgemm/xconvgemm_3232.hpp" 14 | #include "database/kernels/xconvgemm/xconvgemm_64.hpp" 15 | #include "database/kernels/xconvgemm/xconvgemm_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/xconvgemm/xconvgemm.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xconvgemm' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry XconvgemmHalf; 16 | extern const DatabaseEntry XconvgemmSingle; 17 | extern const DatabaseEntry XconvgemmComplexSingle; 18 | extern const DatabaseEntry XconvgemmDouble; 19 | extern const DatabaseEntry XconvgemmComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/xconvgemm/xconvgemm_3232.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xconvgemm3232' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | namespace clblast { 11 | namespace database { 12 | 13 | const DatabaseEntry XconvgemmComplexSingle = { 14 | "Xconvgemm", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { 15 | { // Default 16 | kDeviceTypeAll, "default", { 17 | { "default", { 18 | { kDeviceNameDefault , Params{ 1, 8, 16, 8, 8, 0, 0, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, 19 | } }, 20 | } 21 | }, 22 | } 23 | }; 24 | 25 | } // namespace database 26 | } // namespace clblast 27 | -------------------------------------------------------------------------------- /src/database/kernels/xconvgemm/xconvgemm_6464.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xconvgemm6464' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | namespace clblast { 11 | namespace database { 12 | 13 | const DatabaseEntry XconvgemmComplexDouble = { 14 | "Xconvgemm", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { 15 | { // Default 16 | kDeviceTypeAll, "default", { 17 | { "default", { 18 | { kDeviceNameDefault , Params{ 1, 8, 16, 8, 8, 0, 0, 1, 1, 16, 0, 0, 0, 0, 0, 0 } }, 19 | } }, 20 | } 21 | }, 22 | } 23 | }; 24 | 25 | } // namespace database 26 | } // namespace clblast 27 | -------------------------------------------------------------------------------- /src/database/kernels/xdot/xdot.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xdot' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/xdot/xdot.hpp" 11 | #include "database/kernels/xdot/xdot_16.hpp" 12 | #include "database/kernels/xdot/xdot_32.hpp" 13 | #include "database/kernels/xdot/xdot_3232.hpp" 14 | #include "database/kernels/xdot/xdot_64.hpp" 15 | #include "database/kernels/xdot/xdot_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/xdot/xdot.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xdot' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry XdotHalf; 16 | extern const DatabaseEntry XdotSingle; 17 | extern const DatabaseEntry XdotComplexSingle; 18 | extern const DatabaseEntry XdotDouble; 19 | extern const DatabaseEntry XdotComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/xgemm/xgemm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/xgemm/xgemm.hpp" 11 | #include "database/kernels/xgemm/xgemm_16.hpp" 12 | #include "database/kernels/xgemm/xgemm_32.hpp" 13 | #include "database/kernels/xgemm/xgemm_3232.hpp" 14 | #include "database/kernels/xgemm/xgemm_64.hpp" 15 | #include "database/kernels/xgemm/xgemm_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/xgemm/xgemm.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry XgemmHalf; 16 | extern const DatabaseEntry XgemmSingle; 17 | extern const DatabaseEntry XgemmComplexSingle; 18 | extern const DatabaseEntry XgemmDouble; 19 | extern const DatabaseEntry XgemmComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/xgemm_direct/xgemm_direct.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/xgemm_direct/xgemm_direct.hpp" 11 | #include "database/kernels/xgemm_direct/xgemm_direct_16.hpp" 12 | #include "database/kernels/xgemm_direct/xgemm_direct_32.hpp" 13 | #include "database/kernels/xgemm_direct/xgemm_direct_3232.hpp" 14 | #include "database/kernels/xgemm_direct/xgemm_direct_64.hpp" 15 | #include "database/kernels/xgemm_direct/xgemm_direct_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/xgemm_direct/xgemm_direct.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry XgemmDirectHalf; 16 | extern const DatabaseEntry XgemmDirectSingle; 17 | extern const DatabaseEntry XgemmDirectComplexSingle; 18 | extern const DatabaseEntry XgemmDirectDouble; 19 | extern const DatabaseEntry XgemmDirectComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/xgemv/xgemv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemv' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/xgemv/xgemv.hpp" 11 | #include "database/kernels/xgemv/xgemv_16.hpp" 12 | #include "database/kernels/xgemv/xgemv_32.hpp" 13 | #include "database/kernels/xgemv/xgemv_3232.hpp" 14 | #include "database/kernels/xgemv/xgemv_64.hpp" 15 | #include "database/kernels/xgemv/xgemv_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/xgemv/xgemv.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemv' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry XgemvHalf; 16 | extern const DatabaseEntry XgemvSingle; 17 | extern const DatabaseEntry XgemvComplexSingle; 18 | extern const DatabaseEntry XgemvDouble; 19 | extern const DatabaseEntry XgemvComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/xgemv_fast/xgemv_fast.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/xgemv_fast/xgemv_fast.hpp" 11 | #include "database/kernels/xgemv_fast/xgemv_fast_16.hpp" 12 | #include "database/kernels/xgemv_fast/xgemv_fast_32.hpp" 13 | #include "database/kernels/xgemv_fast/xgemv_fast_3232.hpp" 14 | #include "database/kernels/xgemv_fast/xgemv_fast_64.hpp" 15 | #include "database/kernels/xgemv_fast/xgemv_fast_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/xgemv_fast/xgemv_fast.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry XgemvFastHalf; 16 | extern const DatabaseEntry XgemvFastSingle; 17 | extern const DatabaseEntry XgemvFastComplexSingle; 18 | extern const DatabaseEntry XgemvFastDouble; 19 | extern const DatabaseEntry XgemvFastComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp" 11 | #include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp" 12 | #include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp" 13 | #include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp" 14 | #include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp" 15 | #include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry XgemvFastRotHalf; 16 | extern const DatabaseEntry XgemvFastRotSingle; 17 | extern const DatabaseEntry XgemvFastRotComplexSingle; 18 | extern const DatabaseEntry XgemvFastRotDouble; 19 | extern const DatabaseEntry XgemvFastRotComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/database/kernels/xger/xger.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xger' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/kernels/xger/xger.hpp" 11 | #include "database/kernels/xger/xger_16.hpp" 12 | #include "database/kernels/xger/xger_32.hpp" 13 | #include "database/kernels/xger/xger_3232.hpp" 14 | #include "database/kernels/xger/xger_64.hpp" 15 | #include "database/kernels/xger/xger_6464.hpp" 16 | -------------------------------------------------------------------------------- /src/database/kernels/xger/xger.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It 4 | // is auto-generated by the 'scripts/database/database.py' Python script. 5 | // 6 | // This file populates the database with best-found tuning parameters for the 'Xger' kernels. 7 | // 8 | // ================================================================================================= 9 | 10 | #include "database/database_structure.hpp" 11 | 12 | namespace clblast { 13 | namespace database { 14 | 15 | extern const DatabaseEntry XgerHalf; 16 | extern const DatabaseEntry XgerSingle; 17 | extern const DatabaseEntry XgerComplexSingle; 18 | extern const DatabaseEntry XgerDouble; 19 | extern const DatabaseEntry XgerComplexDouble; 20 | 21 | } // namespace database 22 | } // namespace clblast 23 | -------------------------------------------------------------------------------- /src/kernel_preprocessor.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file contains the a simple pre-processor for the OpenCL kernels. This pre-processor is used 11 | // in cases where the vendor's OpenCL compiler falls short in loop unrolling and array-to-register 12 | // promotion. This pre-processor is specific for the CLBlast code making many assumptions. 13 | // 14 | // ================================================================================================= 15 | 16 | #ifndef CLBLAST_KERNEL_PREPROCESSOR_H_ 17 | #define CLBLAST_KERNEL_PREPROCESSOR_H_ 18 | 19 | #include 20 | 21 | #include "utilities/utilities.hpp" 22 | 23 | namespace clblast { 24 | // ================================================================================================= 25 | 26 | std::string PreprocessKernelSource(const std::string& kernel_source); 27 | 28 | // ================================================================================================= 29 | } // namespace clblast 30 | 31 | // CLBLAST_KERNEL_PREPROCESSOR_H_ 32 | #endif 33 | -------------------------------------------------------------------------------- /src/pyclblast/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md src/*.pyx 2 | include samples/*.py 3 | include CMakeLists.txt 4 | -------------------------------------------------------------------------------- /src/pyclblast/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["scikit-build-core", "cython", "numpy"] 3 | build-backend = "scikit_build_core.build" 4 | 5 | [project] 6 | name = "pyclblast" 7 | version = "1.4.0" 8 | description = "Python bindings for CLBlast, the tuned OpenCL BLAS library" 9 | authors = [ 10 | {name = "Cedric Nugteren", email = "web@cedricnugteren.nl"} 11 | ] 12 | license = {text = "Apache Software License"} 13 | readme = "README.md" 14 | classifiers = [ 15 | "Development Status :: 4 - Beta", 16 | "Intended Audience :: Developers", 17 | "Topic :: Software Development :: Libraries", 18 | "License :: OSI Approved :: Apache Software License", 19 | "Programming Language :: Python :: 2", 20 | "Programming Language :: Python :: 3", 21 | ] 22 | keywords = ["OpenCL", "BLAS", "CLBlast", "GEMM", "matrix-multiplication"] 23 | dependencies = [ 24 | "numpy", 25 | "pyopencl" 26 | ] 27 | 28 | [project.urls] 29 | Homepage = "https://github.com/CNugteren/CLBlast/blob/master/src/pyclblast" 30 | 31 | [tool.setuptools.packages.find] 32 | where = ["src"] 33 | -------------------------------------------------------------------------------- /src/pyclblast/samples/haxpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. 4 | # This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. 5 | # 6 | # Author(s): 7 | # Cedric Nugteren 8 | 9 | import numpy as np 10 | import pyopencl as cl 11 | from pyopencl.array import Array 12 | import pyclblast 13 | 14 | # Settings for this sample 15 | dtype = 'float16' 16 | alpha = 1.5 17 | alpha_fp16 = pyclblast.float32_to_float16(alpha) 18 | n = 4 19 | 20 | print("# Setting up OpenCL") 21 | ctx = cl.create_some_context() 22 | queue = cl.CommandQueue(ctx) 23 | 24 | print("# Setting up Numpy arrays") 25 | x = np.linspace(1.0, n, num=n).astype(dtype=dtype) 26 | y = np.linspace(1.0, n / 2, num=n).astype(dtype=dtype) 27 | 28 | print("# Setting up OpenCL arrays") 29 | clx = Array(queue, x.shape, x.dtype) 30 | cly = Array(queue, y.shape, y.dtype) 31 | clx.set(x) 32 | cly.set(y) 33 | 34 | print("# Example level-1 operation: AXPY") 35 | pyclblast.axpy(queue, n, clx, cly, alpha=alpha_fp16) 36 | queue.finish() 37 | print("# Result for vector y: %s" % cly.get()) 38 | print("# Expected result: %s" % (alpha * x + y)) 39 | -------------------------------------------------------------------------------- /src/pyclblast/samples/override_parameters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. 4 | # This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. 5 | # 6 | # Author(s): 7 | # Cedric Nugteren 8 | 9 | import numpy as np 10 | import pyopencl as cl 11 | from pyopencl.array import Array 12 | import pyclblast 13 | from datetime import datetime 14 | 15 | if __name__ == "__main__": 16 | 17 | # Set up pyopencl: 18 | ctx = cl.create_some_context() 19 | queue = cl.CommandQueue(ctx) 20 | 21 | # Set up a basic sgemm example: 22 | m, n, k = 2, 3, 4 23 | a = np.random.rand(m, k).astype(dtype=np.float32) 24 | b = np.random.rand(k, n).astype(dtype=np.float32) 25 | c = np.empty((m, n), np.float32) 26 | cla = Array(queue, a.shape, a.dtype) 27 | clb = Array(queue, b.shape, b.dtype) 28 | clc = Array(queue, c.shape, c.dtype) 29 | cla.set(a) 30 | clb.set(b) 31 | clc.set(c) 32 | 33 | # Perform sgemm on these matrices, overriding the CLBlast parameters. In this example, we'll 34 | # just change the 'MWG' parameter a couple of times: 35 | params = { "KWG": 32, "KWI": 2, "MDIMA": 8, "MDIMC": 8, "MWG": 64, "NDIMB": 8, "NDIMC": 8, 36 | "NWG": 64, "SA": 0, "SB": 0, "STRM": 0, "STRN": 0, "VWM": 4, "VWN": 1 } 37 | for mwg in (32, 64, 256): 38 | print("Running sgemm tuned with MWG = %d" % mwg) 39 | params["MWG"] = mwg 40 | pyclblast.override_parameters(ctx.devices[0], 'Xgemm', 32, params) 41 | pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n) 42 | assert np.allclose(clc.get(), a.dot(b)), "uh-oh, xgemm isn't behaving correctly" 43 | -------------------------------------------------------------------------------- /src/pyclblast/samples/saxpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. 4 | # This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. 5 | # 6 | # Author(s): 7 | # Cedric Nugteren 8 | 9 | import numpy as np 10 | import pyopencl as cl 11 | from pyopencl.array import Array 12 | import pyclblast 13 | 14 | # Settings for this sample 15 | dtype = 'float32' 16 | alpha = 1.5 17 | n = 4 18 | 19 | print("# Setting up OpenCL") 20 | ctx = cl.create_some_context() 21 | queue = cl.CommandQueue(ctx) 22 | 23 | print("# Setting up Numpy arrays") 24 | x = np.random.rand(n).astype(dtype=dtype) 25 | y = np.random.rand(n).astype(dtype=dtype) 26 | 27 | print("# Setting up OpenCL arrays") 28 | clx = Array(queue, x.shape, x.dtype) 29 | cly = Array(queue, y.shape, y.dtype) 30 | clx.set(x) 31 | cly.set(y) 32 | 33 | print("# Example level-1 operation: AXPY") 34 | pyclblast.axpy(queue, n, clx, cly, alpha=alpha) 35 | queue.finish() 36 | print("# Result for vector y: %s" % cly.get()) 37 | print("# Expected result: %s" % (alpha * x + y)) 38 | -------------------------------------------------------------------------------- /src/pyclblast/samples/saxpybatched.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. 4 | # This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. 5 | # 6 | # Author(s): 7 | # Cedric Nugteren 8 | 9 | import numpy as np 10 | import pyopencl as cl 11 | from pyopencl.array import Array 12 | import pyclblast 13 | 14 | # Settings for this sample: 15 | batch_count = 2 16 | dtype = 'float32' 17 | alphas = [1.5, 1.0] 18 | n = 4 19 | 20 | print("# Setting up OpenCL") 21 | ctx = cl.create_some_context() 22 | queue = cl.CommandQueue(ctx) 23 | 24 | print("# Setting up Numpy arrays") 25 | x = np.random.rand(n * batch_count).astype(dtype=dtype) 26 | y = np.random.rand(n * batch_count).astype(dtype=dtype) 27 | 28 | print("# Batch offsets: next after each other") 29 | x_offsets = [0, n] 30 | y_offsets = [0, n] 31 | 32 | print("# Setting up OpenCL arrays") 33 | clx = Array(queue, x.shape, x.dtype) 34 | cly = Array(queue, y.shape, y.dtype) 35 | clx.set(x) 36 | cly.set(y) 37 | 38 | print("# Example level-1 batched operation: AXPY-batched") 39 | assert len(alphas) == len(x_offsets) == len(y_offsets) == batch_count 40 | pyclblast.axpyBatched(queue, n, clx, cly, alphas, x_offsets, y_offsets) 41 | queue.finish() 42 | 43 | print("# Full result for vector y: %s" % str(cly.get())) 44 | for i in range(batch_count): 45 | result = alphas[i] * x[x_offsets[i]:x_offsets[i] + n] + y[y_offsets[i]:y_offsets[i] + n] 46 | print("# Expected result batch #%d: %s" % (i, str(result))) 47 | -------------------------------------------------------------------------------- /src/pyclblast/samples/sgemm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. 4 | # This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. 5 | # 6 | # Author(s): 7 | # Cedric Nugteren 8 | 9 | import numpy as np 10 | import pyopencl as cl 11 | from pyopencl.array import Array 12 | import pyclblast 13 | 14 | # Settings for this sample 15 | dtype = 'float32' 16 | 17 | print("# Setting up OpenCL") 18 | ctx = cl.create_some_context() 19 | queue = cl.CommandQueue(ctx) 20 | 21 | print("# Setting up Numpy arrays") 22 | m, n, k = 2, 3, 4 23 | a = np.random.rand(m, k).astype(dtype=dtype) 24 | b = np.random.rand(k, n).astype(dtype=dtype) 25 | c = np.random.rand(m, n).astype(dtype=dtype) 26 | 27 | print("# Setting up OpenCL arrays") 28 | cla = Array(queue, a.shape, a.dtype) 29 | clb = Array(queue, b.shape, b.dtype) 30 | clc = Array(queue, c.shape, c.dtype) 31 | cla.set(a) 32 | clb.set(b) 33 | clc.set(c) 34 | 35 | print("# Example level-3 operation: GEMM") 36 | pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n) 37 | queue.finish() 38 | print("# Matrix C result: %s" % clc.get()) 39 | print("# Expected result: %s" % (np.dot(a, b))) 40 | -------------------------------------------------------------------------------- /src/pyclblast/samples/sgemv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. 4 | # This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line. 5 | # 6 | # Author(s): 7 | # Cedric Nugteren 8 | 9 | import numpy as np 10 | import pyopencl as cl 11 | from pyopencl.array import Array 12 | import pyclblast 13 | 14 | # Settings for this sample 15 | dtype = 'float32' 16 | m, n = 4, 3 17 | alpha = 1.0 18 | beta = 0.0 19 | 20 | print("# Setting up OpenCL") 21 | ctx = cl.create_some_context() 22 | queue = cl.CommandQueue(ctx) 23 | 24 | print("# Setting up Numpy arrays") 25 | a = np.random.rand(m, n).astype(dtype=dtype) 26 | x = np.random.rand(n).astype(dtype=dtype) 27 | y = np.random.rand(m).astype(dtype=dtype) 28 | 29 | print("# Setting up OpenCL arrays") 30 | cla = Array(queue, a.shape, a.dtype) 31 | clx = Array(queue, x.shape, x.dtype) 32 | cly = Array(queue, y.shape, y.dtype) 33 | cla.set(a) 34 | clx.set(x) 35 | cly.set(y) 36 | 37 | print("# Example level-2 operation: GEMV") 38 | pyclblast.gemv(queue, m, n, cla, clx, cly, a_ld=n, alpha=alpha, beta=beta) 39 | queue.finish() 40 | print("# Result for vector y: %s" % cly.get()) 41 | print("# Expected result: %s" % (alpha * np.dot(a, x) + beta * y)) 42 | -------------------------------------------------------------------------------- /src/pyclblast/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CNugteren/CLBlast/e2ee100d7b7a3fb1cb60eb541d787c07f137171b/src/pyclblast/test/__init__.py -------------------------------------------------------------------------------- /src/routines/level1/xamax.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xamax routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XAMAX_H_ 15 | #define CLBLAST_ROUTINES_XAMAX_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xamax: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xamax(Queue &queue, EventPointer event, const std::string &name = "AMAX"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoAmax(const size_t n, 32 | const Buffer &imax_buffer, const size_t imax_offset, 33 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc); 34 | }; 35 | 36 | // ================================================================================================= 37 | } // namespace clblast 38 | 39 | // CLBLAST_ROUTINES_XAMAX_H_ 40 | #endif 41 | -------------------------------------------------------------------------------- /src/routines/level1/xasum.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xasum routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XASUM_H_ 15 | #define CLBLAST_ROUTINES_XASUM_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xasum: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xasum(Queue &queue, EventPointer event, const std::string &name = "ASUM"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoAsum(const size_t n, 32 | const Buffer &asum_buffer, const size_t asum_offset, 33 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc); 34 | }; 35 | 36 | // ================================================================================================= 37 | } // namespace clblast 38 | 39 | // CLBLAST_ROUTINES_XASUM_H_ 40 | #endif 41 | -------------------------------------------------------------------------------- /src/routines/level1/xaxpy.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xaxpy routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XAXPY_H_ 15 | #define CLBLAST_ROUTINES_XAXPY_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xaxpy: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xaxpy(Queue &queue, EventPointer event, const std::string &name = "AXPY"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoAxpy(const size_t n, const T alpha, 32 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 33 | const Buffer &y_buffer, const size_t y_offset, const size_t y_inc); 34 | }; 35 | 36 | // ================================================================================================= 37 | } // namespace clblast 38 | 39 | // CLBLAST_ROUTINES_XAXPY_H_ 40 | #endif 41 | -------------------------------------------------------------------------------- /src/routines/level1/xcopy.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xcopy routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XCOPY_H_ 15 | #define CLBLAST_ROUTINES_XCOPY_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xcopy: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xcopy(Queue &queue, EventPointer event, const std::string &name = "COPY"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoCopy(const size_t n, 32 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 33 | const Buffer &y_buffer, const size_t y_offset, const size_t y_inc); 34 | }; 35 | 36 | // ================================================================================================= 37 | } // namespace clblast 38 | 39 | // CLBLAST_ROUTINES_XCOPY_H_ 40 | #endif 41 | -------------------------------------------------------------------------------- /src/routines/level1/xdot.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xdot routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XDOT_H_ 15 | #define CLBLAST_ROUTINES_XDOT_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xdot: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xdot(Queue &queue, EventPointer event, const std::string &name = "DOT"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoDot(const size_t n, 32 | const Buffer &dot_buffer, const size_t dot_offset, 33 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 34 | const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, 35 | const bool do_conjugate = false); 36 | }; 37 | 38 | // ================================================================================================= 39 | } // namespace clblast 40 | 41 | // CLBLAST_ROUTINES_XDOT_H_ 42 | #endif 43 | -------------------------------------------------------------------------------- /src/routines/level1/xdotc.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xdotc routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XDOTC_H_ 15 | #define CLBLAST_ROUTINES_XDOTC_H_ 16 | 17 | #include "routines/level1/xdot.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xdotc: public Xdot { 25 | public: 26 | 27 | // Uses the regular Xdot routine 28 | using Xdot::DoDot; 29 | 30 | // Constructor 31 | Xdotc(Queue &queue, EventPointer event, const std::string &name = "DOTC"); 32 | 33 | // Templated-precision implementation of the routine 34 | void DoDotc(const size_t n, 35 | const Buffer &dot_buffer, const size_t dot_offset, 36 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 37 | const Buffer &y_buffer, const size_t y_offset, const size_t y_inc); 38 | }; 39 | 40 | // ================================================================================================= 41 | } // namespace clblast 42 | 43 | // CLBLAST_ROUTINES_XDOTC_H_ 44 | #endif 45 | -------------------------------------------------------------------------------- /src/routines/level1/xdotu.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xdotu routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XDOTU_H_ 15 | #define CLBLAST_ROUTINES_XDOTU_H_ 16 | 17 | #include "routines/level1/xdot.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xdotu: public Xdot { 25 | public: 26 | 27 | // Uses the regular Xdot routine 28 | using Xdot::DoDot; 29 | 30 | // Constructor 31 | Xdotu(Queue &queue, EventPointer event, const std::string &name = "DOTU"); 32 | 33 | // Templated-precision implementation of the routine 34 | void DoDotu(const size_t n, 35 | const Buffer &dot_buffer, const size_t dot_offset, 36 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 37 | const Buffer &y_buffer, const size_t y_offset, const size_t y_inc); 38 | }; 39 | 40 | // ================================================================================================= 41 | } // namespace clblast 42 | 43 | // CLBLAST_ROUTINES_XDOTU_H_ 44 | #endif 45 | -------------------------------------------------------------------------------- /src/routines/level1/xnrm2.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xnrm2 routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XNRM2_H_ 15 | #define CLBLAST_ROUTINES_XNRM2_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xnrm2: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xnrm2(Queue &queue, EventPointer event, const std::string &name = "NRM2"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoNrm2(const size_t n, 32 | const Buffer &nrm2_buffer, const size_t nrm2_offset, 33 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc); 34 | }; 35 | 36 | // ================================================================================================= 37 | } // namespace clblast 38 | 39 | // CLBLAST_ROUTINES_XNRM2_H_ 40 | #endif 41 | -------------------------------------------------------------------------------- /src/routines/level1/xscal.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xscal routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XSCAL_H_ 15 | #define CLBLAST_ROUTINES_XSCAL_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xscal: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xscal(Queue &queue, EventPointer event, const std::string &name = "SCAL"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoScal(const size_t n, const T alpha, 32 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc); 33 | }; 34 | 35 | // ================================================================================================= 36 | } // namespace clblast 37 | 38 | // CLBLAST_ROUTINES_XSCAL_H_ 39 | #endif 40 | -------------------------------------------------------------------------------- /src/routines/level1/xswap.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xswap routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XSWAP_H_ 15 | #define CLBLAST_ROUTINES_XSWAP_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xswap: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xswap(Queue &queue, EventPointer event, const std::string &name = "SWAP"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoSwap(const size_t n, 32 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 33 | const Buffer &y_buffer, const size_t y_offset, const size_t y_inc); 34 | }; 35 | 36 | // ================================================================================================= 37 | } // namespace clblast 38 | 39 | // CLBLAST_ROUTINES_XSWAP_H_ 40 | #endif 41 | -------------------------------------------------------------------------------- /src/routines/level2/xger.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xger routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XGER_H_ 15 | #define CLBLAST_ROUTINES_XGER_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xger: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xger(Queue &queue, EventPointer event, const std::string &name = "GER"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoGer(const Layout layout, 32 | const size_t m, const size_t n, 33 | const T alpha, 34 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 35 | const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, 36 | const Buffer &a_buffer, const size_t a_offset, const size_t a_ld); 37 | }; 38 | 39 | // ================================================================================================= 40 | } // namespace clblast 41 | 42 | // CLBLAST_ROUTINES_XGER_H_ 43 | #endif 44 | -------------------------------------------------------------------------------- /src/routines/level2/xhpr.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xhpr routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XHPR_H_ 15 | #define CLBLAST_ROUTINES_XHPR_H_ 16 | 17 | #include "routines/level2/xher.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xhpr: public Xher { 25 | public: 26 | 27 | // Uses the regular Xher routine 28 | using Xher::DoHer; 29 | 30 | // Constructor 31 | Xhpr(Queue &queue, EventPointer event, const std::string &name = "HPR"); 32 | 33 | // Templated-precision implementation of the routine 34 | void DoHpr(const Layout layout, const Triangle triangle, 35 | const size_t n, 36 | const U alpha, 37 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 38 | const Buffer &ap_buffer, const size_t ap_offset); 39 | }; 40 | 41 | // ================================================================================================= 42 | } // namespace clblast 43 | 44 | // CLBLAST_ROUTINES_XHPR_H_ 45 | #endif 46 | -------------------------------------------------------------------------------- /src/routines/level2/xspr.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xspr routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XSPR_H_ 15 | #define CLBLAST_ROUTINES_XSPR_H_ 16 | 17 | #include "routines/level2/xher.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xspr: public Xher { 25 | public: 26 | 27 | // Uses the regular Xher routine 28 | using Xher::DoHer; 29 | 30 | // Constructor 31 | Xspr(Queue &queue, EventPointer event, const std::string &name = "SPR"); 32 | 33 | // Templated-precision implementation of the routine 34 | void DoSpr(const Layout layout, const Triangle triangle, 35 | const size_t n, 36 | const T alpha, 37 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 38 | const Buffer &ap_buffer, const size_t ap_offset); 39 | }; 40 | 41 | // ================================================================================================= 42 | } // namespace clblast 43 | 44 | // CLBLAST_ROUTINES_XSPR_H_ 45 | #endif 46 | -------------------------------------------------------------------------------- /src/routines/levelx/xhad.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xhad routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XHAD_H_ 15 | #define CLBLAST_ROUTINES_XHAD_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xhad: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xhad(Queue &queue, EventPointer event, const std::string &name = "HAD"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoHad(const size_t n, const T alpha, 32 | const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, 33 | const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, const T beta, 34 | const Buffer &z_buffer, const size_t z_offset, const size_t z_inc); 35 | }; 36 | 37 | // ================================================================================================= 38 | } // namespace clblast 39 | 40 | // CLBLAST_ROUTINES_XHAD_H_ 41 | #endif 42 | -------------------------------------------------------------------------------- /src/routines/levelx/xinvert.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file contains all the common code to perform (partial) matrix inverting. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XINVERT_H_ 15 | #define CLBLAST_ROUTINES_XINVERT_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | template 23 | class Xinvert: public Routine { 24 | public: 25 | 26 | // Constructor 27 | Xinvert(Queue &queue, EventPointer event, const std::string &name = "INVERT"); 28 | 29 | // Inverts diagonal square blocks of a matrix 30 | void InvertMatrixDiagonalBlocks(const Layout layout, const Triangle triangle, const Diagonal diag, 31 | const size_t n, const size_t block_size, 32 | const Buffer &src, const size_t offset, const size_t ld_src, 33 | Buffer &dest); 34 | }; 35 | 36 | // ================================================================================================= 37 | } // namespace clblast 38 | 39 | // CLBLAST_ROUTINES_XINVERT_H_ 40 | #endif 41 | -------------------------------------------------------------------------------- /src/routines/levelx/xomatcopy.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file implements the Xomatcopy routine. The precision is implemented using a template argument. 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_ROUTINES_XOMATCOPY_H_ 15 | #define CLBLAST_ROUTINES_XOMATCOPY_H_ 16 | 17 | #include "routine.hpp" 18 | 19 | namespace clblast { 20 | // ================================================================================================= 21 | 22 | // See comment at top of file for a description of the class 23 | template 24 | class Xomatcopy: public Routine { 25 | public: 26 | 27 | // Constructor 28 | Xomatcopy(Queue &queue, EventPointer event, const std::string &name = "OMATCOPY"); 29 | 30 | // Templated-precision implementation of the routine 31 | void DoOmatcopy(const Layout layout, const Transpose a_transpose, 32 | const size_t m, const size_t n, const T alpha, 33 | const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, 34 | const Buffer &b_buffer, const size_t b_offset, const size_t b_ld); 35 | }; 36 | 37 | // ================================================================================================= 38 | } // namespace clblast 39 | 40 | // CLBLAST_ROUTINES_XOMATCOPY_H_ 41 | #endif 42 | -------------------------------------------------------------------------------- /src/utilities/compile.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file contains the CLBlast way to compile a kernel from source, used for the library and for 11 | // the auto-tuners. 12 | // 13 | // ================================================================================================= 14 | 15 | #ifndef CLBLAST_UTILITIES_COMPILE_H_ 16 | #define CLBLAST_UTILITIES_COMPILE_H_ 17 | 18 | #include 19 | #include 20 | 21 | #include "utilities/utilities.hpp" 22 | 23 | namespace clblast { 24 | // ================================================================================================= 25 | 26 | // Compiles a program from source code 27 | std::shared_ptr CompileFromSource( 28 | const std::string &source_string, const Precision precision, 29 | const std::string &routine_name, 30 | const Device& device, const Context& context, 31 | std::vector& options, 32 | const size_t run_preprocessor, // 0: platform dependent, 1: always, 2: never 33 | const bool silent = false); 34 | 35 | // ================================================================================================= 36 | } // namespace clblast 37 | 38 | // CLBLAST_UTILITIES_COMPILE_H_ 39 | #endif 40 | -------------------------------------------------------------------------------- /src/utilities/msvc.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file provides macro's and definitions to make compilation work on Microsoft Visual Studio, 11 | // in particular for versions older than 2015 with limited C++11 support. 12 | // MSVC++ 14.0 _MSC_VER == 1900 (Visual Studio 2015) 13 | // MSVC++ 12.0 _MSC_VER == 1800 (Visual Studio 2013) 14 | // MSVC++ 11.0 _MSC_VER == 1700 (Visual Studio 2012) 15 | // MSVC++ 10.0 _MSC_VER == 1600 (Visual Studio 2010) 16 | // MSVC++ 9.0 _MSC_VER == 1500 (Visual Studio 2008) 17 | // 18 | // ================================================================================================= 19 | 20 | #ifndef CLBLAST_MSVC_HPP_ 21 | #define CLBLAST_MSVC_HPP_ 22 | 23 | namespace clblast { 24 | // ================================================================================================= 25 | #ifdef _MSC_VER 26 | 27 | // No support for constexpr prior to 2015. Note that this only works with constants, not with 28 | // constexpr functions (unused in this project). 29 | #if _MSC_VER < 1900 30 | #define constexpr const 31 | #endif 32 | 33 | // _MSC_VER 34 | #endif 35 | // ================================================================================================= 36 | } // namespace clblast 37 | 38 | // CLBLAST_MSVC_HPP_ 39 | #endif 40 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xamax.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xamax.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "iSAMAX"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "iDAMAX"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "iCAMAX"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "iZAMAX"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "iHAMAX"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xasum.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xasum.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SASUM"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DASUM"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "ScASUM"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "DzASUM"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HASUM"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xaxpy.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xaxpy.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SAXPY"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DAXPY"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CAXPY"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPY"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HAXPY"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xcopy.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xcopy.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SCOPY"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DCOPY"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CCOPY"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZCOPY"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HCOPY"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xdot.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xdot.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SDOT"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DDOT"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HDOT"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xdotc.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xdotc.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CDOTC"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTC"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xdotu.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xdotu.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CDOTU"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTU"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xnrm2.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xnrm2.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SNRM2"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DNRM2"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "ScNRM2"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "DzNRM2"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HNRM2"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xrot.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xrot.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SROT"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DROT"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xrotg.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xrotg.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SROTG"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DROTG"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xrotm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xrotm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SROTM"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DROTM"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xrotmg.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xrotmg.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SROTMG"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DROTMG"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xscal.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xscal.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSCAL"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSCAL"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSCAL"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSCAL"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSCAL"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level1/xswap.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level1/xswap.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSWAP"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSWAP"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSWAP"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSWAP"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSWAP"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xgbmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xgbmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SGBMV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DGBMV"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGBMV"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGBMV"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGBMV"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xgemv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xgemv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SGEMV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMV"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMV"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMV"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGEMV"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xger.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xger.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SGER"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DGER"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGER"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xgerc.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xgerc.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CGERC"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGERC"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xgeru.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xgeru.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CGERU"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGERU"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xhbmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xhbmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHBMV"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHBMV"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xhemv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xhemv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHEMV"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMV"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xher.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xher.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, float>(argc, argv, false, "CHER"); 19 | errors += clblast::RunTests, clblast::double2, double>(argc, argv, true, "ZHER"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xher2.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xher2.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHER2"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHER2"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xhpmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xhpmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHPMV"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHPMV"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xhpr.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xhpr.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, float>(argc, argv, false, "CHPR"); 19 | errors += clblast::RunTests, clblast::double2, double>(argc, argv, true, "ZHPR"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xhpr2.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xhpr2.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHPR2"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHPR2"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xsbmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xsbmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSBMV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSBMV"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSBMV"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xspmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xspmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSPMV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSPMV"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSPMV"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xspr.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xspr.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSPR"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSPR"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSPR"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xspr2.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xspr2.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSPR2"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSPR2"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSPR2"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xsymv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xsymv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSYMV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSYMV"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSYMV"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xsyr.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xsyr.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSYR"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSYR"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSYR"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xsyr2.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xsyr2.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSYR2"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSYR2"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSYR2"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xtbmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xtbmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "STBMV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DTBMV"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTBMV"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTBMV"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HTBMV"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xtbsv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xtbsv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "STBSV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DTBSV"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTBSV"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTBSV"); 22 | if (errors > 0) { return 1; } else { return 0; } 23 | } 24 | 25 | // ================================================================================================= 26 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xtpmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xtpmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "STPMV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DTPMV"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTPMV"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTPMV"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HTPMV"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xtpsv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xtpsv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "STPSV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DTPSV"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTPSV"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTPSV"); 22 | if (errors > 0) { return 1; } else { return 0; } 23 | } 24 | 25 | // ================================================================================================= 26 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xtrmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xtrmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "STRMV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DTRMV"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTRMV"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMV"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HTRMV"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level2/xtrsv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level2/xtrsv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "STRSV"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DTRSV"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTRSV"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSV"); 22 | if (errors > 0) { return 1; } else { return 0; } 23 | } 24 | 25 | // ================================================================================================= 26 | -------------------------------------------------------------------------------- /test/correctness/routines/level3/xhemm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level3/xhemm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHEMM"); 19 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMM"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level3/xher2k.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level3/xher2k.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, float>(argc, argv, false, "CHER2K"); 19 | errors += clblast::RunTests, clblast::double2, double>(argc, argv, true, "ZHER2K"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level3/xherk.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level3/xherk.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, clblast::float2, float>(argc, argv, false, "CHERK"); 19 | errors += clblast::RunTests, clblast::double2, double>(argc, argv, true, "ZHERK"); 20 | if (errors > 0) { return 1; } else { return 0; } 21 | } 22 | 23 | // ================================================================================================= 24 | -------------------------------------------------------------------------------- /test/correctness/routines/level3/xsymm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level3/xsymm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSYMM"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSYMM"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSYMM"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSYMM"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSYMM"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level3/xsyr2k.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level3/xsyr2k.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSYR2K"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSYR2K"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSYR2K"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSYR2K"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSYR2K"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level3/xsyrk.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level3/xsyrk.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SSYRK"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DSYRK"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSYRK"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSYRK"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HSYRK"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level3/xtrmm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level3/xtrmm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "STRMM"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DTRMM"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTRMM"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMM"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HTRMM"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/level3/xtrsm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/level3/xtrsm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "STRSM"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DTRSM"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTRSM"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSM"); 22 | if (errors > 0) { return 1; } else { return 0; } 23 | } 24 | 25 | // ================================================================================================= 26 | -------------------------------------------------------------------------------- /test/correctness/routines/levelx/xaxpybatched.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/levelx/xaxpybatched.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SAXPYBATCHED"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DAXPYBATCHED"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CAXPYBATCHED"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPYBATCHED"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HAXPYBATCHED"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/levelx/xcol2im.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/levelx/xcol2im.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SCOL2IM"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DCOL2IM"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CCOL2IM"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZCOL2IM"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HCOL2IM"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/levelx/xconvgemm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/levelx/xconvgemm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SCONVGEMM"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DCONVGEMM"); 20 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HCONVGEMM"); 21 | if (errors > 0) { return 1; } else { return 0; } 22 | } 23 | 24 | // ================================================================================================= 25 | -------------------------------------------------------------------------------- /test/correctness/routines/levelx/xgemmbatched.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/levelx/xgemmbatched.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SGEMMBATCHED"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMMBATCHED"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMMBATCHED"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMMBATCHED"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGEMMBATCHED"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/levelx/xgemmstridedbatched.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/levelx/xgemmstridedbatched.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SGEMMSTRIDEDBATCHED"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMMSTRIDEDBATCHED"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMMSTRIDEDBATCHED"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMMSTRIDEDBATCHED"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HGEMMSTRIDEDBATCHED"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/levelx/xhad.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/levelx/xhad.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SHAD"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DHAD"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CHAD"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHAD"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HHAD"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/levelx/xim2col.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/levelx/xim2col.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SIM2COL"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DIM2COL"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CIM2COL"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZIM2COL"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HIM2COL"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/correctness/routines/levelx/xinvert.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/levelx/xinvert.hpp" 14 | 15 | // Shortcuts to the clblast namespace 16 | using float2 = clblast::float2; 17 | using double2 = clblast::double2; 18 | 19 | // Main function (not within the clblast namespace) 20 | int main(int argc, char *argv[]) { 21 | auto errors = size_t{0}; 22 | errors += clblast::RunTests, float, float>(argc, argv, false, "SINVERT"); 23 | errors += clblast::RunTests, double, double>(argc, argv, true, "DINVERT"); 24 | errors += clblast::RunTests, float2, float2>(argc, argv, true, "CINVERT"); 25 | errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZINVERT"); 26 | errors += clblast::RunTests, half, half>(argc, argv, true, "HINVERT"); 27 | if (errors > 0) { return 1; } else { return 0; } 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/correctness/routines/levelx/xomatcopy.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/correctness/testblas.hpp" 13 | #include "test/routines/levelx/xomatcopy.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | auto errors = size_t{0}; 18 | errors += clblast::RunTests, float, float>(argc, argv, false, "SOMATCOPY"); 19 | errors += clblast::RunTests, double, double>(argc, argv, true, "DOMATCOPY"); 20 | errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "COMATCOPY"); 21 | errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZOMATCOPY"); 22 | errors += clblast::RunTests, clblast::half, clblast::half>(argc, argv, true, "HOMATCOPY"); 23 | if (errors > 0) { return 1; } else { return 0; } 24 | } 25 | 26 | // ================================================================================================= 27 | -------------------------------------------------------------------------------- /test/performance/routines/level1/xdot.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level1/xdot.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/performance/routines/level1/xdotc.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level1/xdotc.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level1/xdotu.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level1/xdotu.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level1/xrot.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level1/xrot.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: 21 | clblast::RunClient, float, float>(argc, argv); break; 22 | case clblast::Precision::kDouble: 23 | clblast::RunClient, double, double>(argc, argv); break; 24 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 25 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level1/xrotg.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level1/xrotg.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: 21 | clblast::RunClient, float, float>(argc, argv); break; 22 | case clblast::Precision::kDouble: 23 | clblast::RunClient, double, double>(argc, argv); break; 24 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 25 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level1/xrotm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level1/xrotm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: 21 | clblast::RunClient, float, float>(argc, argv); break; 22 | case clblast::Precision::kDouble: 23 | clblast::RunClient, double, double>(argc, argv); break; 24 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 25 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level1/xrotmg.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level1/xrotmg.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: 21 | clblast::RunClient, float, float>(argc, argv); break; 22 | case clblast::Precision::kDouble: 23 | clblast::RunClient, double, double>(argc, argv); break; 24 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 25 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xger.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xger.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xgerc.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xgerc.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xgeru.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xgeru.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xhbmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xhbmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xhemv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xhemv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xher.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xher.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, float>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, double>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xher2.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xher2.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xhpmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xhpmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xhpr.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xhpr.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, float>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, double>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xhpr2.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xhpr2.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xsbmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xsbmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xspmv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xspmv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xspr.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xspr.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xspr2.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xspr2.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xsymv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xsymv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xsyr.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xsyr.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xsyr2.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xsyr2.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xtbsv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xtbsv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: 21 | clblast::RunClient, float, float>(argc, argv); break; 22 | case clblast::Precision::kDouble: 23 | clblast::RunClient, double, double>(argc, argv); break; 24 | case clblast::Precision::kComplexSingle: 25 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 26 | case clblast::Precision::kComplexDouble: 27 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 28 | } 29 | return 0; 30 | } 31 | 32 | // ================================================================================================= 33 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xtpsv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xtpsv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: 21 | clblast::RunClient, float, float>(argc, argv); break; 22 | case clblast::Precision::kDouble: 23 | clblast::RunClient, double, double>(argc, argv); break; 24 | case clblast::Precision::kComplexSingle: 25 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 26 | case clblast::Precision::kComplexDouble: 27 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 28 | } 29 | return 0; 30 | } 31 | 32 | // ================================================================================================= 33 | -------------------------------------------------------------------------------- /test/performance/routines/level2/xtrsv.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level2/xtrsv.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: 21 | clblast::RunClient, float, float>(argc, argv); break; 22 | case clblast::Precision::kDouble: 23 | clblast::RunClient, double, double>(argc, argv); break; 24 | case clblast::Precision::kComplexSingle: 25 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 26 | case clblast::Precision::kComplexDouble: 27 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 28 | } 29 | return 0; 30 | } 31 | 32 | // ================================================================================================= 33 | -------------------------------------------------------------------------------- /test/performance/routines/level3/xhemm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level3/xhemm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level3/xher2k.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level3/xher2k.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, float>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, double>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level3/xherk.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level3/xherk.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kComplexSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); 21 | case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); 22 | case clblast::Precision::kComplexSingle: 23 | clblast::RunClient, clblast::float2, float>(argc, argv); break; 24 | case clblast::Precision::kComplexDouble: 25 | clblast::RunClient, clblast::double2, double>(argc, argv); break; 26 | } 27 | return 0; 28 | } 29 | 30 | // ================================================================================================= 31 | -------------------------------------------------------------------------------- /test/performance/routines/level3/xtrsm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/level3/xtrsm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); 20 | case clblast::Precision::kSingle: 21 | clblast::RunClient, float, float>(argc, argv); break; 22 | case clblast::Precision::kDouble: 23 | clblast::RunClient, double, double>(argc, argv); break; 24 | case clblast::Precision::kComplexSingle: 25 | clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; 26 | case clblast::Precision::kComplexDouble: 27 | clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; 28 | } 29 | return 0; 30 | } 31 | 32 | // ================================================================================================= 33 | -------------------------------------------------------------------------------- /test/performance/routines/levelx/xconvgemm.cpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // ================================================================================================= 11 | 12 | #include "test/performance/client.hpp" 13 | #include "test/routines/levelx/xconvgemm.hpp" 14 | 15 | // Main function (not within the clblast namespace) 16 | int main(int argc, char *argv[]) { 17 | const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); 18 | switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { 19 | case clblast::Precision::kHalf: 20 | clblast::RunClient, clblast::half, clblast::half>(argc, argv); break; 21 | case clblast::Precision::kSingle: 22 | clblast::RunClient, float, float>(argc, argv); break; 23 | case clblast::Precision::kDouble: 24 | clblast::RunClient, double, double>(argc, argv); break; 25 | case clblast::Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); 26 | case clblast::Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); 27 | } 28 | return 0; 29 | } 30 | 31 | // ================================================================================================= 32 | -------------------------------------------------------------------------------- /test/routines/common.hpp: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file contains all the common includes for the clients and tests 11 | // 12 | // ================================================================================================= 13 | 14 | #ifndef CLBLAST_TEST_ROUTINES_COMMON_H_ 15 | #define CLBLAST_TEST_ROUTINES_COMMON_H_ 16 | 17 | #include 18 | #include 19 | 20 | #include "utilities/utilities.hpp" 21 | #include "test/test_utilities.hpp" 22 | 23 | #ifdef CLBLAST_REF_CLBLAS 24 | #include "test/wrapper_clblas.hpp" 25 | #endif 26 | #ifdef CLBLAST_REF_CBLAS 27 | #include "test/wrapper_cblas.hpp" 28 | #endif 29 | #include "test/wrapper_cuda.hpp" 30 | #ifdef CLBLAST_REF_CUBLAS 31 | #include "test/wrapper_cublas.hpp" 32 | #endif 33 | 34 | // ================================================================================================= 35 | 36 | // CLBLAST_TEST_ROUTINES_COMMON_H_ 37 | #endif 38 | -------------------------------------------------------------------------------- /vcpkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": [ "opencl" ], 3 | "builtin-baseline": "752799690be680e3b16ceb8e5a4db2f683af6ae3", 4 | "overrides": [ 5 | { "name": "opencl", "version": "v2024.05.08" } 6 | ] 7 | } --------------------------------------------------------------------------------