├── .gitattributes ├── .gitignore ├── .travis.yml ├── CHANGELOG ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── appveyor.yml ├── doc ├── README-BinaryCacheOnDisk.txt ├── README-FunctorConcepts.txt ├── README-HowToIntroduceFunctors.txt ├── README-TransformASolverIntoAFunctor.txt ├── clBLAS.doxy └── performance │ ├── clBLAS_2.6.0 │ ├── S9150 │ │ ├── README.txt │ │ ├── cgemmNT_S9150_14.50.2_2.6.0_8.csv │ │ ├── dgemmNT_S9150_14.50.2_2.6.0_8.csv │ │ ├── dgemm_32.csv │ │ ├── dgemm_96.csv │ │ ├── dtrsm_192.csv │ │ ├── generate_graphs.sh │ │ ├── peak_dp.csv │ │ ├── peak_sp.csv │ │ ├── sgemmNT_S9150_14.50.2_2.6.0_8.csv │ │ ├── sgemm_32.csv │ │ ├── zgemmNT_S9150_14.50.2_2.6.0_8.csv │ │ ├── zgemm_32.csv │ │ └── zgemm_64.csv │ └── W9100 │ │ ├── README.txt │ │ ├── clblas_sgemmNT_w9100_14502.csv │ │ ├── dgemm_32.csv │ │ ├── dgemm_96.csv │ │ ├── dtrsm_w9100_14502.csv │ │ ├── peak_dp.csv │ │ ├── peak_sp.csv │ │ ├── zgemm_32.csv │ │ └── zgemm_64.csv │ ├── clBLAS_2.7.1 │ ├── S9150 │ │ ├── cgemmNT_S9150_14.50.2_2.7.1_8.csv │ │ ├── dgemmNT_S9150_14.50.2_2.7.1_8.csv │ │ ├── sgemmNT_S9150_14.50.2_2.7.1_8.csv │ │ └── zgemmNT_S9150_14.50.2_2.7.1_8.csv │ └── W9100 │ │ ├── clblas271_w9100_dtrsm_col_left_lower_unit_14502.csv │ │ ├── clblas271_w9100_dtrsm_col_left_upper_unit_14502.csv │ │ ├── clblas271_w9100_dtrsm_col_right_lower_unit_14502.csv │ │ └── clblas271_w9100_dtrsm_col_right_upper_unit_14502.csv │ ├── clBLAS_2.9.0 │ └── FIJINANO │ │ ├── clblas290_fijinano_cgemm_col_nt_1520.csv │ │ ├── clblas290_fijinano_dgemm_col_nt_1520.csv │ │ ├── clblas290_fijinano_sgemm_col_nt_1520.csv │ │ └── clblas290_fijinano_zgemm_col_nt_1520.csv │ ├── cuBLAS_7.0 │ └── Tesla_K40 │ │ ├── README.txt │ │ ├── dgemm.csv │ │ ├── dtrsm.csv │ │ ├── peak_dp.csv │ │ ├── peak_sp.csv │ │ ├── sgemm.csv │ │ └── zgemm.csv │ └── cuBLAS_7.5 │ └── Tesla_K40 │ ├── cublas75_k40_dtrsm_col_left_lower_unit.csv │ ├── cublas75_k40_dtrsm_col_left_upper_unit.csv │ ├── cublas75_k40_dtrsm_col_right_lower_unit.csv │ ├── cublas75_k40_dtrsm_col_right_upper_unit.csv │ ├── cublas_cgemm_8.csv │ ├── cublas_dgemm_8.csv │ ├── cublas_sgemm_8.csv │ ├── cublas_zgemm_8.csv │ ├── peak_dp.csv │ └── peak_sp.csv └── src ├── CMakeLists.txt ├── FindNetlib.cmake ├── FindOpenCL.cmake ├── clAmdBlas.h ├── clAmdBlas.version.h ├── clBLAS-complex.h ├── clBLAS.def ├── clBLAS.h ├── clBLAS.version.h.in ├── clBLASConfig.cmake.in ├── clBLASConfigVersion.cmake.in ├── client ├── CMakeLists.txt ├── clGemm.h ├── clfunc_common.hpp ├── clfunc_xgemm.hpp ├── clfunc_xgemv.hpp ├── clfunc_xger.hpp ├── clfunc_xgerc.hpp ├── clfunc_xgeru.hpp ├── clfunc_xhemm.hpp ├── clfunc_xhemv.hpp ├── clfunc_xher.hpp ├── clfunc_xher2.hpp ├── clfunc_xher2k.hpp ├── clfunc_xherk.hpp ├── clfunc_xsymm.hpp ├── clfunc_xsymv.hpp ├── clfunc_xsyr.hpp ├── clfunc_xsyr2.hpp ├── clfunc_xsyr2k.hpp ├── clfunc_xsyrk.hpp ├── clfunc_xtrmm.hpp ├── clfunc_xtrmv.hpp ├── clfunc_xtrsm.hpp ├── clfunc_xtrsv.hpp ├── client.cpp ├── ctimer.h ├── makefile ├── statisticalTimer.cpp ├── statisticalTimer.h ├── stdafx.cpp ├── stdafx.h ├── targetver.h ├── testPerfWrapper.cpp ├── timer.cpp └── timer.hpp ├── flags_public.txt ├── include ├── binary_lookup.h ├── clblas_stddef.h ├── clkern.h ├── cltypes.h ├── dblock_kgen.h ├── defbool.h ├── devinfo.h ├── dis_warning.h ├── granulation.h ├── kern_cache.h ├── kernel_extra.h ├── kerngen.h ├── list.h ├── md5sum.h ├── mempat.h ├── msvc.h ├── mutex.h ├── rwlock.h ├── solver.h └── trace_malloc.h ├── library ├── CMakeLists.txt ├── OCLBinaryGenerator.cmake ├── bingen.cmake ├── blas │ ├── AutoGemm │ │ ├── .gitignore │ │ ├── AutoGemm.py │ │ ├── AutoGemmParameters.py │ │ ├── AutoGemmTeardown.h │ │ ├── AutoGemmTools │ │ │ ├── AutoGemmPreCompileKernels.cpp │ │ │ ├── AutoGemmUtil.h │ │ │ ├── ProfileAutoGemm.cpp │ │ │ └── TestAutoGemm.cpp │ │ ├── Common.py │ │ ├── Includes.py │ │ ├── KernelOpenCL.py │ │ ├── KernelParameters.py │ │ ├── KernelSelection.py │ │ ├── KernelsToPreCompile.py │ │ ├── README.txt │ │ └── UserGemmKernelSources │ │ │ ├── UserGemmClKernels.cc │ │ │ ├── UserGemmClKernels.h │ │ │ ├── UserGemmKernelSourceIncludes.cpp │ │ │ ├── UserGemmKernelSourceIncludes.h │ │ │ ├── create_user_gemm_cl_kernels.py │ │ │ ├── dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp │ │ │ ├── dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp │ │ │ ├── dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp │ │ │ ├── dgemm_Col_NT_B1_MX048_NX048_KX08_src.cpp │ │ │ ├── dgemm_Col_TN_B0_MX048_NX048_KX08_src.cpp │ │ │ ├── dgemm_Col_TN_B1_MX048_NX048_KX08_src.cpp │ │ │ ├── sgemm_Col_NN_B0_MX032_NX032_KX16_src.cpp │ │ │ ├── sgemm_Col_NN_B0_MX064_NX064_KX16_src.cpp │ │ │ ├── sgemm_Col_NN_B0_MX096_NX096_KX16_src.cpp │ │ │ ├── sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp │ │ │ ├── sgemm_Col_NN_B1_MX032_NX032_KX16_src.cpp │ │ │ ├── sgemm_Col_NN_B1_MX064_NX064_KX16_src.cpp │ │ │ ├── sgemm_Col_NN_B1_MX096_NX096_KX16_src.cpp │ │ │ ├── sgemm_Col_NT_B0_MX032_NX032_KX16_src.cpp │ │ │ ├── sgemm_Col_NT_B0_MX064_NX064_KX16_src.cpp │ │ │ ├── sgemm_Col_NT_B0_MX096_NX096_KX16_src.cpp │ │ │ ├── sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp │ │ │ ├── sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_src.cpp │ │ │ ├── sgemm_Col_NT_B1_MX032_NX032_KX16_src.cpp │ │ │ ├── sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_src.cpp │ │ │ ├── sgemm_Col_NT_B1_MX064_NX032_KX16_COL_src.cpp │ │ │ ├── sgemm_Col_NT_B1_MX064_NX064_KX16_src.cpp │ │ │ ├── sgemm_Col_NT_B1_MX096_NX096_KX16_src.cpp │ │ │ ├── sgemm_Col_NT_B1_MX128_NX128_KX16_src.cpp │ │ │ ├── sgemm_Col_TN_B0_MX032_NX032_KX16_src.cpp │ │ │ ├── sgemm_Col_TN_B0_MX064_NX064_KX16_src.cpp │ │ │ ├── sgemm_Col_TN_B0_MX096_NX096_KX16_src.cpp │ │ │ ├── sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp │ │ │ ├── sgemm_Col_TN_B1_MX032_NX032_KX16_src.cpp │ │ │ ├── sgemm_Col_TN_B1_MX064_NX064_KX16_src.cpp │ │ │ └── sgemm_Col_TN_B1_MX096_NX096_KX16_src.cpp │ ├── fill.cc │ ├── functor │ │ ├── bonaire.cc │ │ ├── functor.cc │ │ ├── functor_fill.cc │ │ ├── functor_selector.cc │ │ ├── functor_xgemm.cc │ │ ├── functor_xscal.cc │ │ ├── functor_xscal_generic.cc │ │ ├── functor_xtrsm.cc │ │ ├── gcn_dgemm.cc │ │ ├── gcn_dgemmCommon.cc │ │ ├── gcn_dgemmSmallMatrices.cc │ │ ├── gcn_sgemm.cc │ │ ├── gcn_sgemmSmallMatrices.cc │ │ ├── gcn_zgemm.cc │ │ ├── gpu_dtrsm.cc │ │ ├── gpu_dtrsm192.cc │ │ ├── hawaii.cc │ │ ├── hawaii_dgemmChannelConflict.cc │ │ ├── hawaii_dgemmSplitKernel.cc │ │ ├── hawaii_sgemmBig1024Kernel.cc │ │ ├── hawaii_sgemmBranchKernel.cc │ │ ├── hawaii_sgemmSplit64_32.cc │ │ ├── hawaii_sgemmSplitKernel.cc │ │ ├── include │ │ │ ├── BinaryBuild.h │ │ │ ├── atomic_counter.h │ │ │ ├── bonaire.h │ │ │ ├── functor.h │ │ │ ├── functor_fill.h │ │ │ ├── functor_hawaii_dgemm_NT_MN48.h │ │ │ ├── functor_selector.h │ │ │ ├── functor_utils.h │ │ │ ├── functor_xgemm.h │ │ │ ├── functor_xscal.h │ │ │ ├── functor_xscal_generic.h │ │ │ ├── functor_xtrsm.h │ │ │ ├── gcn_dgemm.h │ │ │ ├── gcn_dgemmCommon.h │ │ │ ├── gcn_dgemmSmallMatrices.h │ │ │ ├── gcn_sgemm.h │ │ │ ├── gcn_sgemmSmallMatrices.h │ │ │ ├── gcn_zgemm.h │ │ │ ├── gpu_dtrsm.h │ │ │ ├── gpu_dtrsm192.h │ │ │ ├── hawaii.h │ │ │ ├── hawaii_dgemmChannelConflict.h │ │ │ ├── hawaii_dgemmSplitKernel.h │ │ │ ├── hawaii_sgemmBig1024Kernel.h │ │ │ ├── hawaii_sgemmBranchKernel.h │ │ │ ├── hawaii_sgemmSplit64_32.h │ │ │ ├── hawaii_sgemmSplitKernel.h │ │ │ └── tahiti.h │ │ └── tahiti.cc │ ├── generic │ │ ├── binary_lookup.cc │ │ ├── blas_funcs.c │ │ ├── common.c │ │ ├── common2.cc │ │ ├── events.c │ │ ├── functor_cache.cc │ │ ├── kdump.c │ │ ├── kernel_extra.c │ │ ├── matrix_dims.c │ │ ├── matrix_props.c │ │ ├── problem_iter.c │ │ ├── problem_iter.h │ │ ├── solution_assert.c │ │ ├── solution_assert.h │ │ ├── solution_seq.c │ │ └── solution_seq_make.c │ ├── gens │ │ ├── asum.cpp │ │ ├── axpy_reg.cpp │ │ ├── blas_kgen.c │ │ ├── blas_kgen.h │ │ ├── blas_subgroup.c │ │ ├── blas_subgroup.h │ │ ├── clTemplates │ │ │ ├── asum.cl │ │ │ ├── axpy.cl │ │ │ ├── copy.cl │ │ │ ├── dgemm_NT_MN48.cl │ │ │ ├── dgemm_gcn_SmallMatrices.cl │ │ │ ├── dgemm_hawai.cl │ │ │ ├── dgemm_hawaiiChannelConfilct.cl │ │ │ ├── dgemm_hawaiiSplitKernel.cl │ │ │ ├── dot.cl │ │ │ ├── dtrsm_gpu.cl │ │ │ ├── dtrsm_gpu192.cl │ │ │ ├── gbmv.cl │ │ │ ├── gemm.cl │ │ │ ├── gemm_helper.cl │ │ │ ├── ger.cl │ │ │ ├── her.cl │ │ │ ├── her2.cl │ │ │ ├── iamax.cl │ │ │ ├── nrm2.cl │ │ │ ├── reduction.cl │ │ │ ├── rotg.cl │ │ │ ├── rotm.cl │ │ │ ├── rotmg.cl │ │ │ ├── scal.cl │ │ │ ├── sgemm_gcn.cl │ │ │ ├── sgemm_gcn_SmallMatrices.cl │ │ │ ├── sgemm_gcn_bigMatrices.cl │ │ │ ├── sgemm_hawaiiSplit64_32.cl │ │ │ ├── sgemm_hawaiiSplitKernel.cl │ │ │ ├── swap.cl │ │ │ ├── symm.cl │ │ │ ├── symm_helper.cl │ │ │ ├── syr.cl │ │ │ ├── syr2.cl │ │ │ ├── syr2_her2.cl │ │ │ ├── syr_her.cl │ │ │ ├── trmv.cl │ │ │ ├── trsv.cl │ │ │ ├── trsv_gemv.cl │ │ │ └── zgemm_gcn.cl │ │ ├── copy_reg.cpp │ │ ├── decomposition.c │ │ ├── dot.cpp │ │ ├── fetch.c │ │ ├── fetch.h │ │ ├── gbmv.cpp │ │ ├── gemm.c │ │ ├── gemm_cached.cpp │ │ ├── gemm_tail_cached.cpp │ │ ├── gemv.c │ │ ├── gen_helper.c │ │ ├── gen_helper.h │ │ ├── gen_init.c │ │ ├── ger_lds.cpp │ │ ├── her2_lds.cpp │ │ ├── her_lds.cpp │ │ ├── iamax.cpp │ │ ├── init.h │ │ ├── kprintf.cpp │ │ ├── legacy │ │ │ ├── blas_kgen_legacy.c │ │ │ ├── blas_kgen_legacy.h │ │ │ ├── blkmul.c │ │ │ ├── gemm_img.c │ │ │ ├── gemm_lds.c │ │ │ ├── gen_helper_legacy.c │ │ │ ├── gen_helper_legacy.h │ │ │ ├── tests │ │ │ │ ├── CMakeLists.txt │ │ │ │ └── t_blkmul.c │ │ │ ├── trmm_img.c │ │ │ ├── trmm_lds.c │ │ │ ├── trsm_cached_lds.c │ │ │ ├── trsm_img.c │ │ │ ├── trsm_kgen_legacy.c │ │ │ ├── trsm_kgen_legacy.h │ │ │ ├── trsm_lds.c │ │ │ ├── trxm_common_legacy.c │ │ │ └── trxm_common_legacy.h │ │ ├── nrm2.cpp │ │ ├── reduction.cpp │ │ ├── rotg_reg.cpp │ │ ├── rotm_reg.cpp │ │ ├── rotmg_reg.cpp │ │ ├── scal_reg.cpp │ │ ├── swap_reg.cpp │ │ ├── symm_cached.cpp │ │ ├── symv.c │ │ ├── syr2_lds.cpp │ │ ├── syr_lds.cpp │ │ ├── syrxk.c │ │ ├── tests │ │ │ ├── CMakeLists.txt │ │ │ └── t_tilemul.c │ │ ├── tile.c │ │ ├── tile.h │ │ ├── tile_iter.c │ │ ├── tile_iter.h │ │ ├── tilemul.c │ │ ├── trmm.c │ │ ├── trmv_reg.cpp │ │ ├── trsm.c │ │ ├── trsm_kgen.c │ │ ├── trsm_kgen.h │ │ ├── trsv_gemv.cpp │ │ ├── trsv_trtri.cpp │ │ ├── trxm_common.c │ │ ├── trxm_common.h │ │ ├── tuned_numbers.c │ │ ├── tuned_numbers.h │ │ ├── xxmv_common.c │ │ └── xxmv_common.h │ ├── impl.c │ ├── include │ │ ├── blas_funcs.h │ │ ├── blas_mempat.h │ │ ├── clblas-internal.h │ │ ├── events.h │ │ ├── kprintf.hpp │ │ ├── matrix_dims.h │ │ ├── matrix_props.h │ │ ├── solution_seq.h │ │ └── xgemm.h │ ├── init.c │ ├── ixamax.c │ ├── matrix.c │ ├── scimage.c │ ├── specialCases │ │ ├── GemmSpecialCases.cpp │ │ └── include │ │ │ └── GemmSpecialCases.h │ ├── trtri │ │ ├── TrtriClKernels.h │ │ ├── TrtriKernelSourceIncludes.cpp │ │ ├── TrtriKernelSourceIncludes.h │ │ ├── diag_dtrtri_lower_128_16.cpp │ │ ├── diag_dtrtri_upper_128_16.cpp │ │ ├── diag_dtrtri_upper_192_12.cpp │ │ ├── triple_dgemm_update_128_16_PART1_L.cpp │ │ ├── triple_dgemm_update_128_16_PART2_L.cpp │ │ ├── triple_dgemm_update_128_16_R.cpp │ │ ├── triple_dgemm_update_128_32_PART1_L.cpp │ │ ├── triple_dgemm_update_128_32_PART1_R.cpp │ │ ├── triple_dgemm_update_128_32_PART2_L.cpp │ │ ├── triple_dgemm_update_128_32_PART2_R.cpp │ │ ├── triple_dgemm_update_128_64_PART1_L.cpp │ │ ├── triple_dgemm_update_128_64_PART1_R.cpp │ │ ├── triple_dgemm_update_128_64_PART2_L.cpp │ │ ├── triple_dgemm_update_128_64_PART2_R.cpp │ │ ├── triple_dgemm_update_128_ABOVE64_PART1_L.cpp │ │ ├── triple_dgemm_update_128_ABOVE64_PART1_R.cpp │ │ ├── triple_dgemm_update_128_ABOVE64_PART2_L.cpp │ │ ├── triple_dgemm_update_128_ABOVE64_PART2_R.cpp │ │ ├── triple_dgemm_update_128_ABOVE64_PART3_L.cpp │ │ ├── triple_dgemm_update_128_ABOVE64_PART3_R.cpp │ │ ├── triple_dgemm_update_192_12_R.cpp │ │ ├── triple_dgemm_update_192_24_PART1_R.cpp │ │ ├── triple_dgemm_update_192_24_PART2_R.cpp │ │ ├── triple_dgemm_update_192_48_PART1_R.cpp │ │ ├── triple_dgemm_update_192_48_PART2_R.cpp │ │ ├── triple_dgemm_update_192_96_PART1_R.cpp │ │ └── triple_dgemm_update_192_96_PART2_R.cpp │ ├── xasum.c │ ├── xaxpy.c │ ├── xcopy.c │ ├── xdot.c │ ├── xgbmv.c │ ├── xgemm.cc │ ├── xgemm2.c │ ├── xgemv.c │ ├── xger.c │ ├── xhemm.c │ ├── xhemv.c │ ├── xher.c │ ├── xher2.c │ ├── xher2k.c │ ├── xherk.c │ ├── xhpmv.c │ ├── xnrm2.c │ ├── xrot.c │ ├── xrotg.c │ ├── xrotm.c │ ├── xrotmg.c │ ├── xscal.c │ ├── xscal.cc │ ├── xshbmv.c │ ├── xspmv.c │ ├── xswap.c │ ├── xsymm.c │ ├── xsymv.c │ ├── xsyr.c │ ├── xsyr2.c │ ├── xsyr2k.c │ ├── xsyrk.c │ ├── xtbmv.c │ ├── xtbsv.c │ ├── xtrmm.c │ ├── xtrmv.c │ ├── xtrsm.cc │ └── xtrsv.c ├── clBLAS.pc.in ├── common │ ├── clkern.c │ ├── devinfo-cache.c │ ├── devinfo.c │ ├── gens │ │ └── dblock_kgen.c │ ├── kern_cache.c │ ├── kerngen_core.c │ ├── kgen_basic.c │ ├── kgen_guard.c │ ├── kgen_loop_helper.c │ ├── list.c │ ├── md5sum.c │ ├── misc.c │ ├── mutex.c │ ├── rwlock.c │ ├── tests │ │ ├── CMakeLists.txt │ │ ├── t_dblock_kgen.c │ │ └── t_gens_cache.c │ └── trace_malloc.c └── tools │ ├── OCLBinaryGenerator │ ├── CMakeLists.txt │ └── OCLBinaryGenerator.cpp │ ├── bingen │ ├── CMakeLists.txt │ └── bingen.cpp │ ├── ktest │ ├── CMakeLists.txt │ ├── config-cmdline.cpp │ ├── config.cpp │ ├── config.h │ ├── ktest-common.h │ ├── ktest-patterns.h │ ├── ktest.cpp │ ├── ktest.h │ ├── main.cpp │ ├── naive │ │ └── naive_blas.cpp │ ├── scripts │ │ └── verify_ktest.bash │ ├── step-dump.cpp │ ├── step.cpp │ ├── step.h │ ├── steps │ │ ├── gemm.cpp │ │ ├── gemm.h │ │ ├── gemv.cpp │ │ ├── gemv.h │ │ ├── symv.cpp │ │ ├── symv.h │ │ ├── syr2k.cpp │ │ ├── syr2k.h │ │ ├── syrk.cpp │ │ ├── syrk.h │ │ ├── trmm.cpp │ │ ├── trmm.h │ │ ├── trsm.cpp │ │ └── trsm.h │ ├── var.cpp │ └── var.h │ ├── tplgen │ ├── CMakeLists.txt │ └── tplgen.cpp │ └── tune │ ├── CMakeLists.txt │ ├── dimension.c │ ├── fileio.c │ ├── fileio.h │ ├── storage_data.c │ ├── storage_data.h │ ├── storage_init.c │ ├── storage_io.c │ ├── subdim.c │ ├── subdim.h │ ├── toolslib.c │ ├── toolslib.h │ ├── tune.c │ └── tune.h ├── samples ├── CMakeLists.pack ├── CMakeLists.txt ├── clBlasVersion.c ├── example_chbmv.c ├── example_chemm.cpp ├── example_cher.c ├── example_cher2k.c ├── example_cherk.cpp ├── example_chpmv.c ├── example_chpr.c ├── example_csscal.c ├── example_ctrsm.c ├── example_dtrmv.c ├── example_isamax.c ├── example_sasum.c ├── example_saxpy.c ├── example_scopy.c ├── example_sdot.c ├── example_sgbmv.c ├── example_sgemm.c ├── example_sgemv.c ├── example_sger.c ├── example_snrm2.c ├── example_srot.c ├── example_srotg.c ├── example_srotm.c ├── example_srotmg.c ├── example_ssbmv.c ├── example_sscal.c ├── example_sspmv.c ├── example_sspr.c ├── example_sspr2.c ├── example_sswap.c ├── example_ssymm.c ├── example_ssymv.c ├── example_ssyr.c ├── example_ssyr2.c ├── example_ssyr2k.c ├── example_ssyrk.c ├── example_stbmv.c ├── example_stbsv.c ├── example_stpmv.c ├── example_stpsv.c ├── example_strmm.c ├── example_strmv.c ├── example_strsm.c ├── example_strsm.cpp ├── example_strsv.c ├── example_zhemv.cpp ├── example_zher2.c └── example_zhpr2.c ├── scripts └── perf │ ├── CMakeLists.txt │ ├── blasPerformanceTesting.py │ ├── errorHandler.py │ ├── measurePerformance.py │ ├── performanceUtility.py │ └── plotPerformance.py ├── targetver.h ├── tests ├── BasicRoutines.cpp ├── BlasBase.cpp ├── CMakeLists.txt ├── blas-cblas.c ├── blas-wrapper.cpp ├── blas.c ├── clBLAS-wrapper.cpp ├── cmdline.c ├── common.cpp ├── copyTestDependencies.cmake.in ├── correctness │ ├── BlasBase-corr.cpp │ ├── blas-lapack.c │ ├── blas-lapack.h │ ├── corr-asum.cpp │ ├── corr-axpy.cpp │ ├── corr-copy.cpp │ ├── corr-dot.cpp │ ├── corr-dotc.cpp │ ├── corr-gbmv.cpp │ ├── corr-gemm.cpp │ ├── corr-gemm2.cpp │ ├── corr-gemv.cpp │ ├── corr-ger.cpp │ ├── corr-gerc.cpp │ ├── corr-hbmv.cpp │ ├── corr-hemm.cpp │ ├── corr-hemv.cpp │ ├── corr-her.cpp │ ├── corr-her2.cpp │ ├── corr-her2k.cpp │ ├── corr-herk.cpp │ ├── corr-hpmv.cpp │ ├── corr-hpr.cpp │ ├── corr-hpr2.cpp │ ├── corr-iamax.cpp │ ├── corr-nrm2.cpp │ ├── corr-rot.cpp │ ├── corr-rotg.cpp │ ├── corr-rotm.cpp │ ├── corr-rotmg.cpp │ ├── corr-sbmv.cpp │ ├── corr-scal.cpp │ ├── corr-spmv.cpp │ ├── corr-spr.cpp │ ├── corr-spr2.cpp │ ├── corr-swap.cpp │ ├── corr-symm.cpp │ ├── corr-symv.cpp │ ├── corr-syr.cpp │ ├── corr-syr2.cpp │ ├── corr-syr2k.cpp │ ├── corr-syrk.cpp │ ├── corr-tbmv.cpp │ ├── corr-tbsv.cpp │ ├── corr-tpmv.cpp │ ├── corr-tpsv.cpp │ ├── corr-trmm.cpp │ ├── corr-trmv.cpp │ ├── corr-trsm.cpp │ ├── corr-trsv.cpp │ ├── delta.h │ ├── tcase-filter.cpp │ ├── tcase-filter.h │ ├── test-correctness.cpp │ ├── trsm-delta.h │ └── trsv-delta.h ├── functional │ ├── BlasBase-func.cpp │ ├── func-error.cpp │ ├── func-event.cpp │ ├── func-images.cpp │ ├── func-queue.cpp │ ├── func-thread.cpp │ ├── func.h │ └── test-functional.cpp ├── gtest.cmake ├── include │ ├── BlasBase.h │ ├── ExtraTestSizes.h │ ├── asum.h │ ├── axpy.h │ ├── blas-cblas.h │ ├── blas-internal.h │ ├── blas-math.h │ ├── blas-random.h │ ├── blas-wrapper.h │ ├── clBLAS-wrapper.h │ ├── cmdline.h │ ├── common.h │ ├── copy.h │ ├── dot.h │ ├── dotc.h │ ├── gbmv.h │ ├── gemm-2.h │ ├── gemm.h │ ├── gemv.h │ ├── ger.h │ ├── gerc.h │ ├── hbmv.h │ ├── hemm.h │ ├── hemv.h │ ├── her.h │ ├── her2.h │ ├── her2k.h │ ├── herk.h │ ├── hpmv.h │ ├── hpr.h │ ├── hpr2.h │ ├── iamax.h │ ├── matrix.h │ ├── nrm2.h │ ├── rot.h │ ├── rotg.h │ ├── rotm.h │ ├── rotmg.h │ ├── sbmv.h │ ├── scal.h │ ├── spmv.h │ ├── spr.h │ ├── spr2.h │ ├── swap.h │ ├── symm.h │ ├── symv.h │ ├── syr.h │ ├── syr2.h │ ├── syr2k.h │ ├── syrk.h │ ├── tbmv.h │ ├── tbsv.h │ ├── test-limits.h │ ├── testDG.h │ ├── timer.h │ ├── tpmv.h │ ├── tpsv.h │ ├── trmm.h │ ├── trmv.h │ ├── trsm.h │ └── trsv.h ├── performance │ ├── BlasBase-perf.cpp │ ├── PerformanceRecorder.cpp │ ├── PerformanceRecorder.h │ ├── PerformanceTest.cpp │ ├── PerformanceTest.h │ ├── TrxmPerformanceTest.cpp │ ├── perf-asum.cpp │ ├── perf-axpy.cpp │ ├── perf-copy.cpp │ ├── perf-dot.cpp │ ├── perf-dotc.cpp │ ├── perf-gbmv.cpp │ ├── perf-gemm.cpp │ ├── perf-gemm2.cpp │ ├── perf-gemv.cpp │ ├── perf-ger.cpp │ ├── perf-gerc.cpp │ ├── perf-hbmv.cpp │ ├── perf-hemm.cpp │ ├── perf-hemv.cpp │ ├── perf-her.cpp │ ├── perf-her2.cpp │ ├── perf-her2k.cpp │ ├── perf-herk.cpp │ ├── perf-hpmv.cpp │ ├── perf-hpr.cpp │ ├── perf-hpr2.cpp │ ├── perf-iamax.cpp │ ├── perf-nrm2.cpp │ ├── perf-rot.cpp │ ├── perf-rotg.cpp │ ├── perf-rotm.cpp │ ├── perf-rotmg.cpp │ ├── perf-sbmv.cpp │ ├── perf-scal.cpp │ ├── perf-spmv.cpp │ ├── perf-spr.cpp │ ├── perf-spr2.cpp │ ├── perf-swap.cpp │ ├── perf-symm.cpp │ ├── perf-symv.cpp │ ├── perf-syr.cpp │ ├── perf-syr2.cpp │ ├── perf-syr2k.cpp │ ├── perf-syrk.cpp │ ├── perf-tbmv.cpp │ ├── perf-tbsv.cpp │ ├── perf-tpmv.cpp │ ├── perf-tpsv.cpp │ ├── perf-trmm.cpp │ ├── perf-trmv.cpp │ ├── perf-trsm.cpp │ ├── perf-trsv.cpp │ └── test-performance.cpp └── timer.c └── wrappers └── python ├── README.txt ├── pyclBLAS.pxd ├── pyclBLAS.pyx └── setup.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Compiled Dynamic libraries 8 | *.so 9 | *.dylib 10 | *.dll 11 | 12 | # Compiled Static libraries 13 | *.lai 14 | *.la 15 | *.a 16 | *.lib 17 | 18 | # Generated kernel template files 19 | *.clT 20 | 21 | # flags.txt file 22 | *flags.txt 23 | 24 | # vim temp files 25 | .*.swp 26 | 27 | # python compiled files 28 | *.pyc 29 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clMathLibraries/clBLAS/cf9113982fdfc994297d372785ce76eb80911af2/CHANGELOG -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | AMD clBLAS 2 | Copyright 2013 Advanced Micro Devices, Inc. 3 | 4 | This product includes software developed at 5 | Advanced Micro Devices, Inc. (http://www.amd.com). 6 | -------------------------------------------------------------------------------- /doc/README-BinaryCacheOnDisk.txt: -------------------------------------------------------------------------------- 1 | S. Chauveau 2 | CAPS Entreprise 3 | clBLAS Project 4 | ------------------------------ 5 | April 30,2014 6 | 7 | 8 | The implementation of a binary cache for CL programs can be found in 9 | files src/include/binary_lookup.h and src/library/blas/generic/binary_lookup.cc 10 | 11 | The cache is currently disabled by default. It can be enabled by 12 | setting the environment variable 'CLBLAS_CACHE_PATH' to the directory 13 | containing the cache entries. 14 | 15 | In the code itself, accesses to the cache are controlled by the 16 | BinaryLookup class. A typical cache query looks as follow: 17 | 18 | (1) Create a local instance of BinaryLookup 19 | 20 | (2) Specify the additional characteristics (i.e. variants) of the 21 | requested program. That information combined with the program name 22 | and the OpenCL context and device shall form a unique signature 23 | for the binary program. 24 | 25 | (3) Perform the effective search by calling the 'found' method 26 | 27 | (4a) If the search was successful then cl_program can be retrieved 28 | by a call to the 'getProgram' method 29 | 30 | (4b) If the search was not successful then a cl_program 31 | must be created and populated in the cache by a call 32 | to the 'setProgram' method. 33 | 34 | (5) Destroy the BinaryLookup local instance. 35 | 36 | 37 | So in practice a typical query shall looks as follow: 38 | 39 | cl_program program ; 40 | 41 | // The program name is part of the signature and shall be unique 42 | const char * program_name = "... my unique program name ... " ; 43 | 44 | BinaryLookup bl(context, device, program_name); 45 | 46 | // Specify some additional information used to build a 47 | // unique signature for that cache entry 48 | 49 | bl.variantInt( vectorSize ); 50 | bl.variantInt( hasBorder ); 51 | ... 52 | 53 | // Perform the query 54 | if ( bl.found() ) 55 | { 56 | // Success! use the cl_program retrieved from the cache 57 | program = bl.getProgram(); 58 | } 59 | else 60 | { 61 | // Failure! we need to build the program 62 | program = build_my_program(context,device,vectorSize,...) ; 63 | // and inform the lookup object of the program 64 | bl.setProgram(program); 65 | // and finally populate the cache 66 | bl.populateCache() 67 | } 68 | 69 | // The BinaryLookup shall now be destroyed 70 | -------------------------------------------------------------------------------- /doc/performance/clBLAS_2.6.0/S9150/README.txt: -------------------------------------------------------------------------------- 1 | ################################ 2 | # # 3 | # Benchmarking Methodology # 4 | # # 5 | ################################ 6 | 7 | ############ 8 | # Hardware # 9 | ############ 10 | S9150 11 | 12 | ############ 13 | # Software # 14 | ############ 15 | CentOS 6.6 16 | clBLAS 2.6.0 17 | driver 14.502 18 | 19 | ############ 20 | # Settings # 21 | ############ 22 | gpu clocks: set to max level using proprietary tool though public alternatives exist 23 | clBLAS: 24 | m=n=k=lda=ldb=ldc (for simplicity) 25 | alpha=beta=1 26 | gemms were column-major, op(A,B)=N,T 27 | 28 | ############ 29 | # Sampling # 30 | ############ 31 | For each data point, we took 10 samples. Each sample consists of 10 gemm calls with a wait afterward. Outlying samples beyond 1 standard deviation were removed (rarely if ever did this actually need to happen). Before running the 10 samples, one warm-up sample was executed (but not included in the stastics). 32 | 33 | GFlop/s was calculated as 34 | (2*m*n*k flops) / (host time for 10 kernels / 10) // real data 35 | (8*m*n*k flops) / (host time for 10 kernels / 10) // complex data 36 | -------------------------------------------------------------------------------- /doc/performance/clBLAS_2.6.0/W9100/README.txt: -------------------------------------------------------------------------------- 1 | ################################ 2 | # # 3 | # Benchmarking Methodology # 4 | # # 5 | ################################ 6 | 7 | ############ 8 | # Hardware # 9 | ############ 10 | W9100 11 | 12 | ############ 13 | # Software # 14 | ############ 15 | CentOS 6.6 16 | clBLAS 2.6.0 17 | driver 14.502 18 | 19 | ############ 20 | # Settings # 21 | ############ 22 | gpu clocks: set to max level using proprietary tool though public alternatives exist 23 | clBLAS: 24 | m=n=k=lda=ldb=ldc (for simplicity) 25 | alpha=beta=1 26 | gemms were column-major, op(A,B)=N,T 27 | 28 | ############ 29 | # Sampling # 30 | ############ 31 | For each data point, we took 10 samples. Each sample consists of 10 gemm calls with a wait afterward. Outlying samples beyond 1 standard deviation were removed (rarely if ever did this actually need to happen). Before running the 10 samples, one warm-up sample was executed (but not included in the stastics). 32 | 33 | GFlop/s was calculated as 34 | (2*m*n*k flops) / (host time for 10 kernels / 10) // real data 35 | (8*m*n*k flops) / (host time for 10 kernels / 10) // complex data 36 | -------------------------------------------------------------------------------- /doc/performance/cuBLAS_7.0/Tesla_K40/README.txt: -------------------------------------------------------------------------------- 1 | ################################ 2 | # # 3 | # Benchmarking Methodology # 4 | # # 5 | ################################ 6 | 7 | ############ 8 | # Hardware # 9 | ############ 10 | Tesla K40 11 | 12 | ############ 13 | # Software # 14 | ############ 15 | openSUSE 13.2 16 | cuBLAS 7.0 17 | driver 346.47 18 | 19 | ############ 20 | # Settings # 21 | ############ 22 | gpu clocks: set to boost level using nvidia-smi 23 | cuBLAS: 24 | m=n=k=lda=ldb=ldc (for simplicity) 25 | alpha=beta=1 26 | gemms were column-major, op(A,B)=N,T 27 | 28 | ############ 29 | # Sampling # 30 | ############ 31 | For each data point, we took 10 samples. Each sample consists of 10 gemm calls with a wait afterward. Outlying samples beyond 1 standard deviation were removed (rarely if ever did this actually need to happen). Before running the 10 samples, one warm-up sample was executed (but not included in the stastics). 32 | 33 | GFlop/s was calculated as 34 | (2*m*n*k flops) / (host time for 10 kernels / 10) // real data 35 | (8*m*n*k flops) / (host time for 10 kernels / 10) // complex data 36 | -------------------------------------------------------------------------------- /src/clAmdBlas.version.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* the configured version and settings for clblas 19 | */ 20 | #define clAmdBlasVersionMajor 2 21 | #define clAmdBlasVersionMinor 0 22 | #define clAmdBlasVersionPatch 0 23 | -------------------------------------------------------------------------------- /src/clBLAS-complex.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef CLBLAS_COMPLEX_H_ 19 | #define CLBLAS_COMPLEX H_ 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | typedef cl_float2 FloatComplex; 26 | typedef cl_double2 DoubleComplex; 27 | 28 | static __inline FloatComplex 29 | floatComplex(float real, float imag) 30 | { 31 | FloatComplex z; 32 | z.s[0] = real; 33 | z.s[1] = imag; 34 | return z; 35 | } 36 | 37 | static __inline DoubleComplex 38 | doubleComplex(double real, double imag) 39 | { 40 | DoubleComplex z; 41 | z.s[0] = real; 42 | z.s[1] = imag; 43 | return z; 44 | } 45 | 46 | #define CREAL(v) ((v).s[0]) 47 | #define CIMAG(v) ((v).s[1]) 48 | 49 | #ifdef __cplusplus 50 | } /* extern "C" { */ 51 | #endif 52 | 53 | #endif /* CLBLAS_COMPLEX_H_ */ 54 | -------------------------------------------------------------------------------- /src/clBLAS.version.h.in: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* the configured version and settings for clblas 19 | */ 20 | #define clblasVersionMajor @clBLAS_VERSION_MAJOR@ 21 | #define clblasVersionMinor @clBLAS_VERSION_MINOR@ 22 | #define clblasVersionPatch @clBLAS_VERSION_PATCH@ 23 | -------------------------------------------------------------------------------- /src/clBLASConfig.cmake.in: -------------------------------------------------------------------------------- 1 | include(${CMAKE_CURRENT_LIST_DIR}/clBLASTargets.cmake) 2 | get_filename_component(CLBLAS_INCLUDE_DIRS ${CMAKE_CURRENT_LIST_DIR}/@reldir@/include ABSOLUTE) 3 | set(CLBLAS_LIBRARIES clBLAS) 4 | -------------------------------------------------------------------------------- /src/clBLASConfigVersion.cmake.in: -------------------------------------------------------------------------------- 1 | # This is a basic version file for the Config-mode of find_package(). 2 | # It is used by write_basic_package_version_file() as input file for configure_file() 3 | # to create a version-file which can be installed along a config.cmake file. 4 | # 5 | # The created file sets PACKAGE_VERSION_EXACT if the current version string and 6 | # the requested version string are exactly the same and it sets 7 | # PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version, 8 | # but only if the requested major version is the same as the current one. 9 | # The variable CLBLAS_VERSION must be set before calling configure_file(). 10 | 11 | 12 | set(PACKAGE_VERSION "@CLBLAS_VERSION@") 13 | 14 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}" ) 15 | set(PACKAGE_VERSION_COMPATIBLE FALSE) 16 | else() 17 | 18 | if("@CLBLAS_VERSION@" MATCHES "^([0-9]+)\\.") 19 | set(CLBLAS_VERSION_MAJOR "${CMAKE_MATCH_1}") 20 | else() 21 | set(CLBLAS_VERSION_MAJOR "@CLBLAS_VERSION@") 22 | endif() 23 | 24 | if("${PACKAGE_FIND_VERSION_MAJOR}" STREQUAL "${CLBLAS_VERSION_MAJOR}") 25 | set(PACKAGE_VERSION_COMPATIBLE TRUE) 26 | else() 27 | set(PACKAGE_VERSION_COMPATIBLE FALSE) 28 | endif() 29 | 30 | if( "${PACKAGE_FIND_VERSION}" STREQUAL "${PACKAGE_VERSION}") 31 | set(PACKAGE_VERSION_EXACT TRUE) 32 | endif() 33 | endif() 34 | 35 | 36 | # if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: 37 | if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "@CMAKE_SIZEOF_VOID_P@" STREQUAL "") 38 | return() 39 | endif() 40 | 41 | # check that the installed version has the same 32/64bit-ness as the one which is currently searching: 42 | if(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "@CMAKE_SIZEOF_VOID_P@") 43 | math(EXPR installedBits "@CMAKE_SIZEOF_VOID_P@ * 8") 44 | set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)") 45 | set(PACKAGE_VERSION_UNSUITABLE TRUE) 46 | endif() 47 | -------------------------------------------------------------------------------- /src/client/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | set(CLIENT_SRC client.cpp stdafx.cpp statisticalTimer.cpp) 18 | set(CLIENT_HEADER 19 | stdafx.h 20 | targetver.h 21 | statisticalTimer.h 22 | clfunc_common.hpp 23 | clfunc_xgemm.hpp 24 | clfunc_xgemv.hpp 25 | clfunc_xsymv.hpp 26 | clfunc_xtrmm.hpp 27 | clfunc_xtrsm.hpp 28 | clfunc_xsyrk.hpp 29 | clfunc_xsyr2k.hpp 30 | clfunc_xhemm.hpp 31 | clfunc_xsymm.hpp 32 | clfunc_xherk.hpp 33 | clfunc_xher2k.hpp) 34 | 35 | set(WRAPPER_SRC testPerfWrapper.cpp) 36 | 37 | add_definitions(-D_CRT_SECURE_NO_WARNINGS) 38 | 39 | # Having problems on build server, compiling gtest headers with -pedantic; disabling detection of long long 40 | # http://code.google.com/p/googletest/issues/detail?id=334 41 | if( CMAKE_COMPILER_IS_GNUCXX ) 42 | add_definitions( -Wno-long-long ) 43 | endif( ) 44 | 45 | include_directories( 46 | ${Boost_INCLUDE_DIRS} 47 | ${OPENCL_INCLUDE_DIRS} 48 | ${clBLAS_SOURCE_DIR} 49 | ${clBLAS_SOURCE_DIR}/include 50 | ${clBLAS_SOURCE_DIR}/tests/include 51 | ${Netlib_INCLUDE_DIRS} 52 | .) 53 | 54 | add_executable(client ${CLIENT_SRC} ${CLIENT_HEADER}) 55 | target_link_libraries(client ${Netlib_LIBRARIES} ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} clBLAS) 56 | set_target_properties( client PROPERTIES 57 | RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" 58 | OUTPUT_NAME clBLAS-client ) 59 | 60 | add_executable(testPerfWrapper ${WRAPPER_SRC}) 61 | target_link_libraries(testPerfWrapper ${Boost_LIBRARIES}) 62 | set_target_properties( testPerfWrapper PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) 63 | 64 | # CPack configuration; include the executable into the package 65 | install( TARGETS client testPerfWrapper 66 | RUNTIME DESTINATION bin${SUFFIX_BIN} 67 | LIBRARY DESTINATION lib${SUFFIX_LIB} 68 | ARCHIVE DESTINATION lib${SUFFIX_LIB}/import 69 | ) 70 | -------------------------------------------------------------------------------- /src/client/ctimer.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef C_TIMER_HXX__ 18 | #define C_TIMER_HXX__ 19 | 20 | #if defined(__cplusplus) 21 | typedef class timer *Timer; 22 | #else 23 | typedef struct timer *Timer; 24 | #endif 25 | 26 | #if defined(__cplusplus) 27 | extern "C" { 28 | #endif 29 | 30 | extern Timer CreateTimer(); 31 | extern void DeleteTimer(Timer timer); 32 | extern double GetTime(Timer timer); 33 | extern void PauseTimer(Timer timer); 34 | extern void RestartTimer(Timer timer); 35 | extern void ResetTimer(Timer timer); 36 | extern void ResetDelayTimer(Timer timer, double delay_time); 37 | 38 | #if defined(__cplusplus) 39 | } 40 | #endif 41 | 42 | #endif // ifndef C_TIMER_HXX__ 43 | -------------------------------------------------------------------------------- /src/client/makefile: -------------------------------------------------------------------------------- 1 | SHELL = /bin/bash 2 | CXX = g++ 3 | CXXFLAGS = -O3 -fomit-frame-pointer -finline-functions -I../include -I../tests/include 4 | LIBS = -lclblas -lOpenCL -lboost_program_options -lrt 5 | .PHONY: clean 6 | 7 | %.o:%.cpp 8 | ${CXX} ${CXXFLAGS} $< -c 9 | 10 | clblas_client: clblas_client.o statisticalTimer.o timer.o 11 | ${CXX} ${CXXFLAGS} $^ ${LIBS} -o $@ 12 | 13 | clean: 14 | rm -rf *.o 15 | -------------------------------------------------------------------------------- /src/client/stdafx.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // stdafx.cpp : source file that includes just the standard includes 19 | // clAmdFft.pch will be the pre-compiled header 20 | // stdafx.obj will contain the pre-compiled type information 21 | 22 | #include "stdafx.h" 23 | 24 | // TODO: reference any additional headers you need in STDAFX.H 25 | // and not in this file 26 | -------------------------------------------------------------------------------- /src/client/stdafx.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | // stdafx.h : include file for standard system include files, 19 | // or project specific include files that are used frequently, but 20 | // are changed infrequently 21 | // 22 | 23 | #pragma once 24 | 25 | #include "targetver.h" 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #if defined( _WIN32 ) 34 | #define NOMINMAX 35 | #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers 36 | 37 | #include 38 | #include 39 | #endif 40 | 41 | -------------------------------------------------------------------------------- /src/client/targetver.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #pragma once 19 | 20 | // Including SDKDDKVer.h defines the highest available Windows platform. 21 | 22 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and 23 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. 24 | 25 | #if defined( _WIN32 ) 26 | #include 27 | #endif 28 | -------------------------------------------------------------------------------- /src/client/timer.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include "ctimer.h" 18 | #include "timer.hpp" 19 | 20 | timer:: 21 | timer() : elapsed_time_(0.0) 22 | { 23 | init_time_ = get_walltime(); 24 | } 25 | 26 | timer:: 27 | ~timer() 28 | { 29 | } 30 | 31 | double 32 | timer:: 33 | get() 34 | { 35 | return elapsed_time_ + get_walltime() - init_time_; 36 | } 37 | 38 | void 39 | timer:: 40 | pause() 41 | { 42 | elapsed_time_ = get(); 43 | } 44 | 45 | void 46 | timer:: 47 | restart() 48 | { 49 | init_time_ = get_walltime(); 50 | } 51 | 52 | void 53 | timer:: 54 | reset() 55 | { 56 | elapsed_time_ = 0.0; 57 | init_time_ = get_walltime(); 58 | } 59 | 60 | void 61 | timer:: 62 | reset_delay(double delay_time) 63 | { 64 | reset(); 65 | elapsed_time_ = delay_time; 66 | } 67 | 68 | Timer CreateTimer() 69 | { 70 | Timer local_timer = new timer(); 71 | return local_timer; 72 | } 73 | 74 | void DeleteTimer(Timer timer) 75 | { 76 | delete timer; 77 | } 78 | 79 | double GetTime(Timer timer) 80 | { 81 | return timer->get(); 82 | } 83 | 84 | void ResetTimer(Timer timer) 85 | { 86 | timer->reset(); 87 | } 88 | 89 | void RestartTimer(Timer timer) 90 | { 91 | timer->restart(); 92 | } 93 | 94 | void PauseTimer(Timer timer) 95 | { 96 | timer->pause(); 97 | } 98 | 99 | void ResetDelayTimer(Timer timer, double delay_time) 100 | { 101 | timer->reset_delay(delay_time); 102 | } 103 | 104 | -------------------------------------------------------------------------------- /src/client/timer.hpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef TIMER_HXX__ 18 | #define TIMER_HXX__ 19 | 20 | #include 21 | 22 | class timer 23 | { 24 | public: 25 | double get(); 26 | void pause(); 27 | void restart(); 28 | void reset(); 29 | void reset_delay(double delay_time); 30 | 31 | private: 32 | inline double get_walltime() 33 | { 34 | struct timespec ts; 35 | 36 | clock_gettime(CLOCK_REALTIME, &ts); 37 | return static_cast(ts.tv_sec) + 38 | static_cast(ts.tv_nsec) * 1.0e-9; 39 | } 40 | 41 | private: 42 | double init_time_; 43 | double elapsed_time_; 44 | 45 | public: 46 | timer(); 47 | ~timer(); 48 | }; // class timer 49 | 50 | #endif // ifndef TIMER_HXX__ 51 | -------------------------------------------------------------------------------- /src/flags_public.txt: -------------------------------------------------------------------------------- 1 | TAHITI_OCL " "; 2 | HAWAII1_OCL " "; 3 | HAWAII2_OCL "-cl-std=CL2.0"; 4 | BONAIRE_OCL "-cl-std=CL2.0"; 5 | -------------------------------------------------------------------------------- /src/include/cltypes.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef CLTYPES_H_ 19 | #define CLTYPES_H_ 20 | 21 | #include 22 | 23 | #if defined(__APPLE__) || defined(__MACOSX) 24 | #include 25 | #else 26 | #include 27 | #endif 28 | 29 | /** 30 | * @internal 31 | * @defgroup DTYPES Data types 32 | */ 33 | /*@{*/ 34 | 35 | /** 36 | * @brief OpenCL type identifiers 37 | */ 38 | typedef enum DataType { 39 | TYPE_FLOAT, /**< single float precision type */ 40 | TYPE_DOUBLE, /**< double float precision type */ 41 | TYPE_COMPLEX_FLOAT, /**< single float precision complex type */ 42 | TYPE_COMPLEX_DOUBLE, /**< double float precision complex type */ 43 | TYPE_UNSIGNED_INT /**< Unsigned int, for output buffer for iAMAX routine */ 44 | } DataType; 45 | 46 | /*@}*/ 47 | 48 | enum { 49 | FLOAT4_VECLEN = sizeof(cl_float4) / sizeof(cl_float) 50 | }; 51 | 52 | /* 53 | * return size of a BLAS related data type 54 | */ 55 | #ifdef __cplusplus 56 | extern "C" 57 | #endif 58 | unsigned int 59 | dtypeSize(DataType type); 60 | 61 | /* 62 | * width of the matrix (block) in float4 words 63 | */ 64 | size_t 65 | fl4RowWidth(size_t width, size_t typeSize); 66 | 67 | static __inline bool 68 | isDoubleBasedType(DataType dtype) 69 | { 70 | return (dtype == TYPE_DOUBLE || dtype == TYPE_COMPLEX_DOUBLE); 71 | } 72 | 73 | static __inline bool 74 | isComplexType(DataType dtype) 75 | { 76 | return (dtype == TYPE_COMPLEX_FLOAT || dtype == TYPE_COMPLEX_DOUBLE); 77 | } 78 | 79 | #endif /* CLTYPES_H_ */ 80 | -------------------------------------------------------------------------------- /src/include/defbool.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef DEFBOOL_H_ 19 | #define DEFBOOL_H_ 20 | 21 | #if defined(__powerpc64__) && defined(__ALTIVEC__) 22 | #include "altivec.h" 23 | #undef bool 24 | #endif 25 | 26 | #if defined(_MSC_VER) && _MSC_VER <= 1700 27 | 28 | /* 29 | FIX for windows compilation 30 | #if !defined(__cplusplus) 31 | 32 | typedef int _Bool; 33 | #define bool _Bool 34 | enum { 35 | false, 36 | true 37 | }; 38 | #endif 39 | */ 40 | 41 | #define __bool_true_false_are_defined 1 42 | 43 | #ifndef __cplusplus 44 | 45 | #define bool _Bool 46 | #if __STDC_VERSION__ < 199901L && __GNUC__ < 3 47 | #define false 0 48 | #define true 1 49 | 50 | typedef int _Bool; 51 | #endif 52 | 53 | #endif /* !__cplusplus */ 54 | 55 | 56 | #else /* defined(_MSC_VER) && _MSC_VER <= 1700 */ 57 | 58 | #include 59 | 60 | #endif /* defined(_MSC_VER) && _MSC_VER <= 1700 */ 61 | 62 | #endif /* DEFBOOL_H_ */ 63 | -------------------------------------------------------------------------------- /src/include/dis_warning.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef DIS_WARNING_H_ 19 | #define DIS_WARNING_H_ 20 | 21 | #if _MSC_VER 22 | 23 | #pragma warning (disable:4204) 24 | #pragma warning (disable:4127) 25 | #define MAY_ALIAS 26 | 27 | #else /* _MSC_VER */ 28 | 29 | #define MAY_ALIAS __attribute__((__may_alias__)) 30 | 31 | #endif 32 | 33 | 34 | /* 35 | * Set of macro to mute gcc when we don't need in using some 36 | * function arguments 37 | */ 38 | 39 | #define DUMMY_ARG_USAGE(arg) \ 40 | do { \ 41 | (void)arg; \ 42 | } while (0) 43 | 44 | #define DUMMY_ARGS_USAGE_2(arg1, arg2) \ 45 | do { \ 46 | (void)arg1; \ 47 | (void)arg2; \ 48 | } while (0) 49 | 50 | #define DUMMY_ARGS_USAGE_3(arg1, arg2, arg3) \ 51 | do { \ 52 | (void)arg1; \ 53 | (void)arg2; \ 54 | (void)arg3; \ 55 | } while(0) \ 56 | 57 | #define DUMMY_ARGS_USAGE_4(arg1, arg2, arg3, arg4) \ 58 | do { \ 59 | (void)arg1; \ 60 | (void)arg2; \ 61 | (void)arg3; \ 62 | (void)arg4; \ 63 | } while(0) \ 64 | 65 | #endif /* DIS_WARNING_H_ */ 66 | -------------------------------------------------------------------------------- /src/include/md5sum.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. 3 | * MD5 Message-Digest Algorithm (RFC 1321). 4 | * 5 | * Homepage: 6 | * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 7 | * 8 | * Author: 9 | * Alexander Peslyak, better known as Solar Designer 10 | * 11 | * This software was written by Alexander Peslyak in 2001. No copyright is 12 | * claimed, and the software is hereby placed in the public domain. 13 | * In case this attempt to disclaim copyright and place the software in the 14 | * public domain is deemed null and void, then the software is 15 | * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the 16 | * general public under the following terms: 17 | * 18 | * Redistribution and use in source and binary forms, with or without 19 | * modification, are permitted. 20 | * 21 | * There's ABSOLUTELY NO WARRANTY, express or implied. 22 | * 23 | * See md5.c for more information. 24 | */ 25 | 26 | #ifdef HAVE_OPENSSL 27 | #include 28 | #elif !defined(_MD5_H) 29 | #define _MD5_H 30 | 31 | /* Any 32-bit or wider unsigned integer data type will do */ 32 | typedef unsigned int MD5_u32plus; 33 | 34 | typedef struct { 35 | MD5_u32plus lo, hi; 36 | MD5_u32plus a, b, c, d; 37 | unsigned char buffer[64]; 38 | MD5_u32plus block[16]; 39 | } MD5_CTX; 40 | 41 | extern void MD5_Init(MD5_CTX *ctx); 42 | extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size); 43 | extern void MD5_Final(unsigned char *result, MD5_CTX *ctx); 44 | 45 | char * md5sum (const void * data, unsigned long size); 46 | 47 | 48 | 49 | #endif 50 | 51 | -------------------------------------------------------------------------------- /src/include/mempat.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* 19 | * Memory usage pattern related definitions 20 | */ 21 | 22 | #ifndef MEMPAT_H_ 23 | #define MEMPAT_H_ 24 | 25 | #include 26 | 27 | enum { 28 | MAX_MEMORY_PATTERNS = 16 29 | }; 30 | 31 | /** 32 | * @internal 33 | * @brief Memory level identifiers 34 | * 35 | * @ingroup SOLVERIF 36 | */ 37 | typedef enum CLMemLevel { 38 | CLMEM_LEVEL_LDS = 0x01, /**< Local data storage */ 39 | CLMEM_LEVEL_L1 = 0x02, /**< L1 cache */ 40 | CLMEM_LEVEL_L2 = 0x04 /**< L2 cache */ 41 | } CLMemLevel; 42 | 43 | /** 44 | * @internal 45 | * @brief Memory type identifiers 46 | * 47 | * @ingroup SOLVERIF 48 | */ 49 | typedef enum CLMemType { 50 | CLMEM_GLOBAL_MEMORY, 51 | CLMEM_LOCAL_MEMORY, 52 | CLMEM_IMAGE, 53 | // FIXME: it's for backward compatibility, remove after blkmul deprecation 54 | CLMEM_BUFFER = CLMEM_LOCAL_MEMORY 55 | } CLMemType; 56 | 57 | // memory levels set 58 | typedef unsigned int meml_set_t; 59 | 60 | /* 61 | * FIXME: deprecate cuLevel and thLevel 62 | */ 63 | 64 | /** 65 | * @internal 66 | * @brief Solver memory pattern description structure 67 | * 68 | * The structure decribes memory using features and used 69 | * by frontend at choosing of solving strategy and decomposition 70 | * block sizes 71 | * 72 | * @ingroup SOLVERIF 73 | */ 74 | typedef struct MemoryPattern { 75 | const char *name; /**< Pattern's name */ 76 | unsigned int nrLevels; /**< Decomposition levels number */ 77 | /** Level a problem is decomposed among compute units at */ 78 | int cuLevel; 79 | /** Level a problem is decomposed among threads within single compute unit */ 80 | int thLevel; 81 | SolverOps *sops; /**< Solver operations */ 82 | /** extra information specific for the application field */ 83 | void *extra; 84 | } MemoryPattern; 85 | 86 | #endif /* MEMPAT_H_ */ 87 | -------------------------------------------------------------------------------- /src/include/msvc.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* 19 | * Declarations not supported in visual studio 20 | * by default 21 | */ 22 | 23 | #ifndef MSVC_H_ 24 | #define MSVC_H_ 25 | 26 | #ifndef EOVERFLOW 27 | #define EOVERFLOW 1000 28 | #endif /* EOVERFLOW */ 29 | 30 | #if ( _MSC_VER < 1900 ) 31 | #define snprintf _snprintf 32 | #endif 33 | 34 | typedef long ssize_t; 35 | 36 | #endif /* MSVS_H_ */ 37 | -------------------------------------------------------------------------------- /src/include/mutex.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef MUTEX_H_ 19 | #define MUTEX_H_ 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | typedef void* mutex_t; 26 | 27 | mutex_t* mutexInit(void); 28 | int mutexDestroy(mutex_t *mutex); 29 | int mutexLock(mutex_t *mutex); 30 | int mutexUnlock(mutex_t *mutex); 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | 36 | #endif /* MUTEX_H_ */ 37 | -------------------------------------------------------------------------------- /src/include/trace_malloc.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* 19 | * Define simple functionality to track memory leaks in order to separate 20 | * library leaks from leaks in the other components and to take info in 21 | * a human friendly format 22 | */ 23 | 24 | #ifndef TRACE_MALLOC_H_ 25 | #define TRACE_MALLOC_H_ 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | #if defined(TRACE_MALLOC) 32 | 33 | #define malloc(size) debugMalloc(size, __FILE__, __LINE__) 34 | #define calloc(nmemb, size) debugCalloc(size * nmemb, __FILE__, __LINE__) 35 | #define realloc(ptr, size) debugRealloc(ptr, size, __FILE__, __LINE__) 36 | #define free(ptr) debugFree(ptr) 37 | 38 | void initMallocTrace(void); 39 | void *debugMalloc(size_t size, const char *file, int line); 40 | void *debugCalloc(size_t size, const char *file, int line); 41 | void *debugRealloc(void *ptr, size_t size, const char *file, int line); 42 | void debugFree(void *ptr); 43 | void printMallocStatistics(void); 44 | void printMemLeaksInfo(void); 45 | void releaseMallocTrace(void); 46 | 47 | #else /* TRACE_MALLOC */ 48 | 49 | static __inline void initMallocTrace(void) 50 | { 51 | /* do nothing */ 52 | } 53 | 54 | static __inline void printMallocStatistics(void) 55 | { 56 | /* do nothing */ 57 | } 58 | 59 | static __inline void printMemLeaksInfo(void) 60 | { 61 | /* do nothing */ 62 | } 63 | 64 | static __inline void releaseMallocTrace(void) 65 | { 66 | /* do nothing */ 67 | } 68 | 69 | #endif /* !TRACE_MALLOC */ 70 | 71 | #ifdef __cplusplus 72 | } /* extern "C" { */ 73 | #endif 74 | 75 | #endif /* TRACE_MALLOC_H_ */ 76 | -------------------------------------------------------------------------------- /src/library/blas/AutoGemm/.gitignore: -------------------------------------------------------------------------------- 1 | *.cl 2 | *.swp 3 | *.txt 4 | *.pyc 5 | -------------------------------------------------------------------------------- /src/library/blas/AutoGemm/AutoGemm.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # AutoGemm 3 | # - Automatically generate gemm kernels based on tile parameters 4 | # - This script generates the following to ease integration into clBLAS: 5 | # - generate all the kernel files 6 | # - kernel selection logic 7 | # - include files for kernel strings 8 | # 9 | # TODO Now 10 | # - offline compilation 11 | # TODO Future 12 | # - fuse together unroll=8 and unroll=1 in same kernel ? 13 | # functionally works fine, but lowers performance by ~10% 14 | ################################################################################ 15 | 16 | import os 17 | import sys 18 | import argparse 19 | import getopt 20 | 21 | import Common 22 | import Includes 23 | import KernelSelection 24 | import AutoGemmParameters 25 | import KernelOpenCL 26 | 27 | 28 | ################################################################################ 29 | # Main 30 | ################################################################################ 31 | if __name__ == "__main__": 32 | # parse arguments 33 | ap = argparse.ArgumentParser(description="AutoGemm") 34 | ap.add_argument("--output-path", dest="output" ) 35 | ap.add_argument("--opencl-compiler-version", dest="clCompilerVersion", action="store", choices=["1.1", "1.2", "2.0" ]) 36 | ap.add_argument("--architecture", dest="architecture", action="store", choices=["Hawaii", "Fiji" ]) 37 | args = ap.parse_args() 38 | if args.output: 39 | Common.setOutputPath(args.output) 40 | else: 41 | print("AutoGemm.py: Warning: No output path specified; default is working directory.") 42 | 43 | print("AutoGemm.py: using OpenCL " + args.clCompilerVersion + " compiler") 44 | Common.setClCompilerVersion(args.clCompilerVersion) 45 | AutoGemmParameters.setArchitecture(args.architecture) 46 | 47 | KernelOpenCL.writeOpenCLKernels() 48 | KernelSelection.writeKernelSelection() 49 | Includes.writeIncludes() 50 | -------------------------------------------------------------------------------- /src/library/blas/AutoGemm/AutoGemmTeardown.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __cplusplus 4 | #extern "C" { 5 | #endif 6 | void initAutoGemmClKernels(void); 7 | #ifdef __cplusplus 8 | } 9 | #endif 10 | 11 | -------------------------------------------------------------------------------- /src/library/blas/AutoGemm/Common.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Auto-Gemm 3 | ################################################################################ 4 | 5 | outputPath = "" 6 | clCompilerVersion = "2.0" 7 | 8 | def setClCompilerVersion(version): 9 | global clCompilerVersion 10 | clCompilerVersion = version 11 | 12 | def getClCompilerVersion(): 13 | global clCompilerVersion 14 | return clCompilerVersion 15 | 16 | def setOutputPath(path): 17 | global outputPath 18 | outputPath = path + "/" 19 | 20 | def getOutputPath(): 21 | global outputPath 22 | return outputPath 23 | 24 | def getRelativeKernelSourcePath(): 25 | return "AutoGemmKernelSources/" 26 | 27 | def getRelativeKernelBinaryPath(): 28 | return "AutoGemmKernelBinaries/" 29 | 30 | def getRelativeIncludePath(): 31 | return "AutoGemmIncludes/" 32 | 33 | def getKernelSourcePath(): 34 | return getOutputPath() + getRelativeKernelSourcePath() 35 | 36 | def getKernelBinaryPath(): 37 | return getOutputPath() + getRelativeKernelBinaryPath() 38 | 39 | def getIncludePath(): 40 | return getOutputPath() + getRelativeIncludePath() 41 | 42 | def getAutoGemmHeader(): 43 | return ( 44 | "/*******************************************************************************\n" 45 | " * This file was auto-generated using the AutoGemm.py python script.\n" 46 | " * DO NOT modify this file! Instead, make changes to scripts in\n" 47 | " * clBLAS/src/library/blas/AutoGemm/ then re-generate files\n" 48 | " * (otherwise local changes will be lost after re-generation).\n" 49 | " ******************************************************************************/\n\n" 50 | ) 51 | 52 | hostDataChar = { "s":"s", "d":"d", "c":"c", "z":"z" } 53 | hostDataType = { "s":"float", "d":"double", "c":"float2", "z":"double2" } 54 | openclDataType = { "s":"float", "d":"double", "c":"float2", "z":"double2" } 55 | 56 | precisionInt = { "s":0, "d":1, "c":2, "z":3 } 57 | orderInt = { "clblasRowMajor":0, "clblasColumnMajor":1 } 58 | transposeInt = { "N":0, "T":1, "C":2 } 59 | 60 | 61 | -------------------------------------------------------------------------------- /src/library/blas/AutoGemm/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clMathLibraries/clBLAS/cf9113982fdfc994297d372785ce76eb80911af2/src/library/blas/AutoGemm/README.txt -------------------------------------------------------------------------------- /src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.cc: -------------------------------------------------------------------------------- 1 | // GENERATED using create_user_gemm_cl_kernels.py 2 | 3 | #if defined( __APPLE__ ) || defined( __MACOSX ) 4 | #include 5 | #else 6 | #include 7 | #endif 8 | 9 | cl_kernel sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel = NULL; 10 | cl_kernel sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel = NULL; 11 | cl_kernel sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel = NULL; 12 | cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel = NULL; 13 | cl_kernel sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL; 14 | cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL; 15 | cl_kernel sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL; 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | void initUserGemmClKernels(void); 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | void initUserGemmClKernels(void) { 26 | if(sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel != NULL) { 27 | clReleaseKernel(sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel); 28 | sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel = NULL; 29 | } 30 | if(sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel != NULL) { 31 | clReleaseKernel(sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel); 32 | sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel = NULL; 33 | } 34 | if(sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel != NULL) { 35 | clReleaseKernel(sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel); 36 | sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel = NULL; 37 | } 38 | if(sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel != NULL) { 39 | clReleaseKernel(sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel); 40 | sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel = NULL; 41 | } 42 | if(sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel != NULL) { 43 | clReleaseKernel(sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel); 44 | sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL; 45 | } 46 | if(sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel != NULL) { 47 | clReleaseKernel(sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel); 48 | sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL; 49 | } 50 | if(sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel != NULL) { 51 | clReleaseKernel(sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel); 52 | sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef USERGEMM_CL_KERNELS_H 3 | #define USERGEMM_CL_KERNELS_H 4 | 5 | #if defined( __APPLE__ ) || defined( __MACOSX ) 6 | #include 7 | #else 8 | #include 9 | #endif 10 | 11 | extern cl_kernel sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel; 12 | 13 | extern cl_kernel sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel; 14 | extern cl_kernel sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel; 15 | extern cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel; 16 | 17 | extern cl_kernel sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel; 18 | extern cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel; 19 | extern cl_kernel sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel; 20 | 21 | static const int user_kernel_count = 7; 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | void initUserGemmClKernels(void); 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/library/blas/AutoGemm/UserGemmKernelSources/create_user_gemm_cl_kernels.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run from same directory as this file is in 3 | Generates UserGemmClKernels.cc 4 | 5 | Would be nice to use Jinja2 for this, but using print for now, for consistency 6 | """ 7 | 8 | kernelNames = [] 9 | # lets just read the kernel names from UserGemmClKernels.h: 10 | ifile = open('UserGemmClKernels.h', 'r') 11 | contents = ifile.read() 12 | for line in contents.split('\n'): 13 | if line.find('cl_kernel') < 0: 14 | continue 15 | kernelName = line.split()[2].split(';')[0] # probably not terribly un-fragile, but works for now 16 | kernelNames.append(kernelName) 17 | ifile.close() 18 | 19 | ofile = open('UserGemmClKernels.cc', 'w') 20 | 21 | ofile.write('// GENERATED using create_user_gemm_cl_kernels.py\n') 22 | ofile.write('\n') 23 | 24 | ofile.write('#if defined( __APPLE__ ) || defined( __MACOSX )\n') 25 | ofile.write('#include \n') 26 | ofile.write('#else\n') 27 | ofile.write('#include \n') 28 | ofile.write('#endif\n') 29 | ofile.write('\n') 30 | 31 | for kernelName in kernelNames: 32 | ofile.write('cl_kernel %s = NULL;\n' % kernelName) 33 | ofile.write('\n') 34 | 35 | ofile.write('void initUserGemmClKernels(void) {\n') 36 | 37 | for kernelName in kernelNames: 38 | ofile.write(' if(%s != NULL) {\n' % kernelName) 39 | ofile.write(' clReleaseKernel(%s);\n' % kernelName) 40 | ofile.write(' %s = NULL;\n' % kernelName) 41 | ofile.write(' }\n') 42 | 43 | ofile.write('}\n') 44 | ofile.close() 45 | -------------------------------------------------------------------------------- /src/library/blas/functor/include/BinaryBuild.h: -------------------------------------------------------------------------------- 1 | #ifndef _BINARY_BUILD_ 2 | #define _BINARY_BUILD_ 3 | 4 | //#include "CL\opencl.h" 5 | //manage if we use cl binaries or cl source code 6 | //#define BUILD_KERNEL_FROM_STRING 1 7 | 8 | //find if we use in 32 or 64 bits ISA 9 | //extern /*char * _64Bits;*/cl_uint _64Bits; 10 | #endif //_BINARY_BUILD_ -------------------------------------------------------------------------------- /src/library/blas/functor/include/bonaire.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef _CLBLAS_FUNCTION_SELECTOR_BONAIRE_ 19 | #define _CLBLAS_FUNCTION_SELECTOR_BONAIRE_ 20 | 21 | #include 22 | //#include 23 | 24 | class FunctorSelectorBonaire : public clblasFunctorSelector 25 | { 26 | private: 27 | FunctorSelectorBonaire(); 28 | 29 | static FunctorSelectorBonaire instance; 30 | 31 | public: 32 | 33 | // we don't want to provide any DP algorithm as DP is slow on bonaire 34 | //virtual clblasDgemmFunctor * select_dgemm_specific(clblasDgemmFunctor::Args & args); 35 | virtual clblasSgemmFunctor * select_sgemm_specific(clblasSgemmFunctor::Args & args); 36 | // virtual clblasDtrsmFunctor * select_dtrsm_specific(clblasDtrsmFunctor::Args & args); 37 | 38 | }; 39 | 40 | 41 | #endif // _CLBLAS_FUNCTION_SELECTOR_BONAIRE_ 42 | -------------------------------------------------------------------------------- /src/library/blas/functor/include/gcn_dgemm.h: -------------------------------------------------------------------------------- 1 | #ifndef CLBLASDGEMMFUNCTORGCN 2 | #define CLBLASDGEMMFUNCTORGCN 3 | #include 4 | 5 | class clblasDgemmFunctorGCN : public clblasDgemmFunctor 6 | { 7 | public: 8 | 9 | // 10 | // A structure that describes a kernel variant. 11 | // 12 | // It is important that all instances of those structures shall 13 | // be const and static because their addresses are used as keys 14 | // in the internal functor cache. 15 | // 16 | // Also, they shall all have a unique kernel name. 17 | // 18 | struct Variant 19 | { 20 | const char * kernel_name ; 21 | const char * source ; // the kernel source (shall be unique) 22 | const char * build_options; 23 | const char * bin ; 24 | size_t bin_size ; 25 | clblasTranspose transA ; // 26 | clblasTranspose transB ; // 27 | unsigned divN ; // Required divisor of N (use 1 when N can be of any value) 28 | unsigned divM ; // Required divisor of M (use 1 when M can be of any value) 29 | unsigned divK ; // Required divisor of K (use 1 when K can be of any value) 30 | size_t ls[2] ; // Local size (the work-group size) 31 | size_t bwi[2] ; // Block size work-item: Number of elements calculated by each work items 32 | // So basically each kernel is computing a block of 33 | // (ls[0]*bwi[0]) x (ls[1]*bwi[1]) 34 | // elements of C. 35 | std::string mult; 36 | 37 | } ; 38 | 39 | private: // Constructor & Destructor 40 | 41 | //clblasDgemmFunctorGCN(Args & args, const Variant * variant, cl_int & err) ; 42 | 43 | public: 44 | 45 | // Provide a suitable clblasDgemmFunctorGCN for the specified args 46 | // or NULL if none 47 | //static clblasDgemmFunctorGCN * provide(clblasDgemmFunctor::Args & args, const char* DevName) ; 48 | 49 | public: // inherited member from clblasDgemmFunctor 50 | 51 | virtual clblasStatus execute(Args &args) ; 52 | 53 | protected: 54 | 55 | cl_program m_program ; 56 | const Variant * m_variant ; // Pointer to a 'const static' object describing the kernel variant. 57 | } ; 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /src/library/blas/functor/include/gcn_dgemmCommon.h: -------------------------------------------------------------------------------- 1 | #ifndef DGEMMMGCNCOMMON 2 | #define DGEMMMGCNCOMMON 3 | 4 | #include "gcn_dgemm.h" 5 | 6 | 7 | class clBlasGCNdgemmCommonFunctor : public clblasDgemmFunctorGCN 8 | { 9 | 10 | private: // Constructor & Destructor 11 | 12 | clBlasGCNdgemmCommonFunctor(Args & args, const Variant * variant, cl_int & err) ; 13 | 14 | public: 15 | 16 | // Provide a suitable hawaii_dgemmChannelConflict for the specified args 17 | // or NULL if none 18 | static clBlasGCNdgemmCommonFunctor * provide(clblasDgemmFunctor::Args & args, const char* DevName) ; 19 | 20 | }; 21 | 22 | #endif -------------------------------------------------------------------------------- /src/library/blas/functor/include/gcn_dgemmSmallMatrices.h: -------------------------------------------------------------------------------- 1 | #ifndef GCN_DGEMMMSMALLMATRICES 2 | #define GCN_DGEMMMSMALLMATRICES 3 | 4 | #include "gcn_dgemm.h" 5 | 6 | 7 | class clBlasGCNDgemmSmallMatricesFunctor : public clblasDgemmFunctorGCN 8 | { 9 | public: 10 | 11 | 12 | 13 | private: // Constructor & Destructor 14 | 15 | clBlasGCNDgemmSmallMatricesFunctor(Args & args, const Variant * variant, cl_int & err) ; 16 | //cl_int KernelsLaunch(cl_command_queue queue, cl_kernel Kernel[4], Args &args); 17 | 18 | public: 19 | 20 | // Provide a suitable hawaii_dgemmChannelConflict for the specified args 21 | // or NULL if none 22 | static clBlasGCNDgemmSmallMatricesFunctor * provide(clblasDgemmFunctor::Args & args, const char* DevName) ; 23 | virtual clblasStatus execute(Args &args) ; 24 | 25 | }; 26 | 27 | #endif -------------------------------------------------------------------------------- /src/library/blas/functor/include/gcn_sgemm.h: -------------------------------------------------------------------------------- 1 | #ifndef CLBLASSGEMMFUNCTORGCN 2 | #define CLBLASSGEMMFUNCTORGCN 3 | #include 4 | 5 | class clblasSgemmFunctorGCN : public clblasSgemmFunctor 6 | { 7 | public: 8 | 9 | // 10 | // A structure that describes a kernel variant. 11 | // 12 | // It is important that all instances of those structures shall 13 | // be const and static because their addresses are used as keys 14 | // in the internal functor cache. 15 | // 16 | // Also, they shall all have a unique kernel name. 17 | // 18 | struct Variant 19 | { 20 | const char * kernel_name ; 21 | const char * source ; // the kernel source (shall be unique) 22 | const char * build_options; 23 | const char * bin ; 24 | size_t bin_size ; 25 | clblasTranspose transA ; // 26 | clblasTranspose transB ; // 27 | unsigned divN ; // Required divisor of N (use 1 when N can be of any value) 28 | unsigned divM ; // Required divisor of M (use 1 when M can be of any value) 29 | unsigned divK ; // Required divisor of K (use 1 when K can be of any value) 30 | size_t ls[2] ; // Local size (the work-group size) 31 | size_t bwi[2] ; // Block size work-item: Number of elements calculated by each work items 32 | // So basically each kernel is computing a block of 33 | // (ls[0]*bwi[0]) x (ls[1]*bwi[1]) 34 | // elements of C. 35 | std::string mult; 36 | } ; 37 | 38 | private: // Constructor & Destructor 39 | 40 | clblasSgemmFunctorGCN(Args & args, const Variant * variant, cl_int & err) ; 41 | 42 | public: 43 | 44 | // Provide a suitable clblasDgemmFunctorGCN for the specified args 45 | // or NULL if none 46 | static clblasSgemmFunctorGCN * provide(clblasSgemmFunctor::Args & args, const char* DevName) ; 47 | 48 | public: // inherited member from clblasDgemmFunctor 49 | 50 | virtual clblasStatus execute(Args &args) ; 51 | 52 | protected: 53 | //we need a default constructor as we derive this class, 54 | //but we can't use the specific constructor as the arguments won't be the same (variant!!!). 55 | //Maybe it worth revisiting this class to have something cleaner 56 | clblasSgemmFunctorGCN(){}; 57 | cl_program m_program ; 58 | protected: 59 | const Variant * m_variant ; // Pointer to a 'const static' object describing the kernel variant. 60 | } ; 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /src/library/blas/functor/include/gcn_sgemmSmallMatrices.h: -------------------------------------------------------------------------------- 1 | #ifndef GCN_SGEMMMSMALLMATRICES 2 | #define GCN_SGEMMMSMALLMATRICES 3 | 4 | #include "gcn_sgemm.h" 5 | 6 | 7 | class clBlasGCNSgemmSmallMatricesFunctor : public clblasSgemmFunctorGCN 8 | { 9 | public: 10 | 11 | 12 | 13 | private: // Constructor & Destructor 14 | 15 | clBlasGCNSgemmSmallMatricesFunctor(Args & args, const Variant * variant, cl_int & err) ; 16 | //cl_int KernelsLaunch(cl_command_queue queue, cl_kernel Kernel[4], Args &args); 17 | 18 | public: 19 | 20 | // Provide a suitable hawaii_dgemmChannelConflict for the specified args 21 | // or NULL if none 22 | static clBlasGCNSgemmSmallMatricesFunctor * provide(clblasSgemmFunctor::Args & args, const char* DevName) ; 23 | virtual clblasStatus execute(Args &args) ; 24 | 25 | }; 26 | 27 | #endif -------------------------------------------------------------------------------- /src/library/blas/functor/include/gcn_zgemm.h: -------------------------------------------------------------------------------- 1 | #ifndef CLBLASZGEMMFUNCTORGCN 2 | #define CLBLASZGEMMFUNCTORGCN 3 | #include 4 | 5 | class clblasZgemmFunctorGCN : public clblasZgemmFunctor 6 | { 7 | public: 8 | 9 | // 10 | // A structure that describes a kernel variant. 11 | // 12 | // It is important that all instances of those structures shall 13 | // be const and static because their addresses are used as keys 14 | // in the internal functor cache. 15 | // 16 | // Also, they shall all have a unique kernel name. 17 | // 18 | struct Variant 19 | { 20 | const char * kernel_name ; 21 | const char * source ; // the kernel source (shall be unique) 22 | const char * build_options; 23 | const char * bin ; 24 | size_t bin_size ; 25 | clblasTranspose transA ; // 26 | clblasTranspose transB ; // 27 | unsigned divN ; // Required divisor of N (use 1 when N can be of any value) 28 | unsigned divM ; // Required divisor of M (use 1 when M can be of any value) 29 | unsigned divK ; // Required divisor of K (use 1 when K can be of any value) 30 | size_t ls[2] ; // Local size (the work-group size) 31 | size_t bwi[2] ; // Block size work-item: Number of elements calculated by each work items 32 | // So basically each kernel is computing a block of 33 | // (ls[0]*bwi[0]) x (ls[1]*bwi[1]) 34 | // elements of C. 35 | std::string mult; 36 | } ; 37 | 38 | private: // Constructor & Destructor 39 | 40 | clblasZgemmFunctorGCN(Args & args, const Variant * variant, cl_int & err) ; 41 | 42 | public: 43 | 44 | // Provide a suitable clblasZgemmFunctorGCN for the specified args 45 | // or NULL if none 46 | static clblasZgemmFunctorGCN * provide(clblasZgemmFunctor::Args & args, const char* DevName) ; 47 | 48 | public: // inherited member from clblasZgemmFunctor 49 | 50 | virtual clblasStatus execute(Args &args) ; 51 | 52 | protected: 53 | //we need a default constructor as we derive this class, 54 | //but we can't use the specific constructor as the arguments won't be the same (variant!!!). 55 | //Maybe it worth revisiting this class to have something cleaner 56 | clblasZgemmFunctorGCN(){}; 57 | cl_program m_program ; 58 | protected: 59 | const Variant * m_variant ; // Pointer to a 'const static' object describing the kernel variant. 60 | } ; 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /src/library/blas/functor/include/gpu_dtrsm.h: -------------------------------------------------------------------------------- 1 | #ifndef _CLBLAS_DTRSM_FUNCTOR_GPU_H_ 2 | #define _CLBLAS_DTRSM_FUNCTOR_GPU_H_ 3 | 4 | class clblasDtrsmFunctorGpu : public clblasDtrsmFunctor 5 | { 6 | public: 7 | 8 | 9 | private: // Constructor & Destructor 10 | 11 | clblasDtrsmFunctorGpu(Args & args, cl_int & err, const char* DevName, cl_uint _64BitsUse) ; 12 | 13 | public: 14 | 15 | // Provide a suitable clblasDtrsmFunctorTahiti for the specified args 16 | // or NULL if none 17 | static clblasDtrsmFunctorGpu * provide(clblasDtrsmFunctor::Args & args, const char* DevName) ; 18 | 19 | public: // inherited member from clblasDtrsmFunctor 20 | 21 | virtual clblasStatus execute(Args &args) ; 22 | 23 | private: 24 | 25 | cl_program m_program ; 26 | } ; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/library/blas/functor/include/gpu_dtrsm192.h: -------------------------------------------------------------------------------- 1 | #ifndef _CLBLAS_DTRSM192_FUNCTOR_GPU_H_ 2 | #define _CLBLAS_DTRSM192_FUNCTOR_GPU_H_ 3 | 4 | class clblasDtrsm192FunctorGpu : public clblasDtrsmFunctor 5 | { 6 | public: 7 | 8 | 9 | private: // Constructor & Destructor 10 | 11 | clblasDtrsm192FunctorGpu(Args & args, cl_int & err, const char* DevName, cl_uint _64BitsUse) ; 12 | 13 | public: 14 | 15 | // Provide a suitable clblasDtrsmFunctorTahiti for the specified args 16 | // or NULL if none 17 | static clblasDtrsm192FunctorGpu * provide(clblasDtrsmFunctor::Args & args, const char* DevName) ; 18 | 19 | public: // inherited member from clblasDtrsmFunctor 20 | 21 | virtual clblasStatus execute(Args &args) ; 22 | 23 | private: 24 | 25 | cl_program m_program ; 26 | } ; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/library/blas/functor/include/hawaii.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef _CLBLAS_FUNCTION_SELECTOR_HAWAII_ 19 | #define _CLBLAS_FUNCTION_SELECTOR_HAWAII_ 20 | 21 | #include 22 | //#include 23 | 24 | class FunctorSelectorHawaii : public clblasFunctorSelector 25 | { 26 | private: 27 | FunctorSelectorHawaii(); 28 | 29 | static FunctorSelectorHawaii instance; 30 | 31 | public: 32 | // Provide a dgemmFunctor usable only if N is a multiple of blocksize 33 | // and incx==1 34 | virtual clblasDgemmFunctor * select_dgemm_specific(clblasDgemmFunctor::Args & args); 35 | virtual clblasSgemmFunctor * select_sgemm_specific(clblasSgemmFunctor::Args & args); 36 | virtual clblasZgemmFunctor * select_zgemm_specific(clblasZgemmFunctor::Args & args); 37 | virtual clblasDtrsmFunctor * select_dtrsm_specific(clblasDtrsmFunctor::Args & args); 38 | 39 | }; 40 | 41 | 42 | #endif // _CLBLAS_FUNCTION_SELECTOR_HAWAII_ 43 | -------------------------------------------------------------------------------- /src/library/blas/functor/include/hawaii_dgemmChannelConflict.h: -------------------------------------------------------------------------------- 1 | #ifndef HAWAII_DGEMMMCHANNELCONFLICT 2 | #define HAWAII_DGEMMMCHANNELCONFLICT 3 | 4 | #include "gcn_dgemm.h" 5 | 6 | 7 | class clBlashawaiiDgemmChannelConflictFunctor : public clblasDgemmFunctorGCN 8 | { 9 | 10 | private: // Constructor & Destructor 11 | 12 | clBlashawaiiDgemmChannelConflictFunctor(Args & args, const Variant * variant, cl_int & err) ; 13 | 14 | public: 15 | 16 | // Provide a suitable hawaii_dgemmChannelConflict for the specified args 17 | // or NULL if none 18 | static clBlashawaiiDgemmChannelConflictFunctor * provide(clblasDgemmFunctor::Args & args) ; 19 | 20 | }; 21 | 22 | #endif -------------------------------------------------------------------------------- /src/library/blas/functor/include/hawaii_dgemmSplitKernel.h: -------------------------------------------------------------------------------- 1 | #ifndef HAWAII_DGEMMMSPLITKERNEL 2 | #define HAWAII_DGEMMMSPLITKERNEL 3 | 4 | #include "gcn_dgemm.h" 5 | 6 | 7 | class clBlashawaiiDgemmSplitKernelFunctor : public clblasDgemmFunctorGCN 8 | { 9 | public: 10 | struct Variant 11 | { 12 | const char * variantName; 13 | const char * kernel_name[4] ; //order is main, row, column, single 14 | const char * source ; // the kernel source (shall be unique) 15 | const char * build_options; 16 | const char * bin ; 17 | size_t bin_size ; 18 | clblasTranspose transA ; // 19 | clblasTranspose transB ; // 20 | unsigned divK ; // Required divisor of N (use 1 when N can be of any value) 21 | size_t ls[2] ; // Local size (the work-group size) 22 | size_t bwi[2] ; // Block size work-item: Number of elements calculated by each work items 23 | // So basically each kernel is computing a block of 24 | // (ls[0]*bwi[0]) x (ls[1]*bwi[1]) 25 | // elements of C. 26 | std::string mult; 27 | 28 | } ; 29 | 30 | 31 | private: // Constructor & Destructor 32 | 33 | clBlashawaiiDgemmSplitKernelFunctor(Args & args, const Variant * variant, cl_int & err) ; 34 | cl_int KernelsLaunch(cl_command_queue queue, cl_kernel Kernel[4], Args &args); 35 | const Variant * m_variantSplit ; // Pointer to a 'const static' object describing the kernel variant. 36 | 37 | public: 38 | 39 | // Provide a suitable hawaii_dgemmChannelConflict for the specified args 40 | // or NULL if none 41 | static clBlashawaiiDgemmSplitKernelFunctor * provide(clblasDgemmFunctor::Args & args) ; 42 | virtual clblasStatus execute(Args &args) ; 43 | 44 | }; 45 | 46 | #endif -------------------------------------------------------------------------------- /src/library/blas/functor/include/hawaii_sgemmBig1024Kernel.h: -------------------------------------------------------------------------------- 1 | /* 2 | Handles lda=ldb=4096, 5120, 7168, 8192 3 | lda=ldb=6144 should be handled by a special case in hawaii_sgemmSplitKernel 4 | */ 5 | #ifndef HAWAII_SGEMMBIG1024KERNEL 6 | #define HAWAII_SGEMMBIG1024KERNEL 7 | 8 | #include "gcn_sgemm.h" 9 | 10 | 11 | class clBlashawaiiSgemmBig1024KernelFunctor : public clblasSgemmFunctorGCN 12 | { 13 | public: 14 | struct Variant 15 | { 16 | const char * variantName; 17 | const char * kernel_name[1] ; //just one kernel here 18 | const char * source ; // the kernel source (shall be unique) 19 | const char * build_options; 20 | const char * bin ; 21 | size_t bin_size ; 22 | clblasTranspose transA ; // 23 | clblasTranspose transB ; // 24 | unsigned divK ; // Required divisor of N (use 1 when N can be of any value) 25 | size_t ls[2] ; // Local size (the work-group size) 26 | size_t bwi[2] ; // Block size work-item: Number of elements calculated by each work items 27 | // So basically each kernel is computing a block of 28 | // (ls[0]*bwi[0]) x (ls[1]*bwi[1]) 29 | // elements of C. 30 | std::string mult; 31 | 32 | } ; 33 | 34 | 35 | private: // Constructor & Destructor 36 | 37 | clBlashawaiiSgemmBig1024KernelFunctor(Args & args, const Variant * variant, cl_int & err); 38 | cl_int KernelsLaunch(cl_command_queue queue, cl_kernel Kernel[1], Args &args); 39 | const Variant * m_variantBig1024 ; // Pointer to a 'const static' object describing the kernel variant. 40 | 41 | public: 42 | 43 | static clBlashawaiiSgemmBig1024KernelFunctor * provide(clblasSgemmFunctor::Args & args, char* DevName); 44 | virtual clblasStatus execute(Args &args) ; 45 | 46 | }; 47 | 48 | #endif -------------------------------------------------------------------------------- /src/library/blas/functor/include/hawaii_sgemmBranchKernel.h: -------------------------------------------------------------------------------- 1 | /* 2 | Handles non multiples of 16, 32, 48, 64, 94 SGEMM in one kernel 3 | Only non multiples of 32 (NT) is implemented right now. 4 | */ 5 | #ifndef HAWAII_SGEMMBRANCHKERNEL 6 | #define HAWAII_SGEMMBRANCHKERNEL 7 | 8 | #include "gcn_sgemm.h" 9 | 10 | 11 | class clBlashawaiiSgemmBranchKernelFunctor : public clblasSgemmFunctorGCN 12 | { 13 | public: 14 | struct Variant 15 | { 16 | const char * variantName; 17 | const char * kernel_name[1] ; //just one kernel here 18 | const char * source ; // the kernel source (shall be unique) 19 | const char * build_options; 20 | const char * bin ; 21 | size_t bin_size ; 22 | clblasTranspose transA ; // 23 | clblasTranspose transB ; // 24 | unsigned divK ; // Required divisor of N (use 1 when N can be of any value) 25 | size_t ls[2] ; // Local size (the work-group size) 26 | size_t bwi[2] ; // Block size work-item: Number of elements calculated by each work items 27 | // So basically each kernel is computing a block of 28 | // (ls[0]*bwi[0]) x (ls[1]*bwi[1]) 29 | // elements of C. 30 | std::string mult; 31 | 32 | } ; 33 | 34 | 35 | private: // Constructor & Destructor 36 | 37 | clBlashawaiiSgemmBranchKernelFunctor(Args & args, const Variant * variant, cl_int & err); 38 | cl_int KernelsLaunch(cl_command_queue queue, cl_kernel Kernel[1], Args &args); 39 | const Variant * m_variantBranch ; // Pointer to a 'const static' object describing the kernel variant. 40 | 41 | public: 42 | 43 | // Provide a suitable hawaii_sgemmChannelConflict for the specified args 44 | // or NULL if none 45 | static clBlashawaiiSgemmBranchKernelFunctor * provide(clblasSgemmFunctor::Args & args, char* DevName) ; 46 | virtual clblasStatus execute(Args &args) ; 47 | 48 | }; 49 | 50 | #endif -------------------------------------------------------------------------------- /src/library/blas/functor/include/hawaii_sgemmSplit64_32.h: -------------------------------------------------------------------------------- 1 | #ifndef HAWAII_SGEMMMSPLIT64_32 2 | #define HAWAII_SGEMMMSPLIT64_32 3 | 4 | #include "gcn_sgemm.h" 5 | 6 | 7 | class clBlashawaiiSgemmSplit64_32Functor : public clblasSgemmFunctorGCN 8 | { 9 | public: 10 | struct Variant 11 | { 12 | const char * variantName; 13 | const char * kernel_name[4] ; //order is main, row, column, single 14 | const char * source ; // the kernel source (shall be unique) 15 | const char * build_options; 16 | const char * bin ; 17 | size_t bin_size ; 18 | clblasTranspose transA ; // 19 | clblasTranspose transB ; // 20 | unsigned divK ; // Required divisor of N (use 1 when N can be of any value) 21 | size_t ls[2] ; // Local size (the work-group size) 22 | size_t bwi[2] ; // Block size work-item: Number of elements calculated by each work items 23 | // So basically each kernel is computing a block of 24 | // (ls[0]*bwi[0]) x (ls[1]*bwi[1]) 25 | // elements of C. 26 | std::string mult; 27 | 28 | } ; 29 | 30 | 31 | private: // Constructor & Destructor 32 | 33 | clBlashawaiiSgemmSplit64_32Functor(Args & args, const Variant * variant, cl_int & err); 34 | cl_int KernelsLaunch(cl_command_queue queue, cl_kernel Kernel[4], Args &args); 35 | const Variant * m_variantSplit ; // Pointer to a 'const static' object describing the kernel variant. 36 | 37 | public: 38 | 39 | // Provide a suitable hawaii_sgemmChannelConflict for the specified args 40 | // or NULL if none 41 | static clBlashawaiiSgemmSplit64_32Functor * provide(clblasSgemmFunctor::Args & args, char* DevName); 42 | virtual clblasStatus execute(Args &args) ; 43 | 44 | }; 45 | 46 | #endif -------------------------------------------------------------------------------- /src/library/blas/functor/include/hawaii_sgemmSplitKernel.h: -------------------------------------------------------------------------------- 1 | #ifndef HAWAII_SGEMMMSPLITKERNEL 2 | #define HAWAII_SGEMMMSPLITKERNEL 3 | 4 | #include "gcn_sgemm.h" 5 | 6 | 7 | class clBlashawaiiSgemmSplitKernelFunctor : public clblasSgemmFunctorGCN 8 | { 9 | public: 10 | struct Variant 11 | { 12 | const char * variantName; 13 | const char * kernel_name[4] ; //order is main, row, column, single 14 | const char * source ; // the kernel source (shall be unique) 15 | const char * build_options; 16 | const char * bin ; 17 | size_t bin_size ; 18 | clblasTranspose transA ; // 19 | clblasTranspose transB ; // 20 | unsigned divK ; // Required divisor of N (use 1 when N can be of any value) 21 | size_t ls[2] ; // Local size (the work-group size) 22 | size_t bwi[2] ; // Block size work-item: Number of elements calculated by each work items 23 | // So basically each kernel is computing a block of 24 | // (ls[0]*bwi[0]) x (ls[1]*bwi[1]) 25 | // elements of C. 26 | std::string mult; 27 | 28 | } ; 29 | 30 | 31 | private: // Constructor & Destructor 32 | 33 | clBlashawaiiSgemmSplitKernelFunctor(Args & args, const Variant * variant, cl_int & err) ; 34 | cl_int KernelsLaunch(cl_command_queue queue, cl_kernel Kernel[4], Args &args); 35 | const Variant * m_variantSplit ; // Pointer to a 'const static' object describing the kernel variant. 36 | 37 | public: 38 | 39 | // Provide a suitable hawaii_sgemmChannelConflict for the specified args 40 | // or NULL if none 41 | static clBlashawaiiSgemmSplitKernelFunctor * provide(clblasSgemmFunctor::Args & args, char* DevName) ; 42 | virtual clblasStatus execute(Args &args) ; 43 | 44 | }; 45 | 46 | #endif -------------------------------------------------------------------------------- /src/library/blas/functor/include/tahiti.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2014 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef _CLBLAS_FUNCTION_SELECTOR_TAHITI_ 19 | #define _CLBLAS_FUNCTION_SELECTOR_TAHITI_ 20 | 21 | #include 22 | //#include 23 | 24 | class FunctorSelectorTahiti : public clblasFunctorSelector 25 | { 26 | private: 27 | FunctorSelectorTahiti(); 28 | 29 | static FunctorSelectorTahiti instance; 30 | 31 | public: 32 | // Provide a dgemmFunctor usable only if N is a multiple of blocksize 33 | // and incx==1 34 | virtual clblasDgemmFunctor * select_dgemm_specific(clblasDgemmFunctor::Args & args); 35 | virtual clblasDtrsmFunctor * select_dtrsm_specific(clblasDtrsmFunctor::Args & args); 36 | virtual clblasSgemmFunctor * select_sgemm_specific(clblasSgemmFunctor::Args & args); 37 | 38 | }; 39 | 40 | 41 | #endif // _CLBLAS_FUNCTION_SELECTOR_TAHITI_ 42 | -------------------------------------------------------------------------------- /src/library/blas/generic/blas_funcs.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | 20 | int 21 | funcBlasLevel(BlasFunctionID funcID) 22 | { 23 | switch(funcID) 24 | { 25 | case CLBLAS_SWAP: 26 | case CLBLAS_SCAL: 27 | case CLBLAS_COPY: 28 | case CLBLAS_AXPY: 29 | case CLBLAS_DOT: 30 | case CLBLAS_REDUCTION_EPILOGUE: 31 | case CLBLAS_ROTG: 32 | case CLBLAS_ROTMG: 33 | case CLBLAS_ROT: 34 | case CLBLAS_ROTM: 35 | case CLBLAS_iAMAX: 36 | case CLBLAS_NRM2: 37 | case CLBLAS_ASUM: 38 | return 1; 39 | 40 | case CLBLAS_GEMV: 41 | case CLBLAS_SYMV: 42 | case CLBLAS_TRMV: 43 | case CLBLAS_TRSV: 44 | case CLBLAS_TRSV_GEMV: 45 | case CLBLAS_HEMV: 46 | case CLBLAS_SYR: 47 | case CLBLAS_SYR2: 48 | case CLBLAS_GER: 49 | case CLBLAS_HER: 50 | case CLBLAS_HER2: 51 | case CLBLAS_TPMV: 52 | case CLBLAS_SPMV: 53 | case CLBLAS_HPMV: 54 | case CLBLAS_TPSV: 55 | case CLBLAS_SPR: 56 | case CLBLAS_SPR2: 57 | case CLBLAS_HPR: 58 | case CLBLAS_HPR2: 59 | case CLBLAS_GBMV: 60 | case CLBLAS_TBMV: 61 | case CLBLAS_SBMV: 62 | case CLBLAS_HBMV: 63 | case CLBLAS_TBSV: 64 | return 2; 65 | 66 | default: return 3; 67 | } 68 | } 69 | 70 | bool 71 | funcHasBeta(BlasFunctionID funcID) 72 | { 73 | return !funcHasTriangMatrix(funcID); 74 | } 75 | 76 | bool 77 | funcHasTriangMatrix(BlasFunctionID funcID) 78 | { 79 | bool ret = false; 80 | 81 | switch (funcID) { 82 | // go through 83 | case CLBLAS_TRMM: 84 | case CLBLAS_TRSM: 85 | case CLBLAS_TRMV: 86 | case CLBLAS_HEMV: 87 | case CLBLAS_TRSV: 88 | ret = true; 89 | break; 90 | default: 91 | /* do nothing */ 92 | break; 93 | } 94 | 95 | return ret; 96 | } 97 | -------------------------------------------------------------------------------- /src/library/blas/generic/events.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | static const size_t ALLOCATION_STEP = 100; 25 | 26 | static mutex_t *lock = NULL; 27 | static cl_event *decomposeEvents = NULL; 28 | static size_t numDecomposeEvents = 0; 29 | static size_t maxDecomposeEvents = 0; 30 | 31 | void 32 | decomposeEventsSetup(void) 33 | { 34 | lock = mutexInit(); 35 | } 36 | 37 | void 38 | decomposeEventsTeardown(void) 39 | { 40 | mutexLock(lock); 41 | 42 | if (decomposeEvents != NULL) { 43 | free(decomposeEvents); 44 | } 45 | 46 | decomposeEvents = NULL; 47 | numDecomposeEvents = 0; 48 | maxDecomposeEvents = 0; 49 | 50 | mutexDestroy(lock); 51 | lock = NULL; 52 | } 53 | 54 | cl_event* 55 | decomposeEventsAlloc(void) 56 | { 57 | cl_event* e; 58 | 59 | mutexLock(lock); 60 | 61 | if (numDecomposeEvents == maxDecomposeEvents) { 62 | e = realloc(decomposeEvents, 63 | (maxDecomposeEvents + ALLOCATION_STEP) * sizeof(cl_event)); 64 | if (e == NULL) { 65 | mutexUnlock(lock); 66 | return NULL; 67 | } 68 | decomposeEvents = e; 69 | maxDecomposeEvents += ALLOCATION_STEP; 70 | } 71 | e = &(decomposeEvents[numDecomposeEvents++]); 72 | 73 | mutexUnlock(lock); 74 | return e; 75 | } 76 | -------------------------------------------------------------------------------- /src/library/blas/generic/kernel_extra.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | 20 | #include "kernel_extra.h" 21 | 22 | int 23 | clblasKernelExtraCmp(const void *extra, const void *extraKey) 24 | { 25 | return memcmp(extra, extraKey, sizeof(CLBLASKernExtra)); 26 | } 27 | 28 | -------------------------------------------------------------------------------- /src/library/blas/generic/problem_iter.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef PROBLEM_ITERATOR_H_ 19 | #define PROBLEM_ITERATOR_H_ 20 | 21 | #include 22 | 23 | #include "clblas-internal.h" 24 | #include "blas_funcs.h" 25 | 26 | // Problem iterator to scatter solving, for passing over matrix A 27 | 28 | typedef struct ProblemIterator { 29 | MatrixRole mrole; 30 | size_t pos; 31 | size_t prevPos; 32 | size_t size; 33 | size_t globPitch; 34 | BlasFunctionID funcID; 35 | clblasUplo uplo; 36 | clblasSide side; 37 | DataType dtype; 38 | size_t maxPanels; 39 | size_t maxBlocks; 40 | size_t bpitch; 41 | size_t bheight; 42 | } ProblemIterator; 43 | 44 | /* 45 | * @maxBlocks: maximal number of blocks to iterate with; 46 | * There is as little as 1 iteration if it is 47 | * set to 0. 48 | */ 49 | void VISIBILITY_HIDDEN 50 | initProblemIterator( 51 | ProblemIterator *iter, 52 | BlasFunctionID funcID, 53 | MatrixRole mrole, 54 | CLBlasKargs *kargs, 55 | size_t maxPanels, 56 | size_t maxBlocks, 57 | SubproblemDim *topDim); 58 | 59 | void VISIBILITY_HIDDEN 60 | iteratorReset(ProblemIterator *iter); 61 | 62 | bool VISIBILITY_HIDDEN 63 | isIterBackward(ProblemIterator *iter); 64 | 65 | /* 66 | * Iterate in some dimension based on maximal blocks info; 67 | * Iteration for the 'SDIM_BWIDTH' component is prohibited. 68 | * Returns 1 when achieve the end position 69 | */ 70 | int VISIBILITY_HIDDEN 71 | iterateProblem(ProblemIterator *iter); 72 | 73 | size_t VISIBILITY_HIDDEN 74 | iterLastOffset(ProblemIterator *iter); 75 | 76 | #endif /* PROBLEM_ITERATOR_H_ */ 77 | -------------------------------------------------------------------------------- /src/library/blas/generic/solution_assert.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef SOLUTION_ASSERT_H_ 19 | #define SOLUTION_ASSERT_H_ 20 | 21 | #include "solution_seq.h" 22 | 23 | #ifdef ASSERT_GRANULATION 24 | 25 | void 26 | assertGranulation( 27 | SubproblemDim *dims, 28 | unsigned int nrDims, 29 | PGranularity *pgran, 30 | unsigned int thLevel); 31 | 32 | #else // ASSERT_GRANULATION 33 | 34 | // stub, do nothing 35 | #define assertGranulation(dims, nrDims, pgran, thLevel) 36 | 37 | #endif // !ASSERT_GRANULATION 38 | 39 | #ifdef ASSERT_IMAGE_STEPS 40 | 41 | void 42 | assertImageSubstep( 43 | SolutionStep *wholeStep, 44 | SolutionStep *substep, 45 | ListHead *doneSubsteps); 46 | 47 | void 48 | assertImageStep(SolutionStep *wholeStep, ListHead *doneSubsteps); 49 | 50 | void 51 | releaseImageAssertion(ListHead *doneSubsteps); 52 | 53 | #else /* ASSERT_IMAGE_STEPS */ 54 | 55 | // stubs 56 | 57 | #define assertImageSubstep(wholeStep, substep, doneSubsteps) 58 | #define assertImageStep(wholeStep, doneSubsteps) 59 | #define releaseImageAssertion(doneSubsteps) 60 | 61 | #endif /* !ASSERT_IMAGE_STEPS */ 62 | 63 | #endif /* SOLUTION_ASSERT_H_ */ 64 | -------------------------------------------------------------------------------- /src/library/blas/gens/blas_subgroup.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef SUBGROUP_H 18 | #define SUBGROUP_H 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | #include 29 | #include "blas_kgen.h" 30 | 31 | #include "tile.h" 32 | #include "fetch.h" 33 | 34 | typedef int 35 | (*UpresProcPtr)( struct KgenContext*, 36 | BlasFunctionID, 37 | const BlasGenSettings *, 38 | UpdateResultFlags, 39 | const char *, 40 | const char *, 41 | const char *); 42 | 43 | /** 44 | */ 45 | typedef struct SubgVarNames { 46 | 47 | const char* subgCoord; // 2-vector of subgroup ID by X and Y 48 | const char* itemId; // 2-vector of subgroup item id/subgroupID 49 | } SubgVarNames; 50 | 51 | /** 52 | */ 53 | int 54 | mergeUpdateResult( struct KgenContext* pCtx, 55 | BlasFunctionID funcID, 56 | struct BlasGenSettings* pGSet, 57 | SubgVarNames* pSubgVNames, 58 | UpdateResultFlags upResFlags, 59 | UpresProcPtr upresProcPtr ); 60 | 61 | /** 62 | */ 63 | int 64 | subgGetDefaultDecomp( 65 | PGranularity *pgran, 66 | SubproblemDim *subdims, 67 | void* pArgs ); 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /src/library/blas/gens/clTemplates/asum.cl: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | static const char *asum_kernel = " 18 | #ifdef DOUBLE_PRECISION 19 | #ifdef cl_khr_fp64 20 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 21 | #else 22 | #pragma OPENCL EXTENSION cl_amd_fp64 : enable 23 | #endif 24 | #endif 25 | 26 | __kernel void %PREFIXasum_kernel( __global %TYPE *_X, __global %PTYPE *scratchBuff, uint N, uint offx, int incx) 27 | { 28 | __global %TYPE *X = _X + offx; 29 | %TYPE asum = (%TYPE) 0.0; 30 | 31 | #ifdef INCX_NEGATIVE 32 | if( get_global_id(0) == 0 ) { 33 | scratchBuff[0] = (%PTYPE)0.0; 34 | } 35 | return; 36 | #endif 37 | 38 | 39 | int gOffset; 40 | for( gOffset=(get_global_id(0) * %V); (gOffset + %V - 1) 22 | #include 23 | #include 24 | 25 | #include "../blas_kgen.h" 26 | 27 | typedef struct CopyImgFuncs { 28 | char localToImage[2][FUNC_NAME_MAXLEN]; 29 | char globalToImage[2][FUNC_NAME_MAXLEN]; 30 | char globalToLocalTransposed[2][FUNC_NAME_MAXLEN]; 31 | char globalToLocalTransposedGeneric[2][FUNC_NAME_MAXLEN]; 32 | char globalToLocal[2][FUNC_NAME_MAXLEN]; 33 | char globalToLocalGeneric[2][FUNC_NAME_MAXLEN]; 34 | char zeroBlock[2][FUNC_NAME_MAXLEN]; 35 | } CopyImgFuncs; 36 | 37 | int 38 | generateImageCopyFuncs( 39 | CopyImgFuncs *copyFuncs, 40 | struct KgenContext *ctx, 41 | BlasFunctionID funcID, 42 | const BlasGenSettings *gset); 43 | 44 | int 45 | generateResultUpdateOld( 46 | struct KgenContext *ctx, 47 | BlasFunctionID funcID, 48 | const BlasGenSettings *gset, 49 | const char *optFuncName, 50 | const char *genericFuncName); 51 | 52 | int 53 | genResultUpdateWithFlagsOld( 54 | struct KgenContext *ctx, 55 | BlasFunctionID funcID, 56 | const BlasGenSettings *gset, 57 | UpdateResultFlags flags, 58 | const char *optFuncName, 59 | const char *genericFuncName, 60 | const char *cachedName); 61 | 62 | int generateUpresFuncs( 63 | struct KgenContext *ctx, 64 | BlasFunctionID funcID, 65 | const BlasGenSettings *gset, 66 | char optFuncName[FUNC_NAME_MAXLEN], 67 | char genericFuncName[FUNC_NAME_MAXLEN]); 68 | 69 | int 70 | genUpresFuncsWithFlags( 71 | struct KgenContext *ctx, 72 | const BlasGenSettings *gset, 73 | UpdateResultFlags flags, 74 | char optFuncName[FUNC_NAME_MAXLEN], 75 | char genericFuncName[FUNC_NAME_MAXLEN]); 76 | 77 | #endif /* GEN_HELPER_LEGACY_H_ */ 78 | -------------------------------------------------------------------------------- /src/library/blas/gens/legacy/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | 18 | set(SRC_BLKMUL 19 | ../blkmul.c 20 | ${clBLAS_SOURCE_DIR}/library/common/kerngen_core.c 21 | ${clBLAS_SOURCE_DIR}/library/common/kgen_basic.c 22 | ${clBLAS_SOURCE_DIR}/library/common/kgen_loop_helper.c 23 | ${clBLAS_SOURCE_DIR}/library/common/misc.c 24 | ${clBLAS_SOURCE_DIR}/library/blas/gens/blas_kgen.c 25 | ${clBLAS_SOURCE_DIR}/library/blas/gens/tile.c 26 | ${clBLAS_SOURCE_DIR}/library/blas/gens/tile_iter.c 27 | ${clBLAS_SOURCE_DIR}/library/blas/gens/legacy/blas_kgen_legacy.c 28 | ${clBLAS_SOURCE_DIR}/library/blas/gens/gen_helper.c 29 | ${clBLAS_SOURCE_DIR}/library/blas/gens/legacy/gen_helper_legacy.c 30 | ${clBLAS_SOURCE_DIR}/library/blas/generic/blas_funcs.c 31 | ${clBLAS_SOURCE_DIR}/library/blas/generic/matrix_dims.c 32 | ${clBLAS_SOURCE_DIR}/library/blas/generic/matrix_props.c 33 | ${clBLAS_SOURCE_DIR}/library/common/gens/dblock_kgen.c 34 | ${clBLAS_SOURCE_DIR}/library/blas/gens/tilemul.c 35 | ${clBLAS_SOURCE_DIR}/library/blas/gens/fetch.c 36 | ${clBLAS_SOURCE_DIR}/library/common/kgen_guard.c 37 | ${clBLAS_SOURCE_DIR}/library/common/list.c 38 | ${clBLAS_SOURCE_DIR}/library/common/mutex.c 39 | ${clBLAS_SOURCE_DIR}/library/common/trace_malloc.c 40 | t_blkmul.c 41 | ) 42 | 43 | include_directories(${OPENCL_INCLUDE_DIRS} ${clBLAS_SOURCE_DIR} ${clBLAS_SOURCE_DIR}/include 44 | ${clBLAS_SOURCE_DIR}/library/blas/include ${clBLAS_SOURCE_DIR}/library/blas/gens) 45 | 46 | add_executable(t_blkmul ${SRC_BLKMUL}) 47 | target_link_libraries(t_blkmul ${OPENCL_LIBRARIES}) 48 | set_target_properties( t_blkmul PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) 49 | 50 | # CPack configuration; include the executable into the package 51 | install( TARGETS t_blkmul 52 | RUNTIME DESTINATION bin${SUFFIX_BIN} 53 | LIBRARY DESTINATION lib${SUFFIX_LIB} 54 | ARCHIVE DESTINATION lib${SUFFIX_LIB}/import 55 | ) 56 | -------------------------------------------------------------------------------- /src/library/blas/gens/legacy/trsm_kgen_legacy.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef TRSM_KGEN_LEGACY_H_ 19 | #define TRSM_KGEN_LEGACY_H_ 20 | 21 | void 22 | genUpdateIntermTrsmResult( 23 | struct KgenContext *ctx, 24 | const BlasGenSettings *gset, 25 | const char *optFuncName, 26 | const char *genericFuncName, 27 | bool withMhitCond); 28 | 29 | void 30 | genHeapTrsmResultToLDS( 31 | struct KgenContext *ctx, 32 | const BlasGenSettings *gset, 33 | const char *funcName, 34 | const char *dstName); 35 | 36 | void 37 | genInvertingBlockFunc( 38 | struct KgenContext *ctx, 39 | size_t pitch, 40 | DataType dtype, 41 | KernelExtraFlags kflags); 42 | 43 | #endif /* TRSM_KGEN_LEGACY_H_ */ 44 | -------------------------------------------------------------------------------- /src/library/blas/gens/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | 18 | set(SRC_TILEMUL 19 | ../tilemul.c 20 | ../fetch.c 21 | ${clBLAS_SOURCE_DIR}/library/common/kerngen_core.c 22 | ${clBLAS_SOURCE_DIR}/library/common/kgen_basic.c 23 | ${clBLAS_SOURCE_DIR}/library/common/kgen_loop_helper.c 24 | ${clBLAS_SOURCE_DIR}/library/common/misc.c 25 | ${clBLAS_SOURCE_DIR}/library/blas/gens/blas_kgen.c 26 | ${clBLAS_SOURCE_DIR}/library/blas/gens/tile.c 27 | ${clBLAS_SOURCE_DIR}/library/blas/gens/tile_iter.c 28 | ${clBLAS_SOURCE_DIR}/library/blas/gens/gen_helper.c 29 | ${clBLAS_SOURCE_DIR}/library/blas/generic/blas_funcs.c 30 | ${clBLAS_SOURCE_DIR}/library/blas/generic/matrix_dims.c 31 | ${clBLAS_SOURCE_DIR}/library/blas/generic/matrix_props.c 32 | ${clBLAS_SOURCE_DIR}/library/common/gens/dblock_kgen.c 33 | ${clBLAS_SOURCE_DIR}/library/common/kgen_guard.c 34 | ${clBLAS_SOURCE_DIR}/library/common/list.c 35 | ${clBLAS_SOURCE_DIR}/library/common/mutex.c 36 | ${clBLAS_SOURCE_DIR}/library/common/trace_malloc.c 37 | t_tilemul.c 38 | ) 39 | 40 | include_directories(${OPENCL_INCLUDE_DIRS} ${clBLAS_SOURCE_DIR} ${clBLAS_SOURCE_DIR}/include 41 | ${clBLAS_SOURCE_DIR}/library/blas/include ${clBLAS_SOURCE_DIR}/library/blas/gens) 42 | 43 | add_executable(t_tilemul ${SRC_TILEMUL}) 44 | target_link_libraries(t_tilemul ${OPENCL_LIBRARIES}) 45 | set_target_properties( t_tilemul PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) 46 | 47 | # CPack configuration; include the executable into the package 48 | install( TARGETS t_tilemul 49 | RUNTIME DESTINATION bin${SUFFIX_BIN} 50 | LIBRARY DESTINATION lib${SUFFIX_LIB} 51 | ARCHIVE DESTINATION lib${SUFFIX_LIB}/import 52 | ) 53 | -------------------------------------------------------------------------------- /src/library/blas/gens/tile_iter.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef TILE_ITER_H 18 | #define TILE_ITER_H 19 | 20 | #include "blas_kgen.h" 21 | 22 | typedef enum TileIterFlags { 23 | // iterate in the backward direction along logical rows 24 | TILE_ITER_BACKWARD_ROWS = 0x01, 25 | // iterate in the backward direction along logical columns 26 | TILE_ITER_BACKWARD_COLS = 0x02 27 | } TileIterFlags; 28 | 29 | typedef enum PhyIterFlags { 30 | PHY_ITER_BACKWARD_LINES = 0x01, 31 | PHY_ITER_BACKWARD_VECS = 0x02, 32 | } PhyIterFlags; 33 | 34 | typedef struct PhysTileIterator { 35 | int row; // logical tile row 36 | int col; // logical tile column 37 | 38 | int phyIterFlags; 39 | int isLogRowMaj; 40 | 41 | int vecLen; 42 | 43 | int line; // physical line 44 | int vec; // vector in physical line 45 | 46 | int nrLines; // physical line number 47 | int nrVecs; // physical vec number 48 | 49 | } PhysTileIterator; 50 | 51 | //----------------------------------------------------------------------------- 52 | 53 | int 54 | iterInit(PhysTileIterator *iter, 55 | const Tile *tile, 56 | int vecLen, 57 | unsigned int tileIterFlags); 58 | 59 | int 60 | iterIterate(PhysTileIterator *iter); 61 | 62 | /* 63 | * Check if the entire tile has been iterated. Return true if the iterator is 64 | * at the next element beyond the last. 65 | */ 66 | int 67 | iterIsEnd(const PhysTileIterator *iter); 68 | 69 | int 70 | iterSeek( PhysTileIterator *iter, 71 | int row, 72 | int col ); 73 | 74 | int 75 | iterSeekPhys( PhysTileIterator *iter, 76 | int line, 77 | int vec ); 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /src/library/blas/gens/trsm_kgen.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | #include "trsm_kgen.h" 20 | 21 | void 22 | genComplexMathOperators( 23 | struct KgenContext *ctx, 24 | DataType dtype) 25 | { 26 | const char *ctype; 27 | char tmp[1024]; 28 | 29 | ctype = dtypeBuiltinType(dtype); 30 | sprintf(tmp, "%s\ndiv(%s u, %s v)\n", ctype, ctype, ctype); 31 | kgenDeclareFunction(ctx, tmp); 32 | kgenBeginFuncBody(ctx); 33 | sprintf(tmp, "return (%s)((u.x * v.x + u.y * v.y) / " 34 | "(v.x * v.x + v.y * v.y)," 35 | "(u.y * v.x - u.x * v.y) / " 36 | "(v.x * v.x + v.y * v.y));\n", ctype); 37 | kgenAddStmt(ctx, tmp); 38 | kgenEndFuncBody(ctx); 39 | kgenAddBlankLine(ctx); 40 | 41 | sprintf(tmp, "%s\nmul(%s u, %s v)\n", ctype, ctype, ctype); 42 | kgenDeclareFunction(ctx, tmp); 43 | kgenBeginFuncBody(ctx); 44 | sprintf(tmp, "return (%s)(u.x * v.x - u.y * v.y, u.x * v.y + u.y * v.x);\n", 45 | ctype); 46 | kgenAddStmt(ctx, tmp); 47 | kgenEndFuncBody(ctx); 48 | kgenAddBlankLine(ctx); 49 | } 50 | 51 | -------------------------------------------------------------------------------- /src/library/blas/gens/trsm_kgen.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef TRSM_KGEN_H_ 19 | #define TRSM_KGEN_H_ 20 | 21 | #include "blas_kgen.h" 22 | 23 | void 24 | genComplexMathOperators( 25 | struct KgenContext *ctx, 26 | DataType dtype); 27 | 28 | #endif /* TRSM_KGEN_H_ */ 29 | -------------------------------------------------------------------------------- /src/library/blas/gens/tuned_numbers.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef _TUNED_NUMBERS_ 19 | #define _TUNED_NUMBERS_ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | typedef struct blockSizes 31 | { 32 | unsigned char TY; // Not more than 32 33 | unsigned char TX; 34 | unsigned char ITEMY:7; // Not more than 8 35 | unsigned char ITEMX:7; 36 | unsigned char useBarrier:1; 37 | } blockSizes; 38 | 39 | blockSizes bestBlockSizeForDevice( SolutionStep *step ); 40 | 41 | #ifdef __cplusplus 42 | } /* extern "C" { */ 43 | #endif 44 | 45 | #endif // _TUNED_NUMBERS_ 46 | -------------------------------------------------------------------------------- /src/library/blas/gens/xxmv_common.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef XXMV_COMMON_H_ 19 | #define XXMV_COMMON_H_ 20 | 21 | #include "blas_kgen.h" 22 | #include "gen_helper.h" 23 | 24 | /* Fetch part of vector x into tile b */ 25 | void 26 | genFetchX( 27 | struct KgenContext *ctx, 28 | Tile *tile, 29 | unsigned int vecLen, 30 | DataType dtype, 31 | const KernelVarNames *varNames, 32 | TileMulFlags tflags, 33 | KernelExtraFlags kflags); 34 | 35 | void 36 | setResultPos( 37 | struct KgenContext *ctx, 38 | KernelExtraFlags kflags, 39 | const char *axVar); 40 | 41 | void 42 | updateResultVectorTiled( 43 | struct KgenContext *ctx, 44 | KernelExtraFlags kflags, 45 | unsigned int vecLen, 46 | Tile *tile); 47 | 48 | void 49 | genIncPointers( 50 | struct KgenContext *ctx, 51 | KernelExtraFlags kflags); 52 | 53 | void 54 | genStoreLocalResult( 55 | struct KgenContext *ctx, 56 | Tile *tile, 57 | const char *lid); 58 | 59 | void 60 | genAddLocalResult( 61 | struct KgenContext *ctx, 62 | Tile *tile, 63 | const char *lid, 64 | unsigned int cLocal, 65 | unsigned int bStep); 66 | 67 | /* Store partial result to private result buffer */ 68 | void 69 | genMergeResults( 70 | struct KgenContext *ctx, 71 | Tile *result, 72 | Tile *source); 73 | 74 | #endif /* XXMV_COMMON_H_ */ 75 | -------------------------------------------------------------------------------- /src/library/blas/include/blas_funcs.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* 19 | * Blas function identifiers and properties 20 | */ 21 | 22 | #ifndef BLASFUNCS_H_ 23 | #define BLASFUNCS_H_ 24 | 25 | #include 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | typedef enum BlasFunctionID { 32 | CLBLAS_GEMV, 33 | CLBLAS_SYMV, 34 | CLBLAS_GEMM, 35 | CLBLAS_TRMM, 36 | CLBLAS_TRSM, 37 | CLBLAS_SYRK, 38 | CLBLAS_SYR2K, 39 | CLBLAS_TRMV, 40 | CLBLAS_HEMV, 41 | CLBLAS_TRSV, 42 | CLBLAS_TRSV_GEMV, // Need a Kludge as current "gemv" don't support complex types 43 | CLBLAS_SYMM, 44 | CLBLAS_SYMM_DIAGONAL, 45 | CLBLAS_HEMM_DIAGONAL, 46 | CLBLAS_GEMM2, 47 | CLBLAS_GEMM_TAIL, 48 | CLBLAS_SYR, 49 | CLBLAS_SYR2, 50 | CLBLAS_GER, 51 | CLBLAS_HER, 52 | CLBLAS_HER2, 53 | CLBLAS_HEMM, 54 | CLBLAS_HERK, 55 | CLBLAS_TPMV, 56 | CLBLAS_SPMV, 57 | CLBLAS_HPMV, 58 | CLBLAS_TPSV, 59 | CLBLAS_SPR, 60 | CLBLAS_SPR2, 61 | CLBLAS_HPR, 62 | CLBLAS_HPR2, 63 | CLBLAS_GBMV, 64 | CLBLAS_TBMV, 65 | CLBLAS_SBMV, 66 | CLBLAS_HBMV, 67 | CLBLAS_TBSV, 68 | CLBLAS_SWAP, 69 | CLBLAS_SCAL, 70 | CLBLAS_COPY, 71 | CLBLAS_AXPY, 72 | CLBLAS_DOT, 73 | CLBLAS_REDUCTION_EPILOGUE, 74 | CLBLAS_ROTG, 75 | CLBLAS_ROTMG, 76 | CLBLAS_ROT, 77 | CLBLAS_ROTM, 78 | CLBLAS_iAMAX, 79 | CLBLAS_NRM2, 80 | CLBLAS_ASUM, 81 | CLBLAS_TRANSPOSE, 82 | 83 | /* ! Must be the last */ 84 | BLAS_FUNCTIONS_NUMBER 85 | } BlasFunctionID; 86 | 87 | int funcBlasLevel(BlasFunctionID funcID); 88 | bool funcHasBeta(BlasFunctionID funcID); 89 | bool funcHasTriangMatrix(BlasFunctionID funcID); 90 | 91 | #ifdef __cplusplus 92 | } 93 | #endif 94 | 95 | #endif /* BLASFUNCS_H_ */ 96 | -------------------------------------------------------------------------------- /src/library/blas/include/events.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* 19 | * Events used during SolutionStep decomposition internally. 20 | */ 21 | 22 | #ifndef EVENTS_H_ 23 | #define EVENTS_H_ 24 | 25 | void decomposeEventsSetup(void); 26 | void decomposeEventsTeardown(void); 27 | cl_event* decomposeEventsAlloc(void); 28 | 29 | #endif /* EVENTS_H_ */ 30 | -------------------------------------------------------------------------------- /src/library/blas/include/matrix_dims.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef MATRIX_DIMS_H_ 19 | #define MATRIX_DIMS_H_ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | void 31 | swapDimXY(SubproblemDim *dim); 32 | 33 | size_t 34 | matrBlockPitch( 35 | const SubproblemDim *dim, 36 | MatrixRole mrole, 37 | DataType dtype, 38 | clblasSide side); 39 | 40 | cl_ulong 41 | matrBlockSize( 42 | SubproblemDim *dim, 43 | MatrixRole mrole, 44 | DataType dtype, 45 | clblasSide side); 46 | 47 | size_t 48 | matrBlockHeight( 49 | SubproblemDim *dim, 50 | MatrixRole mrole, 51 | clblasSide side); 52 | 53 | /* 54 | * Transform respective kernel arguments to problem dimension. 55 | * if 'offset' is set to true, then it transform starting offsets 56 | * to process matrices from, otherwise it transforms matrix sizes. 57 | * It ignores 'bwidth' field in offset mode. 58 | */ 59 | void 60 | kargsToProbDims( 61 | SubproblemDim *probDim, 62 | BlasFunctionID funcID, 63 | const CLBlasKargs *kargs, 64 | bool offset); 65 | 66 | /* 67 | * Transform problem dimensions to respective kernel arguments. 68 | * In the offset mode it ignore 'offsetK' and always sets it to 0 69 | */ 70 | void 71 | probDimsToKargs( 72 | CLBlasKargs *kargs, 73 | BlasFunctionID funcID, 74 | SubproblemDim *blasDim, 75 | bool offset); 76 | 77 | #ifdef __cplusplus 78 | } 79 | #endif 80 | 81 | #endif /* MATRIX_DIMS_H_ */ 82 | -------------------------------------------------------------------------------- /src/library/blas/include/matrix_props.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef MATRIX_PROPS_H_ 19 | #define MATRIX_PROPS_H_ 20 | 21 | #include 22 | 23 | #include "clblas-internal.h" 24 | #include "blas_funcs.h" 25 | #include "matrix_props.h" 26 | 27 | typedef enum MatrixRole { 28 | MATRIX_A, 29 | MATRIX_B, 30 | MATRIX_C, 31 | MATRIX_ROLES_NUMBER 32 | } MatrixRole; 33 | 34 | /* 35 | * Functions to deal with kernel extra flags 36 | */ 37 | 38 | // Is a matrix should be conjugated 39 | bool 40 | isMatrixConj(KernelExtraFlags flags, MatrixRole mrole); 41 | 42 | /* 43 | * Is a matrix accessed in the column-major order 44 | */ 45 | bool 46 | isMatrixAccessColMaj( 47 | BlasFunctionID funcID, 48 | KernelExtraFlags flags, 49 | MatrixRole mrole); 50 | 51 | /* 52 | * Triangularity type at the physical layout with account 53 | * of solution element indices the largest part makes 54 | * a contribution to. That means a right-side, non transposed, 55 | * upper diagonal matrix is considered as the lower triangular 56 | * since the largest part make a contribution to solution elements 57 | * with a highest index. 58 | */ 59 | static __inline bool 60 | isMatrixUpper(KernelExtraFlags kflags); 61 | 62 | static __inline bool 63 | isMatrixUpper(KernelExtraFlags kflags) 64 | { 65 | return (((kflags & KEXTRA_UPPER_TRIANG) != 0) ^ 66 | ((kflags & KEXTRA_TRANS_A) != 0) ^ 67 | ((kflags & KEXTRA_SIDE_RIGHT) != 0)); 68 | } 69 | 70 | #endif /* MATRIX_PROPS_H_ */ 71 | -------------------------------------------------------------------------------- /src/library/blas/include/xgemm.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | //some help functions 18 | 19 | #ifndef CLBLAS_XGEMM_H 20 | #define CLBLAS_XGEMM_H 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | void makeGemmKernel( 27 | cl_kernel *clKernel, 28 | cl_command_queue clQueue, 29 | const char *kernelSource, 30 | const char *sourceBuildOptions, 31 | const unsigned char **kernelBinary, 32 | size_t *kernelBinarySize, 33 | const char *binaryBuildOptions); 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | #endif -------------------------------------------------------------------------------- /src/library/blas/specialCases/include/GemmSpecialCases.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2015 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef CLBLAS_GEMM_SPECIAL_CASES_H 18 | #define CLBLAS_GEMM_SPECIAL_CASES_H 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | template 25 | clblasStatus 26 | GemmSpecialCases(clblasOrder order, 27 | clblasTranspose transA, 28 | clblasTranspose transB, 29 | cl_uint M, cl_uint N, cl_uint K, 30 | Precision alpha, 31 | cl_mem A, cl_uint offA, cl_uint lda, 32 | cl_mem B, cl_uint offB, cl_uint ldb, 33 | Precision beta, 34 | cl_mem C, cl_uint offC, cl_uint ldc, 35 | cl_uint numCommandQueues, 36 | cl_command_queue *commandQueues, 37 | cl_uint numEventsInWaitList, 38 | const cl_event *eventWaitList, 39 | cl_event *events, 40 | bool &specialCaseHandled); 41 | 42 | #endif -------------------------------------------------------------------------------- /src/library/blas/trtri/TrtriClKernels.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef TRTRI_CL_KERNELS_H 3 | #define TRTRI_CL_KERNELS_H 4 | #if defined( __APPLE__ ) || defined ( __MACOS ) 5 | #include 6 | #else 7 | #include "CL/cl.h" 8 | #endif 9 | 10 | /*mod 192 dtrsm*/ 11 | static cl_kernel diag_dtrtri_upper_192_12_clKernel = NULL; 12 | static cl_kernel triple_dgemm_update_192_12_R_clKernel = NULL; 13 | static cl_kernel triple_dgemm_update_192_24_PART1_R_clKernel = NULL; 14 | static cl_kernel triple_dgemm_update_192_24_PART2_R_clKernel = NULL; 15 | static cl_kernel triple_dgemm_update_192_48_PART1_R_clKernel = NULL; 16 | static cl_kernel triple_dgemm_update_192_48_PART2_R_clKernel = NULL; 17 | static cl_kernel triple_dgemm_update_192_96_PART1_R_clKernel = NULL; 18 | static cl_kernel triple_dgemm_update_192_96_PART2_R_clKernel = NULL; 19 | 20 | /*mod 128 dtrsm*/ 21 | /*upper*/ 22 | static cl_kernel diag_dtrtri_upper_128_16_clKernel = NULL; 23 | static cl_kernel triple_dgemm_update_128_16_R_clKernel = NULL; 24 | static cl_kernel triple_dgemm_update_128_32_PART1_R_clKernel = NULL; 25 | static cl_kernel triple_dgemm_update_128_32_PART2_R_clKernel = NULL; 26 | static cl_kernel triple_dgemm_update_128_64_PART1_R_clKernel = NULL; 27 | static cl_kernel triple_dgemm_update_128_64_PART2_R_clKernel = NULL; 28 | static cl_kernel triple_dgemm_update_128_ABOVE64_PART1_R_clKernel = NULL; 29 | static cl_kernel triple_dgemm_update_128_ABOVE64_PART2_R_clKernel = NULL; 30 | static cl_kernel triple_dgemm_update_128_ABOVE64_PART3_R_clKernel = NULL; 31 | 32 | /*lower*/ 33 | static cl_kernel diag_dtrtri_lower_128_16_clKernel = NULL; 34 | static cl_kernel triple_dgemm_update_128_16_PART1_L_clKernel = NULL; 35 | static cl_kernel triple_dgemm_update_128_16_PART2_L_clKernel = NULL; 36 | static cl_kernel triple_dgemm_update_128_32_PART1_L_clKernel = NULL; 37 | static cl_kernel triple_dgemm_update_128_32_PART2_L_clKernel = NULL; 38 | static cl_kernel triple_dgemm_update_128_64_PART1_L_clKernel = NULL; 39 | static cl_kernel triple_dgemm_update_128_64_PART2_L_clKernel = NULL; 40 | static cl_kernel triple_dgemm_update_128_ABOVE64_PART1_L_clKernel = NULL; 41 | static cl_kernel triple_dgemm_update_128_ABOVE64_PART2_L_clKernel = NULL; 42 | static cl_kernel triple_dgemm_update_128_ABOVE64_PART3_L_clKernel = NULL; 43 | 44 | #endif -------------------------------------------------------------------------------- /src/library/clBLAS.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix}/bin@SUFFIX_BIN@ 3 | includedir=${prefix}/include 4 | libdir=${prefix}/lib@SUFFIX_LIB@ 5 | 6 | Name: clBLAS 7 | Description: Open source OpenCL BLAS library 8 | Version: @clBLAS_VERSION@ 9 | URL: https://github.com/clMathLibraries/clBLAS 10 | 11 | Cflags: -I${includedir} 12 | Libs: -L${libdir} -lclBLAS 13 | -------------------------------------------------------------------------------- /src/library/common/misc.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | 20 | unsigned int 21 | dtypeSize(DataType type) 22 | { 23 | size_t ret; 24 | 25 | switch (type) { 26 | case TYPE_FLOAT: 27 | ret = sizeof(cl_float); 28 | break; 29 | case TYPE_DOUBLE: 30 | ret = sizeof(cl_double); 31 | break; 32 | case TYPE_COMPLEX_FLOAT: 33 | ret = sizeof(cl_float2); 34 | break; 35 | case TYPE_COMPLEX_DOUBLE: 36 | ret = sizeof(cl_double2); 37 | break; 38 | case TYPE_UNSIGNED_INT:// For iAMAX 39 | ret = sizeof(cl_uint); 40 | break; 41 | default: 42 | ret = (size_t)-1; 43 | break; 44 | } 45 | 46 | return (unsigned int)ret; 47 | } 48 | 49 | size_t 50 | fl4RowWidth(size_t width, size_t typeSize) 51 | { 52 | size_t s; 53 | 54 | s = width / (sizeof(cl_float4) / typeSize); 55 | if (s * (sizeof(cl_float4) / typeSize) != width) { 56 | s++; 57 | } 58 | 59 | return s; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /src/library/common/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | set(SRC_COMMON 18 | ../list.c 19 | ../clkern.c 20 | ../kern_cache.c 21 | ../kerngen_core.c 22 | ../kgen_basic.c 23 | ../kgen_loop_helper.c 24 | ../kgen_guard.c 25 | ../misc.c 26 | ../gens/dblock_kgen.c 27 | ../devinfo.c 28 | ../devinfo-cache.c 29 | ../mutex.c 30 | ../trace_malloc.c 31 | ) 32 | 33 | set(SRC_DBLOCK_KGEN 34 | ${SRC_COMMON} 35 | t_dblock_kgen.c 36 | ) 37 | 38 | set(SRC_GENS_CACHE 39 | ${SRC_COMMON} 40 | t_gens_cache.c 41 | ) 42 | 43 | include_directories(${OPENCL_INCLUDE_DIRS} ${clBLAS_SOURCE_DIR} ${clBLAS_SOURCE_DIR}/include ${clBLAS_SOURCE_DIR}/src/blas/include) 44 | 45 | add_executable(t_dblock_kgen ${SRC_DBLOCK_KGEN}) 46 | target_link_libraries(t_dblock_kgen ${OPENCL_LIBRARIES} ${MATH_LIBRARY}) 47 | set_target_properties( t_dblock_kgen PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) 48 | 49 | add_executable(t_gens_cache ${SRC_GENS_CACHE}) 50 | target_link_libraries(t_gens_cache ${OPENCL_LIBRARIES} ${MATH_LIBRARY}) 51 | set_target_properties( t_gens_cache PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) 52 | 53 | # CPack configuration; include the executable into the package 54 | install( TARGETS t_dblock_kgen t_gens_cache 55 | RUNTIME DESTINATION bin${SUFFIX_BIN} 56 | LIBRARY DESTINATION lib${SUFFIX_LIB} 57 | ARCHIVE DESTINATION lib${SUFFIX_LIB}/import 58 | ) 59 | -------------------------------------------------------------------------------- /src/library/tools/OCLBinaryGenerator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | cmake_minimum_required(VERSION 2.6) 18 | project(OCLBinaryGenerator C CXX) 19 | ADD_DEFINITIONS(/D_CRT_SECURE_NO_WARNINGS) 20 | ADD_EXECUTABLE(OCLBinaryGenerator OCLBinaryGenerator.cpp) 21 | target_link_libraries(OCLBinaryGenerator ${OPENCL_LIBRARIES}) 22 | include_directories(${OPENCL_INCLUDE_DIRS}) 23 | 24 | set_target_properties( OCLBinaryGenerator PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/staging" ) 25 | 26 | if ( MSVC ) 27 | set_target_properties( OCLBinaryGenerator PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_CURRENT_BINARY_DIR}/staging" ) 28 | set_target_properties( OCLBinaryGenerator PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_CURRENT_BINARY_DIR}/staging" ) 29 | endif( ) 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/library/tools/bingen/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | cmake_minimum_required(VERSION 2.6) 18 | project(bingen C CXX) 19 | ADD_DEFINITIONS(/D_CRT_SECURE_NO_WARNINGS) 20 | ADD_EXECUTABLE(bingen bingen.cpp) 21 | target_link_libraries(bingen ${OPENCL_LIBRARIES}) 22 | include_directories(${OPENCL_INCLUDE_DIRS}) 23 | 24 | set_target_properties( bingen PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/staging" ) 25 | 26 | if ( MSVC ) 27 | set_target_properties( bingen PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_CURRENT_BINARY_DIR}/staging" ) 28 | set_target_properties( bingen PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_CURRENT_BINARY_DIR}/staging" ) 29 | endif( ) 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/library/tools/ktest/ktest-common.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef KTEST_COMMON_H_ 19 | #define KTEST_COMMON_H_ 20 | 21 | namespace clMath { 22 | 23 | typedef enum KTestMatrixGenerator { 24 | RANDOM_MATRIX, 25 | UNIT_MATRIX, 26 | SAWTOOTH_MATRIX, 27 | 28 | N_MATRIX_GENERATORS 29 | } KTestMatrixGenerator; 30 | } 31 | 32 | #endif /* KTEST_COMMON_H_ */ 33 | -------------------------------------------------------------------------------- /src/library/tools/ktest/steps/gemm.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef KTEST_GEMM_H__ 19 | #define KTEST_GEMM_H__ 20 | 21 | #include "../step.h" 22 | 23 | namespace clMath { 24 | 25 | class GemmStep : public Step { 26 | public: 27 | GemmStep(cl_device_id device); 28 | GemmStep(ListNode *node); 29 | 30 | virtual void fixLD(); 31 | virtual void declareVars(Step *masterStep); 32 | }; 33 | 34 | } // namespace clMath 35 | 36 | #endif // KTEST_GEMM_H__ 37 | -------------------------------------------------------------------------------- /src/library/tools/ktest/steps/gemv.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef KTEST_GEMV_H__ 19 | #define KTEST_GEMV_H__ 20 | 21 | #include "../step.h" 22 | 23 | namespace clMath { 24 | 25 | class GemvStep : public Step { 26 | public: 27 | GemvStep(cl_device_id device); 28 | GemvStep(ListNode *node); 29 | 30 | virtual void fixLD(); 31 | virtual void declareVars(Step *masterStep); 32 | }; 33 | 34 | } // namespace clMath 35 | 36 | #endif // KTEST_GEMV_H__ 37 | -------------------------------------------------------------------------------- /src/library/tools/ktest/steps/symv.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef KTEST_SYMV_H__ 19 | #define KTEST_SYMV_H__ 20 | 21 | #include "../step.h" 22 | 23 | namespace clMath { 24 | 25 | class SymvStep : public Step { 26 | public: 27 | SymvStep(cl_device_id device); 28 | SymvStep(ListNode *node); 29 | 30 | virtual void fixLD(); 31 | virtual void declareVars(Step *masterStep); 32 | }; 33 | 34 | } // namespace clMath 35 | 36 | #endif // KTEST_SYMV_H__ 37 | -------------------------------------------------------------------------------- /src/library/tools/ktest/steps/syr2k.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef KTEST_SYR2K_H__ 19 | #define KTEST_SYR2K_H__ 20 | 21 | #include "../step.h" 22 | 23 | namespace clMath { 24 | 25 | class Syr2kStep : public Step { 26 | public: 27 | Syr2kStep(cl_device_id device); 28 | Syr2kStep(ListNode *node); 29 | 30 | virtual void fixLD(); 31 | virtual void declareVars(Step *masterStep); 32 | }; 33 | 34 | } // namespace clMath 35 | 36 | #endif // KTEST_SYR2K_H__ 37 | -------------------------------------------------------------------------------- /src/library/tools/ktest/steps/syrk.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef KTEST_SYRK_H__ 19 | #define KTEST_SYRK_H__ 20 | 21 | #include "../step.h" 22 | 23 | namespace clMath { 24 | 25 | class SyrkStep : public Step { 26 | public: 27 | SyrkStep(cl_device_id device); 28 | SyrkStep(ListNode *node); 29 | 30 | virtual void fixLD(); 31 | virtual void declareVars(Step *masterStep); 32 | }; 33 | 34 | } // namespace clMath 35 | 36 | #endif // KTEST_SYRK_H__ 37 | -------------------------------------------------------------------------------- /src/library/tools/ktest/steps/trmm.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef KTEST_TRMM_H__ 19 | #define KTEST_TRMM_H__ 20 | 21 | #include "../step.h" 22 | 23 | namespace clMath { 24 | 25 | class TrmmStep : public Step { 26 | public: 27 | TrmmStep(cl_device_id device); 28 | TrmmStep(ListNode *node); 29 | 30 | virtual void fixLD(); 31 | virtual void declareVars(Step *masterStep); 32 | }; 33 | 34 | } // namespace clMath 35 | 36 | #endif // KTEST_TRMM_H__ 37 | -------------------------------------------------------------------------------- /src/library/tools/ktest/steps/trsm.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef KTEST_TRSM_H__ 19 | #define KTEST_TRSM_H__ 20 | 21 | #include "../step.h" 22 | 23 | namespace clMath { 24 | 25 | class TrsmStep : public Step { 26 | public: 27 | TrsmStep(cl_device_id device); 28 | TrsmStep(ListNode *node); 29 | 30 | virtual void fixLD(); 31 | virtual void declareVars(Step *masterStep); 32 | }; 33 | 34 | } // namespace clMath 35 | 36 | #endif // KTEST_TRSM_H__ 37 | -------------------------------------------------------------------------------- /src/library/tools/tplgen/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | cmake_minimum_required(VERSION 2.6) 18 | project(tplgen C CXX) 19 | ADD_DEFINITIONS(/D_CRT_SECURE_NO_WARNINGS) 20 | ADD_EXECUTABLE(tplgen tplgen.cpp) 21 | -------------------------------------------------------------------------------- /src/library/tools/tune/toolslib.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef TOOLSLIB_H__ 19 | #define TOOLSLIB_H__ 20 | 21 | #ifdef __APPLE__ 22 | #include 23 | #else 24 | #include 25 | #endif 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | #include 33 | 34 | // Interface to access to saved data 35 | 36 | #define GF_SUCCESS 0 37 | #define GF_ERROR 1 38 | #define GF_INVALID_CACHE 2 39 | #define GF_CORRUPT_FILE 3 40 | #define GF_KERNEL_NOT_FOUND 4 41 | 42 | 43 | /* 44 | * FIXME: It's a kludge to dedicated processing a case when matrix leading 45 | * dimension is aligned on the bank size 46 | */ 47 | #define BANK_ALIGNED_CASE_RECORD_IDX 5 48 | 49 | 50 | typedef int dimension; 51 | 52 | void 53 | initStorageCache(void); 54 | 55 | void 56 | destroyStorageCache(void); 57 | 58 | int 59 | getGranularityInfo ( 60 | TargetDevice* tdev, 61 | const char* pattName, 62 | const DataType dt, 63 | const KernelExtraFlags kflag, 64 | dimension dim, 65 | SubproblemDim* sdim, 66 | PGranularity* 67 | pgran, 68 | double* time); 69 | 70 | int 71 | getKernelInfo ( 72 | TargetDevice* tdev, 73 | const char* pattName, 74 | const DataType dt, 75 | const KernelExtraFlags kflag, 76 | dimension dim, 77 | unsigned char** bufer, 78 | size_t* sizeBufer); 79 | 80 | int getDimensionCount(TargetDevice* tdev, int func); 81 | 82 | dimension 83 | getDimensionID ( 84 | TargetDevice* tdev, 85 | int func, 86 | size_t M, 87 | size_t N, 88 | size_t K); 89 | 90 | #endif /* TOOLSLIB_H__ */ 91 | 92 | -------------------------------------------------------------------------------- /src/library/tools/tune/tune.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef TOOLS_H__ 19 | #define TOOLS_H__ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | #include "storage_data.h" 30 | 31 | extern const char *FileID; 32 | extern const char *FileExt; 33 | extern const char *ENV_FILE_PATH; 34 | 35 | struct SubDimInfo; 36 | 37 | 38 | void initMask(unsigned int* mask); 39 | char* getDevName(TargetDevice* devId); 40 | void initCLDeviceInfoRec(TargetDevice* devID, DeviceInfo *devInfo); 41 | 42 | #endif /* TOOLS_H__ */ 43 | 44 | -------------------------------------------------------------------------------- /src/samples/clBlasVersion.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | /* Include CLBLAS header. It automatically includes needed OpenCL header, 23 | * so we can drop out explicit inclusion of cl.h header. 24 | */ 25 | #include 26 | 27 | 28 | int 29 | main(void) 30 | { 31 | cl_uint major,minor,patch; 32 | clblasStatus err; 33 | 34 | err = clblasGetVersion(&major,&minor,&patch); 35 | if (err != CL_SUCCESS) { 36 | printf("clblasGetVersion() failed with %d\n", err); 37 | return 1; 38 | } 39 | printf("clblas version %d.%d.%d\n", major,minor,patch); 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /src/scripts/perf/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright 2013 Advanced Micro Devices, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ######################################################################## 16 | 17 | set(GRAPHING_SCRIPTS measurePerformance.py 18 | plotPerformance.py 19 | blasPerformanceTesting.py 20 | errorHandler.py 21 | performanceUtility.py 22 | ) 23 | 24 | if( WIN32 ) 25 | install( FILES ${GRAPHING_SCRIPTS} DESTINATION bin${SUFFIX_BIN} ) 26 | else ( ) 27 | install( FILES ${GRAPHING_SCRIPTS} DESTINATION share/clBLAS ) 28 | endif( ) 29 | -------------------------------------------------------------------------------- /src/targetver.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #pragma once 18 | 19 | // The following macros define the minimum required platform. The minimum required platform 20 | // is the earliest version of Windows, Internet Explorer etc. that has the necessary features to run 21 | // your application. The macros work by enabling all features available on platform versions up to and 22 | // including the version specified. 23 | 24 | // Modify the following defines if you have to target a platform prior to the ones specified below. 25 | // Refer to MSDN for the latest info on corresponding values for different platforms. 26 | #ifndef _WIN32_WINNT // Specifies that the minimum required platform is Windows Vista. 27 | #define _WIN32_WINNT 0x0600 // Change this to the appropriate value to target other versions of Windows. 28 | #endif 29 | 30 | -------------------------------------------------------------------------------- /src/tests/blas-cblas.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | 20 | 21 | complex 22 | compose_complex(float x, float y) 23 | { 24 | complex z = { x, y }; 25 | return z; 26 | } 27 | 28 | float 29 | complex_real(complex z) 30 | { 31 | return z.real; 32 | } 33 | 34 | float 35 | complex_imag(complex z) 36 | { 37 | return z.imag; 38 | } 39 | 40 | doublecomplex 41 | compose_doublecomplex(double x, double y) 42 | { 43 | doublecomplex z = { x, y }; 44 | return z; 45 | } 46 | 47 | double 48 | doublecomplex_real(doublecomplex z) 49 | { 50 | return z.real; 51 | } 52 | 53 | double 54 | doublecomplex_imag(doublecomplex z) 55 | { 56 | return z.imag; 57 | } 58 | -------------------------------------------------------------------------------- /src/tests/correctness/BlasBase-corr.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | namespace clMath { 25 | 26 | clblasStatus 27 | BlasBase::addScratchImages(void) 28 | { 29 | //clblasStatus status; 30 | 31 | //// Height must be less than 1024 32 | //imageA_ = clblasAddScratchImage(context_, 2048, 512, &status); 33 | //if (imageA_) { 34 | // imageB_ = clblasAddScratchImage(context_, 2048, 512, &status); 35 | //} 36 | 37 | //return status; 38 | return clblasNotImplemented; 39 | } 40 | 41 | } // namespace 42 | -------------------------------------------------------------------------------- /src/tests/correctness/delta.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef DELTA_H_ 18 | #define DELTA_H_ 19 | 20 | #include 21 | #include 22 | 23 | // Type-dependant constants 24 | template 25 | static cl_double DELTA_0(); 26 | template<> 27 | __template_static cl_double DELTA_0() { return pow(2.0, -20); } 28 | template<> 29 | __template_static cl_double DELTA_0() { return pow(2.0, -50); } 30 | template<> 31 | __template_static cl_double DELTA_0() { return pow(2.0, -20); } 32 | template<> 33 | __template_static cl_double DELTA_0() { return pow(2.0, -50); } 34 | 35 | #endif // DELTA_H 36 | 37 | -------------------------------------------------------------------------------- /src/tests/correctness/tcase-filter.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* 19 | * Filter for skipping test cases when run time is more important than 20 | * coverage 21 | */ 22 | 23 | #ifndef TCASEFILTER_H_ 24 | #define TCASEFILTER_H_ 25 | 26 | #include 27 | 28 | bool canCaseBeSkipped(const TestParams *params, bool isComplex); 29 | 30 | #endif /* TCASEFILTER_H_ */ 31 | -------------------------------------------------------------------------------- /src/tests/include/asum.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using ::testing::TestWithParam; 24 | 25 | class ASUM : public TestWithParam< 26 | ::std::tr1::tuple< 27 | int, // N 28 | int, // incx, should be greater than 0 29 | int, //offx 30 | int, //offa -- for offAsum 31 | int // numCommandQueues 32 | > > { 33 | public: 34 | void getParams(TestParams *params) 35 | { 36 | params->N = N; 37 | params->incx = incx; 38 | params->offBX = offx; 39 | params->offa = offAsum; 40 | params->numCommandQueues = numCommandQueues; 41 | } 42 | 43 | protected: 44 | virtual void SetUp() 45 | { 46 | //size_t lenX; 47 | 48 | N = ::std::tr1::get<0>(GetParam()); 49 | incx = ::std::tr1::get<1>(GetParam()); 50 | offx = ::std::tr1::get<2>(GetParam()); 51 | offAsum = ::std::tr1::get<3>(GetParam()); 52 | numCommandQueues = ::std::tr1::get<4>(GetParam()); 53 | 54 | base = ::clMath::BlasBase::getInstance(); 55 | 56 | useNumCommandQueues = base->useNumCommandQueues(); 57 | if (useNumCommandQueues) { 58 | numCommandQueues = base->numCommandQueues(); 59 | } 60 | 61 | if (base->useN()) { 62 | N = base->N(); 63 | } 64 | } 65 | 66 | size_t N; 67 | int incx; 68 | size_t offx, offAsum; 69 | 70 | ::clMath::BlasBase *base; 71 | cl_ulong imageA, imageX; 72 | 73 | bool useNumCommandQueues; 74 | cl_uint numCommandQueues; 75 | }; 76 | 77 | 78 | -------------------------------------------------------------------------------- /src/tests/include/axpy.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef AXPY__H_ 18 | #define AXPY__H_ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | using namespace clMath; 26 | using ::testing::TestWithParam; 27 | 28 | // Name AXPY creates problem in gTest 29 | class AXPY : public TestWithParam< 30 | 31 | ::std::tr1::tuple< 32 | int, // N 33 | ComplexLong, // alpha 34 | int, // offBX 35 | int, // incx, should not be 0 36 | int, //offCY 37 | int, //incy, should not be 0 38 | int // numCommandQueues 39 | > > { 40 | public: 41 | void getParams(TestParams *params) 42 | { 43 | params->N = N; 44 | params->alpha = paramAlpha; 45 | params->offBX = offBX; 46 | params->incx = incx; 47 | params->offCY = offCY; 48 | params->incy = incy; 49 | params->numCommandQueues = numCommandQueues; 50 | } 51 | 52 | protected: 53 | virtual void SetUp() 54 | { 55 | N = ::std::tr1::get<0>(GetParam()); 56 | paramAlpha = ::std::tr1::get<1>(GetParam()); 57 | offBX = ::std::tr1::get<2>(GetParam()); 58 | incx = ::std::tr1::get<3>(GetParam()); 59 | offCY = ::std::tr1::get<4>(GetParam()); 60 | incy = ::std::tr1::get<5>(GetParam()); 61 | numCommandQueues = ::std::tr1::get<6>(GetParam()); 62 | 63 | base = ::clMath::BlasBase::getInstance(); 64 | seed = base->seed(); 65 | 66 | useNumCommandQueues = base->useNumCommandQueues(); 67 | if (useNumCommandQueues) { 68 | numCommandQueues = base->numCommandQueues(); 69 | } 70 | 71 | if (base->useN()) { 72 | N = base->N(); 73 | } 74 | } 75 | 76 | size_t N; 77 | bool useAlpha; 78 | ComplexLong paramAlpha; 79 | size_t offBX; 80 | int incx; 81 | size_t offCY; 82 | int incy; 83 | unsigned int seed; 84 | 85 | ::clMath::BlasBase *base; 86 | 87 | bool useNumCommandQueues; 88 | cl_uint numCommandQueues; 89 | }; 90 | 91 | #endif 92 | -------------------------------------------------------------------------------- /src/tests/include/copy.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using ::testing::TestWithParam; 24 | 25 | class COPY : public TestWithParam< 26 | ::std::tr1::tuple< 27 | int, // N 28 | int, // incx, should be greater than 0 29 | int, //incy 30 | int, //offx 31 | int, //offy 32 | int // numCommandQueues 33 | > > { 34 | public: 35 | void getParams(TestParams *params) 36 | { 37 | params->N = N; 38 | params->incx = incx; 39 | params->incy = incy; 40 | params->offBX = offx; 41 | params->offCY = offy; 42 | params->numCommandQueues = numCommandQueues; 43 | } 44 | 45 | protected: 46 | virtual void SetUp() 47 | { 48 | 49 | N = ::std::tr1::get<0>(GetParam()); 50 | incx = ::std::tr1::get<1>(GetParam()); 51 | incy = ::std::tr1::get<2>(GetParam()); 52 | offx = ::std::tr1::get<3>(GetParam()); 53 | offy = ::std::tr1::get<4>(GetParam()); 54 | numCommandQueues = ::std::tr1::get<5>(GetParam()); 55 | 56 | base = ::clMath::BlasBase::getInstance(); 57 | 58 | useNumCommandQueues = base->useNumCommandQueues(); 59 | if (useNumCommandQueues) { 60 | numCommandQueues = base->numCommandQueues(); 61 | } 62 | 63 | if (base->useN()) { 64 | N = base->N(); 65 | } 66 | } 67 | 68 | size_t N; 69 | int incx; 70 | int incy; 71 | size_t offx, offy; 72 | 73 | ::clMath::BlasBase *base; 74 | cl_ulong imageA, imageX; 75 | 76 | bool useNumCommandQueues; 77 | cl_uint numCommandQueues; 78 | }; 79 | 80 | 81 | -------------------------------------------------------------------------------- /src/tests/include/dot.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using ::testing::TestWithParam; 24 | 25 | class DOT : public TestWithParam< 26 | ::std::tr1::tuple< 27 | int, // N 28 | int, // incx, should be greater than 0 29 | int, //incy 30 | int, //offx 31 | int, //offy 32 | int, //offa -- for offDP 33 | int // numCommandQueues 34 | > > { 35 | public: 36 | void getParams(TestParams *params) 37 | { 38 | params->N = N; 39 | params->incx = incx; 40 | params->incy = incy; 41 | params->offBX = offx; 42 | params->offCY = offy; 43 | params->offa = offDP; 44 | params->numCommandQueues = numCommandQueues; 45 | } 46 | 47 | protected: 48 | virtual void SetUp() 49 | { 50 | //size_t lenX; 51 | 52 | N = ::std::tr1::get<0>(GetParam()); 53 | incx = ::std::tr1::get<1>(GetParam()); 54 | incy = ::std::tr1::get<2>(GetParam()); 55 | offx = ::std::tr1::get<3>(GetParam()); 56 | offy = ::std::tr1::get<4>(GetParam()); 57 | offDP = ::std::tr1::get<5>(GetParam()); 58 | numCommandQueues = ::std::tr1::get<6>(GetParam()); 59 | 60 | base = ::clMath::BlasBase::getInstance(); 61 | 62 | useNumCommandQueues = base->useNumCommandQueues(); 63 | if (useNumCommandQueues) { 64 | numCommandQueues = base->numCommandQueues(); 65 | } 66 | 67 | if (base->useN()) { 68 | N = base->N(); 69 | } 70 | } 71 | 72 | size_t N; 73 | int incx; 74 | int incy; 75 | size_t offx, offy, offDP; 76 | 77 | ::clMath::BlasBase *base; 78 | cl_ulong imageA, imageX; 79 | 80 | bool useNumCommandQueues; 81 | cl_uint numCommandQueues; 82 | }; 83 | 84 | 85 | -------------------------------------------------------------------------------- /src/tests/include/dotc.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using ::testing::TestWithParam; 24 | 25 | class DOTC : public TestWithParam< 26 | ::std::tr1::tuple< 27 | int, // N 28 | int, // incx, should be greater than 0 29 | int, //incy 30 | int, //offx 31 | int, //offy 32 | int, //offa -- for offDP 33 | int // numCommandQueues 34 | > > { 35 | public: 36 | void getParams(TestParams *params) 37 | { 38 | params->N = N; 39 | params->incx = incx; 40 | params->incy = incy; 41 | params->offBX = offx; 42 | params->offCY = offy; 43 | params->offa = offDP; 44 | params->numCommandQueues = numCommandQueues; 45 | } 46 | 47 | protected: 48 | virtual void SetUp() 49 | { 50 | //size_t lenX; 51 | 52 | N = ::std::tr1::get<0>(GetParam()); 53 | incx = ::std::tr1::get<1>(GetParam()); 54 | incy = ::std::tr1::get<2>(GetParam()); 55 | offx = ::std::tr1::get<3>(GetParam()); 56 | offy = ::std::tr1::get<4>(GetParam()); 57 | offDP = ::std::tr1::get<5>(GetParam()); 58 | numCommandQueues = ::std::tr1::get<6>(GetParam()); 59 | 60 | base = ::clMath::BlasBase::getInstance(); 61 | 62 | useNumCommandQueues = base->useNumCommandQueues(); 63 | if (useNumCommandQueues) { 64 | numCommandQueues = base->numCommandQueues(); 65 | } 66 | 67 | if (base->useN()) { 68 | N = base->N(); 69 | } 70 | } 71 | 72 | size_t N; 73 | int incx; 74 | int incy; 75 | size_t offx, offy, offDP; 76 | 77 | ::clMath::BlasBase *base; 78 | cl_ulong imageA, imageX; 79 | 80 | bool useNumCommandQueues; 81 | cl_uint numCommandQueues; 82 | }; 83 | 84 | 85 | -------------------------------------------------------------------------------- /src/tests/include/hpmv.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef HPMV_H_ 19 | #define HPMV_H_ 20 | 21 | #define HEMV_PACKED 22 | 23 | #include 24 | 25 | #undef HEMV_PACKED 26 | 27 | #endif // HPMV_H_ -------------------------------------------------------------------------------- /src/tests/include/hpr.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | 19 | 20 | #ifndef HPR_H_ 21 | 22 | #define HPR_H_ 23 | #define HER_PACKED 24 | #include "her.h" 25 | 26 | #undef HER_PACKED 27 | 28 | #endif -------------------------------------------------------------------------------- /src/tests/include/hpr2.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef HPR2_H_ 18 | 19 | #define HPR2_H_ 20 | #define HER2_PACKED 21 | #include "her2.h" 22 | 23 | #undef HER2_PACKED 24 | 25 | #endif -------------------------------------------------------------------------------- /src/tests/include/iamax.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using ::testing::TestWithParam; 24 | 25 | class iAMAX : public TestWithParam< 26 | ::std::tr1::tuple< 27 | int, // N 28 | int, // incx, should be greater than 0 29 | int, //offx 30 | int, //offa -- for offiAmax 31 | int // numCommandQueues 32 | > > { 33 | public: 34 | void getParams(TestParams *params) 35 | { 36 | params->N = N; 37 | params->incx = incx; 38 | params->offBX = offx; 39 | params->offa = offiAmax; 40 | params->numCommandQueues = numCommandQueues; 41 | } 42 | 43 | protected: 44 | virtual void SetUp() 45 | { 46 | N = ::std::tr1::get<0>(GetParam()); 47 | incx = ::std::tr1::get<1>(GetParam()); 48 | offx = ::std::tr1::get<2>(GetParam()); 49 | offiAmax = ::std::tr1::get<3>(GetParam()); 50 | numCommandQueues = ::std::tr1::get<4>(GetParam()); 51 | 52 | base = ::clMath::BlasBase::getInstance(); 53 | 54 | useNumCommandQueues = base->useNumCommandQueues(); 55 | if (useNumCommandQueues) { 56 | numCommandQueues = base->numCommandQueues(); 57 | } 58 | 59 | if (base->useN()) { 60 | N = base->N(); 61 | } 62 | } 63 | 64 | size_t N; 65 | int incx; 66 | size_t offx, offiAmax; 67 | 68 | ::clMath::BlasBase *base; 69 | cl_ulong imageA, imageX; 70 | 71 | bool useNumCommandQueues; 72 | cl_uint numCommandQueues; 73 | }; 74 | 75 | 76 | -------------------------------------------------------------------------------- /src/tests/include/nrm2.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using ::testing::TestWithParam; 24 | 25 | class NRM2 : public TestWithParam< 26 | ::std::tr1::tuple< 27 | int, // N 28 | int, // incx 29 | int, // offx 30 | int, // offa -- for offNRM2 31 | int // numCommandQueues 32 | > > { 33 | public: 34 | void getParams(TestParams *params) 35 | { 36 | params->N = N; 37 | params->incx = incx; 38 | params->offBX = offx; 39 | params->offa = offNRM2; 40 | params->numCommandQueues = numCommandQueues; 41 | } 42 | 43 | protected: 44 | virtual void SetUp() 45 | { 46 | //size_t lenX; 47 | 48 | N = ::std::tr1::get<0>(GetParam()); 49 | incx = ::std::tr1::get<1>(GetParam()); 50 | offx = ::std::tr1::get<2>(GetParam()); 51 | offNRM2 = ::std::tr1::get<3>(GetParam()); 52 | numCommandQueues = ::std::tr1::get<4>(GetParam()); 53 | 54 | base = ::clMath::BlasBase::getInstance(); 55 | 56 | useNumCommandQueues = base->useNumCommandQueues(); 57 | if (useNumCommandQueues) { 58 | numCommandQueues = base->numCommandQueues(); 59 | } 60 | 61 | if (base->useN()) { 62 | N = base->N(); 63 | } 64 | } 65 | 66 | size_t N; 67 | int incx; 68 | size_t offx, offNRM2; 69 | 70 | ::clMath::BlasBase *base; 71 | cl_ulong imageA, imageX; 72 | 73 | bool useNumCommandQueues; 74 | cl_uint numCommandQueues; 75 | }; 76 | 77 | 78 | -------------------------------------------------------------------------------- /src/tests/include/rot.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef ROT_H_ 18 | #define ROT_H_ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | using namespace clMath; 27 | using ::testing::TestWithParam; 28 | 29 | class ROT : public TestWithParam< 30 | ::std::tr1::tuple< 31 | int, // N 32 | int, // offx 33 | int, // incx 34 | int, // offy 35 | int, // incy 36 | ComplexLong, // C 37 | ComplexLong, // S 38 | int // numCommandQueues 39 | > > 40 | { 41 | public: 42 | void getParams(TestParams *params) 43 | { 44 | params->N = N; 45 | params->offa= offa; //offx 46 | params->offb = offb; // offy 47 | params->incx = incx; 48 | params->incy = incy; 49 | params->alpha = alpha; // C 50 | params->beta = beta; //S 51 | params->numCommandQueues = numCommandQueues; 52 | } 53 | 54 | protected: 55 | virtual void SetUp() 56 | { 57 | N = ::std::tr1::get<0>(GetParam()); 58 | offa = ::std::tr1::get<1>(GetParam()); 59 | incx = ::std::tr1::get<2>(GetParam()); 60 | offb = ::std::tr1::get<3>(GetParam()); 61 | incy = ::std::tr1::get<4>(GetParam()); 62 | alpha = ::std::tr1::get<5>(GetParam()); 63 | beta = ::std::tr1::get<6>(GetParam()); 64 | numCommandQueues = ::std::tr1::get<7>(GetParam()); 65 | 66 | base = ::clMath::BlasBase::getInstance(); 67 | 68 | useNumCommandQueues = base->useNumCommandQueues(); 69 | if (useNumCommandQueues) 70 | { 71 | numCommandQueues = base->numCommandQueues(); 72 | } 73 | } 74 | 75 | size_t N, offa, offb; 76 | int incx, incy; 77 | ComplexLong alpha; 78 | ComplexLong beta; 79 | ::clMath::BlasBase *base; 80 | 81 | bool useNumCommandQueues; 82 | cl_uint numCommandQueues; 83 | }; 84 | #endif 85 | -------------------------------------------------------------------------------- /src/tests/include/rotg.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef ROTG_H_ 18 | #define ROTG_H_ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | using namespace clMath; 27 | using ::testing::TestWithParam; 28 | 29 | class ROTG : public TestWithParam< 30 | ::std::tr1::tuple< 31 | int, //offsa 32 | int, //offsb 33 | int, //offc 34 | int, //offs 35 | int //numCommandQueues 36 | > > 37 | { 38 | public: 39 | void getParams(TestParams *params) 40 | { 41 | params->offa = offC; 42 | params->offb = offS; 43 | params->offBX = offSA; 44 | params->offCY = offSB; 45 | params->numCommandQueues = numCommandQueues; 46 | } 47 | 48 | protected: 49 | virtual void SetUp() 50 | { 51 | offSA = ::std::tr1::get<0>(GetParam()); 52 | offSB = ::std::tr1::get<1>(GetParam()); 53 | offC = ::std::tr1::get<2>(GetParam()); 54 | offS = ::std::tr1::get<3>(GetParam()); 55 | numCommandQueues = ::std::tr1::get<4>(GetParam()); 56 | 57 | base = ::clMath::BlasBase::getInstance(); 58 | 59 | useNumCommandQueues = base->useNumCommandQueues(); 60 | if (useNumCommandQueues) 61 | { 62 | numCommandQueues = base->numCommandQueues(); 63 | } 64 | } 65 | 66 | size_t offSA, offSB, offC, offS; 67 | 68 | ::clMath::BlasBase *base; 69 | 70 | bool useNumCommandQueues; 71 | cl_uint numCommandQueues; 72 | }; 73 | #endif 74 | -------------------------------------------------------------------------------- /src/tests/include/rotm.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef ROTM_H_ 18 | #define ROTM_H_ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | using namespace clMath; 27 | using ::testing::TestWithParam; 28 | 29 | class ROTM : public TestWithParam< 30 | ::std::tr1::tuple< 31 | int, // N 32 | int, // offx 33 | int, // incx 34 | int, // offy 35 | int, // incy 36 | int, // offParam 37 | ComplexLong, // SFLAG Param 38 | int // numCommandQueues 39 | > > 40 | { 41 | public: 42 | void getParams(TestParams *params) 43 | { 44 | params->N = N; 45 | params->offa= offa; // corrosponds to offx 46 | params->offb = offb; // corrosponds to offy 47 | params->offc = offc; // corrosponds to offParam 48 | params->incx = incx; 49 | params->incy = incy; 50 | params->alpha = alpha; // corrosponds to sflagparam 51 | params->numCommandQueues = numCommandQueues; 52 | } 53 | 54 | protected: 55 | virtual void SetUp() 56 | { 57 | N = ::std::tr1::get<0>(GetParam()); 58 | offa = ::std::tr1::get<1>(GetParam()); 59 | incx = ::std::tr1::get<2>(GetParam()); 60 | offb = ::std::tr1::get<3>(GetParam()); 61 | incy = ::std::tr1::get<4>(GetParam()); 62 | offc = ::std::tr1::get<5>(GetParam()); 63 | alpha = ::std::tr1::get<6>(GetParam()); 64 | numCommandQueues = ::std::tr1::get<7>(GetParam()); 65 | 66 | base = ::clMath::BlasBase::getInstance(); 67 | 68 | useNumCommandQueues = base->useNumCommandQueues(); 69 | if (useNumCommandQueues) 70 | { 71 | numCommandQueues = base->numCommandQueues(); 72 | } 73 | } 74 | 75 | size_t N, offa, offb, offc; 76 | int incx, incy; 77 | ComplexLong alpha; 78 | ::clMath::BlasBase *base; 79 | 80 | bool useNumCommandQueues; 81 | cl_uint numCommandQueues; 82 | }; 83 | #endif 84 | -------------------------------------------------------------------------------- /src/tests/include/scal.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef SCAL_H_ 19 | #define SCAL_H_ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | using namespace clMath; 29 | using ::testing::TestWithParam; 30 | 31 | class SCAL : public TestWithParam< 32 | ::std::tr1::tuple< 33 | int, // N 34 | ComplexLong, // alpha 35 | int, // offx 36 | int, // incx 37 | int // numCommandQueues 38 | > > { 39 | public: 40 | void getParams(TestParams *params) 41 | { 42 | params->N = N; 43 | params->alpha = paramAlpha; 44 | params->offBX = offx; 45 | params->incx = incx; 46 | params->numCommandQueues = numCommandQueues; 47 | } 48 | 49 | protected: 50 | virtual void SetUp() 51 | { 52 | N = ::std::tr1::get<0>(GetParam()); 53 | paramAlpha = ::std::tr1::get<1>(GetParam()); 54 | offx = ::std::tr1::get<2>(GetParam()); 55 | incx = ::std::tr1::get<3>(GetParam()); 56 | numCommandQueues = ::std::tr1::get<4>(GetParam()); 57 | 58 | base = ::clMath::BlasBase::getInstance(); 59 | seed = base->seed(); 60 | 61 | useNumCommandQueues = base->useNumCommandQueues(); 62 | if (useNumCommandQueues) { 63 | numCommandQueues = base->numCommandQueues(); 64 | } 65 | } 66 | 67 | size_t N; 68 | unsigned int seed; 69 | size_t offx; 70 | int incx; 71 | bool useAlpha; 72 | ComplexLong paramAlpha; 73 | ::clMath::BlasBase *base; 74 | bool useNumCommandQueues; 75 | cl_uint numCommandQueues; 76 | }; 77 | 78 | #endif // SCAL_H_ 79 | -------------------------------------------------------------------------------- /src/tests/include/spr.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef SPR_H_ 18 | #define SPR_H_ 19 | #define SYR_PACKED 20 | #include "syr.h" 21 | #undef SYR_PACKED 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /src/tests/include/spr2.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef SPR2_H_ 19 | 20 | #define SPR2_H_ 21 | #define SYR2_PACKED 22 | #include "syr2.h" 23 | 24 | #undef SYR2_PACKED 25 | 26 | #endif -------------------------------------------------------------------------------- /src/tests/include/swap.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef SWAP__H_ 18 | #define SWAP__H_ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | using namespace clMath; 26 | using ::testing::TestWithParam; 27 | 28 | // Name SWAP creates problem in gTest 29 | class SWAPXY : public TestWithParam< 30 | 31 | ::std::tr1::tuple< 32 | int, // N 33 | int, // offBX 34 | int, // incx, should not be 0 35 | int, //offCY 36 | int, //incy, should not be 0 37 | int // numCommandQueues 38 | > > { 39 | public: 40 | void getParams(TestParams *params) 41 | { 42 | params->N = N; 43 | params->offBX = offBX; 44 | params->incx = incx; 45 | params->offCY = offCY; 46 | params->incy = incy; 47 | params->numCommandQueues = numCommandQueues; 48 | } 49 | 50 | protected: 51 | virtual void SetUp() 52 | { 53 | N = ::std::tr1::get<0>(GetParam()); 54 | offBX = ::std::tr1::get<1>(GetParam()); 55 | incx = ::std::tr1::get<2>(GetParam()); 56 | offCY = ::std::tr1::get<3>(GetParam()); 57 | incy = ::std::tr1::get<4>(GetParam()); 58 | numCommandQueues = ::std::tr1::get<5>(GetParam()); 59 | 60 | base = ::clMath::BlasBase::getInstance(); 61 | seed = base->seed(); 62 | 63 | useNumCommandQueues = base->useNumCommandQueues(); 64 | if (useNumCommandQueues) { 65 | numCommandQueues = base->numCommandQueues(); 66 | } 67 | 68 | if (base->useN()) { 69 | N = base->N(); 70 | } 71 | } 72 | 73 | size_t N; 74 | size_t offBX; 75 | int incx; 76 | size_t offCY; 77 | int incy; 78 | unsigned int seed; 79 | 80 | ::clMath::BlasBase *base; 81 | 82 | bool useNumCommandQueues; 83 | cl_uint numCommandQueues; 84 | }; 85 | 86 | #endif 87 | -------------------------------------------------------------------------------- /src/tests/include/test-limits.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef TEST_LIMITS_H_ 19 | #define TEST_LIMITS_H_ 20 | 21 | #define FLOAT_UPPER_BOUND pow(2.0, 23) 22 | #define DOUBLE_UPPER_BOUND pow(2.0, 52) 23 | 24 | #define TRSM_FLOAT_LIMIT_A pow(2.0, 7) 25 | #define TRSM_DOUBLE_LIMIT_A pow(2.0, 5) 26 | #define TRSM_FLOAT_LIMIT_B pow(2.0, 16) 27 | #define TRSM_DOUBLE_LIMIT_B pow(2.0, 47) 28 | 29 | // Type-dependant constants 30 | template 31 | static cl_double UPPER_BOUND(); 32 | template<> 33 | __template_static cl_double UPPER_BOUND() { return FLOAT_UPPER_BOUND; } 34 | template<> 35 | __template_static cl_double UPPER_BOUND() { return DOUBLE_UPPER_BOUND;} 36 | template<> 37 | __template_static cl_double UPPER_BOUND() { return FLOAT_UPPER_BOUND; } 38 | template<> 39 | __template_static cl_double UPPER_BOUND() { return DOUBLE_UPPER_BOUND; } 40 | 41 | template 42 | static cl_double TRSM_LIMIT_A(); 43 | template<> 44 | __template_static cl_double TRSM_LIMIT_A() { return TRSM_FLOAT_LIMIT_A; } 45 | template<> 46 | __template_static cl_double TRSM_LIMIT_A() { return TRSM_DOUBLE_LIMIT_A; } 47 | template<> 48 | __template_static cl_double TRSM_LIMIT_A() { return TRSM_FLOAT_LIMIT_A; } 49 | template<> 50 | __template_static cl_double TRSM_LIMIT_A() { return TRSM_DOUBLE_LIMIT_A; } 51 | 52 | template 53 | static cl_double TRSM_LIMIT_B(); 54 | template<> 55 | __template_static cl_double TRSM_LIMIT_B() { return TRSM_FLOAT_LIMIT_B; } 56 | template<> 57 | __template_static cl_double TRSM_LIMIT_B() { return TRSM_DOUBLE_LIMIT_B; } 58 | template<> 59 | __template_static cl_double TRSM_LIMIT_B() { return TRSM_FLOAT_LIMIT_B; } 60 | template<> 61 | __template_static cl_double TRSM_LIMIT_B() { return TRSM_DOUBLE_LIMIT_B; } 62 | 63 | #endif /* TEST_LIMITS_H_ */ 64 | -------------------------------------------------------------------------------- /src/tests/include/testDG.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | #ifndef _TESTDG_H_ 18 | #define _TESTDG_H_ 19 | 20 | // Coming from testDG.hpp 21 | 22 | enum TRIANGLE_OPERATIONS { 23 | LTOU, 24 | UTOL, 25 | SWAP 26 | }; 27 | 28 | 29 | enum RealMatrixCreationFlags { 30 | //NO_FLAGS = 0, 31 | ROW_MAJOR_ORDER = 1, 32 | PACKED_MATRIX = 2, 33 | SYMMETRIC_MATRIX = 4, 34 | UPPER_HALF_ONLY = 8, 35 | LOWER_HALF_ONLY = 16, 36 | NO_ALIGNMENT = 32, 37 | UNIT_DIAGONAL = 64, 38 | RANDOM_INIT = 128, 39 | ZERO_DIAGONAL = 256 40 | }; 41 | 42 | #define setDiagonalUnity() setDiagonalUnityOrNonUnity(1, data, rows, cols, lda, vectorLength, creationFlags, bound) // Unity diagonal 43 | #define setDiagonalRandom() setDiagonalUnityOrNonUnity(2, data, rows, cols, lda, vectorLength, creationFlags, bound) // Random values 44 | #define setDiagonalZero() setDiagonalUnityOrNonUnity(0, data, rows, cols, lda, vectorLength, creationFlags, bound) // Zero diagonal 45 | 46 | // Column-Major is i,j replaced and RML is CMU 47 | // So CMU(i,j) will be RML(j,i) 48 | // The following is Row-Major packed 49 | #define RMLPacked(i,j) ((T*)data + ((i*(i+1))/2 + j) * vectorLength) 50 | #define RMUPacked(i,j) ((T*)data + ((i*((2* rows) + 1 - i))/2 + (j -i))* vectorLength ) 51 | 52 | #define CMUPacked(i,j) ((T*)data + ((j*(j+1))/2 + i)* vectorLength) 53 | #define CMLPacked(i,j) ((T*)data + ((j*((2*rows) + 1 - j))/2 + (i - j))* vectorLength) 54 | 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /src/tests/include/timer.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #ifndef TIMER_H_ 19 | #define TIMER_H_ 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | #if defined(_MSC_VER) 26 | 27 | typedef unsigned long long nano_time_t; 28 | #define NANOTIME_MAX (~0ULL - 1) 29 | 30 | #elif defined(__APPLE__) 31 | #include 32 | 33 | typedef uint64_t nano_time_t; 34 | #define NANOTIME_MAX (UINT64_MAX - 1) 35 | 36 | #else 37 | 38 | typedef unsigned long nano_time_t; 39 | #define NANOTIME_MAX (~0UL - 1) 40 | 41 | #endif 42 | 43 | #define NANOTIME_ERR (NANOTIME_MAX + 1) 44 | 45 | nano_time_t 46 | conv2millisec(nano_time_t t); 47 | 48 | nano_time_t 49 | conv2microsec(nano_time_t t); 50 | 51 | nano_time_t 52 | conv2nanosec(nano_time_t t); 53 | 54 | nano_time_t 55 | getCurrentTime(void); 56 | 57 | void 58 | sleepTime(nano_time_t t); 59 | 60 | #ifdef __cplusplus 61 | } /* extern "C" { */ 62 | #endif 63 | 64 | #endif /* TIMER_H_ */ 65 | -------------------------------------------------------------------------------- /src/tests/include/tpmv.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | 19 | #ifndef TPMV_H_ 20 | #define TPMV_H_ 21 | #define TRMV_PACKED 22 | #include "trmv.h" 23 | #undef TRMV_PACKED 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/tests/include/tpsv.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | 19 | #ifndef TPSV_H_ 20 | #define TPSV_H_ 21 | #define TRSV_PACKED_ 22 | #include "trsv.h" 23 | #undef TRSV_PACKED_ 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/tests/performance/PerformanceTest.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | /* 19 | * Basic performance test case class declaration 20 | */ 21 | 22 | #ifndef PERFORMANCE_TEST_H_ 23 | #define PERFORMANCE_TEST_H_ 24 | 25 | #include 26 | #include "timer.h" 27 | #include "PerformanceRecorder.h" 28 | 29 | enum { 30 | MAX_ZMATRIX_SIZE = 3072 31 | }; 32 | 33 | namespace clMath { 34 | 35 | class PerformanceTest { 36 | public: 37 | PerformanceTest(BlasFunction function, problem_size_t prob_size) : 38 | function_(function), prob_size_(prob_size) { }; 39 | virtual ~PerformanceTest() { } 40 | 41 | /* 42 | * On runtime error returns -1; otherwise returns 1 43 | * if the CLBLAS version has been slower, otherwise returns 0 44 | * 45 | * @opFactor: scaling factor showing number of operations per each element 46 | */ 47 | int run(int opFactor); 48 | virtual int prepare(void); 49 | virtual nano_time_t etalonPerfSingle(void); 50 | virtual nano_time_t clblasPerfSingle(void); 51 | 52 | private: 53 | BlasFunction function_; 54 | problem_size_t prob_size_; 55 | }; 56 | 57 | } // namespace clMath 58 | 59 | #endif /* PERFORMANCE_TEST_H_ */ 60 | -------------------------------------------------------------------------------- /src/tests/performance/perf-trmm.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include // srand() 19 | #include // memcpy() 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include "TrxmPerformanceTest.cpp" 29 | 30 | /* 31 | * NOTE: operation factor takes into account the same as for 32 | * gemm but also the fact that only a half of data is actually 33 | * useful 34 | */ 35 | 36 | using namespace std; 37 | using namespace clMath; 38 | 39 | 40 | // strmm performance test case 41 | TEST_P(TRMM, strmm) 42 | { 43 | TestParams params; 44 | 45 | getParams(¶ms); 46 | TrxmPerformanceTest::runInstance(FN_STRMM, ¶ms); 47 | } 48 | 49 | // dtrmm performance test case 50 | TEST_P(TRMM, dtrmm) 51 | { 52 | TestParams params; 53 | 54 | getParams(¶ms); 55 | TrxmPerformanceTest::runInstance(FN_DTRMM, ¶ms); 56 | } 57 | 58 | // ctrmm performance test case 59 | TEST_P(TRMM, ctrmm) 60 | { 61 | TestParams params; 62 | 63 | getParams(¶ms); 64 | TrxmPerformanceTest::runInstance(FN_CTRMM, ¶ms); 65 | } 66 | 67 | // ztrmm performance test case 68 | TEST_P(TRMM, ztrmm) 69 | { 70 | TestParams params; 71 | 72 | getParams(¶ms); 73 | TrxmPerformanceTest::runInstance(FN_ZTRMM, ¶ms); 74 | } 75 | -------------------------------------------------------------------------------- /src/tests/performance/perf-trsm.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright 2013 Advanced Micro Devices, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ************************************************************************/ 16 | 17 | 18 | #include // srand() 19 | #include // memcpy() 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include "TrxmPerformanceTest.cpp" 29 | 30 | using namespace std; 31 | using namespace clMath; 32 | 33 | // strsm performance test case 34 | TEST_P(TRSM, strsm) 35 | { 36 | TestParams params; 37 | 38 | getParams(¶ms); 39 | TrxmPerformanceTest::runInstance(FN_STRSM, ¶ms); 40 | } 41 | 42 | // dtrsm performance test case 43 | TEST_P(TRSM, dtrsm) 44 | { 45 | TestParams params; 46 | 47 | getParams(¶ms); 48 | TrxmPerformanceTest::runInstance(FN_DTRSM, ¶ms); 49 | } 50 | 51 | // ctrsm performance test case 52 | TEST_P(TRSM, ctrsm) 53 | { 54 | TestParams params; 55 | 56 | getParams(¶ms); 57 | TrxmPerformanceTest::runInstance(FN_CTRSM, ¶ms); 58 | } 59 | 60 | // ztrsm performance test case 61 | TEST_P(TRSM, ztrsm) 62 | { 63 | TestParams params; 64 | 65 | getParams(¶ms); 66 | TrxmPerformanceTest::runInstance(FN_ZTRSM, ¶ms); 67 | } 68 | --------------------------------------------------------------------------------