├── .github └── workflows │ └── tests.yml ├── .gitignore ├── CMakeLists.txt ├── Changelog.txt ├── LICENSE.txt ├── Makefile ├── Makefile.external_blas ├── Makefile.rule ├── README.md ├── TODOlist.txt ├── auxiliary ├── Makefile ├── blasfeo_processor_features.c ├── blasfeo_stdlib.c ├── d_aux_common.c ├── d_aux_ext_dep.c ├── d_aux_ext_dep_common.c ├── d_aux_hp_cm.c ├── d_aux_lib4.c ├── d_aux_lib8.c ├── d_aux_ref.c ├── i_aux_ext_dep_lib.c ├── m_aux_lib.c ├── m_aux_lib44.c ├── m_aux_lib48.c ├── memory.c ├── s_aux_common.c ├── s_aux_ext_dep.c ├── s_aux_ext_dep_common.c ├── s_aux_hp_cm.c ├── s_aux_lib16.c ├── s_aux_lib4.c ├── s_aux_lib8.c ├── s_aux_ref.c ├── timing.c ├── v_aux_ext_dep_lib.c ├── x_aux_ext_dep.c ├── x_aux_ext_dep_common.c └── x_aux_ref.c ├── benchmarks ├── CMakeLists.txt ├── Makefile ├── Makefile.blas_api ├── Makefile.blasfeo_api ├── benchmark_d_blas_api.c ├── benchmark_d_blasfeo_api.c ├── benchmark_s_blas_api.c ├── benchmark_s_blasfeo_api.c ├── benchmark_x_common.h ├── cpu_freq.h.example ├── figures_benchmark_one.m ├── figures_blas_benchmark.m ├── figures_blas_comparison.m ├── figures_x_benchmark_one.m ├── x_benchmark_blas_api.c └── x_benchmark_blasfeo_api.c ├── blas_api ├── Makefile ├── archive │ ├── dgemm.c │ ├── dgetrf.c │ ├── dgetrf_np.c │ ├── dpotrf.c │ ├── dsyrk.c │ ├── dtrmm.c │ ├── dtrsm.c │ ├── sgemm.c │ └── strsm.c ├── daxpy.c ├── dcopy.c ├── ddot.c ├── dgemm_ref.c ├── dgemv_ref.c ├── dger_ref.c ├── dgesv.c ├── dgetr_ref.c ├── dgetrf_ref.c ├── dgetrs.c ├── dlaswp.c ├── dposv.c ├── dpotrf_ref.c ├── dpotrs.c ├── dsymv_ref.c ├── dsyr2k_ref.c ├── dsyrk_ref.c ├── dtrmm_ref.c ├── dtrsm_ref.c ├── dtrtrs.c ├── experimental │ ├── Makefile │ ├── dlaed0.c │ ├── dlaed1.c │ ├── dlaed3.c │ ├── dlarf.c │ ├── dlarfb.c │ ├── dlarft.c │ ├── dlatrd.c │ ├── dorm2r.c │ ├── dormqr.c │ ├── dormtr.c │ ├── dstedc.c │ ├── dsyevd.c │ ├── dsyevr.c │ ├── dsytd2.c │ └── dsytrd.c ├── saxpy.c ├── sdot.c ├── sgemm_ref.c ├── spotrf_ref.c ├── ssymv_ref.c ├── strsm_ref.c ├── xgemm_ref.c ├── xgemv_ref.c ├── xger_ref.c ├── xgetr_ref.c ├── xgetrf_ref.c ├── xpotrf_ref.c ├── xsymv_ref.c ├── xsyr2k_ref.c ├── xsyrk_ref.c ├── xtrmm_ref.c └── xtrsm_ref.c ├── blasfeo_hp_cm ├── Makefile ├── dgemm.c ├── dgemv.c ├── dger.c ├── dgetr.c ├── dgetrf.c ├── dpotrf.c ├── dsymv.c ├── dsyr2k.c ├── dsyrk.c ├── dtrmm.c ├── dtrsm.c ├── sgemm.c ├── spotrf.c └── strsm.c ├── blasfeo_hp_pm ├── Makefile ├── d_blas1_lib4.c ├── d_blas1_lib8.c ├── d_blas2_diag_lib.c ├── d_blas2_lib4.c ├── d_blas2_lib8.c ├── d_blas3_diag_lib4.c ├── d_blas3_diag_lib8.c ├── d_blas3_lib4.c ├── d_blas3_lib8.c ├── d_lapack_lib4.c ├── d_lapack_lib8.c ├── s_blas1_lib16.c ├── s_blas1_lib4.c ├── s_blas1_lib8.c ├── s_blas2_diag_lib.c ├── s_blas2_lib16.c ├── s_blas2_lib4.c ├── s_blas2_lib8.c ├── s_blas3_diag_lib16.c ├── s_blas3_diag_lib4.c ├── s_blas3_diag_lib8.c ├── s_blas3_lib16.c ├── s_blas3_lib4.c ├── s_blas3_lib8.c ├── s_lapack_lib16.c ├── s_lapack_lib4.c ├── s_lapack_lib8.c └── x_blas2_diag_lib.c ├── blasfeo_ref ├── Makefile ├── d_blas1_hp_cm.c ├── d_blas1_ref.c ├── d_blas2_diag_hp_cm.c ├── d_blas2_diag_ref.c ├── d_blas2_hp_cm.c ├── d_blas2_ref.c ├── d_blas3_diag_hp_cm.c ├── d_blas3_diag_ref.c ├── d_blas3_hp_cm.c ├── d_blas3_ref.c ├── d_blas3_ref_blas.c ├── d_lapack_hp_cm.c ├── d_lapack_ref.c ├── d_lapack_ref_blas.c ├── s_blas1_hp_cm.c ├── s_blas1_ref.c ├── s_blas2_diag_hp_cm.c ├── s_blas2_diag_ref.c ├── s_blas2_hp_cm.c ├── s_blas2_ref.c ├── s_blas3_diag_hp_cm.c ├── s_blas3_diag_ref.c ├── s_blas3_hp_cm.c ├── s_blas3_ref.c ├── s_blas3_ref_blas.c ├── s_lapack_hp_cm.c ├── s_lapack_ref.c ├── x_blas1_ref.c ├── x_blas2_diag_ref.c ├── x_blas2_ref.c ├── x_blas3_diag_ref.c ├── x_blas3_ref.c └── x_lapack_ref.c ├── blasfeo_target.h.in ├── blasfeo_wr ├── Makefile ├── d_blas1_lib.c ├── d_blas2_diag_lib.c ├── d_blas2_lib.c ├── d_blas3_diag_lib.c ├── d_blas3_lib.c ├── d_lapack_lib.c ├── s_blas1_lib.c ├── s_blas2_diag_lib.c ├── s_blas2_lib.c ├── s_blas3_diag_lib.c ├── s_blas3_lib.c ├── s_lapack_lib.c ├── x_blas1_lib.c ├── x_blas2_diag_lib.c ├── x_blas2_lib.c ├── x_blas3_diag_lib.c ├── x_blas3_lib.c └── x_lapack_lib.c ├── cmake ├── ArchitectureTests.cmake ├── TestSingleTarget.cmake ├── X64AutomaticTargetSelection.cmake ├── intrinsic_tests │ ├── intrinsic_test.c │ └── intrinsic_test.cmake └── isa_tests │ ├── TEST_AVX.S │ ├── TEST_AVX2.S │ ├── TEST_FMA.S │ ├── TEST_NEON.S │ ├── TEST_NEONv2.S │ ├── TEST_SSE3.S │ ├── TEST_VFPv3.S │ ├── TEST_VFPv4.S │ ├── isa_test.c │ └── isa_test.cmake ├── doc ├── conventions.tex ├── guide.pdf └── guide.tex ├── examples ├── CMakeLists.txt ├── Makefile ├── example_d_lq_factorization.c ├── example_d_lu_factorization.c ├── example_d_riccati_recursion.c ├── example_s_lu_factorization.c ├── example_s_riccati_recursion.c ├── example_tree_riccati_recursion.c ├── getting_started.c ├── tools.c └── tools.h ├── experimental └── giaf │ ├── AtHA.m │ └── blas │ ├── Makefile │ ├── blas │ ├── dgemm.c │ └── dpotrf.c │ ├── include │ ├── blasfeo_d_blas.h │ └── blasfeo_d_kernel.h │ ├── kernel │ ├── avx │ │ └── kernel_dgemm_4x4_lib.S │ └── avx2 │ │ ├── archive.S │ │ ├── kernel_dgemm_12x4_lib.S │ │ ├── kernel_dgemm_4x3_lib.S │ │ ├── kernel_dgemm_4x4_lib.S │ │ ├── kernel_dgemm_8x4_lib.S │ │ ├── kernel_dpack_lib4.S │ │ └── kernel_dpatr_lib4.c │ └── tests │ ├── .gitignore │ ├── benchmark.c │ ├── print_blas.m │ └── test.c ├── guidelines.md ├── include ├── blasfeo.h ├── blasfeo_align.h ├── blasfeo_block_size.h ├── blasfeo_common.h ├── blasfeo_d_aux.h ├── blasfeo_d_aux_ext_dep.h ├── blasfeo_d_aux_ext_dep_ref.h ├── blasfeo_d_aux_old.h ├── blasfeo_d_aux_ref.h ├── blasfeo_d_aux_test.h ├── blasfeo_d_blas.h ├── blasfeo_d_blas_api.h ├── blasfeo_d_blasfeo_api.h ├── blasfeo_d_blasfeo_api_ref.h ├── blasfeo_d_blasfeo_hp_api.h ├── blasfeo_d_blasfeo_ref_api.h ├── blasfeo_d_kernel.h ├── blasfeo_i_aux_ext_dep.h ├── blasfeo_m_aux.h ├── blasfeo_memory.h ├── blasfeo_naming.h ├── blasfeo_processor_features.h ├── blasfeo_s_aux.h ├── blasfeo_s_aux_ext_dep.h ├── blasfeo_s_aux_ext_dep_ref.h ├── blasfeo_s_aux_old.h ├── blasfeo_s_aux_ref.h ├── blasfeo_s_aux_test.h ├── blasfeo_s_blas.h ├── blasfeo_s_blas_api.h ├── blasfeo_s_blasfeo_api.h ├── blasfeo_s_blasfeo_api_ref.h ├── blasfeo_s_blasfeo_ref_api.h ├── blasfeo_s_kernel.h ├── blasfeo_stdlib.h ├── blasfeo_timing.h ├── blasfeo_v_aux_ext_dep.h ├── d_blas.h ├── d_blas_64.h ├── s_blas.h └── s_blas_64.h ├── kernel ├── Makefile ├── armv7a │ ├── Makefile │ ├── kernel_dgemm_4x4_lib.S │ ├── kernel_dgemm_4x4_lib4.S │ ├── kernel_sgemm_12x4_lib4.S │ ├── kernel_sgemm_4x4_lib.S │ ├── kernel_sgemm_4x4_lib4.S │ ├── kernel_sgemm_8x4_lib.S │ └── kernel_sgemm_8x4_lib4.S ├── armv8a │ ├── Makefile │ ├── kernel_d_aux_lib.S │ ├── kernel_dgemm_12x4_lib.S │ ├── kernel_dgemm_12x4_lib4.S │ ├── kernel_dgemm_4x4_lib.S │ ├── kernel_dgemm_4x4_lib4.S │ ├── kernel_dgemm_8x4_lib.S │ ├── kernel_dgemm_8x4_lib4.S │ ├── kernel_dgemv_4_lib.S │ ├── kernel_dgemv_4_lib4.S │ ├── kernel_dger_lib.S │ ├── kernel_dger_lib4.S │ ├── kernel_dgetr_lib.S │ ├── kernel_dgetrf_pivot_lib.c │ ├── kernel_dgetrf_pivot_lib4.c │ ├── kernel_dpack_lib4.S │ ├── kernel_sgemm_12x4_lib4.S │ ├── kernel_sgemm_16x4_lib4.S │ ├── kernel_sgemm_4x4_lib.S │ ├── kernel_sgemm_4x4_lib4.S │ ├── kernel_sgemm_8x4_lib.S │ ├── kernel_sgemm_8x4_lib4.S │ ├── kernel_sgemm_8x8_lib.S │ ├── kernel_sgemm_8x8_lib4.S │ ├── kernel_sgemv_4_lib4.S │ └── kernel_spack_lib4.S ├── avx │ ├── Makefile │ ├── archive │ │ ├── kernel_dgemm_10xX_lib4.S │ │ ├── kernel_dgemm_4x2_lib4.S │ │ ├── kernel_dgemm_6xX_lib4.S │ │ └── kernel_dgemm_8x2_lib4.S │ ├── experimental │ │ └── kernel_dpack_liba.S │ ├── issue_20 │ ├── kernel_d_aux_lib.S │ ├── kernel_dgebp_lib4.S │ ├── kernel_dgecp_lib4.c │ ├── kernel_dgemm_12x4_lib4.S │ ├── kernel_dgemm_4x4_lib.S │ ├── kernel_dgemm_4x4_lib4.S │ ├── kernel_dgemm_8x4_lib.S │ ├── kernel_dgemm_8x4_lib4.S │ ├── kernel_dgemm_diag_lib4.c │ ├── kernel_dgemv_12_lib4.S │ ├── kernel_dgemv_4_lib.S │ ├── kernel_dgemv_4_lib4.S │ ├── kernel_dgemv_8_lib4.S │ ├── kernel_dgeqrf_4_lib4.c │ ├── kernel_dger_lib.S │ ├── kernel_dger_lib4.S │ ├── kernel_dgetr_lib.S │ ├── kernel_dgetr_lib4.c │ ├── kernel_dgetrf_pivot_lib.c │ ├── kernel_dgetrf_pivot_lib4.c │ ├── kernel_dpack_lib4.S │ ├── kernel_dsymv_6_lib4.S │ ├── kernel_sgead_lib8.S │ ├── kernel_sgecpsc_lib8.S │ ├── kernel_sgemm_16x4_lib.S │ ├── kernel_sgemm_16x4_lib8.S │ ├── kernel_sgemm_16x8_lib8.S │ ├── kernel_sgemm_8x4_lib.S │ ├── kernel_sgemm_8x4_lib8.S │ ├── kernel_sgemm_8x8_lib.S │ ├── kernel_sgemm_8x8_lib8.S │ ├── kernel_sgemm_diag_lib8.c │ ├── kernel_sgemv_4_lib8.S │ ├── kernel_sgemv_8_lib8.S │ ├── kernel_sgetr_lib8.S │ └── kernel_spack_lib8.S ├── avx2 │ ├── Makefile │ ├── archive │ │ ├── archive.S │ │ ├── kernel_dgemm_10xX_lib4.S │ │ ├── kernel_dgemm_12x4_lib.S │ │ ├── kernel_dgemm_4x2_lib4.S │ │ ├── kernel_dgemm_4x3_lib.S │ │ ├── kernel_dgemm_4x4_lib.S │ │ ├── kernel_dgemm_6xX_lib4.S │ │ ├── kernel_dgemm_8x2_lib4.S │ │ └── kernel_dgemm_8x4_lib.S │ ├── experimental │ │ └── kernel_dgemm_12x4_lib.S │ ├── kernel_dgebp_lib4.S │ ├── kernel_dgelqf_4_lib4.S │ ├── kernel_dgelqf_4_lib4_bkp.c │ ├── kernel_dgemm_12x4_lib.S │ ├── kernel_dgemm_12x4_lib4.S │ ├── kernel_dgemm_4x4_lib.S │ ├── kernel_dgemm_4x4_lib4.S │ ├── kernel_dgemm_8x4_lib.S │ ├── kernel_dgemm_8x4_lib4.S │ ├── kernel_dgemm_8x8_lib.S │ ├── kernel_dgemm_8x8_lib4.S │ ├── kernel_dgemv_4_lib.S │ ├── kernel_dgemv_4_lib4.S │ ├── kernel_dgemv_8_lib4.S │ ├── kernel_dger_lib.S │ ├── kernel_dger_lib4.S │ ├── kernel_dgetr_lib4.c │ ├── kernel_dgetrf_pivot_lib.c │ ├── kernel_dgetrf_pivot_lib4.c │ ├── kernel_dsymv_6_lib4.S │ ├── kernel_sgemm_16x4_lib.S │ ├── kernel_sgemm_16x4_lib8.S │ ├── kernel_sgemm_16x8_lib8.S │ ├── kernel_sgemm_24x4_lib.S │ ├── kernel_sgemm_24x4_lib8.S │ ├── kernel_sgemm_8x4_lib.S │ ├── kernel_sgemm_8x4_lib8.S │ ├── kernel_sgemm_8x8_lib.S │ └── kernel_sgemm_8x8_lib8.S ├── avx512 │ ├── Makefile │ ├── kernel_dgelqf_lib8.S │ ├── kernel_dgemm_16x8_lib.S │ ├── kernel_dgemm_16x8_lib8.S │ ├── kernel_dgemm_24x8_lib.S │ ├── kernel_dgemm_24x8_lib8.S │ ├── kernel_dgemm_8x8_lib.S │ ├── kernel_dgemm_8x8_lib8.S │ ├── kernel_dgemv_16_lib8.S │ ├── kernel_dgemv_8_lib8.S │ ├── kernel_dgeqrf_8_lib8.c │ └── kernel_dpack_lib8.S ├── avx_x86 │ ├── Makefile │ ├── kernel_dgemm_4x4_lib.S │ ├── kernel_dgemm_4x4_lib4.S │ ├── kernel_dgemv_4_lib4.S │ ├── kernel_sgemm_4x4_lib4.S │ └── kernel_sgemv_4_lib4.S ├── fma │ ├── Makefile │ ├── kernel_dgemm_4x4_lib.S │ └── kernel_dgemm_4x4_lib4.S ├── generic │ ├── Makefile │ ├── kernel_align_generic.c │ ├── kernel_d_aux_lib.c │ ├── kernel_daxpy_lib.c │ ├── kernel_ddot_lib.c │ ├── kernel_dgecp_lib4.c │ ├── kernel_dgemm_4x4_lib.c │ ├── kernel_dgemm_4x4_lib4.c │ ├── kernel_dgemm_diag_lib4.c │ ├── kernel_dgemv_4_lib.c │ ├── kernel_dgemv_4_lib4.c │ ├── kernel_dgeqrf_4_lib4.c │ ├── kernel_dger_lib.c │ ├── kernel_dger_lib4.c │ ├── kernel_dgetr_lib.c │ ├── kernel_dgetr_lib4.c │ ├── kernel_dgetrf_pivot_lib.c │ ├── kernel_dgetrf_pivot_lib4.c │ ├── kernel_dpack_buffer_lib4.c │ ├── kernel_dpack_lib4.c │ ├── kernel_dsymv_4_lib.c │ ├── kernel_dsymv_4_lib4.c │ ├── kernel_saxpy_lib.c │ ├── kernel_sdot_lib.c │ ├── kernel_sgecp_lib4.c │ ├── kernel_sgemm_4x4_lib.c │ ├── kernel_sgemm_4x4_lib4.c │ ├── kernel_sgemm_8x4_lib8.c │ ├── kernel_sgemm_diag_lib4.c │ ├── kernel_sgemv_4_lib4.c │ ├── kernel_sgetr_lib4.c │ ├── kernel_sgetrf_pivot_lib4.c │ ├── kernel_spack_lib4.c │ └── kernel_ssymv_4_lib4.c ├── sse3 │ ├── Makefile │ ├── kernel_align_x64.S │ ├── kernel_dgemm_4x4_lib.S │ ├── kernel_dgemm_4x4_lib4.S │ ├── kernel_dgemv_4_lib4.S │ ├── kernel_sgemm_4x4_lib.S │ └── kernel_sgemm_4x4_lib4.S └── sse3_x86 │ ├── Makefile │ ├── kernel_align_x86.S │ ├── kernel_dgemm_2x2_lib4.S │ ├── kernel_dgemm_4x2_lib.S │ ├── kernel_dgemm_4x2_lib4.S │ └── kernel_dgemv_4_lib4.S ├── lib └── .gitignore ├── microbenchmarks ├── Makefile └── microbenchmark_bandwidth.c ├── netlib ├── .gitignore ├── Makefile ├── Makefile.netlib_blas ├── Makefile.netlib_cblas ├── Makefile.netlib_lapack └── Makefile.netlib_lapacke ├── sandbox ├── .gitignore ├── Makefile ├── kernel.h ├── kernel_armv7a.S ├── kernel_armv8a.S ├── kernel_avx.S ├── kernel_avx2.S ├── kernel_avx_x86.S ├── kernel_generic.c ├── kernel_sse3.S ├── kernel_sse3_x86.S └── test_sandbox.c ├── tests ├── CMakeLists.txt ├── Makefile ├── Makefile.tpl ├── README.md ├── TODO.txt ├── classes │ ├── aux1args.c │ ├── aux2args.c │ ├── aux3args.c │ ├── blasapi_gemm.c │ ├── blasapi_getrf.c │ ├── blasapi_potrf.c │ ├── blasapi_syrk.c │ ├── blasapi_trm.c │ ├── gemm.c │ ├── geqf.c │ ├── getrf_nopivot.c │ ├── getrf_rp.c │ ├── potrf.c │ ├── potrf_mn.c │ ├── syrk.c │ └── trm.c ├── genarate_docs.py ├── routines.json ├── routines_list.md ├── routines_list.md.tpl ├── test.c ├── test_d_blas_api.c ├── test_d_blasfeo_api.c ├── test_d_common.h ├── test_s_blas_api.c ├── test_s_blasfeo_api.c ├── test_s_common.h ├── test_schema.json ├── test_valgrind.c ├── test_x.c ├── test_x_common.c ├── test_x_common.h ├── tester.py ├── testset_all.json ├── testset_ci_blas_cm_double_amd64.json ├── testset_ci_blas_cm_double_arm64.json ├── testset_ci_blas_cm_single_amd64.json ├── testset_ci_blas_cm_single_arm64.json ├── testset_ci_blas_pm_double_amd64.json ├── testset_ci_blas_pm_double_arm64.json ├── testset_ci_blas_pm_single_amd64.json ├── testset_ci_blas_pm_single_arm64.json ├── testset_ci_blasfeo_cm_double_amd64.json ├── testset_ci_blasfeo_cm_double_amd64_packalg.json ├── testset_ci_blasfeo_cm_double_arm64.json ├── testset_ci_blasfeo_cm_double_arm64_packalg.json ├── testset_ci_blasfeo_cm_single_amd64.json ├── testset_ci_blasfeo_cm_single_arm64.json ├── testset_ci_blasfeo_pm_double_amd64.json ├── testset_ci_blasfeo_pm_double_arm64.json ├── testset_ci_blasfeo_pm_single_amd64.json ├── testset_ci_blasfeo_pm_single_arm64.json ├── testset_debug.json └── testset_default.json ├── utils ├── change_dollar_immediate.sh ├── change_lic.sh ├── change_name.sh ├── new_lic_c └── new_lic_makefile └── version /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | 9 | jobs: 10 | tests: 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | os: [ ubuntu-latest, ubuntu-24.04-arm, macos-13, macos-14 ] 15 | api: [ blas, blasfeo ] 16 | mf: [ cm, pm ] 17 | precision: [ single, double ] 18 | include: 19 | - os: ubuntu-latest 20 | arch: amd64 21 | - os: ubuntu-24.04-arm 22 | arch: arm64 23 | - os: macos-13 24 | arch: amd64 25 | - os: macos-14 26 | arch: arm64 27 | - api: blasfeo 28 | mf: cm 29 | precision: double 30 | packalg: _packalg 31 | 32 | runs-on: ${{ matrix.os }} 33 | 34 | steps: 35 | - uses: actions/checkout@v4 36 | 37 | - uses: conda-incubator/setup-miniconda@v3 38 | with: 39 | miniforge-version: latest 40 | channels: conda-forge 41 | activate-environment: blasfeo 42 | 43 | - name: Install dependencies 44 | shell: bash -l {0} 45 | run: | 46 | # Compiler infrastructure 47 | conda install cmake compilers make pkg-config 48 | # Dependencies 49 | conda install libblas liblapack 50 | # Python packages 51 | pip3 install jinja2 52 | 53 | - name: Run tester 54 | shell: bash -l {0} 55 | working-directory: tests 56 | run: python3 tester.py testset_ci_${{ matrix.api }}_${{ matrix.mf }}_${{ matrix.precision }}_${{ matrix.arch }}${{ matrix.packalg }}.json 57 | 58 | # debug: 59 | # strategy: 60 | # fail-fast: false 61 | 62 | # runs-on: ubuntu-24.04-arm 63 | 64 | # steps: 65 | # - uses: actions/checkout@v4 66 | 67 | # - uses: conda-incubator/setup-miniconda@v3 68 | # with: 69 | # miniforge-version: latest 70 | # channels: conda-forge 71 | # activate-environment: blasfeo 72 | 73 | # - name: Install dependencies 74 | # shell: bash -l {0} 75 | # run: | 76 | # # Compiler infrastructure 77 | # conda install cmake compilers make pkg-config 78 | # # Dependencies 79 | # conda install libblas liblapack 80 | # # Python packages 81 | # pip3 install jinja2 82 | 83 | # - name: Run tester 84 | # shell: bash -l {0} 85 | # working-directory: tests 86 | # run: python3 tester.py testset_debug.json 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw[a-z] 2 | *.s 3 | *.o 4 | *.out 5 | include/blasfeo_target.h 6 | octave-workspace 7 | *.local 8 | *_local.json 9 | *.aux 10 | *.toc 11 | *.log 12 | figures/ 13 | libblasfeo.a 14 | cpu_freq.h 15 | .vscode 16 | 17 | # test 18 | tests/build 19 | tests/reports 20 | tests/libblasfeo_ref.a 21 | 22 | # CMake 23 | build* 24 | Build* 25 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BLASFEO -- BLAS For Embedded Optimization. 2 | Copyright (C) 2019 by Gianluca Frison. 3 | Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. 4 | All rights reserved. 5 | 6 | The 2-Clause BSD License 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | 1. Redistributions of source code must retain the above copyright notice, this 12 | list of conditions and the following disclaimer. 13 | 2. Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /TODOlist.txt: -------------------------------------------------------------------------------- 1 | - syrk_potrf_ln_mn 2 | - alpha for trsm 3 | - kernels and _mn_ version of trmv 4 | - kernel dsymv dgemv_nt 4 avx 5 | - remove n from trmv 6 | - store_gen in single precision 7 | - clean target.h and create it also from cmake (see "file") 8 | 9 | 10 | 11 | - (some) octave dependencies: 12 | BLAS: 13 | * dot 14 | * gemv 15 | * gemm 16 | * syrk 17 | LAPACK: 18 | * geqrf 19 | * getrf 20 | * getri 21 | * getrs 22 | * potrf 23 | * potri 24 | * potrs 25 | * trtri 26 | * trtrs 27 | -------------------------------------------------------------------------------- /benchmarks/benchmark_x_common.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #if !defined(GHZ_MAX) 37 | #include "cpu_freq.h" 38 | #if !defined(GHZ_MAX) 39 | int main() 40 | { 41 | printf("\n\n Define GHZ_MAX=x.x in the header file cpu_freq.h,\n"); 42 | printf("or recompile this benchmarks with GHZ_MAX=x.x\n"); 43 | printf("where x.x is the max frequency of the CPU of your current machine.\n"); 44 | return 0; 45 | } 46 | #endif 47 | #endif 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /benchmarks/cpu_freq.h.example: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | /* 37 | #ifndef GHZ_MAX 38 | #define GHZ_MAX x.x 39 | #endif 40 | */ 41 | -------------------------------------------------------------------------------- /benchmarks/figures_benchmark_one.m: -------------------------------------------------------------------------------- 1 | % print blas 2 | 3 | benchmark_one 4 | 5 | Gflops_max = A(1)*A(2); 6 | 7 | f1 = figure(); 8 | plot(B(:,1), B(:,2), 'b'); 9 | hold on 10 | %plot(B(:,1), B(:,4), 'g'); 11 | %plot(B(:,1), B(:,6), 'r'); 12 | hold off 13 | 14 | axis([0 300 0 Gflops_max]); 15 | %legend('blas', 'blas_api', 'blasfeo_api', 'Location', 'SouthEast'); 16 | legend('blasfeo_api', 'Location', 'SouthEast'); 17 | xlabel('matrix size n') 18 | ylabel('Gflops') 19 | grid on 20 | 21 | file_name_eps = ['benchmark_one.eps']; 22 | file_name_pdf = ['benchmark_one.pdf']; 23 | print(f1, file_name_eps, '-depsc') 24 | system(['epstopdf ', file_name_eps, ' -out ', file_name_pdf]); 25 | system(['rm ', file_name_eps]); 26 | 27 | -------------------------------------------------------------------------------- /benchmarks/figures_x_benchmark_one.m: -------------------------------------------------------------------------------- 1 | % routine to plot figures from data in the folder 2 | 3 | %routine = 'dgemm'; 4 | routine = 'dtrmm'; 5 | 6 | routines = dir('*.m'); 7 | 8 | routines.name; 9 | 10 | num_all = length(routines); 11 | 12 | f1 = figure(); 13 | 14 | Gflops_max = 0.0; 15 | 16 | for ii=1:num_all 17 | 18 | name = routines(ii).name; 19 | tokens = strsplit(name, '_'); 20 | 21 | if (strcmp(tokens{1}, routine)) 22 | 23 | run(name); 24 | 25 | tokens2 = strsplit(tokens{2}, '.'); 26 | 27 | hold all 28 | plot(B(:,1), B(:,2), 'DisplayName', tokens2{1}); 29 | hold off 30 | 31 | if A(1)*A(2)>Gflops_max 32 | Gflops_max = A(1)*A(2); 33 | end 34 | 35 | end 36 | 37 | end 38 | 39 | axis([0 300 0 Gflops_max]); 40 | hlegend = legend(gca, 'show', 'Location', 'SouthEast'); 41 | title(routine); 42 | xlabel('matrix size n') 43 | ylabel('Gflops') 44 | grid on 45 | ytick = get(gca, 'ytick'); 46 | ytick = [ytick, Gflops_max]; 47 | set(gca, 'ytick', ytick); 48 | 49 | file_name = [routine]; 50 | file_name_eps = [file_name, '.eps']; 51 | file_name_pdf = [file_name, '.pdf']; 52 | print(f1, file_name_eps, '-depsc') 53 | system(['epstopdf ', file_name_eps, ' -out ', file_name_pdf]); 54 | system(['rm ', file_name_eps]); 55 | 56 | 57 | -------------------------------------------------------------------------------- /blas_api/dgetrf_ref.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | #include 41 | 42 | 43 | 44 | #if ( defined(BLAS_API) & defined(MF_PANELMAJ) ) 45 | #define GETRF_RP blasfeo_cm_dgetrf_rp 46 | #define MAT blasfeo_cm_dmat 47 | #else 48 | #define GETRF_RP blasfeo_dgetrf_rp 49 | #define MAT blasfeo_dmat 50 | #endif 51 | #define REAL double 52 | 53 | 54 | 55 | #if defined(FORTRAN_BLAS_API) 56 | #define GETRF dgetrf_ 57 | #else 58 | #define GETRF blasfeo_lapack_dgetrf 59 | #endif 60 | 61 | 62 | 63 | 64 | #include "xgetrf_ref.c" 65 | 66 | 67 | -------------------------------------------------------------------------------- /blas_api/xgetr_ref.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | void BLAS_GETR(int *pm, int *pn, REAL *A, int *plda, REAL *B, int *pldb) 37 | { 38 | 39 | struct MAT sA; 40 | sA.pA = A; 41 | sA.m = *plda; 42 | 43 | struct MAT sB; 44 | sB.pA = B; 45 | sB.m = *pldb; 46 | 47 | GETR(*pm, *pn, &sA, 0, 0, &sB, 0, 0); 48 | 49 | return; 50 | 51 | } 52 | -------------------------------------------------------------------------------- /blasfeo_hp_pm/d_blas2_diag_lib.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | #define REAL double 44 | 45 | #define XVEC blasfeo_dvec 46 | 47 | #define HP_GEMV_D blasfeo_hp_dgemv_d 48 | 49 | #define GEMV_D blasfeo_dgemv_d 50 | 51 | 52 | 53 | #include "x_blas2_diag_lib.c" 54 | -------------------------------------------------------------------------------- /blasfeo_hp_pm/s_blas2_diag_lib.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | #define REAL float 44 | 45 | #define XVEC blasfeo_svec 46 | 47 | #define HP_GEMV_D blasfeo_hp_sgemv_d 48 | 49 | #define GEMV_D blasfeo_sgemv_d 50 | 51 | 52 | 53 | #include "x_blas2_diag_lib.c" 54 | 55 | -------------------------------------------------------------------------------- /blasfeo_ref/d_blas2_diag_hp_cm.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | #define HP_CM 44 | 45 | 46 | 47 | #define REAL double 48 | #define XVEC blasfeo_dvec 49 | 50 | 51 | 52 | #define REF_GEMV_D blasfeo_hp_dgemv_d 53 | 54 | #define GEMV_D blasfeo_dgemv_d 55 | 56 | 57 | 58 | #include "x_blas2_diag_ref.c" 59 | 60 | 61 | -------------------------------------------------------------------------------- /blasfeo_ref/d_blas2_diag_ref.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | #define REF 44 | 45 | 46 | 47 | #define REAL double 48 | #define XVEC blasfeo_dvec 49 | 50 | 51 | 52 | #define REF_GEMV_D blasfeo_ref_dgemv_d 53 | 54 | #define GEMV_D blasfeo_dgemv_d 55 | 56 | 57 | 58 | #include "x_blas2_diag_ref.c" 59 | 60 | -------------------------------------------------------------------------------- /blasfeo_ref/s_blas2_diag_hp_cm.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | #define HP_CM 44 | 45 | 46 | 47 | #define REAL float 48 | #define XVEC blasfeo_svec 49 | 50 | 51 | 52 | #define REF_GEMV_D blasfeo_hp_sgemv_d 53 | 54 | #define GEMV_D blasfeo_sgemv_d 55 | 56 | 57 | 58 | #include "x_blas2_diag_ref.c" 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /blasfeo_ref/s_blas2_diag_ref.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | #define REF 44 | 45 | 46 | 47 | #define REAL float 48 | #define XVEC blasfeo_svec 49 | 50 | 51 | 52 | #define REF_GEMV_D blasfeo_ref_sgemv_d 53 | 54 | #define GEMV_D blasfeo_sgemv_d 55 | 56 | 57 | 58 | #include "x_blas2_diag_ref.c" 59 | 60 | 61 | -------------------------------------------------------------------------------- /blasfeo_target.h.in: -------------------------------------------------------------------------------- 1 | #ifndef TARGET_@TARGET@ 2 | #define TARGET_@TARGET@ 3 | #endif 4 | 5 | #ifndef TARGET_@TARGET2@ 6 | #define TARGET_@TARGET2@ 7 | #endif 8 | 9 | #ifndef TARGET_NEED_FEATURE_AVX2 10 | #cmakedefine TARGET_NEED_FEATURE_AVX2 @TARGET_NEED_FEATURE_AVX2@ 11 | #endif 12 | 13 | #ifndef TARGET_NEED_FEATURE_FMA 14 | #cmakedefine TARGET_NEED_FEATURE_FMA @TARGET_NEED_FEATURE_FMA@ 15 | #endif 16 | 17 | #ifndef TARGET_NEED_FEATURE_SSE3 18 | #cmakedefine TARGET_NEED_FEATURE_SSE3 @TARGET_NEED_FEATURE_SSE3@ 19 | #endif 20 | 21 | #ifndef TARGET_NEED_FEATURE_AVX 22 | #cmakedefine TARGET_NEED_FEATURE_AVX @TARGET_NEED_FEATURE_AVX@ 23 | #endif 24 | 25 | #ifndef TARGET_NEED_FEATURE_VFPv3 26 | #cmakedefine TARGET_NEED_FEATURE_VFPv3 @TARGET_NEED_FEATURE_VFPv3@ 27 | #endif 28 | 29 | #ifndef TARGET_NEED_FEATURE_NEON 30 | #cmakedefine TARGET_NEED_FEATURE_NEON @TARGET_NEED_FEATURE_NEON@ 31 | #endif 32 | 33 | #ifndef TARGET_NEED_FEATURE_VFPv4 34 | #cmakedefine TARGET_NEED_FEATURE_VFPv4 @TARGET_NEED_FEATURE_VFPv4@ 35 | #endif 36 | 37 | #ifndef TARGET_NEED_FEATURE_NEONv2 38 | #cmakedefine TARGET_NEED_FEATURE_NEONv2 @TARGET_NEED_FEATURE_NEONv2@ 39 | #endif 40 | 41 | #ifndef LA_@LA@ 42 | #define LA_@LA@ 43 | #endif 44 | 45 | #ifndef MF_@MF@ 46 | #define MF_@MF@ 47 | #endif 48 | 49 | #ifndef EXT_DEP 50 | #define ON 1 51 | #define OFF 0 52 | #if @EXT_DEP@==ON 53 | #define EXT_DEP 54 | #endif 55 | #undef ON 56 | #undef OFF 57 | #endif 58 | 59 | #ifndef EXT_DEP_MALLOC 60 | #define ON 1 61 | #define OFF 0 62 | #if @EXT_DEP_MALLOC@==ON 63 | #define EXT_DEP_MALLOC 64 | #endif 65 | #undef ON 66 | #undef OFF 67 | #endif 68 | 69 | #ifndef BLAS_API 70 | #define ON 1 71 | #define OFF 0 72 | #if @BLAS_API@==ON 73 | #define BLAS_API 74 | #endif 75 | #undef ON 76 | #undef OFF 77 | #endif 78 | 79 | #ifndef FORTRAN_BLAS_API 80 | #define ON 1 81 | #define OFF 0 82 | #if @FORTRAN_BLAS_API@==ON 83 | #define FORTRAN_BLAS_API 84 | #endif 85 | #undef ON 86 | #undef OFF 87 | #endif 88 | -------------------------------------------------------------------------------- /blasfeo_wr/d_blas2_diag_lib.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | #define REAL double 44 | 45 | #define XVEC blasfeo_dvec 46 | 47 | #define HP_GEMV_D blasfeo_hp_dgemv_d 48 | 49 | #define GEMV_D blasfeo_dgemv_d 50 | 51 | 52 | 53 | #include "x_blas2_diag_lib.c" 54 | -------------------------------------------------------------------------------- /blasfeo_wr/d_blas3_diag_lib.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | #define XMATEL_A(X, Y) pA[(X)+lda*(Y)] 44 | #define XMATEL_B(X, Y) pB[(X)+ldb*(Y)] 45 | #define XMATEL_C(X, Y) pC[(X)+ldc*(Y)] 46 | #define XMATEL_D(X, Y) pD[(X)+ldd*(Y)] 47 | 48 | 49 | 50 | #define REAL double 51 | #define XMAT blasfeo_dmat 52 | #define XMATEL BLASFEO_DMATEL 53 | #define XVEC blasfeo_dvec 54 | #define XVECEL BLASFEO_DVECEL 55 | 56 | 57 | 58 | #define GEMM_R_DIAG blasfeo_dgemm_nd 59 | #define GEMM_L_DIAG blasfeo_dgemm_dn 60 | 61 | 62 | 63 | #include "x_blas3_diag_lib.c" 64 | -------------------------------------------------------------------------------- /blasfeo_wr/s_blas2_diag_lib.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS for embedded optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | 42 | 43 | #define REAL float 44 | 45 | #define XVEC blasfeo_svec 46 | 47 | #define HP_GEMV_D blasfeo_hp_sgemv_d 48 | 49 | #define GEMV_D blasfeo_sgemv_d 50 | 51 | 52 | 53 | #include "x_blas2_diag_lib.c" 54 | 55 | -------------------------------------------------------------------------------- /cmake/ArchitectureTests.cmake: -------------------------------------------------------------------------------- 1 | # This file contains the various architecture tests that each target 2 | # must pass when testing the ISA and intrinsics. 3 | 4 | # For Intel Haswell, test for if the AVX2 and FMA ISAs work 5 | set(CMP_CHECK_X64_INTEL_HASWELL 6 | TEST_AVX2 7 | TEST_FMA 8 | ) 9 | 10 | # For Intel Sandy Bridge, test for if the AVX ISA works 11 | set(CMP_CHECK_X64_INTEL_SANDY_BRIDGE 12 | TEST_AVX 13 | ) 14 | 15 | # For Intel Core, test for if the SSE3 ISA works 16 | set(CMP_CHECK_X64_INTEL_CORE 17 | TEST_SSE3 18 | ) 19 | 20 | # For AMD Bulldozer, test for if the AVX and FMA ISAs work 21 | set(CMP_CHECK_X64_AMD_BULLDOZER 22 | TEST_AVX 23 | TEST_FMA 24 | ) 25 | 26 | # For the Cortex A57, test for if the VFPv4 and NEONv2 ISAs work 27 | set(CMP_CHECK_ARMV8A_ARM_CORTEX_A57 28 | TEST_VFPv4 29 | TEST_NEONv2 30 | ) 31 | 32 | # For the Cortex A53, test for if the VFPv4 and NEONv2 ISAs work 33 | set(CMP_CHECK_ARMV8A_ARM_CORTEX_A53 34 | TEST_VFPv4 35 | TEST_NEONv2 36 | ) 37 | 38 | # For the Cortex A15, test for if the VFPv3 and NEON ISAs work 39 | set(CMP_CHECK_ARMV7A_ARM_CORTEX_A15 40 | TEST_VFPv3 41 | TEST_NEON 42 | ) 43 | 44 | # For the Cortex A7, test for if the VFPv3 and NEON ISAs work 45 | set(CMP_CHECK_ARMV7A_ARM_CORTEX_A7 46 | TEST_VFPv3 47 | TEST_NEON 48 | ) 49 | 50 | # For the Cortex A9, test for if the VFPv3 and NEON ISAs work 51 | set(CMP_CHECK_ARMV7A_ARM_CORTEX_A9 52 | TEST_VFPv3 53 | TEST_NEON 54 | ) 55 | -------------------------------------------------------------------------------- /cmake/TestSingleTarget.cmake: -------------------------------------------------------------------------------- 1 | include( ${PROJECT_SOURCE_DIR}/cmake/isa_tests/isa_test.cmake ) 2 | include( ${PROJECT_SOURCE_DIR}/cmake/intrinsic_tests/intrinsic_test.cmake ) 3 | 4 | function( TestSingleTarget ) 5 | set( TEST_TARGET ${TARGET} ) 6 | 7 | # This function will test the compilation and running of the 8 | # target specified in TEST_TARGET 9 | TestISA( ${TEST_TARGET} ) 10 | 11 | if( ${CHKISA_TARGET_BUILD} ) 12 | message( STATUS "Testing target ${TEST_TARGET}: assembly compilation [success]" ) 13 | 14 | if( NOT ${BLASFEO_CROSSCOMPILING} ) 15 | if( ${CHKISA_TARGET_RUN}) 16 | message( STATUS "Testing target ${TEST_TARGET}: assembly run [success]" ) 17 | else() 18 | message( STATUS "Testing target ${TEST_TARGET}: assembly run [failed]" ) 19 | endif() 20 | endif() 21 | 22 | else() 23 | message( STATUS "Testing target ${TEST_TARGET}: assembly compilation [failed]" ) 24 | message( "Compile output:" ) 25 | message( ${CHKISA_TARGET_OUTPUT} ) 26 | message( FATAL_ERROR "Unable to compile with assembly for target ${TEST_TARGET}" ) 27 | endif() 28 | 29 | # This function will test the compiler support for intrinsics 30 | TestIntrinsics( ${TEST_TARGET} ) 31 | 32 | if( ${CHKINTRINSIC_TARGET_BUILD} ) 33 | message( STATUS "Testing target ${TEST_TARGET}: intrinsic compilation [success]" ) 34 | 35 | if( NOT ${BLASFEO_CROSSCOMPILING} ) 36 | if( ${CHKINTRINSIC_TARGET_RUN} ) 37 | message( STATUS "Testing target ${TEST_TARGET}: intrinsic run [success]" ) 38 | else() 39 | message( STATUS "Testing target ${TEST_TARGET}: intrinsic run [failed]" ) 40 | endif() 41 | endif() 42 | 43 | else() 44 | message( STATUS "Testing target ${TEST_TARGET}: intrinsic compilation [failed]" ) 45 | message( "Compile output:" ) 46 | message( ${CHKINTRINSIC_TARGET_OUTPUT} ) 47 | message( FATAL_ERROR "Unable to compile with intrinsics for target ${TEST_TARGET}" ) 48 | endif() 49 | 50 | endfunction() 51 | -------------------------------------------------------------------------------- /cmake/X64AutomaticTargetSelection.cmake: -------------------------------------------------------------------------------- 1 | include( ${PROJECT_SOURCE_DIR}/cmake/isa_tests/isa_test.cmake ) 2 | include( ${PROJECT_SOURCE_DIR}/cmake/intrinsic_tests/intrinsic_test.cmake ) 3 | 4 | function( X64AutomaticTargetSelection ) 5 | 6 | # Iterate over each target to test the compilation and running 7 | foreach( TEST_TARGET ${X64_AUTOMATIC_TARGETS} ) 8 | # This function will test the compilation and running of the 9 | # target specified in TEST_TARGET 10 | TestISA( ${TEST_TARGET} ) 11 | 12 | set( ISA_TEST_PASS FALSE ) 13 | 14 | if( ${CHKISA_TARGET_BUILD} ) 15 | message( STATUS "Testing target ${TEST_TARGET}: assembly compilation [success]" ) 16 | 17 | if( ${CHKISA_TARGET_RUN} ) 18 | message(STATUS "Testing target ${TEST_TARGET}: assembly run [success]" ) 19 | 20 | set( ISA_TEST_PASS TRUE ) 21 | 22 | else() 23 | message( STATUS "Testing target ${TEST_TARGET}: assembly run [failed]" ) 24 | endif() 25 | 26 | else() 27 | message( STATUS "Testing target ${TEST_TARGET}: assembly compilation [failed]" ) 28 | endif() 29 | 30 | TestIntrinsics( ${TEST_TARGET} ) 31 | 32 | set( INTRINSIC_TEST_PASS FALSE ) 33 | 34 | if( ${CHKINTRINSIC_TARGET_BUILD} ) 35 | message( STATUS "Testing target ${TEST_TARGET}: intrinsic compilation [success]" ) 36 | 37 | if( ${CHKINTRINSIC_TARGET_RUN} ) 38 | message( STATUS "Testing target ${TEST_TARGET}: intrinsic run [success]" ) 39 | 40 | set( INTRINSIC_TEST_PASS TRUE ) 41 | 42 | else() 43 | message( STATUS "Testing target ${TEST_TARGET}: intrinsic run [failed]" ) 44 | endif() 45 | 46 | else() 47 | message( STATUS "Testing target ${TEST_TARGET}: intrinsic compilation [failed]" ) 48 | endif() 49 | 50 | 51 | if( ${ISA_TEST_PASS} AND ${INTRINSIC_TEST_PASS} ) 52 | # It both compiles and runs, so pass it up to the parent to use 53 | set( TARGET ${TEST_TARGET} PARENT_SCOPE ) 54 | return() 55 | endif() 56 | 57 | endforeach() 58 | 59 | message( FATAL_ERROR "Unable to identify a target to use. Please select one manually." ) 60 | 61 | endfunction() 62 | -------------------------------------------------------------------------------- /cmake/intrinsic_tests/intrinsic_test.c: -------------------------------------------------------------------------------- 1 | 2 | #if defined( TEST_AVX ) || defined( TEST_AVX2 ) || defined( TEST_FMA ) 3 | // Header that contains the AVX, AVX2 and FMA intrinsics 4 | #include 5 | #endif 6 | 7 | #ifdef TEST_SSE3 8 | // Header that contains the SSE3 intrinsics 9 | #include 10 | #endif 11 | 12 | 13 | int main() 14 | { 15 | 16 | #ifdef TEST_AVX 17 | // Test for working AVX intrinsics 18 | 19 | // This setter is AVX minimum 20 | __m256d retVal_AVX = _mm256_set_pd( 1.0f, 2.0f, 3.0f, 4.0f ); 21 | #endif 22 | 23 | #ifdef TEST_AVX2 24 | // Test for working AVX2 intrinsics 25 | 26 | // This setter is AVX minimum 27 | __m256i testVal_AVX2_1 = _mm256_set_epi32( 1, 2, 3, 4, 5, 6, 7, 8 ); 28 | __m256i testVal_AVX2_2 = _mm256_set_epi32( 2, 3, 4, 5, 6, 7, 8, 9 ); 29 | 30 | // This subtraction is AVX2 minimum 31 | __m256i retVal_AVX2 = _mm256_sub_epi32( testVal_AVX2_1, testVal_AVX2_2 ); 32 | #endif 33 | 34 | #ifdef TEST_FMA 35 | // Test for working FMA intrinsics 36 | 37 | // These brodcast setters are SSE minimum 38 | __m128 testVal_FMA_1 = _mm_set1_ps( 5.0 ); 39 | __m128 testVal_FMA_2 = _mm_set1_ps( 3.0 ); 40 | __m128 testVal_FMA_3 = _mm_set1_ps( 7.0 ); 41 | 42 | // This is FMA minimum 43 | __m128 retVal_FMA = _mm_fmadd_ps( testVal_FMA_1, testVal_FMA_2, testVal_FMA_3 ); 44 | #endif 45 | 46 | #ifdef TEST_SSE3 47 | // Test for working SSE3 intrinsics 48 | 49 | // These brodcast setters are SSE minimum 50 | __m128 testVal_SSE3_1 = _mm_set1_ps( 5.0 ); 51 | __m128 testVal_SSE3_2 = _mm_set1_ps( 3.0 ); 52 | 53 | // This adder is SSE3 minimum 54 | __m128 retVal_SSE3 = _mm_hadd_ps( testVal_SSE3_1, testVal_SSE3_2 ); 55 | #endif 56 | 57 | #ifdef TEST_VFPv4 58 | // Test for working VFPv4 intrinsics 59 | 60 | //TODO 61 | #endif 62 | 63 | #ifdef TEST_NEONv2 64 | // Test for working NEONv2 intrinsics 65 | 66 | //TODO 67 | #endif 68 | 69 | #ifdef TEST_VFPv3 70 | // Test for working VFPv3 intrinsics 71 | 72 | // TODO 73 | #endif 74 | 75 | #ifdef TEST_NEON 76 | // Test for working NEON intrinsics 77 | 78 | 79 | //TODO 80 | #endif 81 | 82 | return 0; 83 | } 84 | -------------------------------------------------------------------------------- /cmake/intrinsic_tests/intrinsic_test.cmake: -------------------------------------------------------------------------------- 1 | # This function will prepare a test for a target (specified by the 2 | # variable TEST_TARGET) and execute it. This consists of compiling 3 | # a C files using the intrinsics supported by the target using the 4 | # compile flags, and then executing the result to see if it runs 5 | # correctly. 6 | # 7 | # The test file contains an exemplar intrinsic for the target, 8 | # so if they fail to run it means the specific target is not supported. 9 | # 10 | # The requested target to test is passed as the argument to the function. 11 | # 12 | # The results of the test are stored as the variables 13 | # CHKINTRINSIC_TARGET_BUILD - True if the target built without error 14 | # CHKINTRINSIC_TARGET_RUN - True if the test ran without error 15 | function( TestIntrinsics TEST_TARGET ) 16 | 17 | # Pull in the tests each architecture needs to run 18 | include( ${PROJECT_SOURCE_DIR}/cmake/ArchitectureTests.cmake ) 19 | 20 | # The main source file to test with 21 | set(CMP_CHECK_SRCS 22 | ${PROJECT_SOURCE_DIR}/cmake/intrinsic_tests/intrinsic_test.c 23 | ) 24 | 25 | set(C_DEFS_CHK "") 26 | 27 | # Add the compile definitions 28 | foreach(CHECK ${CMP_CHECK_${TEST_TARGET}}) 29 | list( APPEND C_DEFS_CHK "-D${CHECK} " ) 30 | endforeach() 31 | 32 | string( REPLACE ";" "" C_DEFS_CHK "${C_DEFS_CHK}" ) 33 | 34 | # Populate the flags to use for the testing 35 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_FLAGS_TARGET_${TEST_TARGET}}") 36 | set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${ASM_FLAGS_TARGET_${TEST_TARGET}}") 37 | 38 | if(${BLASFEO_CROSSCOMPILING}) 39 | set(CHKINTRINSIC_TARGET_RUN_${TEST_TARGET} "1") 40 | 41 | # Only tell CMake to compile the files, not link them since we are doing cross-compilation 42 | if (${CMAKE_VERSION} VERSION_EQUAL "3.6.0" OR ${CMAKE_VERSION} VERSION_GREATER "3.6") 43 | set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) 44 | elseif() 45 | set(CMAKE_EXE_LINKER_FLAGS_INIT "--specs=nosys.specs") 46 | endif() 47 | 48 | try_compile( CHKINTRINSIC_TARGET_BUILD_${TEST_TARGET} # Variable to save the build result to 49 | "${CMAKE_BINARY_DIR}/compilerTest/${TEST_TARGET}" # Directory to compile in 50 | SOURCES ${CMP_CHECK_SRCS} # Source to compile 51 | CMAKE_FLAGS 52 | "-DCOMPILE_DEFINITIONS=${C_DEFS_CHK}" 53 | OUTPUT_VARIABLE CHK_OUTPUT${TEST_TARGET} 54 | ) 55 | else() 56 | try_run( CHKINTRINSIC_TARGET_RUN_${TEST_TARGET} # Variable to save the run result to 57 | CHKINTRINSIC_TARGET_BUILD_${TEST_TARGET} # Variable to save the build result to 58 | "${CMAKE_BINARY_DIR}/compilerTest/${TEST_TARGET}" # Directory to compile in 59 | SOURCES ${CMP_CHECK_SRCS} # Source to compile 60 | CMAKE_FLAGS 61 | "-DCOMPILE_DEFINITIONS=${C_DEFS_CHK}" 62 | OUTPUT_VARIABLE CHK_OUTPUT${TEST_TARGET} 63 | ) 64 | endif() 65 | 66 | if(${CHKINTRINSIC_TARGET_BUILD_${TEST_TARGET}}) 67 | set(CHKINTRINSIC_TARGET_BUILD TRUE PARENT_SCOPE) 68 | 69 | if(${CHKINTRINSIC_TARGET_RUN_${TEST_TARGET}} STREQUAL "0") 70 | set(CHKINTRINSIC_TARGET_RUN TRUE PARENT_SCOPE) 71 | else() 72 | set(CHKINTRINSIC_TARGET_RUN FALSE PARENT_SCOPE) 73 | endif() 74 | 75 | else() 76 | set(CHKINTRINSIC_TARGET_BUILD FALSE PARENT_SCOPE) 77 | set(CHKINTRINSIC_TARGET_OUTPUT ${CHK_OUTPUT${TEST_TARGET}} PARENT_SCOPE) 78 | endif() 79 | 80 | endfunction() 81 | -------------------------------------------------------------------------------- /cmake/isa_tests/TEST_AVX.S: -------------------------------------------------------------------------------- 1 | // void test_avx(); 2 | 3 | .p2align 4,,15 4 | #if defined(OS_LINUX) 5 | .globl test_avx 6 | .type test_avx, @function 7 | test_avx: 8 | #elif defined(OS_MAC) 9 | .globl _test_avx 10 | _test_avx: 11 | #elif defined(OS_WINDOWS) 12 | .globl test_avx 13 | .def test_avx; .scl 2; .type 32; .endef 14 | test_avx2: 15 | #endif 16 | 17 | // Call the VZEROALL function to see if it works 18 | vzeroall 19 | 20 | ret 21 | 22 | 23 | #if defined(OS_LINUX) 24 | .size test_avx, .-test_avx 25 | #endif 26 | -------------------------------------------------------------------------------- /cmake/isa_tests/TEST_AVX2.S: -------------------------------------------------------------------------------- 1 | // void test_avx2(); 2 | 3 | .p2align 4,,15 4 | #if defined(OS_LINUX) 5 | .globl test_avx2 6 | .type test_avx2, @function 7 | test_avx2: 8 | #elif defined(OS_MAC) 9 | .globl _test_avx2 10 | _test_avx2: 11 | #elif defined(OS_WINDOWS) 12 | .globl test_avx2 13 | .def test_avx2; .scl 2; .type 32; .endef 14 | test_avx2: 15 | #endif 16 | 17 | // Call the VPSRAVD function to see if it works 18 | // The actual values being used don't matter, just call it 19 | vpsravd %xmm2, %xmm1, %xmm0 20 | 21 | ret 22 | 23 | 24 | #if defined(OS_LINUX) 25 | .size test_avx2, .-test_avx2 26 | #endif 27 | -------------------------------------------------------------------------------- /cmake/isa_tests/TEST_FMA.S: -------------------------------------------------------------------------------- 1 | // void test_fma(); 2 | 3 | .p2align 4,,15 4 | #if defined(OS_LINUX) 5 | .globl test_fma 6 | .type test_fma, @function 7 | test_fma: 8 | #elif defined(OS_MAC) 9 | .globl _test_fma 10 | _test_fma: 11 | #elif defined(OS_WINDOWS) 12 | .globl test_fma 13 | .def test_fma; .scl 2; .type 32; .endef 14 | test_fma: 15 | #endif 16 | 17 | // Call the VFMADD231PD function to see if it works 18 | // The actual values in the registers don't matter, so just pick three 19 | vfmadd231pd %xmm2, %xmm1, %xmm0 20 | 21 | ret 22 | 23 | 24 | #if defined(OS_LINUX) 25 | .size test_fma, .-test_fma 26 | #endif 27 | -------------------------------------------------------------------------------- /cmake/isa_tests/TEST_NEON.S: -------------------------------------------------------------------------------- 1 | // void test_neon() 2 | 3 | // .p2align 4,,15 4 | #if defined(OS_LINUX) 5 | .global test_neon 6 | .type test_neon, %function 7 | test_neon: 8 | #elif defined(OS_MAC) 9 | .global _test_neon 10 | _test_neon: 11 | #endif 12 | 13 | 14 | // Test for the presence of the vector absolute value instruction 15 | vabs.f32 d0, d1 16 | 17 | 18 | #if defined(OS_LINUX) 19 | .size test_neon, .-test_neon 20 | #endif 21 | -------------------------------------------------------------------------------- /cmake/isa_tests/TEST_NEONv2.S: -------------------------------------------------------------------------------- 1 | // void test_neonv2() 2 | 3 | // .p2align 4,,15 4 | #if defined(OS_LINUX) 5 | .global test_neonv2 6 | .type test_neonv2, %function 7 | test_neonv2: 8 | #elif defined(OS_MAC) 9 | .global _test_neonv2 10 | _test_neonv2: 11 | #endif 12 | 13 | 14 | // Test for the presence of the vector fused multiply accumulate instruction 15 | fmla v0.2d, v24.2d, v30.2d 16 | 17 | 18 | #if defined(OS_LINUX) 19 | .size test_neonv2, .-test_neonv2 20 | #endif 21 | -------------------------------------------------------------------------------- /cmake/isa_tests/TEST_SSE3.S: -------------------------------------------------------------------------------- 1 | // void test_sse3(); 2 | 3 | .p2align 4,,15 4 | #if defined(OS_LINUX) 5 | .globl test_sse3 6 | .type test_sse3, @function 7 | test_sse3: 8 | #elif defined(OS_MAC) 9 | .globl _test_sse3 10 | _test_sse3: 11 | #elif defined(OS_WINDOWS) 12 | .globl test_sse3 13 | .def test_sse3; .scl 2; .type 32; .endef 14 | test_sse3: 15 | #endif 16 | 17 | // Call the ADDSUBPD function to see if it works 18 | // The actual values in the registers don't matter, so just pick two 19 | addsubpd %xmm1, %xmm0 20 | 21 | ret 22 | 23 | 24 | #if defined(OS_LINUX) 25 | .size test_sse3, .-test_sse3 26 | #endif 27 | -------------------------------------------------------------------------------- /cmake/isa_tests/TEST_VFPv3.S: -------------------------------------------------------------------------------- 1 | // void test_vfpv3() 2 | 3 | // .p2align 4,,15 4 | #if defined(OS_LINUX) 5 | .global test_vfpv3 6 | .type test_vfpv3, %function 7 | test_vfpv3: 8 | #elif defined(OS_MAC) 9 | .global _test_vfpv3 10 | _test_vfpv3: 11 | #endif 12 | 13 | 14 | // Test for the presence of the move constant instruction 15 | // This is the equivalent of the UAL vmov.f32 instruction 16 | fconsts s1, #112 // 1.0 17 | 18 | 19 | #if defined(OS_LINUX) 20 | .size test_vfpv3, .-test_vfpv3 21 | #endif 22 | -------------------------------------------------------------------------------- /cmake/isa_tests/TEST_VFPv4.S: -------------------------------------------------------------------------------- 1 | // void test_vfpv4() 2 | 3 | // .p2align 4,,15 4 | #if defined(OS_LINUX) 5 | .global test_vfpv4 6 | .type test_vfpv4, %function 7 | test_vfpv4: 8 | #elif defined(OS_MAC) 9 | .global _test_vfpv4 10 | _test_vfpv4: 11 | #endif 12 | 13 | 14 | // Test for the presence of the fused multiply accumulate instruction 15 | fmadd d0, d1, d2, d3 16 | 17 | 18 | #if defined(OS_LINUX) 19 | .size test_vfpv4, .-test_vfpv4 20 | #endif 21 | -------------------------------------------------------------------------------- /cmake/isa_tests/isa_test.c: -------------------------------------------------------------------------------- 1 | 2 | void test_avx(); 3 | void test_avx2(); 4 | void test_fma(); 5 | void test_sse3(); 6 | void test_vfpv4(); 7 | void test_neonv2(); 8 | void test_vfpv3(); 9 | void test_neon(); 10 | 11 | 12 | int main() 13 | { 14 | 15 | #ifdef TEST_AVX 16 | // Test for if the AVX ISA works 17 | test_avx(); 18 | #endif 19 | 20 | #ifdef TEST_AVX2 21 | // Test for if the AVX2 ISA works 22 | test_avx2(); 23 | #endif 24 | 25 | #ifdef TEST_FMA 26 | // Test for if the FMA ISA works 27 | test_fma(); 28 | #endif 29 | 30 | #ifdef TEST_SSE3 31 | // Test for if the SSE3 ISA works 32 | test_sse3(); 33 | #endif 34 | 35 | #ifdef TEST_VFPv4 36 | // Test for if the VFPv4 ISA works 37 | test_vfpv4(); 38 | #endif 39 | 40 | #ifdef TEST_NEONv2 41 | // Test for if the NEONv2 ISA works 42 | test_neonv2(); 43 | #endif 44 | 45 | #ifdef TEST_VFPv3 46 | // Test for if the VFPv3 ISA works 47 | test_vfpv3(); 48 | #endif 49 | 50 | #ifdef TEST_NEON 51 | // Test for if the NEON ISA works 52 | test_neon(); 53 | #endif 54 | 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /cmake/isa_tests/isa_test.cmake: -------------------------------------------------------------------------------- 1 | # This function will prepare a test for a target (specified by the 2 | # variable TEST_TARGET) and execute it. This consists of compiling 3 | # a C file with associated assembly files using the compile flags, 4 | # and then executing the result to see if it runs correctly. 5 | # 6 | # The assembly files contain an exemplar instruction for the ISA, 7 | # so if they fail to run it means the specific ISA is not supported. 8 | # 9 | # The requested target to test is passed as the argument to the function. 10 | # 11 | # The results of the test are stored as the variables 12 | # CHKISA_TARGET_BUILD - True if the target built without error 13 | # CHKISA_TARGET_RUN - True if the test ran without error 14 | function(TestISA TEST_TARGET) 15 | 16 | # Pull in the tests each architecture needs to run 17 | include( ${PROJECT_SOURCE_DIR}/cmake/ArchitectureTests.cmake ) 18 | 19 | # The main source file to test with 20 | set(CMP_CHECK_SRCS 21 | ${PROJECT_SOURCE_DIR}/cmake/isa_tests/isa_test.c 22 | ) 23 | 24 | set(C_DEFS_CHK "") 25 | 26 | # Add the assembly test files and the compile definitions 27 | foreach(CHECK ${CMP_CHECK_${TEST_TARGET}}) 28 | list( APPEND CMP_CHECK_SRCS ${PROJECT_SOURCE_DIR}/cmake/isa_tests/${CHECK}.S ) 29 | list( APPEND C_DEFS_CHK "-D${CHECK} " ) 30 | endforeach() 31 | 32 | string( REPLACE ";" "" C_DEFS_CHK "${C_DEFS_CHK}" ) 33 | 34 | # Populate the flags to use for the testing 35 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_FLAGS_TARGET_${TEST_TARGET}}") 36 | set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${ASM_FLAGS_TARGET_${TEST_TARGET}}") 37 | 38 | if(${BLASFEO_CROSSCOMPILING}) 39 | set(CHKISA_TARGET_RUN_${TEST_TARGET} "1") 40 | 41 | # Only tell CMake to compile the files, not link them since we are doing cross-compilation 42 | if (${CMAKE_VERSION} VERSION_EQUAL "3.6.0" OR ${CMAKE_VERSION} VERSION_GREATER "3.6") 43 | set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) 44 | elseif() 45 | set(CMAKE_EXE_LINKER_FLAGS_INIT "--specs=nosys.specs") 46 | endif() 47 | 48 | try_compile( CHKISA_TARGET_BUILD_${TEST_TARGET} # Variable to save the build result to 49 | "${CMAKE_BINARY_DIR}/compilerTest/${TEST_TARGET}" # Directory to compile in 50 | SOURCES ${CMP_CHECK_SRCS} # Source to compile 51 | CMAKE_FLAGS 52 | "-DCOMPILE_DEFINITIONS=${C_DEFS_CHK}" 53 | OUTPUT_VARIABLE CHK_OUTPUT_${TEST_TARGET} 54 | ) 55 | else() 56 | try_run( CHKISA_TARGET_RUN_${TEST_TARGET} # Variable to save the run result to 57 | CHKISA_TARGET_BUILD_${TEST_TARGET} # Variable to save the build result to 58 | "${CMAKE_BINARY_DIR}/compilerTest/${TEST_TARGET}" # Directory to compile in 59 | SOURCES ${CMP_CHECK_SRCS} # Source to compile 60 | CMAKE_FLAGS 61 | "-DCOMPILE_DEFINITIONS=${C_DEFS_CHK}" 62 | OUTPUT_VARIABLE CHK_OUTPUT_${TEST_TARGET} 63 | ) 64 | endif() 65 | 66 | # message(${CHK_OUTPUT${TEST_TARGET}}) 67 | 68 | if(${CHKISA_TARGET_BUILD_${TEST_TARGET}}) 69 | set(CHKISA_TARGET_BUILD TRUE PARENT_SCOPE) 70 | 71 | if(${CHKISA_TARGET_RUN_${TEST_TARGET}} STREQUAL "0") 72 | set(CHKISA_TARGET_RUN TRUE PARENT_SCOPE) 73 | else() 74 | set(CHKISA_TARGET_RUN FALSE PARENT_SCOPE) 75 | endif() 76 | 77 | else() 78 | set(CHKISA_TARGET_BUILD FALSE PARENT_SCOPE) 79 | set(CHKISA_TARGET_OUTPUT ${CHK_OUTPUT_${TEST_TARGET}} PARENT_SCOPE) 80 | endif() 81 | 82 | endfunction() 83 | -------------------------------------------------------------------------------- /doc/conventions.tex: -------------------------------------------------------------------------------- 1 | \chapter{Naming conventions} 2 | 3 | 4 | \section{Routines name} 5 | See \code{include/blasfeo\_naming.h} 6 | 7 | \section{Files name} 8 | 9 | \subsection{prefix} 10 | \begin{itemize} 11 | \item x template 12 | \item d double precision 13 | \item s single precision 14 | \item i integer 15 | \end{itemize} 16 | 17 | 18 | \subsection{postfix} 19 | \begin{itemize} 20 | \item lib8 kernel size 8 21 | \item lib4 kernel size 4 22 | \item lib0 column major 23 | \item libref reference 24 | \end{itemize} 25 | -------------------------------------------------------------------------------- /doc/guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/giaf/blasfeo/e33fe98accb757e7d46611ab3ad2cf65f6f96a6d/doc/guide.pdf -------------------------------------------------------------------------------- /examples/tools.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | void dgemm_nn_3l(int m, int n, int k, double *A, int lda , double *B, int ldb, double *C, int ldc); 37 | void daxpy_3l(int n, double da, double *dx, double *dy); 38 | void dscal_3l(int n, double da, double *dx); 39 | 40 | /* copies a matrix into another matrix */ 41 | void dmcopy(int row, int col, double *ptrA, int lda, double *ptrB, int ldb); 42 | 43 | /* solution of a system of linear equations */ 44 | void dgesv_3l(int n, int nrhs, double *A, int lda, int *ipiv, double *B, int ldb, int *info); 45 | 46 | /* matrix exponential */ 47 | void expm(int row, double *A); 48 | -------------------------------------------------------------------------------- /experimental/giaf/AtHA.m: -------------------------------------------------------------------------------- 1 | % matrix size 2 | n = 8; 3 | 4 | % fix rand seed 5 | rand('seed',0); 6 | 7 | % general matrix A 8 | A = rand(n,n); 9 | 10 | % symmetric matrix H 11 | H = rand(n,n); 12 | H = H+H'; 13 | %eig(H) 14 | 15 | % reference 16 | R0 = A' * H * A 17 | 18 | A 19 | H 20 | % recursive algorithm 21 | T0 = zeros(n,1); 22 | T1 = zeros(n,n); 23 | R = zeros(n,n); 24 | R(1,1) = A(1,1)' * H(1,1) * A(1,1); 25 | for ii=2:n 26 | R(ii,1:ii) = A(1:ii-1,ii)' * H(1:ii-1,1:ii-1) * A(1:ii-1,1:ii); 27 | R(1:ii,ii) = R(ii,1:ii)'; 28 | T0(1:ii) = A(1:ii-1,1:ii)' * H(1:ii-1,ii) + 0.5 * A(ii,1:ii)' * H(ii,ii); 29 | T1(1:ii,1:ii) = T0(1:ii) * A(ii,1:ii); 30 | R(1:ii,1:ii) += T1(1:ii,1:ii) + T1(1:ii,1:ii)'; 31 | end 32 | 33 | R 34 | R-R0 35 | -------------------------------------------------------------------------------- /experimental/giaf/blas/include/blasfeo_d_blas.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | // BLAS 3 37 | void blasfeo_dgemm(char *ta, char *tb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc); 38 | -------------------------------------------------------------------------------- /experimental/giaf/blas/tests/.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | -------------------------------------------------------------------------------- /experimental/giaf/blas/tests/print_blas.m: -------------------------------------------------------------------------------- 1 | % print blas 2 | 3 | blas 4 | 5 | Gflops_max = A(1)*A(2); 6 | 7 | f1 = figure(); 8 | plot(B(:,1), B(:,2), 'b'); 9 | hold on 10 | plot(B(:,1), B(:,4), 'g'); 11 | plot(B(:,1), B(:,6), 'r'); 12 | hold off 13 | 14 | axis([0 300 0 Gflops_max]); 15 | legend('blas', 'blas_pack', 'blasfeo', 'Location', 'SouthEast'); 16 | xlabel('matrix size n') 17 | ylabel('Gflops') 18 | grid on 19 | 20 | file_name = ['blas.eps']; 21 | print(f1, file_name, '-depsc') 22 | 23 | 24 | -------------------------------------------------------------------------------- /guidelines.md: -------------------------------------------------------------------------------- 1 | ## Installation on Android 2 | 3 | BLASFEO can successfully run on the Android OS. 4 | The procedure has been tested for the most common combination (namely Android running on the ARMv8A architecture, with the BLASFEO code cross-compiled from a x86_64 Linux host machine), but it is expected to work on other combinations too by using the OS_LINUX architecture in BLASFEO. 5 | 6 | In the tested configuration, BLASFEO was compiled with the ```aarch64-linux-android-gcc``` cross-compiler provided by the Android NDK. 7 | Once downloaded and unzipped into ```NDK_MAIN_DIR```, the NDK tools themselves can be installed on ```/opt/ndk``` by using the script ```$(NDK_MAIN_DIR)/build/tools/make-standalone-toolchain.sh``` as
8 | ```./make-standalone-toolchain.sh --arch=arm64 --install_dir=/opt/ndk``` 9 | 10 | Once the BLASFEO static library has been compiled and the executable ```example.out``` created, this can be moved to the Android device by means of the command
11 | ```adb push example.out /data/local/tmp/example.out```
12 | and executed with the command
13 | ```adb shell /data/local/tmp/example.out``` 14 | 15 | 16 | ## Performance issues 17 | 18 | The performance of BLASFEO routines can be affected by many factor, and some can have a large impact on performance. 19 | 20 | Known performance issues: 21 | - computations on __denormals__. 22 | In some computer architectures (like e.g. the widespread x86_64) computations involving denormal floating point numbers are handled in microcode, and therefore can incur in a very large performance penalty (10x or more). 23 | Unless computation on denormals is on purpose, the user should pay attention to avoid denormals on the data matrices as well as to the __memory passed to create BLASFEO matrices or vectors__ (as the padding memory is still used in internal computations, even if it is discarded and does not affect the correctness of the result). 24 | As a good practice, since denormals can be left in the memory by previous applications, it is __recommended to zero out__ the memory passed to create a BLASFEO matrix or vector in the BLASFEO API (i.e. the memory passed to the routines `blasfeo_create_dmat` and similar). 25 | For similar reasons, in the BLAS API it is recommended to have a leading dimension of the matrices multiple of the minimum BLASFEO kernel size (typically equal to 4 in double precision), and to zero out the memory used for the entire array of doubles or floats (including padding). 26 | - __memory alignment__. 27 | The memory passed to create BLASFEO matrices or vectors has minimum alignment requirements which vary between architecture. 28 | Additionally, if the memory is not aligned to cache size boundaries, there may be little performance degradations. 29 | As a good practice, it is __recommended to align to cache line boundaries__ (typically 64 bytes) the memory passed to create a BLASFEO matrix or vector in the BLASFEO API, or the memory used for the array of doubles of floats in the BLAS API. 30 | -------------------------------------------------------------------------------- /include/blasfeo.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #include "blasfeo_processor_features.h" 37 | #include "blasfeo_target.h" 38 | #include "blasfeo_block_size.h" 39 | #include "blasfeo_stdlib.h" 40 | #include "blasfeo_common.h" 41 | #include "blasfeo_d_aux.h" 42 | #include "blasfeo_d_aux_ext_dep.h" 43 | #include "blasfeo_d_kernel.h" 44 | #include "blasfeo_d_blas.h" 45 | #include "blasfeo_s_aux.h" 46 | #include "blasfeo_s_aux_ext_dep.h" 47 | #include "blasfeo_s_kernel.h" 48 | #include "blasfeo_s_blas.h" 49 | #include "blasfeo_i_aux_ext_dep.h" 50 | #include "blasfeo_v_aux_ext_dep.h" 51 | #include "blasfeo_timing.h" 52 | #include "blasfeo_memory.h" 53 | -------------------------------------------------------------------------------- /include/blasfeo_align.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #ifndef BLASFEO_ALIGN_H_ 37 | #define BLASFEO_ALIGN_H_ 38 | 39 | 40 | 41 | #ifdef __cplusplus 42 | extern "C" { 43 | #endif 44 | 45 | 46 | 47 | void blasfeo_align_2MB(void *ptr, void **ptr_align); 48 | void blasfeo_align_4096_byte(void *ptr, void **ptr_align); 49 | void blasfeo_align_64_byte(void *ptr, void **ptr_align); 50 | 51 | 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif // BLASFEO_ALIGN_H_ 58 | -------------------------------------------------------------------------------- /include/blasfeo_d_blas.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #ifndef BLASFEO_D_BLAS_H_ 37 | #define BLASFEO_D_BLAS_H_ 38 | 39 | 40 | 41 | #include "blasfeo_d_blasfeo_api.h" 42 | #include "blasfeo_d_blas_api.h" 43 | 44 | 45 | 46 | #endif // BLASFEO_D_BLAS_H_ 47 | -------------------------------------------------------------------------------- /include/blasfeo_memory.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2020 by Gianluca Frison. * 7 | * All rights reserved. * 8 | * * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | 37 | #ifndef BLASFEO_MEMORY_H_ 38 | #define BLASFEO_MEMORY_H_ 39 | 40 | #ifdef __cplusplus 41 | extern "C" { 42 | #endif 43 | 44 | 45 | 46 | // 47 | int blasfeo_is_init(); 48 | // 49 | void blasfeo_init(); 50 | // 51 | void blasfeo_quit(); 52 | // 53 | void *blasfeo_get_buffer(); 54 | 55 | 56 | 57 | 58 | #ifdef __cplusplus 59 | } 60 | #endif 61 | 62 | #endif // BLASFEO_MEMORY_H_ 63 | -------------------------------------------------------------------------------- /include/blasfeo_s_blas.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #ifndef BLASFEO_S_BLAS_H_ 37 | #define BLASFEO_S_BLAS_H_ 38 | 39 | 40 | 41 | #include "blasfeo_s_blasfeo_api.h" 42 | #include "blasfeo_s_blas_api.h" 43 | 44 | 45 | 46 | #endif // BLASFEO_S_BLAS_H_ 47 | -------------------------------------------------------------------------------- /include/blasfeo_stdlib.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #ifndef BLASFEO_STDLIB_H_ 37 | #define BLASFEO_STDLIB_H_ 38 | 39 | #ifdef __cplusplus 40 | extern "C" { 41 | #endif 42 | 43 | 44 | 45 | #include 46 | 47 | #ifdef EXT_DEP_MALLOC 48 | // 49 | void blasfeo_malloc(void **ptr, size_t size); 50 | // 51 | void blasfeo_malloc_align(void **ptr, size_t size); 52 | // 53 | void blasfeo_free(void *ptr); 54 | // 55 | void blasfeo_free_align(void *ptr); 56 | #endif 57 | 58 | 59 | 60 | #ifdef __cplusplus 61 | } 62 | #endif 63 | 64 | #endif // BLASFEO_STDLIB_H_ 65 | -------------------------------------------------------------------------------- /kernel/avx/issue_20: -------------------------------------------------------------------------------- 1 | - kernel 4x2, half matrix computed 2 | 3 | -------------------------------------------------------------------------------- /kernel/fma/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################################### 2 | # # 3 | # This file is part of BLASFEO. # 4 | # # 5 | # BLASFEO -- BLAS for embedded optimization. # 6 | # Copyright (C) 2019 by Gianluca Frison. # 7 | # Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. # 8 | # All rights reserved. # 9 | # # 10 | # The 2-Clause BSD License # 11 | # # 12 | # Redistribution and use in source and binary forms, with or without # 13 | # modification, are permitted provided that the following conditions are met: # 14 | # # 15 | # 1. Redistributions of source code must retain the above copyright notice, this # 16 | # list of conditions and the following disclaimer. # 17 | # 2. Redistributions in binary form must reproduce the above copyright notice, # 18 | # this list of conditions and the following disclaimer in the documentation # 19 | # and/or other materials provided with the distribution. # 20 | # # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # 22 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # 23 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR # 25 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # 26 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # 27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # 28 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # 30 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # 31 | # # 32 | # Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de # 33 | # # 34 | ################################################################################################### 35 | 36 | include ../../Makefile.rule 37 | 38 | 39 | ifeq ($(TARGET), X64_AMD_BULLDOZER) 40 | KERNEL_OBJS = \ 41 | kernel_dgemm_4x4_lib4.o \ 42 | 43 | endif 44 | 45 | 46 | OBJS = 47 | 48 | ifeq ($(LA), HIGH_PERFORMANCE) 49 | OBJS += $(KERNEL_OBJS) 50 | endif # LA choice 51 | 52 | ifeq ($(BLASFEO_HP_API), 1) 53 | OBJS += $(KERNEL_OBJS) 54 | endif # LA choice 55 | 56 | 57 | obj: $(OBJS) 58 | 59 | clean: 60 | rm -f *.o 61 | rm -f *.s 62 | rm -f *.gcda 63 | rm -f *.gcno 64 | rm -f *.gcov 65 | 66 | kernel_dgemm_4x4_lib4.o: kernel_dgemm_4x4_lib4.S kernel_dgemm_4x4_lib.S 67 | -------------------------------------------------------------------------------- /kernel/generic/kernel_align_generic.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | #ifdef __MABX2__ 37 | // dSPACE MicroAutoBox II (32-bit) does not provide stdint 38 | typedef unsigned int uintptr_t; 39 | #else 40 | #include 41 | #endif 42 | 43 | 44 | void blasfeo_align_4096_byte(void *ptr, void **ptr_align) 45 | { 46 | *ptr_align = (void *) ( ( ( (uintptr_t) ptr ) + 4095) / 4096 * 4096 ); 47 | return; 48 | } 49 | 50 | 51 | void blasfeo_align_64_byte(void *ptr, void **ptr_align) 52 | { 53 | *ptr_align = (void *) ( ( ( (uintptr_t) ptr ) + 63) / 64 * 64 ); 54 | return; 55 | } 56 | -------------------------------------------------------------------------------- /kernel/generic/kernel_d_aux_lib.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2020 by Gianluca Frison. * 7 | * All rights reserved. * 8 | * * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | 36 | 37 | void kernel_dvecld_inc1(int k, double *x) 38 | { 39 | 40 | int ii; 41 | double tmp; 42 | 43 | for(ii=0; ii 37 | #include 38 | 39 | #include 40 | 41 | #include "kernel.h" 42 | 43 | int main() 44 | { 45 | 46 | #if defined(SANDBOX_MODE) 47 | 48 | printf("\nCalling kernel...\n"); 49 | 50 | kernel(); 51 | 52 | printf("\ndone!\n"); 53 | printf("\n"); 54 | 55 | #else 56 | 57 | printf("\n Recompile BLASFEO with SANDBOX_MODE=1 to run this test.\n"); 58 | printf("\n"); 59 | 60 | #endif 61 | 62 | return 0; 63 | 64 | } 65 | 66 | 67 | -------------------------------------------------------------------------------- /tests/Makefile.tpl: -------------------------------------------------------------------------------- 1 | TESTS_DIR=$(BLASFEO_PATH)/tests 2 | ABS_BINARY_PATH=$(TESTS_DIR)/$(BINARY_DIR) 3 | 4 | include $(BLASFEO_PATH)/Makefile.rule 5 | 6 | LIBS = 7 | SHARED_LIBS = 8 | 9 | LIBS += $(ABS_BINARY_PATH)/libblasfeo.a 10 | SHARED_LIBS += -Wl,-rpath=$(ABS_BINARY_PATH) -L $(ABS_BINARY_PATH) -lblasfeo 11 | 12 | LIBS += -lm 13 | SHARED_LIBS += -lm 14 | 15 | LIBS += $(LIBS_EXTERNAL_BLAS) 16 | SHARED_LIBS += $(SHARED_LIBS_EXTERNAL_BLAS) 17 | 18 | {% for flag, value in test_macros.items() %} 19 | {%- if value -%} 20 | CFLAGS += -D{{flag | upper}}={{value}} 21 | {% else -%} 22 | CFLAGS += -D{{flag | upper}} 23 | {% endif -%} 24 | {%- endfor -%} 25 | 26 | {% if TEST_BLAS_API in test_macros -%} 27 | ifeq ($(EXTERNAL_BLAS), 0) 28 | $(error No EXTERNAL_BLAS specified, install specify one reference blas implementation i.e. OPENBLAS) 29 | {%- endif %} 30 | 31 | test.o: 32 | # build executable obj $(ABS_BINARY_PATH) 33 | $(CC) $(CFLAGS) -c $(TESTS_DIR)/test.c -o $(ABS_BINARY_PATH)/test.o 34 | $(CC) $(CFLAGS) $(ABS_BINARY_PATH)/test.o -o $(ABS_BINARY_PATH)/test.out $(LIBS) 35 | $(ABS_BINARY_PATH)/test.out 36 | # ~/sde/sde64 -- $(ABS_BINARY_PATH)/test.out 37 | 38 | 39 | run: test.o 40 | 41 | update: run 42 | full: update_lib run 43 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | ## Test Framework 2 | 3 | In this test framework every routine is called with different 4 | combinations of arguments, 5 | the result is stored and compared with the result of the reference 6 | implementation of the same routine called with the safe arguments. 7 | 8 | The execution pipeline in this framework aims both to 9 | achieve both fast execution and comparison of similar calls and both to 10 | allow great flexibility and scalability of test specifications. 11 | 12 | A gradient of abstraction from the slower and more 13 | flexible code (Python, json) to the fast and efficient (C and assembly). 14 | 15 | ### Test definition 16 | 17 | - `test_schema.json`: 18 | Define all possible tests 19 | 20 | - `batch_run.json`: 21 | Define current test run to be executed 22 | Check validity against the schema 23 | 24 | ### Build and execution 25 | 26 | - `tester.py` : 27 | Python script to build the test recipe from the definition and call the right make 28 | command 29 | 30 | - `Makefile`: 31 | Generates compiler command and call the compiler 32 | 33 | ### Test C code implementation 34 | 35 | - `test_{s,d}_{aux,blas1,blas2,blas3}.c`: 36 | Compilation target interface with Makefile 37 | 38 | - `test_{s,d}_common.h`: 39 | Define precision related macros 40 | 41 | - `test_x_common.{h,c}`: 42 | Include test C helper functions 43 | 44 | - `test_class_{gemm, ..}.c`: 45 | Define variations of test helper function like `call_routine` 46 | tailored to the specific routine class i.e. `gemm`, 47 | every class of routines have the same signature. 48 | 49 | - `test_x.c` : 50 | Run the actual test templated with the aforementioned configurations. 51 | 52 | 53 | ### BLASFEO 54 | 55 | - `blasfeo_libref.{a, so}`: 56 | 57 | Blasfeo library compiled with REFERENCE target with routines name aliased 58 | postponing `_ref` prefix in order to coesist with the same routines 59 | compiled with other targets, i.e. HIGH_PERFORMANCE. 60 | 61 | - `blasfeo_lib.{a, so}`: 62 | 63 | The actual code to be tested 64 | 65 | 66 | # How To 67 | 68 | Run: `python tester.py` 69 | 70 | The default behavior is to run `recipe_default.json` 71 | which test only `dgemm_nn` routine for target `GENERIC`. 72 | 73 | To customize the run, follow the next two steps: 74 | - Edit the configuration file `recipe_all.json` with the desired values 75 | - Run: `python tester.py ` 76 | 77 | NB: Only the routines specified in `recipe_all.json` are supported at 78 | now. 79 | -------------------------------------------------------------------------------- /tests/TODO.txt: -------------------------------------------------------------------------------- 1 | TODO in tests: 2 | - in case of bad call, print to file the arguments and exit 3 | - create a test routine to load the above print to work on debugging 4 | - remove min_kernel_size (it is only 4x4 now) 5 | -------------------------------------------------------------------------------- /tests/classes/aux1args.c: -------------------------------------------------------------------------------- 1 | // CLASS_1ARGS 2 | // 3 | // blasfeo_xgesc(ni, mi, &sA, ai, aj); 4 | 5 | void call_routines(struct RoutineArgs *args){ 6 | 7 | // unpack args 8 | 9 | // routine call 10 | // 11 | BLASFEO(ROUTINE)( 12 | args->n, args->m, args->alpha, args->sA, args->ai, args->aj 13 | ); 14 | 15 | BLASFEO(REF(ROUTINE))( 16 | args->n, args->m, args->alpha, args->rA, args->ai, args->aj 17 | ); 18 | 19 | } 20 | 21 | void print_routine(struct RoutineArgs *args){ 22 | // unpack args 23 | 24 | printf("%s\n", string(ROUTINE)); 25 | printf( 26 | "A[%d:%d,%d:%d] = %f*A[%d:%d,%d:%d]\n", 27 | args->ai, args->m, args->aj, args->n, 28 | args->alpha, args->ai, args->m, args->aj, args->n 29 | ); 30 | 31 | } 32 | 33 | void print_routine_matrices(struct RoutineArgs *args){ 34 | 35 | printf("\nPrint A:\n"); 36 | blasfeo_print_xmat_debug(args->m, args->n, args->sA, args->ai, args->aj, 0, 0, 0, "HP"); 37 | blasfeo_print_xmat_debug(args->m, args->n, args->rA, args->ai, args->aj, 0, 0, 0, "REF"); 38 | } 39 | 40 | void set_test_args(struct TestArgs *targs) 41 | { 42 | targs->ais = 5; 43 | targs->bis = 5; 44 | targs->dis = 5; 45 | targs->xjs = 2; 46 | 47 | targs->nis = 5; 48 | targs->njs = 5; 49 | targs->nks = 5; 50 | 51 | targs->alphas = 1; 52 | } 53 | -------------------------------------------------------------------------------- /tests/classes/aux2args.c: -------------------------------------------------------------------------------- 1 | // CLASS_2ARGS 2 | // 3 | // blasfeo_xgecp(ni, mi, &sA, ai, aj, &sB, bi, bj); 4 | 5 | void call_routines(struct RoutineArgs *args){ 6 | // call HP and REF routine 7 | 8 | // routine call 9 | // 10 | BLASFEO(ROUTINE)( 11 | args->m, args->n, 12 | args->sA, args->ai, args->aj, 13 | args->sB, args->bi, args->bj 14 | ); 15 | 16 | BLASFEO(REF(ROUTINE))( 17 | args->n, args->m, 18 | args->rA, args->ai, args->aj, 19 | args->rB, args->bi, args->bj 20 | ); 21 | } 22 | 23 | void print_routine(struct RoutineArgs *args){ 24 | // print current class signature 25 | 26 | printf("%s ", string(ROUTINE)); 27 | printf( 28 | "B[%d:%d,%d:%d] = A[%d:%d,%d:%d]\n", 29 | args->bi, args->m, args->bj, args->n, 30 | args->ai, args->m, args->aj, args->n 31 | ); 32 | 33 | } 34 | 35 | void print_routine_matrices(struct RoutineArgs *args){ 36 | 37 | printf("\nPrint A:\n"); 38 | blasfeo_print_xmat_debug(args->m, args->n, args->sA, args->ai, args->aj, 0, 0, 0, "HP"); 39 | blasfeo_print_xmat_debug(args->m, args->n, args->rA, args->ai, args->aj, 0, 0, 0, "REF"); 40 | 41 | printf("\nPrint B:\n"); 42 | blasfeo_print_xmat_debug(args->m, args->n, args->sB, args->ai, args->aj, 0, 0, 0, "HP"); 43 | blasfeo_print_xmat_debug(args->m, args->n, args->rB, args->ai, args->aj, 0, 0, 0, "REF"); 44 | } 45 | 46 | void set_test_args(struct TestArgs *targs) 47 | { 48 | targs->ais = 5; 49 | targs->bis = 5; 50 | targs->dis = 5; 51 | targs->xjs = 2; 52 | 53 | targs->nis = 5; 54 | targs->njs = 5; 55 | targs->nks = 5; 56 | 57 | targs->alphas = 1; 58 | } 59 | -------------------------------------------------------------------------------- /tests/classes/aux3args.c: -------------------------------------------------------------------------------- 1 | // CLASS_3ARGS 2 | // 3 | // blasfeo_xgecpsc(ni, mi, alpha, &sA, ai, aj, &sB, bi, bj); 4 | // blasfeo_xgead(ni, mi, alpha, &sA, ai, aj, &sB, bi, bj); 5 | 6 | void call_routines(struct RoutineArgs *args){ 7 | // call HP and REF routine 8 | 9 | // routine call 10 | // 11 | BLASFEO(ROUTINE)( 12 | args->n, args->m, args->alpha, 13 | args->sA, args->ai, args->aj, 14 | args->sB, args->bi, args->bj 15 | ); 16 | 17 | BLASFEO(REF(ROUTINE))( 18 | args->n, args->m, args->alpha, 19 | args->rA, args->ai, args->aj, 20 | args->rB, args->bi, args->bj 21 | ); 22 | } 23 | 24 | void print_routine(struct RoutineArgs *args){ 25 | // print current class signature 26 | 27 | printf("%s ", string(ROUTINE)); 28 | printf( 29 | "B[%d:%d,%d:%d] = %f*A[%d:%d,%d:%d]\n", 30 | args->bi, args->m, args->bj, args->n, 31 | args->alpha, args->ai, args->m, args->aj, args->n 32 | ); 33 | 34 | } 35 | 36 | void print_routine_matrices(struct RoutineArgs *args){ 37 | 38 | printf("\nPrint A:\n"); 39 | blasfeo_print_xmat_debug(args->m, args->n, args->sA, args->ai, args->aj, 0, 0, 0, "HP"); 40 | blasfeo_print_xmat_debug(args->m, args->n, args->rA, args->ai, args->aj, 0, 0, 0, "REF"); 41 | 42 | printf("\nPrint B:\n"); 43 | blasfeo_print_xmat_debug(args->m, args->n, args->sB, args->ai, args->aj, 0, 0, 0, "HP"); 44 | blasfeo_print_xmat_debug(args->m, args->n, args->rB, args->ai, args->aj, 0, 0, 0, "REF"); 45 | } 46 | 47 | void set_test_args(struct TestArgs *targs) 48 | { 49 | targs->ais = 5; 50 | targs->bis = 5; 51 | targs->dis = 5; 52 | targs->xjs = 2; 53 | 54 | targs->nis = 5; 55 | targs->njs = 5; 56 | targs->nks = 5; 57 | 58 | targs->alphas = 1; 59 | } 60 | -------------------------------------------------------------------------------- /tests/classes/blasapi_gemm.c: -------------------------------------------------------------------------------- 1 | // CLASS_GEMM 2 | // 3 | 4 | void call_routines(struct RoutineArgs *args) 5 | { 6 | // copy input matrix C in D 7 | int ii, jj; 8 | for(jj=0; jjn; jj++) 9 | { 10 | for(ii=0; iim; ii++) 11 | { 12 | args->cD[ii+args->cD_lda*jj] = args->cC[ii+args->cC_lda*jj]; 13 | } 14 | } 15 | for(jj=0; jjn; jj++) 16 | { 17 | for(ii=0; iim; ii++) 18 | { 19 | args->bD[ii+args->bD_lda*jj] = args->bC[ii+args->bC_lda*jj]; 20 | } 21 | } 22 | 23 | BLASFEO_BLAS(ROUTINE)( 24 | string(TRANSA), string(TRANSB), 25 | &(args->m), &(args->n), &(args->k), &(args->alpha), 26 | args->cA, &(args->cA_lda), 27 | args->cB, &(args->cB_lda), &(args->beta), 28 | args->cD, &(args->cD_lda)); 29 | 30 | BLAS(ROUTINE)( 31 | string(TRANSA), string(TRANSB), 32 | &(args->m), &(args->n), &(args->k), &(args->alpha), 33 | args->bA, &(args->bA_lda), 34 | args->bB, &(args->bB_lda), &(args->beta), 35 | args->bD, &(args->bD_lda)); 36 | 37 | } 38 | 39 | 40 | 41 | void print_routine(struct RoutineArgs *args) 42 | { 43 | printf("blas_%s(%s, %s, %d, %d, %d, %f, A, %d, B, %d, D, %d);\n", string(ROUTINE), string(TRANSA), string(TRANSB), args->m, args->n, args->k, args->alpha, args->cA_lda, args->cB_lda, args->cD_lda); 44 | } 45 | 46 | 47 | 48 | void print_routine_matrices(struct RoutineArgs *args) 49 | { 50 | printf("\nPrint A:\n"); 51 | if(*string(TRANSA)=='n' || *string(TRANSA)=='N') 52 | { 53 | print_xmat_debug(args->m, args->k, args->cA, args->cA_lda, args->ai, args->aj, 0, 0, 0, "HP"); 54 | print_xmat_debug(args->m, args->k, args->bA, args->cA_lda, args->ai, args->aj, 0, 0, 0, "REF"); 55 | } 56 | else 57 | { 58 | print_xmat_debug(args->k, args->m, args->cA, args->cA_lda, args->ai, args->aj, 0, 0, 0, "HP"); 59 | print_xmat_debug(args->k, args->m, args->bA, args->cA_lda, args->ai, args->aj, 0, 0, 0, "REF"); 60 | } 61 | 62 | printf("\nPrint B:\n"); 63 | if(*string(TRANSB)=='n' || *string(TRANSB)=='N') 64 | { 65 | print_xmat_debug(args->k, args->n, args->cB, args->cB_lda, args->bi, args->bj, 0, 0, 0, "HP"); 66 | print_xmat_debug(args->k, args->n, args->bB, args->bB_lda, args->bi, args->bj, 0, 0, 0, "REF"); 67 | } 68 | else 69 | { 70 | print_xmat_debug(args->n, args->k, args->cB, args->cB_lda, args->bi, args->bj, 0, 0, 0, "HP"); 71 | print_xmat_debug(args->n, args->k, args->bB, args->bB_lda, args->bi, args->bj, 0, 0, 0, "REF"); 72 | } 73 | 74 | printf("\nPrint C:\n"); 75 | print_xmat_debug(args->m, args->n, args->cC, args->cC_lda, args->di, args->dj, 0, 0, 0, "HP"); 76 | print_xmat_debug(args->m, args->n, args->bC, args->bC_lda, args->di, args->dj, 0, 0, 0, "REF"); 77 | } 78 | 79 | 80 | 81 | void set_test_args(struct TestArgs *targs) 82 | { 83 | // targs->ais = 5; 84 | // targs->bis = 5; 85 | // targs->dis = 5; 86 | // targs->xjs = 2; 87 | 88 | targs->nis = 17; 89 | targs->njs = 8; 90 | targs->nks = 8; 91 | 92 | targs->alphas = 1; 93 | } 94 | -------------------------------------------------------------------------------- /tests/classes/blasapi_getrf.c: -------------------------------------------------------------------------------- 1 | // CLASS_GETRF_BLASAPI 2 | // 3 | void call_routines(struct RoutineArgs *args) 4 | { 5 | // copy input matrix A in D 6 | int ii, jj; 7 | for(jj=0; jjm; jj++) 8 | { 9 | for(ii=0; iim; ii++) 10 | { 11 | args->cD[ii+args->cD_lda*jj] = args->cA_po[ii+args->cA_po_lda*jj]; 12 | } 13 | } 14 | for(jj=0; jjm; jj++) 15 | { 16 | for(ii=0; iim; ii++) 17 | { 18 | args->bD[ii+args->bD_lda*jj] = args->bA_po[ii+args->bA_po_lda*jj]; 19 | } 20 | } 21 | 22 | // routine call 23 | // 24 | BLASFEO_LAPACK(ROUTINE)( 25 | &(args->m), &(args->n), 26 | args->cD, &(args->cD_lda), 27 | args->cipiv, &(args->info)); 28 | 29 | BLAS(ROUTINE)( 30 | &(args->m), &(args->n), 31 | args->bD, &(args->bD_lda), 32 | args->bipiv, &(args->info)); 33 | 34 | // D matrix is overwritten with the solution 35 | 36 | } 37 | 38 | 39 | 40 | void print_routine(struct RoutineArgs *args) 41 | { 42 | printf("blas_%s(%d, %d, D, %d, ipiv, info);\n", string(ROUTINE), args->m, args->n, args->cD_lda); 43 | } 44 | 45 | 46 | 47 | void print_routine_matrices(struct RoutineArgs *args) 48 | { 49 | printf("\nInput matrix:\n"); 50 | print_xmat_debug(args->m, args->m, args->cA, args->cA_lda, 0, 0, 0, 0, 0, "HP"); 51 | print_xmat_debug(args->m, args->m, args->bA, args->bA_lda, 0, 0, 0, 0, 0, "REF"); 52 | 53 | printf("\nRow pivot vector:\n"); 54 | int size = args->m < args->n ? args->m : args->n; 55 | int_print_mat(1, size, args->cipiv, 1); 56 | int_print_mat(1, size, args->bipiv, 1); 57 | } 58 | 59 | 60 | 61 | void set_test_args(struct TestArgs *targs) 62 | { 63 | targs->nis = 21; 64 | targs->njs = 21; 65 | } 66 | -------------------------------------------------------------------------------- /tests/classes/blasapi_potrf.c: -------------------------------------------------------------------------------- 1 | // CLASS_POTRF_BLASAPI 2 | // 3 | void call_routines(struct RoutineArgs *args) 4 | { 5 | // copy input matrix A in D 6 | int ii, jj; 7 | for(jj=0; jjm; jj++) 8 | { 9 | for(ii=0; iim; ii++) 10 | { 11 | args->cD[ii+args->cD_lda*jj] = args->cA_po[ii+args->cA_po_lda*jj]; 12 | } 13 | } 14 | for(jj=0; jjm; jj++) 15 | { 16 | for(ii=0; iim; ii++) 17 | { 18 | args->bD[ii+args->bD_lda*jj] = args->bA_po[ii+args->bA_po_lda*jj]; 19 | } 20 | } 21 | 22 | // routine call 23 | // 24 | BLASFEO_LAPACK(ROUTINE)( 25 | string(UPLO), &(args->m), 26 | args->cD, &(args->cD_lda), 27 | &(args->info)); 28 | 29 | BLAS(ROUTINE)( 30 | string(UPLO), &(args->m), 31 | args->bD, &(args->bD_lda), 32 | &(args->info)); 33 | 34 | // D matrix is overwritten with the solution 35 | 36 | } 37 | 38 | 39 | 40 | void print_routine(struct RoutineArgs *args) 41 | { 42 | printf("blas_%s(%s, %d, D, %d, info);\n", string(ROUTINE), string(UPLO), args->m, args->cD_lda); 43 | } 44 | 45 | 46 | 47 | void print_routine_matrices(struct RoutineArgs *args) 48 | { 49 | printf("\nInput matrix:\n"); 50 | print_xmat_debug(args->m, args->m, args->cA_po, args->cA_po_lda, 0, 0, 0, 0, 0, "HP"); 51 | print_xmat_debug(args->m, args->m, args->bA_po, args->bA_po_lda, 0, 0, 0, 0, 0, "REF"); 52 | } 53 | 54 | 55 | 56 | void set_test_args(struct TestArgs *targs) 57 | { 58 | targs->nis = 21; 59 | } 60 | -------------------------------------------------------------------------------- /tests/classes/blasapi_syrk.c: -------------------------------------------------------------------------------- 1 | // CLASS_SYRK 2 | // 3 | 4 | void call_routines(struct RoutineArgs *args) 5 | { 6 | 7 | // copy input matrix B in D 8 | int ii, jj; 9 | for(jj=0; jjm; jj++) 10 | { 11 | for(ii=0; iim; ii++) 12 | { 13 | args->cD[ii+args->cD_lda*jj] = args->cB[ii+args->cB_lda*jj]; 14 | } 15 | } 16 | for(jj=0; jjm; jj++) 17 | { 18 | for(ii=0; iim; ii++) 19 | { 20 | args->bD[ii+args->bD_lda*jj] = args->bB[ii+args->bB_lda*jj]; 21 | } 22 | } 23 | 24 | // routine call 25 | // 26 | BLASFEO_BLAS(ROUTINE)( 27 | string(UPLO), string(TRANS), 28 | &(args->m), &(args->k), &(args->alpha), 29 | args->cA, &(args->cA_lda), 30 | &(args->beta), 31 | args->cD, &(args->cD_lda)); 32 | 33 | BLAS(ROUTINE)( 34 | string(UPLO), string(TRANS), 35 | &(args->m), &(args->k), &(args->alpha), 36 | args->bA, &(args->bA_lda), 37 | &(args->beta), 38 | args->bD, &(args->bD_lda)); 39 | 40 | // D matrix is overwritten with the solution 41 | 42 | } 43 | 44 | 45 | 46 | void print_routine(struct RoutineArgs *args) 47 | { 48 | printf("blas_%s(%s, %s, %d, %d, %f, A, %d, %f, D, %d);\n", string(ROUTINE), string(UPLO), string(TRANS), args->m, args->k, args->alpha, args->cA_lda, args->beta, args->cD_lda); 49 | } 50 | 51 | 52 | 53 | void print_routine_matrices(struct RoutineArgs *args) 54 | { 55 | printf("\nPrint A:\n"); 56 | if(*string(TRANS)=='n' || *string(TRANS)=='N') 57 | { 58 | print_xmat_debug(args->m, args->k, args->cA, args->cA_lda, 0, 0, 0, 0, 0, "HP"); 59 | print_xmat_debug(args->m, args->k, args->bA, args->bA_lda, 0, 0, 0, 0, 0, "REF"); 60 | } 61 | else 62 | { 63 | print_xmat_debug(args->k, args->m, args->cA, args->cA_lda, 0, 0, 0, 0, 0, "HP"); 64 | print_xmat_debug(args->k, args->m, args->bA, args->bA_lda, 0, 0, 0, 0, 0, "REF"); 65 | } 66 | 67 | printf("\nPrint B:\n"); 68 | print_xmat_debug(args->m, args->m, args->cB, args->cB_lda, 0, 0, 0, 0, 0, "HP"); 69 | print_xmat_debug(args->m, args->m, args->bB, args->bB_lda, 0, 0, 0, 0, 0, "REF"); 70 | } 71 | 72 | 73 | 74 | void set_test_args(struct TestArgs *targs) 75 | { 76 | targs->nis = 17; 77 | targs->nks = 9; 78 | 79 | targs->alphas = 1; 80 | } 81 | -------------------------------------------------------------------------------- /tests/classes/blasapi_trm.c: -------------------------------------------------------------------------------- 1 | // CLASS_GEMM 2 | 3 | void call_routines(struct RoutineArgs *args) 4 | { 5 | 6 | // copy input matrix B in D 7 | int ii, jj; 8 | for(jj=0; jjn; jj++) 9 | { 10 | for(ii=0; iim; ii++) 11 | { 12 | args->cD[ii+args->cD_lda*jj] = args->cB[ii+args->cB_lda*jj]; 13 | } 14 | } 15 | for(jj=0; jjn; jj++) 16 | { 17 | for(ii=0; iim; ii++) 18 | { 19 | args->bD[ii+args->bD_lda*jj] = args->bB[ii+args->bB_lda*jj]; 20 | } 21 | } 22 | 23 | BLASFEO_BLAS(ROUTINE)( 24 | string(SIDE), string(UPLO), string(TRANSA), string(DIAG), 25 | &(args->m), &(args->n), &(args->alpha), 26 | args->cA, &(args->cA_lda), 27 | args->cD, &(args->cD_lda)); 28 | 29 | BLAS(ROUTINE)( 30 | string(SIDE), string(UPLO), string(TRANSA), string(DIAG), 31 | &(args->m), &(args->n), &(args->alpha), 32 | args->bA, &(args->bA_lda), 33 | args->bD, &(args->bD_lda)); 34 | 35 | // D matrix is overwritten with the solution 36 | 37 | } 38 | 39 | 40 | void print_routine(struct RoutineArgs *args) 41 | { 42 | printf("blas_%s(%s, %s, %s, %s, %d, %d, %f, A, %d, D, %d);\n", string(ROUTINE), string(UPLO), string(SIDE), string(TRANSA), string(DIAG), args->m, args->n, args->alpha, args->cA_lda, args->cD_lda); 43 | } 44 | 45 | 46 | 47 | void print_routine_matrices(struct RoutineArgs *args) 48 | { 49 | printf("\nPrint A:\n"); 50 | if(*string(SIDE)=='l' || *string(SIDE)=='L') 51 | { 52 | print_xmat_debug(args->m, args->m, args->cA, args->cA_lda, 0, 0, 0, 0, 0, "HP"); 53 | print_xmat_debug(args->m, args->m, args->bA, args->bA_lda, 0, 0, 0, 0, 0, "REF"); 54 | } 55 | else 56 | { 57 | print_xmat_debug(args->n, args->n, args->cA, args->cA_lda, 0, 0, 0, 0, 0, "HP"); 58 | print_xmat_debug(args->n, args->n, args->bA, args->bA_lda, 0, 0, 0, 0, 0, "REF"); 59 | } 60 | 61 | printf("\nPrint B:\n"); 62 | print_xmat_debug(args->m, args->n, args->cB, args->cB_lda, 0, 0, 0, 0, 0, "HP"); 63 | print_xmat_debug(args->m, args->n, args->bB, args->bB_lda, 0, 0, 0, 0, 0, "REF"); 64 | } 65 | 66 | 67 | 68 | void set_test_args(struct TestArgs *targs) 69 | { 70 | targs->nis = 21; 71 | targs->njs = 21; 72 | 73 | // targs->alphas = 1; 74 | } 75 | -------------------------------------------------------------------------------- /tests/classes/geqf.c: -------------------------------------------------------------------------------- 1 | // CLASS_GEQRF/GELQF 2 | // 3 | void call_routines(struct RoutineArgs *args) 4 | { 5 | 6 | // unpack args 7 | 8 | // allocate memory for work 9 | int memsize = WORKSIZE(BLASFEO(ROUTINE))(args->m, args->n); 10 | void *mem; 11 | v_zeros_align(&mem, memsize); 12 | 13 | int ref_memsize = WORKSIZE(BLASFEO(REF(ROUTINE)))(args->m, args->n); 14 | void *ref_mem; 15 | v_zeros_align(&ref_mem, ref_memsize); 16 | 17 | 18 | // routine call 19 | // 20 | BLASFEO(ROUTINE)( 21 | args->m, args->n, 22 | args->sA_po, args->ai, args->aj, 23 | args->sD, args->di, args->dj, 24 | mem 25 | ); 26 | 27 | BLASFEO(REF(ROUTINE))( 28 | args->m, args->n, 29 | args->rA_po, args->ai, args->aj, 30 | args->rD, args->di, args->dj, 31 | ref_mem 32 | ); 33 | 34 | // free memory 35 | v_free_align(mem); 36 | v_free_align(ref_mem); 37 | 38 | } 39 | 40 | 41 | 42 | void print_routine(struct RoutineArgs *args) 43 | { 44 | // unpack args 45 | 46 | printf("%s\n", string(ROUTINE)); 47 | printf( 48 | "Solving A[%d:%d,%d:%d] = (QR) LQ[%d:%d,%d:%d]\n", 49 | args->ai, args->m, args->aj, args->n, 50 | args->di, args->m, args->dj, args->n 51 | ); 52 | 53 | } 54 | 55 | 56 | 57 | void print_routine_matrices(struct RoutineArgs *args) 58 | { 59 | printf("\nPrint A:\n"); 60 | blasfeo_print_xmat_debug(args->m, args->n, args->sA_po, args->ai, args->aj, 0, 0, 0, "HP"); 61 | blasfeo_print_xmat_debug(args->m, args->n, args->rA_po, args->ai, args->aj, 0, 0, 0, "REF"); 62 | 63 | printf("\nPrint LU:\n"); 64 | blasfeo_print_xmat_debug(args->m, args->n, args->sD, args->ai, args->aj, 0, 0, 0, "HP"); 65 | blasfeo_print_xmat_debug(args->m, args->n, args->rD, args->ai, args->aj, 0, 0, 0, "REF"); 66 | } 67 | 68 | 69 | 70 | void set_test_args(struct TestArgs *targs) 71 | { 72 | targs->nis = 9; 73 | targs->njs = 9; 74 | targs->nks = 9; 75 | 76 | targs->alphas = 1; 77 | } 78 | -------------------------------------------------------------------------------- /tests/classes/getrf_nopivot.c: -------------------------------------------------------------------------------- 1 | // CLASS_GETRF_NOPIVOT 2 | // 3 | void call_routines(struct RoutineArgs *args){ 4 | 5 | // unpack args 6 | 7 | // routine call 8 | // 9 | BLASFEO(ROUTINE)( 10 | args->m, args->n, 11 | args->sA_po, args->ai, args->aj, 12 | args->sD, args->di, args->dj 13 | ); 14 | 15 | BLASFEO(REF(ROUTINE))( 16 | args->m, args->n, 17 | args->rA_po, args->ai, args->aj, 18 | args->rD, args->di, args->dj, 19 | ); 20 | 21 | } 22 | 23 | void print_routine(struct RoutineArgs *args){ 24 | // unpack args 25 | 26 | printf("%s\n", string(ROUTINE)); 27 | printf( 28 | "Solving A[%d:%d,%d:%d] = P * LU[%d:%d,%d:%d]\n", 29 | args->ai, args->m, args->aj, args->n, 30 | args->di, args->m, args->dj, args->n 31 | ); 32 | 33 | } 34 | 35 | void print_routine_matrices(struct RoutineArgs *args) 36 | { 37 | printf("\nPrint A:\n"); 38 | blasfeo_print_xmat_debug(args->m, args->n, args->sA_po, args->ai, args->aj, 0, 0, 0, "HP"); 39 | blasfeo_print_xmat_debug(args->m, args->n, args->rA_po, args->ai, args->aj, 0, 0, 0, "REF"); 40 | 41 | printf("\nPrint LU:\n"); 42 | blasfeo_print_xmat_debug(args->m, args->n, args->sD, args->ai, args->aj, 0, 0, 0, "HP"); 43 | blasfeo_print_xmat_debug(args->m, args->n, args->rD, args->ai, args->aj, 0, 0, 0, "REF"); 44 | } 45 | 46 | 47 | void set_test_args(struct TestArgs *targs) 48 | { 49 | targs->nis = 9; 50 | targs->njs = 9; 51 | targs->nks = 9; 52 | 53 | targs->alphas = 1; 54 | } 55 | -------------------------------------------------------------------------------- /tests/classes/getrf_rp.c: -------------------------------------------------------------------------------- 1 | // CLASS_GETRF_ROWPIVOT 2 | // 3 | 4 | void call_routines(struct RoutineArgs *args) 5 | { 6 | // routine call 7 | // 8 | BLASFEO(ROUTINE)( 9 | args->m, args->n, 10 | args->sA_po, args->ai, args->aj, 11 | args->sD, args->di, args->dj, 12 | args->sipiv); 13 | 14 | BLASFEO(REF(ROUTINE))( 15 | args->m, args->n, 16 | args->rA_po, args->ai, args->aj, 17 | args->rD, args->di, args->dj, 18 | args->ripiv); 19 | } 20 | 21 | 22 | 23 | void print_routine(struct RoutineArgs *args) 24 | { 25 | printf("blasfeo_%s(%d, %d, A, %d, %d, D, %d, %d, ipiv);\n", string(ROUTINE), args->m, args->n, args->ai, args->aj, args->di, args->dj); 26 | } 27 | 28 | 29 | 30 | void print_routine_matrices(struct RoutineArgs *args) 31 | { 32 | printf("\nPrint A:\n"); 33 | blasfeo_print_xmat_debug(args->m, args->n, args->sA_po, args->ai, args->aj, 0, 0, 0, "HP"); 34 | blasfeo_print_xmat_debug(args->m, args->n, args->rA_po, args->ai, args->aj, 0, 0, 0, "REF"); 35 | 36 | printf("\nPrint LU:\n"); 37 | blasfeo_print_xmat_debug(args->m, args->n, args->sD, args->ai, args->aj, 0, 0, 0, "HP"); 38 | blasfeo_print_xmat_debug(args->m, args->n, args->rD, args->ai, args->aj, 0, 0, 0, "REF"); 39 | } 40 | 41 | 42 | 43 | void set_test_args(struct TestArgs *targs) 44 | { 45 | // targs->ais = 1; 46 | // targs->bis = 1; 47 | // targs->dis = 1; 48 | // targs->xjs = 5; 49 | 50 | targs->nis = 13; 51 | targs->njs = 13; 52 | } 53 | -------------------------------------------------------------------------------- /tests/classes/potrf.c: -------------------------------------------------------------------------------- 1 | // CLASS_POTRF 2 | // 3 | 4 | void call_routines(struct RoutineArgs *args) 5 | { 6 | // routine call 7 | // 8 | BLASFEO(ROUTINE)( 9 | args->m, 10 | args->sA_po, args->ai, args->aj, 11 | args->sD, args->di, args->dj 12 | ); 13 | 14 | BLASFEO(REF(ROUTINE))( 15 | args->m, 16 | args->rA_po, args->ai, args->aj, 17 | args->rD, args->di, args->dj 18 | ); 19 | } 20 | 21 | 22 | 23 | void print_routine(struct RoutineArgs *args) 24 | { 25 | printf("blasfeo_%s(%d, A, %d, %d, D, %d, %d);\n", string(ROUTINE), args->m, args->ai, args->aj, args->di, args->dj); 26 | } 27 | 28 | 29 | 30 | void print_routine_matrices(struct RoutineArgs *args) 31 | { 32 | printf("\nPrint A:\n"); 33 | blasfeo_print_xmat_debug(args->m, args->n, args->sA_po, args->ai, args->aj, 0, 0, 0, "HP"); 34 | blasfeo_print_xmat_debug(args->m, args->n, args->rA_po, args->ai, args->aj, 0, 0, 0, "REF"); 35 | 36 | printf("\nPrint D:\n"); 37 | blasfeo_print_xmat_debug(args->m, args->n, args->sD, args->ai, args->aj, 0, 0, 0, "HP"); 38 | blasfeo_print_xmat_debug(args->m, args->n, args->rD, args->ai, args->aj, 0, 0, 0, "REF"); 39 | } 40 | 41 | 42 | 43 | void set_test_args(struct TestArgs *targs) 44 | { 45 | targs->nis = 21; 46 | } 47 | -------------------------------------------------------------------------------- /tests/classes/potrf_mn.c: -------------------------------------------------------------------------------- 1 | // CLASS_POTRF_MN 2 | // 3 | 4 | void call_routines(struct RoutineArgs *args) 5 | { 6 | // routine call 7 | // 8 | BLASFEO(ROUTINE)( 9 | args->m, args->n, 10 | args->sA_po, args->ai, args->aj, 11 | args->sD, args->di, args->dj 12 | ); 13 | 14 | BLASFEO(REF(ROUTINE))( 15 | args->m, args->n, 16 | args->rA_po, args->ai, args->aj, 17 | args->rD, args->di, args->dj 18 | ); 19 | } 20 | 21 | 22 | 23 | void print_routine(struct RoutineArgs *args) 24 | { 25 | printf("blasfeo_%s(%d, %d, A, %d, %d, D, %d, %d);\n", string(ROUTINE), args->m, args->n, args->ai, args->aj, args->di, args->dj); 26 | } 27 | 28 | 29 | 30 | void print_routine_matrices(struct RoutineArgs *args) 31 | { 32 | printf("\nPrint A:\n"); 33 | blasfeo_print_xmat_debug(args->m, args->n, args->sA_po, args->ai, args->aj, 0, 0, 0, "HP"); 34 | blasfeo_print_xmat_debug(args->m, args->n, args->rA_po, args->ai, args->aj, 0, 0, 0, "REF"); 35 | 36 | printf("\nPrint D:\n"); 37 | blasfeo_print_xmat_debug(args->m, args->n, args->sD, args->ai, args->aj, 0, 0, 0, "HP"); 38 | blasfeo_print_xmat_debug(args->m, args->n, args->rD, args->ai, args->aj, 0, 0, 0, "REF"); 39 | } 40 | 41 | 42 | 43 | void set_test_args(struct TestArgs *targs) 44 | { 45 | targs->nis = 13; 46 | targs->njs = 13; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /tests/classes/syrk.c: -------------------------------------------------------------------------------- 1 | // CLASS_SYRK 2 | // 3 | void call_routines(struct RoutineArgs *args) 4 | { 5 | // routine call 6 | // 7 | BLASFEO(ROUTINE)( 8 | args->m, args->n, args->alpha, 9 | args->sA, args->ai, args->aj, 10 | args->sB, args->bi, args->bj, args->beta, 11 | args->sC, args->ci, args->cj, 12 | args->sD, args->di, args->dj); 13 | 14 | BLASFEO(REF(ROUTINE))( 15 | args->m, args->n, args->alpha, 16 | args->rA, args->ai, args->aj, 17 | args->rB, args->bi, args->bj, args->beta, 18 | args->rC, args->ci, args->cj, 19 | args->rD, args->di, args->dj); 20 | 21 | } 22 | 23 | 24 | 25 | void print_routine(struct RoutineArgs *args) 26 | { 27 | printf("blasfeo_%s(%d, %d, %f, A, %d, %d, B, %d, %d, %f, C, %d, %d, D, %d, %d);\n", string(ROUTINE), args->m, args->n, args->alpha, args->ai, args->aj, args->bi, args->bj, args->beta, args->ci, args->cj, args->di, args->dj); 28 | } 29 | 30 | 31 | 32 | void print_routine_matrices(struct RoutineArgs *args) 33 | { 34 | if(!strcmp(string(ROUTINE), "dsyrk_ln") || !strcmp(string(ROUTINE), "dsyrk_un") || !strcmp(string(ROUTINE), "ssyrk_ln") || !strcmp(string(ROUTINE), "ssyrk_un")) 35 | { 36 | printf("\nPrint A:\n"); 37 | blasfeo_print_xmat_debug(args->m, args->n, args->sA, args->ai, args->aj, 0, 0, 0, "HP"); 38 | blasfeo_print_xmat_debug(args->m, args->n, args->rA, args->ai, args->aj, 0, 0, 0, "REF"); 39 | 40 | printf("\nPrint B:\n"); 41 | blasfeo_print_xmat_debug(args->m, args->n, args->sB, args->bi, args->bj, 0, 0, 0, "HP"); 42 | blasfeo_print_xmat_debug(args->m, args->n, args->rB, args->bi, args->bj, 0, 0, 0, "REF"); 43 | } 44 | else 45 | { 46 | printf("\nPrint A:\n"); 47 | blasfeo_print_xmat_debug(args->n, args->m, args->sA, args->ai, args->aj, 0, 0, 0, "HP"); 48 | blasfeo_print_xmat_debug(args->n, args->m, args->rA, args->ai, args->aj, 0, 0, 0, "REF"); 49 | 50 | printf("\nPrint B:\n"); 51 | blasfeo_print_xmat_debug(args->n, args->m, args->sB, args->bi, args->bj, 0, 0, 0, "HP"); 52 | blasfeo_print_xmat_debug(args->n, args->m, args->rB, args->bi, args->bj, 0, 0, 0, "REF"); 53 | } 54 | 55 | printf("\nPrint C:\n"); 56 | blasfeo_print_xmat_debug(args->m, args->m, args->sC, args->ai, args->aj, 0, 0, 0, "HP"); 57 | blasfeo_print_xmat_debug(args->m, args->m, args->rC, args->ai, args->aj, 0, 0, 0, "REF"); 58 | 59 | printf("\nPrint D:\n"); 60 | blasfeo_print_xmat_debug(args->m, args->m, args->sD, args->ai, args->aj, 0, 0, 0, "HP"); 61 | blasfeo_print_xmat_debug(args->m, args->m, args->rD, args->ai, args->aj, 0, 0, 0, "REF"); 62 | } 63 | 64 | 65 | 66 | void set_test_args(struct TestArgs *targs) 67 | { 68 | targs->nis = 21; 69 | targs->njs = 9; 70 | } 71 | -------------------------------------------------------------------------------- /tests/classes/trm.c: -------------------------------------------------------------------------------- 1 | // CLASS_TRM 2 | // 3 | void call_routines(struct RoutineArgs *args) 4 | { 5 | 6 | // unpack args 7 | 8 | // routine call 9 | // 10 | BLASFEO(ROUTINE)( 11 | args->m, args->n, args->alpha, 12 | args->sA, args->ai, args->aj, 13 | args->sB, args->bi, args->bj, 14 | args->sD, args->di, args->dj); 15 | 16 | BLASFEO(REF(ROUTINE))( 17 | args->m, args->n, args->alpha, 18 | args->rA, args->ai, args->aj, 19 | args->rB, args->bi, args->bj, 20 | args->rD, args->di, args->dj); 21 | 22 | } 23 | 24 | 25 | 26 | void print_routine(struct RoutineArgs *args) 27 | { 28 | printf("blasfeo_%s(%d, %d, %f, A, %d, %d, B, %d, %d, D, %d, %d);\n", string(ROUTINE), args->m, args->n, args->alpha, args->ai, args->aj, args->bi, args->bj, args->ci, args->cj); 29 | } 30 | 31 | 32 | 33 | void print_routine_matrices(struct RoutineArgs *args) 34 | { 35 | if( !strcmp(string(ROUTINE), "dtrsm_llnn") | !strcmp(string(ROUTINE), "strsm_llnn") | 36 | !strcmp(string(ROUTINE), "dtrsm_llnu") | !strcmp(string(ROUTINE), "strsm_llnu") | 37 | !strcmp(string(ROUTINE), "dtrsm_lltn") | !strcmp(string(ROUTINE), "strsm_lltn") | 38 | !strcmp(string(ROUTINE), "dtrsm_lltu") | !strcmp(string(ROUTINE), "strsm_lltu") | 39 | !strcmp(string(ROUTINE), "dtrsm_lunn") | !strcmp(string(ROUTINE), "strsm_lunn") | 40 | !strcmp(string(ROUTINE), "dtrsm_lunu") | !strcmp(string(ROUTINE), "strsm_lunu") | 41 | !strcmp(string(ROUTINE), "dtrsm_lutn") | !strcmp(string(ROUTINE), "strsm_lutn") | 42 | !strcmp(string(ROUTINE), "dtrsm_lutu") | !strcmp(string(ROUTINE), "strsm_lutu") ) 43 | { 44 | printf("\nPrint A:\n"); 45 | blasfeo_print_xmat_debug(args->n, args->n, args->sA, args->ai, args->aj, 0, 0, 0, "HP"); 46 | blasfeo_print_xmat_debug(args->n, args->n, args->rA, args->ai, args->aj, 0, 0, 0, "REF"); 47 | } 48 | else 49 | { 50 | printf("\nPrint A:\n"); 51 | blasfeo_print_xmat_debug(args->m, args->m, args->sA, args->ai, args->aj, 0, 0, 0, "HP"); 52 | blasfeo_print_xmat_debug(args->m, args->m, args->rA, args->ai, args->aj, 0, 0, 0, "REF"); 53 | } 54 | 55 | printf("\nPrint B:\n"); 56 | blasfeo_print_xmat_debug(args->m, args->n, args->sB, args->ai, args->aj, 0, 0, 0, "HP"); 57 | blasfeo_print_xmat_debug(args->m, args->n, args->rB, args->ai, args->aj, 0, 0, 0, "REF"); 58 | 59 | printf("\nPrint D:\n"); 60 | blasfeo_print_xmat_debug(args->m, args->n, args->sD, args->ai, args->aj, 0, 0, 0, "HP"); 61 | blasfeo_print_xmat_debug(args->m, args->n, args->rD, args->ai, args->aj, 0, 0, 0, "REF"); 62 | } 63 | 64 | 65 | 66 | void set_test_args(struct TestArgs *targs) 67 | { 68 | targs->nis = 21; 69 | targs->njs = 21; 70 | // targs->nks = 20; 71 | 72 | // targs->ni0 = 10; 73 | // targs->nj0 = 10; 74 | // targs->nk0 = 10; 75 | 76 | targs->alphas = 1; 77 | } 78 | -------------------------------------------------------------------------------- /tests/genarate_docs.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | import subprocess 4 | import sys 5 | import json 6 | from jinja2 import Template 7 | 8 | class RoutinesLibrary: 9 | def __init__(self, routines="routines.json"): 10 | 11 | with open(routines) as f: 12 | self.routines = json.load(f) 13 | 14 | 15 | def generate_docs(self, output_file="routines_list.md", template="routines_list.md.tpl"): 16 | 17 | with open(template) as f: 18 | template_txt = f.read() 19 | 20 | self.preprocess() 21 | template = Template(template_txt) 22 | output = template.render(routines=self.routines) 23 | with open(output_file, "w") as f: 24 | f.write(output) 25 | print(output) 26 | 27 | def preprocess(self): 28 | for rtn_class_name, rtn_class in self.routines.items(): 29 | if rtn_class_name=="aux": self.build_subclasses("aux", skip=["mem"]) 30 | self.add_precision(rtn_class_name) 31 | 32 | def build_subclasses(self, rtn_class, skip=[]): 33 | built_class = {} 34 | for rtn_subclass_name, rtn_subclass in self.routines[rtn_class].items(): 35 | 36 | if rtn_subclass_name in skip: 37 | built_class.update(rtn_subclass) 38 | continue 39 | for rtn_name, rtn_info in rtn_subclass.items(): 40 | built_class[f"{rtn_subclass_name}{rtn_name}"] = rtn_info 41 | 42 | self.routines[rtn_class] = built_class 43 | 44 | def add_precision(self, rtn_class): 45 | built_class = {} 46 | for rtn_name, rtn_info in self.routines[rtn_class].items(): 47 | 48 | if "{PREC}" not in rtn_name and "STR" not in rtn_name: 49 | # default naming 50 | routine_name = f"blasfeo_(s|d){rtn_name}".replace("_", "\_") 51 | else: 52 | # templated naming 53 | routine_name="blasfeo_"+rtn_name.format( 54 | PREC="(s|d)", 55 | STRVEC="(s|d)vec", 56 | STRMAT="(s|d)mat") 57 | routine_name = routine_name.replace("_", "\_") 58 | 59 | built_class[routine_name] = rtn_info 60 | 61 | 62 | self.routines[rtn_class] = built_class 63 | 64 | 65 | if __name__ == "__main__": 66 | 67 | # generate recipes 68 | # test set to be run in the given excution of the script 69 | routines_lib = RoutinesLibrary() 70 | # print(json.dumps(cookbook.recipe, indent=4)) 71 | routines_lib.generate_docs() 72 | -------------------------------------------------------------------------------- /tests/routines_list.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Routines List" 3 | order = 2 4 | +++ 5 | 6 | 7 | List on routines implemented in BLASFEO for a reference about the namining 8 | conventions adopted see [Naming Conventions](/docs/naming) 9 | 10 | 11 | 12 | ## blas1 13 | - blasfeo\_(s|d)axpy 14 | - blasfeo\_(s|d)axpby 15 | - blasfeo\_(s|d)vecmul 16 | - blasfeo\_(s|d)vecmulacc 17 | - blasfeo\_(s|d)vecmuldot 18 | - blasfeo\_(s|d)dot 19 | - blasfeo\_(s|d)rotg 20 | - blasfeo\_(s|d)colrot 21 | - blasfeo\_(s|d)rowrot 22 | 23 | 24 | 25 | 26 | ## blas2 27 | - blasfeo\_(s|d)gemv\_n 28 | - blasfeo\_(s|d)gemv\_t 29 | - blasfeo\_(s|d)gemv\_d 30 | - blasfeo\_(s|d)gemv\_nt 31 | - blasfeo\_(s|d)symv\_l\_nt 32 | - blasfeo\_(s|d)trsv\_lnn\_mn 33 | - blasfeo\_(s|d)trsv\_ltn\_mn 34 | - blasfeo\_(s|d)trsv\_lnn 35 | - blasfeo\_(s|d)trsv\_lnu 36 | - blasfeo\_(s|d)trsv\_ltn 37 | - blasfeo\_(s|d)trsv\_unn 38 | - blasfeo\_(s|d)trsv\_utn 39 | - blasfeo\_(s|d)trsm\_unn 40 | - blasfeo\_(s|d)trsm\_utn 41 | - blasfeo\_(s|d)trsm\_lnn 42 | - blasfeo\_(s|d)trsm\_ltn 43 | 44 | 45 | 46 | 47 | ## blas3 48 | - blasfeo\_(s|d)gemm\_nn 49 | - blasfeo\_(s|d)gemm\_nt 50 | - blasfeo\_(s|d)gemm\_dn 51 | - blasfeo\_(s|d)gemm\_nd 52 | - blasfeo\_(s|d)syrk\_ln\_mn 53 | - blasfeo\_(s|d)syrk\_ln 54 | - blasfeo\_(s|d)trsm\_llnu 55 | - blasfeo\_(s|d)trsm\_rltu 56 | - blasfeo\_(s|d)trsm\_rltn 57 | - blasfeo\_(s|d)trsm\_lunn 58 | - blasfeo\_(s|d)trsm\_rutn 59 | - blasfeo\_(s|d)trmm\_rutn 60 | - blasfeo\_(s|d)trmm\_rlnn 61 | - blasfeo\_(s|d)dia\_left\_ib 62 | 63 | 64 | 65 | 66 | ## lapack 67 | - blasfeo\_(s|d)potrf\_l 68 | - blasfeo\_(s|d)potrf\_l\_mn 69 | - blasfeo\_syrk\_(s|d)potrf\_l\_ln 70 | - blasfeo\_(s|d)getrf\_nopivot 71 | - blasfeo\_(s|d)getrf\_rowpivot 72 | - blasfeo\_(s|d)geqrf 73 | - blasfeo\_(s|d)geqrf\_worksize 74 | - blasfeo\_(s|d)gelqf 75 | - blasfeo\_(s|d)gelqf\_worksize 76 | - blasfeo\_(s|d)gelqf\_pd 77 | - blasfeo\_(s|d)gelqf\_pd\_la 78 | - blasfeo\_(s|d)gelqf\_pd\_lla 79 | 80 | 81 | 82 | 83 | ## aux 84 | - blasfeo\_(s|d)gesc 85 | - blasfeo\_(s|d)gecp 86 | - blasfeo\_(s|d)gead 87 | - blasfeo\_(s|d)gecpsc 88 | - blasfeo\_(s|d)trcp\_l 89 | - blasfeo\_(s|d)trcpsc 90 | - blasfeo\_(s|d)trsc\_l 91 | - blasfeo\_(s|d)trtr\_l 92 | - blasfeo\_(s|d)trtr\_u 93 | - blasfeo\_(s|d)diare 94 | - blasfeo\_(s|d)diain 95 | - blasfeo\_(s|d)diain\_sp 96 | - blasfeo\_(s|d)diaex 97 | - blasfeo\_(s|d)diaex\_sp 98 | - blasfeo\_(s|d)diaad 99 | - blasfeo\_(s|d)diaad\_sp 100 | - blasfeo\_(s|d)diaadin\_sp 101 | - blasfeo\_(s|d)rowin 102 | - blasfeo\_(s|d)rowex 103 | - blasfeo\_(s|d)rowad 104 | - blasfeo\_(s|d)rowad\_sp 105 | - blasfeo\_(s|d)rowsw 106 | - blasfeo\_(s|d)rowpe 107 | - blasfeo\_(s|d)rowpei 108 | - blasfeo\_(s|d)colin 109 | - blasfeo\_(s|d)colex 110 | - blasfeo\_(s|d)colsw 111 | - blasfeo\_(s|d)colpe 112 | - blasfeo\_(s|d)colpei 113 | - blasfeo\_(s|d)vecse 114 | - blasfeo\_(s|d)veccp 115 | - blasfeo\_(s|d)vecsc 116 | - blasfeo\_(s|d)veccpsc 117 | - blasfeo\_(s|d)vecin1 118 | - blasfeo\_(s|d)vecex1 119 | - blasfeo\_(s|d)vecin\_sp 120 | - blasfeo\_(s|d)vecad 121 | - blasfeo\_(s|d)vecad\_sp 122 | - blasfeo\_(s|d)vecex\_sp 123 | - blasfeo\_(s|d)vecsw 124 | - blasfeo\_(s|d)veccl 125 | - blasfeo\_(s|d)veccl\_mask 126 | - blasfeo\_(s|d)vecze 127 | - blasfeo\_(s|d)vecnrm\_inf 128 | - blasfeo\_(s|d)vecpe 129 | - blasfeo\_(s|d)vecpei 130 | - blasfeo\_memsize\_(s|d)mat 131 | - blasfeo\_memsize\_diag\_(s|d)mat 132 | - blasfeo\_memsize\_(s|d)vec 133 | - blasfeo\_create\_(s|d)mat 134 | - blasfeo\_create\_(s|d)vec 135 | - blasfeo\_pack\_(s|d)mat 136 | - blasfeo\_pack\_tran\_(s|d)vec 137 | - blasfeo\_unpack\_(s|d)mat 138 | - blasfeo\_unpack\_tran\_(s|d)mat 139 | - blasfeo\_unpack\_(s|d)vec 140 | - blasfeo\_(s|d)\_cast\_mat2strmat 141 | - blasfeo\_(s|d)\_cast\_diag\_mat2strmat 142 | - blasfeo\_(s|d)\_cast\_vec2vecmat 143 | 144 | 145 | -------------------------------------------------------------------------------- /tests/routines_list.md.tpl: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Routines List" 3 | order = 2 4 | +++ 5 | 6 | 7 | List on routines implemented in BLASFEO for a reference about the namining 8 | conventions adopted see [Naming Conventions](/docs/naming) 9 | 10 | {% for rtn_class_name, rtn_class in routines.items() %} 11 | 12 | ## {{rtn_class_name}} 13 | {% for rtn_name, rtn_meta in rtn_class.items() %} - {{ rtn_name }} 14 | {% endfor %} 15 | 16 | {% endfor %} 17 | -------------------------------------------------------------------------------- /tests/testset_all.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 0, 4 | "silent": 1, 5 | "continue": 0 6 | }, 7 | "env_flags":{ 8 | "CFLAGS":"-Wuninitialized" 9 | }, 10 | "test_macros": 11 | { 12 | "VERBOSE":2 13 | }, 14 | "blasfeo_flags":{ 15 | "CC": "clang", 16 | "BLASFEO_REF_API": 1, 17 | "EXTERNAL_BLAS": "OPENBLAS" 18 | }, 19 | "available_blasfeo_flags":{ 20 | "SOC":["DSPACE", "BACHMANN"], 21 | "CC": ["clang"], 22 | "BLASFEO_REF_API": [1, 0], 23 | "EXTERNAL_BLAS": ["OPENBLAS"] 24 | }, 25 | "precisions":[ 26 | "d", 27 | "s" 28 | ], 29 | "targets":[ 30 | "X64_INTEL_HASWELL", 31 | "X64_INTEL_SANDY_BRIDGE", 32 | "X64_INTEL_CORE", 33 | "X64_AMD_BULLDOZER", 34 | "X86_AMD_JAGUAR", 35 | "X86_AMD_BARCELONA", 36 | "ARMV8A_ARM_CORTEX_A57", 37 | "ARMV8A_ARM_CORTEX_A53", 38 | "ARMV7A_ARM_CORTEX_A15", 39 | "ARMV7A_ARM_CORTEX_A7", 40 | "ARMV7A_ARM_CORTEX_A9", 41 | "GENERIC" 42 | ], 43 | "apis": [ 44 | "blasfeo", 45 | "blas" 46 | ], 47 | "las":[ 48 | "HIGH_PERFORMANCE", 49 | "REFERENCE", 50 | "BLASFEO_WRAPPER" 51 | ], 52 | "routines":[ 53 | "gead", 54 | "gecp", 55 | "gesc", 56 | "gecpsc", 57 | "gemm", 58 | "gemm_nn", 59 | "gemm_nn", 60 | "gemm_nt", 61 | "gemm_tn", 62 | "gemm_tt", 63 | "syrk_ln_mn", 64 | "syrk_ln", 65 | "getrf_rowpivot", 66 | "potrf_l", 67 | "trsm_llnu", 68 | "trsm_lunn", 69 | "trsm_rltu", 70 | "trsm_rltn", 71 | "trsm_rutn", 72 | "trmm_rutn", 73 | "trmm_rlnn" 74 | ] 75 | } 76 | -------------------------------------------------------------------------------- /tests/testset_ci_blas_cm_double_amd64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 1, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blas" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "COLMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "X64_INTEL_CORE", 38 | "X64_INTEL_HASWELL", 39 | "X64_INTEL_SANDY_BRIDGE" 40 | ], 41 | "LA": [ 42 | "HIGH_PERFORMANCE" 43 | ], 44 | "routines": [ 45 | "gemm_nn", 46 | "gemm_nt", 47 | "gemm_tn", 48 | "gemm_tt", 49 | "syrk_ln", 50 | "syrk_lt", 51 | "syrk_un", 52 | "syrk_ut", 53 | "trsm_llnn", 54 | "trsm_llnu", 55 | "trsm_lltn", 56 | "trsm_lltu", 57 | "trsm_lunn", 58 | "trsm_lunu", 59 | "trsm_lutn", 60 | "trsm_lutu", 61 | "trsm_rlnn", 62 | "trsm_rlnu", 63 | "trsm_rltn", 64 | "trsm_rltu", 65 | "trsm_runn", 66 | "trsm_runu", 67 | "trsm_rutn", 68 | "trsm_rutu", 69 | "trmm_llnn", 70 | "trmm_llnu", 71 | "trmm_lltn", 72 | "trmm_lltu", 73 | "trmm_lunn", 74 | "trmm_lunu", 75 | "trmm_lutn", 76 | "trmm_lutu", 77 | "trmm_rlnn", 78 | "trmm_rlnu", 79 | "trmm_rltn", 80 | "trmm_rltu", 81 | "trmm_runn", 82 | "trmm_runu", 83 | "trmm_rutn", 84 | "trmm_rutu", 85 | "potrf_l", 86 | "potrf_u", 87 | "getrf" 88 | ] 89 | } 90 | -------------------------------------------------------------------------------- /tests/testset_ci_blas_cm_double_arm64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 1, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blas" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "COLMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "ARMV8A_ARM_CORTEX_A57", 38 | "ARMV8A_ARM_CORTEX_A53" 39 | ], 40 | "LA": [ 41 | "HIGH_PERFORMANCE" 42 | ], 43 | "routines": [ 44 | "gemm_nn", 45 | "gemm_nt", 46 | "gemm_tn", 47 | "gemm_tt", 48 | "syrk_ln", 49 | "syrk_lt", 50 | "syrk_un", 51 | "syrk_ut", 52 | "trsm_llnn", 53 | "trsm_llnu", 54 | "trsm_lltn", 55 | "trsm_lltu", 56 | "trsm_lunn", 57 | "trsm_lunu", 58 | "trsm_lutn", 59 | "trsm_lutu", 60 | "trsm_rlnn", 61 | "trsm_rlnu", 62 | "trsm_rltn", 63 | "trsm_rltu", 64 | "trsm_runn", 65 | "trsm_runu", 66 | "trsm_rutn", 67 | "trsm_rutu", 68 | "trmm_llnn", 69 | "trmm_llnu", 70 | "trmm_lltn", 71 | "trmm_lltu", 72 | "trmm_lunn", 73 | "trmm_lunu", 74 | "trmm_lutn", 75 | "trmm_lutu", 76 | "trmm_rlnn", 77 | "trmm_rlnu", 78 | "trmm_rltn", 79 | "trmm_rltu", 80 | "trmm_runn", 81 | "trmm_runu", 82 | "trmm_rutn", 83 | "trmm_rutu", 84 | "potrf_l", 85 | "potrf_u", 86 | "getrf" 87 | ] 88 | } 89 | 90 | 91 | -------------------------------------------------------------------------------- /tests/testset_ci_blas_cm_single_amd64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 1, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "single" 22 | ], 23 | "apis": [ 24 | "blas" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "COLMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "X64_INTEL_CORE", 38 | "X64_INTEL_HASWELL", 39 | "X64_INTEL_SANDY_BRIDGE" 40 | ], 41 | "LA": [ 42 | "HIGH_PERFORMANCE" 43 | ], 44 | "routines": [ 45 | "gemm_nn", 46 | "gemm_nt", 47 | "gemm_tn", 48 | "gemm_tt" 49 | ] 50 | } 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /tests/testset_ci_blas_cm_single_arm64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 1, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "single" 22 | ], 23 | "apis": [ 24 | "blas" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "COLMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "ARMV8A_ARM_CORTEX_A57", 38 | "ARMV8A_ARM_CORTEX_A53" 39 | ], 40 | "LA": [ 41 | "HIGH_PERFORMANCE" 42 | ], 43 | "routines": [ 44 | "gemm_nn", 45 | "gemm_nt", 46 | "gemm_tn", 47 | "gemm_tt" 48 | ] 49 | } 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /tests/testset_ci_blas_pm_double_amd64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 1, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blas" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "PANELMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "X64_INTEL_CORE", 38 | "X64_INTEL_SANDY_BRIDGE", 39 | "X64_INTEL_HASWELL" 40 | ], 41 | "LA": [ 42 | "HIGH_PERFORMANCE" 43 | ], 44 | "routines": [ 45 | "gemm_nn", 46 | "gemm_nt", 47 | "gemm_tn", 48 | "gemm_tt", 49 | "syrk_ln", 50 | "syrk_lt", 51 | "syrk_un", 52 | "syrk_ut", 53 | "trsm_llnn", 54 | "trsm_llnu", 55 | "trsm_lltn", 56 | "trsm_lltu", 57 | "trsm_lunn", 58 | "trsm_lunu", 59 | "trsm_lutn", 60 | "trsm_lutu", 61 | "trsm_rlnn", 62 | "trsm_rlnu", 63 | "trsm_rltn", 64 | "trsm_rltu", 65 | "trsm_runn", 66 | "trsm_runu", 67 | "trsm_rutn", 68 | "trsm_rutu", 69 | "trmm_llnn", 70 | "trmm_llnu", 71 | "trmm_lltn", 72 | "trmm_lltu", 73 | "trmm_lunn", 74 | "trmm_lunu", 75 | "trmm_lutn", 76 | "trmm_lutu", 77 | "trmm_rlnn", 78 | "trmm_rlnu", 79 | "trmm_rltn", 80 | "trmm_rltu", 81 | "trmm_runn", 82 | "trmm_runu", 83 | "trmm_rutn", 84 | "trmm_rutu", 85 | "potrf_l", 86 | "potrf_u", 87 | "getrf" 88 | ] 89 | } 90 | 91 | -------------------------------------------------------------------------------- /tests/testset_ci_blas_pm_double_arm64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 1, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blas" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "PANELMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "ARMV8A_ARM_CORTEX_A57", 38 | "ARMV8A_ARM_CORTEX_A53" 39 | ], 40 | "LA": [ 41 | "HIGH_PERFORMANCE" 42 | ], 43 | "routines": [ 44 | "gemm_nn", 45 | "gemm_nt", 46 | "gemm_tn", 47 | "gemm_tt", 48 | "syrk_ln", 49 | "syrk_lt", 50 | "syrk_un", 51 | "syrk_ut", 52 | "trsm_llnn", 53 | "trsm_llnu", 54 | "trsm_lltn", 55 | "trsm_lltu", 56 | "trsm_lunn", 57 | "trsm_lunu", 58 | "trsm_lutn", 59 | "trsm_lutu", 60 | "trsm_rlnn", 61 | "trsm_rlnu", 62 | "trsm_rltn", 63 | "trsm_rltu", 64 | "trsm_runn", 65 | "trsm_runu", 66 | "trsm_rutn", 67 | "trsm_rutu", 68 | "trmm_llnn", 69 | "trmm_llnu", 70 | "trmm_lltn", 71 | "trmm_lltu", 72 | "trmm_lunn", 73 | "trmm_lunu", 74 | "trmm_lutn", 75 | "trmm_lutu", 76 | "trmm_rlnn", 77 | "trmm_rlnu", 78 | "trmm_rltn", 79 | "trmm_rltu", 80 | "trmm_runn", 81 | "trmm_runu", 82 | "trmm_rutn", 83 | "trmm_rutu", 84 | "potrf_l", 85 | "potrf_u", 86 | "getrf" 87 | ] 88 | } 89 | 90 | 91 | -------------------------------------------------------------------------------- /tests/testset_ci_blas_pm_single_amd64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 1, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "single" 22 | ], 23 | "apis": [ 24 | "blas" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "PANELMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "X64_INTEL_CORE", 38 | "X64_INTEL_HASWELL", 39 | "X64_INTEL_SANDY_BRIDGE" 40 | ], 41 | "LA": [ 42 | "HIGH_PERFORMANCE" 43 | ], 44 | "routines": [ 45 | "gemm_nn", 46 | "gemm_nt", 47 | "gemm_tn", 48 | "gemm_tt", 49 | "trsm_llnn", 50 | "trsm_llnu", 51 | "trsm_lltn", 52 | "trsm_lltu", 53 | "trsm_lunn", 54 | "trsm_lunu", 55 | "trsm_lutn", 56 | "trsm_lutu", 57 | "trsm_rlnn", 58 | "trsm_rlnu", 59 | "trsm_rltn", 60 | "trsm_rltu", 61 | "trsm_runn", 62 | "trsm_runu", 63 | "trsm_rutn", 64 | "trsm_rutu", 65 | "potrf_l", 66 | "potrf_u" 67 | ] 68 | } 69 | 70 | 71 | -------------------------------------------------------------------------------- /tests/testset_ci_blas_pm_single_arm64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 1, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "single" 22 | ], 23 | "apis": [ 24 | "blas" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "PANELMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "ARMV8A_ARM_CORTEX_A57", 38 | "ARMV8A_ARM_CORTEX_A53" 39 | ], 40 | "LA": [ 41 | "HIGH_PERFORMANCE" 42 | ], 43 | "routines": [ 44 | "gemm_nn", 45 | "gemm_nt", 46 | "gemm_tn", 47 | "gemm_tt", 48 | "trsm_llnn", 49 | "trsm_llnu", 50 | "trsm_lltn", 51 | "trsm_lltu", 52 | "trsm_lunn", 53 | "trsm_lunu", 54 | "trsm_lutn", 55 | "trsm_lutu", 56 | "trsm_rlnn", 57 | "trsm_rlnu", 58 | "trsm_rltn", 59 | "trsm_rltu", 60 | "trsm_runn", 61 | "trsm_runu", 62 | "trsm_rutn", 63 | "trsm_rutu", 64 | "potrf_l", 65 | "potrf_u" 66 | ] 67 | } 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_cm_double_amd64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "COLMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "X64_INTEL_CORE", 38 | "X64_INTEL_HASWELL", 39 | "X64_INTEL_SANDY_BRIDGE" 40 | ], 41 | "LA": [ 42 | "HIGH_PERFORMANCE" 43 | ], 44 | "routines": [ 45 | "gemm_nn", 46 | "gemm_nt", 47 | "gemm_tn", 48 | "gemm_tt", 49 | "syrk_ln", 50 | "syrk_ln_mn", 51 | "syrk_lt", 52 | "syrk_un", 53 | "syrk_ut", 54 | "trsm_llnn", 55 | "trsm_llnu", 56 | "trsm_lltn", 57 | "trsm_lltu", 58 | "trsm_lunn", 59 | "trsm_lunu", 60 | "trsm_lutn", 61 | "trsm_lutu", 62 | "trsm_rlnn", 63 | "trsm_rlnu", 64 | "trsm_rltn", 65 | "trsm_rltu", 66 | "trsm_runn", 67 | "trsm_runu", 68 | "trsm_rutn", 69 | "trsm_rutu", 70 | "trmm_llnn", 71 | "trmm_llnu", 72 | "trmm_lltn", 73 | "trmm_lltu", 74 | "trmm_lunn", 75 | "trmm_lunu", 76 | "trmm_lutn", 77 | "trmm_lutu", 78 | "trmm_rlnn", 79 | "trmm_rlnu", 80 | "trmm_rltn", 81 | "trmm_rltu", 82 | "trmm_runn", 83 | "trmm_runu", 84 | "trmm_rutn", 85 | "trmm_rutu", 86 | "potrf_l", 87 | "potrf_l_mn", 88 | "potrf_u", 89 | "getrf_rp" 90 | ] 91 | } 92 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_cm_double_amd64_packalg.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 1000 28 | ], 29 | "PACKING_ALG":[ 30 | "ALG_0", 31 | "ALG_M1", 32 | "ALG_N1", 33 | "ALG_2" 34 | ], 35 | "MF": [ 36 | "COLMAJ" 37 | ], 38 | "TARGET": [ 39 | "GENERIC", 40 | "X64_INTEL_CORE", 41 | "X64_INTEL_HASWELL", 42 | "X64_INTEL_SANDY_BRIDGE" 43 | ], 44 | "LA": [ 45 | "HIGH_PERFORMANCE" 46 | ], 47 | "routines": [ 48 | "gemm_nn", 49 | "gemm_nt", 50 | "gemm_tn", 51 | "gemm_tt" 52 | ] 53 | } 54 | 55 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_cm_double_arm64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "COLMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "ARMV8A_ARM_CORTEX_A57", 38 | "ARMV8A_ARM_CORTEX_A53" 39 | ], 40 | "LA": [ 41 | "HIGH_PERFORMANCE" 42 | ], 43 | "routines": [ 44 | "gemm_nn", 45 | "gemm_nt", 46 | "gemm_tn", 47 | "gemm_tt", 48 | "syrk_ln", 49 | "syrk_ln_mn", 50 | "syrk_lt", 51 | "syrk_un", 52 | "syrk_ut", 53 | "trsm_llnn", 54 | "trsm_llnu", 55 | "trsm_lltn", 56 | "trsm_lltu", 57 | "trsm_lunn", 58 | "trsm_lunu", 59 | "trsm_lutn", 60 | "trsm_lutu", 61 | "trsm_rlnn", 62 | "trsm_rlnu", 63 | "trsm_rltn", 64 | "trsm_rltu", 65 | "trsm_runn", 66 | "trsm_runu", 67 | "trsm_rutn", 68 | "trsm_rutu", 69 | "trmm_llnn", 70 | "trmm_llnu", 71 | "trmm_lltn", 72 | "trmm_lltu", 73 | "trmm_lunn", 74 | "trmm_lunu", 75 | "trmm_lutn", 76 | "trmm_lutu", 77 | "trmm_rlnn", 78 | "trmm_rlnu", 79 | "trmm_rltn", 80 | "trmm_rltu", 81 | "trmm_runn", 82 | "trmm_runu", 83 | "trmm_rutn", 84 | "trmm_rutu", 85 | "potrf_l", 86 | "potrf_l_mn", 87 | "potrf_u", 88 | "getrf_rp" 89 | ] 90 | } 91 | 92 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_cm_double_arm64_packalg.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 1000 28 | ], 29 | "PACKING_ALG":[ 30 | "ALG_0", 31 | "ALG_M1", 32 | "ALG_N1", 33 | "ALG_2" 34 | ], 35 | "MF": [ 36 | "COLMAJ" 37 | ], 38 | "TARGET": [ 39 | "GENERIC", 40 | "ARMV8A_ARM_CORTEX_A57", 41 | "ARMV8A_ARM_CORTEX_A53" 42 | ], 43 | "LA": [ 44 | "HIGH_PERFORMANCE" 45 | ], 46 | "routines": [ 47 | "gemm_nn", 48 | "gemm_nt", 49 | "gemm_tn", 50 | "gemm_tt" 51 | ] 52 | } 53 | 54 | 55 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_cm_single_amd64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "single" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "COLMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "X64_INTEL_CORE", 38 | "X64_INTEL_HASWELL", 39 | "X64_INTEL_SANDY_BRIDGE" 40 | ], 41 | "LA": [ 42 | "HIGH_PERFORMANCE" 43 | ], 44 | "routines": [ 45 | "gemm_nn", 46 | "gemm_nt", 47 | "gemm_tn", 48 | "gemm_tt", 49 | "trsm_llnn", 50 | "trsm_llnu", 51 | "trsm_lltn", 52 | "trsm_lltu", 53 | "trsm_lunn", 54 | "trsm_lunu", 55 | "trsm_lutn", 56 | "trsm_lutu", 57 | "trsm_rlnn", 58 | "trsm_rlnu", 59 | "trsm_rltn", 60 | "trsm_rltu", 61 | "trsm_runn", 62 | "trsm_runu", 63 | "trsm_rutn", 64 | "trsm_rutu", 65 | "potrf_l", 66 | "potrf_l_mn", 67 | "potrf_u" 68 | ] 69 | } 70 | 71 | 72 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_cm_single_arm64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "single" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "COLMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "ARMV8A_ARM_CORTEX_A57", 38 | "ARMV8A_ARM_CORTEX_A53" 39 | ], 40 | "LA": [ 41 | "HIGH_PERFORMANCE" 42 | ], 43 | "routines": [ 44 | "gemm_nn", 45 | "gemm_nt", 46 | "gemm_tn", 47 | "gemm_tt", 48 | "trsm_llnn", 49 | "trsm_llnu", 50 | "trsm_lltn", 51 | "trsm_lltu", 52 | "trsm_lunn", 53 | "trsm_lunu", 54 | "trsm_lutn", 55 | "trsm_lutu", 56 | "trsm_rlnn", 57 | "trsm_rlnu", 58 | "trsm_rltn", 59 | "trsm_rltu", 60 | "trsm_runn", 61 | "trsm_runu", 62 | "trsm_rutn", 63 | "trsm_rutu", 64 | "potrf_l", 65 | "potrf_l_mn", 66 | "potrf_u" 67 | ] 68 | } 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_pm_double_amd64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "PANELMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "X64_INTEL_CORE", 38 | "X64_INTEL_HASWELL", 39 | "X64_INTEL_SANDY_BRIDGE" 40 | ], 41 | "LA": [ 42 | "HIGH_PERFORMANCE" 43 | ], 44 | "routines": [ 45 | "gemm_nn", 46 | "gemm_nt", 47 | "gemm_tn", 48 | "gemm_tt", 49 | "syrk_ln", 50 | "syrk_lt", 51 | "syrk_un", 52 | "syrk_ut", 53 | "trsm_llnn", 54 | "trsm_llnu", 55 | "trsm_lltn", 56 | "trsm_lltu", 57 | "trsm_lunn", 58 | "trsm_lunu", 59 | "trsm_lutn", 60 | "trsm_lutu", 61 | "trsm_rlnn", 62 | "trsm_rlnu", 63 | "trsm_rltn", 64 | "trsm_rltu", 65 | "trsm_runn", 66 | "trsm_runu", 67 | "trsm_rutn", 68 | "trsm_rutu", 69 | "trmm_llnn", 70 | "trmm_llnu", 71 | "trmm_lltn", 72 | "trmm_lltu", 73 | "trmm_lunn", 74 | "trmm_lunu", 75 | "trmm_lutn", 76 | "trmm_lutu", 77 | "trmm_rlnn", 78 | "trmm_rlnu", 79 | "trmm_rltn", 80 | "trmm_rltu", 81 | "trmm_runn", 82 | "trmm_runu", 83 | "trmm_rutn", 84 | "trmm_rutu", 85 | "potrf_l", 86 | "potrf_l_mn" 87 | ] 88 | } 89 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_pm_double_arm64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "PANELMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "ARMV8A_ARM_CORTEX_A57", 38 | "ARMV8A_ARM_CORTEX_A53" 39 | ], 40 | "LA": [ 41 | "HIGH_PERFORMANCE" 42 | ], 43 | "routines": [ 44 | "gemm_nn", 45 | "gemm_nt", 46 | "gemm_tn", 47 | "gemm_tt", 48 | "syrk_ln", 49 | "syrk_lt", 50 | "syrk_un", 51 | "syrk_ut", 52 | "trsm_llnn", 53 | "trsm_llnu", 54 | "trsm_lltn", 55 | "trsm_lltu", 56 | "trsm_lunn", 57 | "trsm_lunu", 58 | "trsm_lutn", 59 | "trsm_lutu", 60 | "trsm_rlnn", 61 | "trsm_rlnu", 62 | "trsm_rltn", 63 | "trsm_rltu", 64 | "trsm_runn", 65 | "trsm_runu", 66 | "trsm_rutn", 67 | "trsm_rutu", 68 | "trmm_llnn", 69 | "trmm_llnu", 70 | "trmm_lltn", 71 | "trmm_lltu", 72 | "trmm_lunn", 73 | "trmm_lunu", 74 | "trmm_lutn", 75 | "trmm_lutu", 76 | "trmm_rlnn", 77 | "trmm_rlnu", 78 | "trmm_rltn", 79 | "trmm_rltu", 80 | "trmm_runn", 81 | "trmm_runu", 82 | "trmm_rutn", 83 | "trmm_rutu", 84 | "potrf_l", 85 | "potrf_l_mn" 86 | ] 87 | } 88 | 89 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_pm_single_amd64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC":"gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "single" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "PANELMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "X64_INTEL_CORE", 38 | "X64_INTEL_HASWELL", 39 | "X64_INTEL_SANDY_BRIDGE" 40 | ], 41 | "LA": [ 42 | "HIGH_PERFORMANCE" 43 | ], 44 | "routines": [ 45 | "gemm_nn", 46 | "gemm_nt", 47 | "potrf_l" 48 | ] 49 | } 50 | 51 | -------------------------------------------------------------------------------- /tests/testset_ci_blasfeo_pm_single_arm64.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 0, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE": 1 10 | }, 11 | "env_flags":{ 12 | "CC": "gcc", 13 | "CFLAGS": "-Wuninitialized" 14 | }, 15 | "blasfeo_flags":{ 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 0, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "single" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 500 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "PANELMAJ" 34 | ], 35 | "TARGET": [ 36 | "GENERIC", 37 | "ARMV8A_ARM_CORTEX_A57", 38 | "ARMV8A_ARM_CORTEX_A53" 39 | ], 40 | "LA": [ 41 | "HIGH_PERFORMANCE" 42 | ], 43 | "routines": [ 44 | "gemm_nn", 45 | "gemm_nt", 46 | "potrf_l" 47 | ] 48 | } 49 | 50 | 51 | -------------------------------------------------------------------------------- /tests/testset_debug.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "options":{ 4 | "rebuild": 1, 5 | "silent": 0, 6 | "continue": 1 7 | }, 8 | "test_macros": 9 | { 10 | "VERBOSE": 3 11 | }, 12 | "env_flags":{ 13 | "CC":"gcc", 14 | "CFLAGS": "-Wuninitialized" 15 | }, 16 | "blasfeo_flags":{ 17 | "BLASFEO_REF_API": 1, 18 | "BLAS_API": 0, 19 | "EXTERNAL_BLAS": "SYSTEM" 20 | }, 21 | "precisions": [ 22 | "double" 23 | ], 24 | "apis": [ 25 | "blasfeo" 26 | ], 27 | "K_MAX_STACK":[ 28 | 0 29 | ], 30 | "PACKING_ALG":[ 31 | "AUTO" 32 | ], 33 | "MF": [ 34 | "PANELMAJ" 35 | ], 36 | "TARGET": [ 37 | "ARMV8A_ARM_CORTEX_A53" 38 | ], 39 | "LA": [ 40 | "HIGH_PERFORMANCE" 41 | ], 42 | "routines": [ 43 | "trsm_rltn" 44 | ] 45 | } 46 | -------------------------------------------------------------------------------- /tests/testset_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "options":{ 3 | "rebuild": 1, 4 | "silent": 1, 5 | "continue": 0 6 | }, 7 | "test_macros": 8 | { 9 | "VERBOSE":2 10 | }, 11 | "env_flags":{ 12 | "CFLAGS": "-Wuninitialized" 13 | }, 14 | "blasfeo_flags":{ 15 | "CC": "gcc", 16 | "BLASFEO_REF_API": 1, 17 | "BLAS_API": 1, 18 | "EXTERNAL_BLAS": "SYSTEM" 19 | }, 20 | "precisions": [ 21 | "double" 22 | ], 23 | "apis": [ 24 | "blasfeo" 25 | ], 26 | "K_MAX_STACK":[ 27 | 0, 400 28 | ], 29 | "PACKING_ALG":[ 30 | "AUTO" 31 | ], 32 | "MF": [ 33 | "PANELMAJ" 34 | ], 35 | "TARGET": [ 36 | "X64_INTEL_HASWELL", 37 | "GENERIC" 38 | ], 39 | "LA": [ 40 | "HIGH_PERFORMANCE" 41 | ], 42 | "routines": [ 43 | "gemm_nn", 44 | "gemm_nt", 45 | "gemm_tn", 46 | "gemm_tt" 47 | ], 48 | "buffer": [ 49 | "blasfeo", 50 | "X64_INTEL_HASWELL", 51 | "X64_INTEL_SANDY_BRIDGE", 52 | "X64_INTEL_CORE", 53 | "GENERIC", 54 | "gemm_nt", 55 | "gemm_tt", 56 | "trsm_llnu", 57 | "trsm_llnn", 58 | "trsm_lunu", 59 | "trsm_lunn", 60 | "trsm_lltu", 61 | "trsm_lltn", 62 | "trsm_lutu", 63 | "trsm_lutn", 64 | "trsm_rlnu", 65 | "trsm_rlnn", 66 | "trsm_rltu", 67 | "trsm_rltn", 68 | "trsm_runu", 69 | "trsm_runn", 70 | "trsm_rutu", 71 | "trsm_rutn", 72 | "getrf", 73 | "potrf_l", 74 | "potrf_u", 75 | "gemm_nn", 76 | "gemm_tn", 77 | "gemm_nt", 78 | "gemm_tt", 79 | "trsm_llnu", 80 | "trsm_llnn", 81 | "trsm_lunu", 82 | "trsm_lunn", 83 | "trsm_lltu", 84 | "trsm_lltn", 85 | "trsm_lutu", 86 | "trsm_lutn", 87 | "trsm_rlnu", 88 | "trsm_rlnn", 89 | "trsm_rltu", 90 | "trsm_rltn", 91 | "trsm_runu", 92 | "trsm_runn", 93 | "trsm_rutu", 94 | "trsm_rutn", 95 | "getrf", 96 | "potrf_l", 97 | "potrf_u" 98 | ] 99 | } 100 | -------------------------------------------------------------------------------- /utils/change_dollar_immediate.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | #for file in `find kernel/ -name "*.S"`; do echo $file; done 4 | 5 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$0/\$ 0/g' $file; done 6 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$1/\$ 1/g' $file; done 7 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$2/\$ 2/g' $file; done 8 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$3/\$ 3/g' $file; done 9 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$4/\$ 4/g' $file; done 10 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$5/\$ 5/g' $file; done 11 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$6/\$ 6/g' $file; done 12 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$7/\$ 7/g' $file; done 13 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$8/\$ 8/g' $file; done 14 | for file in `find kernel/ -name "*.S"`; do sed -i 's/\$9/\$ 9/g' $file; done 15 | -------------------------------------------------------------------------------- /utils/change_lic.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | #for file in `find blas_api/ -name "*.c"`; do echo $file; done 4 | 5 | #for file in `find blas_api/ -name "*.c"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 6 | #for file in `find blasfeo_api/ -name "*.c"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 7 | #for file in `find auxiliary/ -name "*.c"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 8 | #for file in `find kernel/ -name "*.c"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 9 | #for file in `find benchmarks/ -name "*.c"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 10 | #for file in `find tests/ -name "*.c"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 11 | #for file in `find experimental/ -name "*.c"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 12 | #for file in `find sandbox/ -name "*.c"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 13 | #for file in `find examples/ -name "*.c"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 14 | 15 | #for file in `find kernel/ -name "*.S"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 16 | #for file in `find experimental/ -name "*.S"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 17 | #for file in `find sandbox/ -name "*.S"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 18 | 19 | #for file in `find include/ -name "*.h"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 20 | #for file in `find benchmarks/ -name "*.h"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 21 | #for file in `find tests/ -name "*.h"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 22 | #for file in `find experimental/ -name "*.h"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 23 | #for file in `find sandbox/ -name "*.h"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 24 | #for file in `find examples/ -name "*.h"`; do cat utils/new_lic_c > temp && tail -n +35 $file >> temp && cat temp > $file; done 25 | 26 | #for file in `find . -name "Makefil*"`; do cat utils/new_lic_makefile > temp && tail -n +35 $file >> temp && cat temp > $file; done 27 | #for file in `find . -name "CMake*"`; do cat utils/new_lic_makefile > temp && tail -n +35 $file >> temp && cat temp > $file; done 28 | -------------------------------------------------------------------------------- /utils/new_lic_c: -------------------------------------------------------------------------------- 1 | /************************************************************************************************** 2 | * * 3 | * This file is part of BLASFEO. * 4 | * * 5 | * BLASFEO -- BLAS For Embedded Optimization. * 6 | * Copyright (C) 2019 by Gianluca Frison. * 7 | * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * 8 | * All rights reserved. * 9 | * * 10 | * The 2-Clause BSD License * 11 | * * 12 | * Redistribution and use in source and binary forms, with or without * 13 | * modification, are permitted provided that the following conditions are met: * 14 | * * 15 | * 1. Redistributions of source code must retain the above copyright notice, this * 16 | * list of conditions and the following disclaimer. * 17 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 18 | * this list of conditions and the following disclaimer in the documentation * 19 | * and/or other materials provided with the distribution. * 20 | * * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * 25 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 31 | * * 32 | * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * 33 | * * 34 | **************************************************************************************************/ 35 | -------------------------------------------------------------------------------- /utils/new_lic_makefile: -------------------------------------------------------------------------------- 1 | ################################################################################################### 2 | # # 3 | # This file is part of BLASFEO. # 4 | # # 5 | # BLASFEO -- BLAS For Embedded Optimization. # 6 | # Copyright (C) 2019 by Gianluca Frison. # 7 | # Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. # 8 | # All rights reserved. # 9 | # # 10 | # The 2-Clause BSD License # 11 | # # 12 | # Redistribution and use in source and binary forms, with or without # 13 | # modification, are permitted provided that the following conditions are met: # 14 | # # 15 | # 1. Redistributions of source code must retain the above copyright notice, this # 16 | # list of conditions and the following disclaimer. # 17 | # 2. Redistributions in binary form must reproduce the above copyright notice, # 18 | # this list of conditions and the following disclaimer in the documentation # 19 | # and/or other materials provided with the distribution. # 20 | # # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # 22 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # 23 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR # 25 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # 26 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # 27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # 28 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # 30 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # 31 | # # 32 | # Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de # 33 | # # 34 | ################################################################################################### 35 | -------------------------------------------------------------------------------- /version: -------------------------------------------------------------------------------- 1 | 0.1.4.2 2 | --------------------------------------------------------------------------------