├── .appveyor.yml ├── .dir-locals.el ├── .gitignore ├── .travis.yml ├── CHANGELOG ├── CONTRIBUTING.md ├── CREDITS ├── INSTALL ├── LICENSE ├── Makefile ├── README.md ├── README_BLIS.md ├── RELEASING ├── blastest ├── Makefile ├── check-blastest.sh ├── f2c │ ├── abs.c │ ├── acos.c │ ├── arith.h │ ├── asin.c │ ├── atan.c │ ├── atn2.c │ ├── close.c │ ├── cnjg.c │ ├── cos.c │ ├── cosh.c │ ├── dim.c │ ├── div.c │ ├── dolio.c │ ├── endfile.c │ ├── epsilon.c │ ├── err.c │ ├── exit_.c │ ├── exp.c │ ├── f2c.h │ ├── f2c_config.h │ ├── f2c_inline.h │ ├── f2c_types.h │ ├── f2c_types_win.h │ ├── fio.h │ ├── fmt.c │ ├── fmt.h │ ├── fmtlib.c │ ├── fp.h │ ├── h_dnnt.c │ ├── hl_cmp.c │ ├── i_dnnt.c │ ├── i_len.c │ ├── imag.c │ ├── int.c │ ├── l_cmp.c │ ├── lg10.c │ ├── lio.h │ ├── log.c │ ├── lread.c │ ├── lwrite.c │ ├── mod.c │ ├── nint.c │ ├── open.c │ ├── pow.c │ ├── prod.c │ ├── rdfmt.c │ ├── rewind.c │ ├── rsfe.c │ ├── s_cmp.c │ ├── s_copy.c │ ├── s_stop.c │ ├── sfe.c │ ├── sig_die.c │ ├── sign.c │ ├── sin.c │ ├── sinh.c │ ├── sqrt.c │ ├── tan.c │ ├── tanh.c │ ├── util.c │ ├── wref.c │ ├── wrtfmt.c │ ├── wsfe.c │ └── wsle.c ├── input │ ├── cblat2.in │ ├── cblat3.in │ ├── dblat2.in │ ├── dblat3.in │ ├── sblat2.in │ ├── sblat3.in │ ├── zblat2.in │ └── zblat3.in ├── obj │ └── .gitkeep └── src │ ├── cblat1.c │ ├── cblat2.c │ ├── cblat3.c │ ├── dblat1.c │ ├── dblat2.c │ ├── dblat3.c │ ├── fortran │ ├── cblat1.f │ ├── cblat2.f │ ├── cblat3.f │ ├── dblat1.f │ ├── dblat2.f │ ├── dblat3.f │ ├── run-f2c.sh │ ├── sblat1.f │ ├── sblat2.f │ ├── sblat3.f │ ├── zblat1.f │ ├── zblat2.f │ └── zblat3.f │ ├── sblat1.c │ ├── sblat2.c │ ├── sblat3.c │ ├── zblat1.c │ ├── zblat2.c │ └── zblat3.c ├── blis.pc.in ├── build ├── add-copyright.py ├── bli_config.h.in ├── bump-version.sh ├── config.mk.in ├── detect │ ├── config │ │ ├── config_detect.c │ │ └── old │ │ │ ├── arch_detect.c │ │ │ ├── auto-detect.sh │ │ │ ├── cpuid_arm.c │ │ │ └── cpuid_x86.c │ ├── iset │ │ ├── avx.s │ │ ├── avx512dq.s │ │ ├── avx512f.s │ │ ├── fma3.s │ │ └── fma4.s │ ├── memkind │ │ └── libmemkind_detect.c │ └── omp_simd │ │ └── omp_simd_detect.c ├── flatten-headers.py ├── flatten-headers.sh ├── gen-make-frags │ ├── fragment.mk │ ├── gen-make-frag.sh │ ├── ignore_list │ ├── special_list │ └── suffix_list ├── irun.py ├── libblis-symbols.def ├── mirror-tree.sh ├── old │ └── flatten-headers-pass.sh ├── regen-symbols.sh └── templates │ ├── license.c │ ├── license.h │ └── license.sh ├── common.mk ├── config ├── README.md ├── a64fx │ ├── bli_a64fx_sector_cache.h │ ├── bli_cntx_init_a64fx.c │ ├── bli_family_a64fx.h │ └── make_defs.mk ├── aaplmx │ ├── bli_cntx_init_aaplmx.c │ ├── bli_family_aaplmx.h │ └── make_defs.mk ├── amd64 │ ├── bli_family_amd64.h │ └── make_defs.mk ├── arm32 │ ├── bli_family_arm32.h │ └── make_defs.mk ├── arm64 │ ├── bli_family_arm64.h │ └── make_defs.mk ├── armsve │ ├── bli_armsve_config_utils.c │ ├── bli_armsve_config_utils.h │ ├── bli_cntx_init_armsve.c │ ├── bli_family_armsve.h │ └── make_defs.mk ├── bgq │ ├── bli_cntx_init_bgq.c │ ├── bli_family_bgq.h │ └── make_defs.mk ├── bulldozer │ ├── bli_cntx_init_bulldozer.c │ ├── bli_family_bulldozer.h │ └── make_defs.mk ├── cortexa15 │ ├── bli_cntx_init_cortexa15.c │ ├── bli_family_cortexa15.h │ └── make_defs.mk ├── cortexa53 │ ├── bli_cntx_init_cortexa53.c │ ├── bli_family_cortexa53.h │ └── make_defs.mk ├── cortexa57 │ ├── bli_cntx_init_cortexa57.c │ ├── bli_family_cortexa57.h │ └── make_defs.mk ├── cortexa9 │ ├── bli_cntx_init_cortexa9.c │ ├── bli_family_cortexa9.h │ └── make_defs.mk ├── excavator │ ├── bli_cntx_init_excavator.c │ ├── bli_family_excavator.h │ └── make_defs.mk ├── generic │ ├── bli_cntx_init_generic.c │ ├── bli_family_generic.h │ └── make_defs.mk ├── haswell │ ├── bli_cntx_init_haswell.c │ ├── bli_family_haswell.h │ └── make_defs.mk ├── intel64 │ ├── bli_family_intel64.h │ └── make_defs.mk ├── knc │ ├── bli_cntx_init_knc.c │ ├── bli_family_knc.h │ └── make_defs.mk ├── knl │ ├── bli_cntx_init_knl.c │ ├── bli_family_knl.h │ └── make_defs.mk ├── old │ ├── armv7a │ │ ├── bli_cntx_init_armv7a.c │ │ ├── bli_family_armv7a.h │ │ └── make_defs.mk │ ├── emscripten │ │ ├── bli_kernel.h │ │ └── make_defs.mk │ ├── haswellbb │ │ ├── bli_cntx_init_haswell.c │ │ ├── bli_family_haswell.h │ │ └── make_defs.mk │ ├── loongson3a │ │ ├── bli_kernel.h │ │ └── make_defs.mk │ ├── newarch │ │ ├── bli_kernel.h │ │ └── make_defs.mk │ └── pnacl │ │ ├── bli_kernel.h │ │ └── make_defs.mk ├── penryn │ ├── bli_cntx_init_penryn.c │ ├── bli_family_penryn.h │ └── make_defs.mk ├── piledriver │ ├── bli_cntx_init_piledriver.c │ ├── bli_family_piledriver.h │ └── make_defs.mk ├── power10 │ ├── bli_cntx_init_power10.c │ ├── bli_family_power10.h │ └── make_defs.mk ├── power7 │ ├── bli_cntx_init_power7.c │ ├── bli_family_power7.h │ └── make_defs.mk ├── power9 │ ├── bli_cntx_init_power9.c │ ├── bli_family_power9.h │ └── make_defs.mk ├── sandybridge │ ├── bli_cntx_init_sandybridge.c │ ├── bli_family_sandybridge.h │ └── make_defs.mk ├── skx │ ├── bli_cntx_init_skx.c │ ├── bli_family_skx.h │ └── make_defs.mk ├── steamroller │ ├── bli_cntx_init_steamroller.c │ ├── bli_family_steamroller.h │ └── make_defs.mk ├── template │ ├── bli_cntx_init_template.c │ ├── bli_family_template.h │ ├── kernels │ │ ├── 1 │ │ │ ├── bli_axpyv_template_noopt_var1.c │ │ │ └── bli_dotv_template_noopt_var1.c │ │ ├── 3 │ │ │ ├── bli_gemm_template_noopt_mxn.c │ │ │ ├── bli_gemmtrsm_l_template_noopt_mxn.c │ │ │ ├── bli_gemmtrsm_u_template_noopt_mxn.c │ │ │ ├── bli_trsm_l_template_noopt_mxn.c │ │ │ └── bli_trsm_u_template_noopt_mxn.c │ │ └── 1f │ │ │ ├── bli_axpy2v_template_noopt_var1.c │ │ │ ├── bli_axpyf_template_noopt_var1.c │ │ │ ├── bli_dotaxpyv_template_noopt_var1.c │ │ │ ├── bli_dotxaxpyf_template_noopt_var1.c │ │ │ └── bli_dotxf_template_noopt_var1.c │ └── make_defs.mk ├── thunderx2 │ ├── bli_cntx_init_thunderx2.c │ ├── bli_family_thunderx2.h │ └── make_defs.mk ├── x86_64 │ ├── bli_family_x86_64.h │ └── make_defs.mk ├── zen │ ├── amd_config.mk │ ├── bli_cntx_init_zen.c │ ├── bli_family_zen.h │ ├── make_defs.mk │ └── old │ │ └── bli_kernel.h └── zen2 │ ├── bli_cntx_init_zen2.c │ ├── bli_family_zen2.h │ └── make_defs.mk ├── config_registry ├── configure ├── docs ├── BLISObjectAPI.md ├── BLISTypedAPI.md ├── BuildSystem.md ├── CodingConventions.md ├── ConfigurationHowTo.md ├── FAQ.md ├── HardwareSupport.md ├── KernelsHowTo.md ├── MixedDatatypes.md ├── Multithreading.md ├── Performance.md ├── PerformanceSmall.md ├── ReleaseNotes.md ├── Sandboxes.md ├── Testsuite.md └── graphs │ ├── aaplmx │ ├── output_st_dgemm_asm_blis.png │ └── output_st_sgemm_asm_blis.png │ ├── large │ ├── l3_perf_a64fx_jc1ic1jr12_nt12.pdf │ ├── l3_perf_a64fx_jc1ic1jr12_nt12.png │ ├── l3_perf_a64fx_jc1ic4jr12_nt48.pdf │ ├── l3_perf_a64fx_jc1ic4jr12_nt48.png │ ├── l3_perf_a64fx_nt1.pdf │ ├── l3_perf_a64fx_nt1.png │ ├── l3_perf_has_jc2ic3jr2_nt12.pdf │ ├── l3_perf_has_jc2ic3jr2_nt12.png │ ├── l3_perf_has_jc4ic3jr2_nt24.pdf │ ├── l3_perf_has_jc4ic3jr2_nt24.png │ ├── l3_perf_has_nt1.pdf │ ├── l3_perf_has_nt1.png │ ├── l3_perf_skx_jc2ic13_nt26.pdf │ ├── l3_perf_skx_jc2ic13_nt26.png │ ├── l3_perf_skx_jc4ic13_nt52.pdf │ ├── l3_perf_skx_jc4ic13_nt52.png │ ├── l3_perf_skx_nt1.pdf │ ├── l3_perf_skx_nt1.png │ ├── l3_perf_tx2_jc4ic7_nt28.pdf │ ├── l3_perf_tx2_jc4ic7_nt28.png │ ├── l3_perf_tx2_jc8ic7_nt56.pdf │ ├── l3_perf_tx2_jc8ic7_nt56.png │ ├── l3_perf_tx2_nt1.pdf │ ├── l3_perf_tx2_nt1.png │ ├── l3_perf_zen2_jc4ic4jr4_nt64.pdf │ ├── l3_perf_zen2_jc4ic4jr4_nt64.png │ ├── l3_perf_zen2_jc8ic4jr4_nt128.pdf │ ├── l3_perf_zen2_jc8ic4jr4_nt128.png │ ├── l3_perf_zen2_nt1.pdf │ ├── l3_perf_zen2_nt1.png │ ├── l3_perf_zen_jc1ic8jr4_nt32.pdf │ ├── l3_perf_zen_jc1ic8jr4_nt32.png │ ├── l3_perf_zen_jc2ic8jr4_nt64.pdf │ ├── l3_perf_zen_jc2ic8jr4_nt64.png │ ├── l3_perf_zen_nt1.pdf │ └── l3_perf_zen_nt1.png │ └── sup │ ├── dgemm_ccc_has_nt1.pdf │ ├── dgemm_ccc_has_nt1.png │ ├── dgemm_ccc_has_nt12.pdf │ ├── dgemm_ccc_has_nt12.png │ ├── dgemm_ccc_kbl_nt1.pdf │ ├── dgemm_ccc_kbl_nt1.png │ ├── dgemm_ccc_kbl_nt4.pdf │ ├── dgemm_ccc_kbl_nt4.png │ ├── dgemm_ccc_zen2_nt1.pdf │ ├── dgemm_ccc_zen2_nt1.png │ ├── dgemm_ccc_zen2_nt32.pdf │ ├── dgemm_ccc_zen2_nt32.png │ ├── dgemm_ccc_zen_nt1.pdf │ ├── dgemm_ccc_zen_nt1.png │ ├── dgemm_ccc_zen_nt32.pdf │ ├── dgemm_ccc_zen_nt32.png │ ├── dgemm_rrr_has_nt1.pdf │ ├── dgemm_rrr_has_nt1.png │ ├── dgemm_rrr_has_nt12.pdf │ ├── dgemm_rrr_has_nt12.png │ ├── dgemm_rrr_kbl_nt1.pdf │ ├── dgemm_rrr_kbl_nt1.png │ ├── dgemm_rrr_kbl_nt4.pdf │ ├── dgemm_rrr_kbl_nt4.png │ ├── dgemm_rrr_zen2_nt1.pdf │ ├── dgemm_rrr_zen2_nt1.png │ ├── dgemm_rrr_zen2_nt32.pdf │ ├── dgemm_rrr_zen2_nt32.png │ ├── dgemm_rrr_zen_nt1.pdf │ ├── dgemm_rrr_zen_nt1.png │ ├── dgemm_rrr_zen_nt32.pdf │ ├── dgemm_rrr_zen_nt32.png │ ├── sgemm_ccc_zen2_nt1.pdf │ ├── sgemm_ccc_zen2_nt1.png │ ├── sgemm_ccc_zen2_nt32.pdf │ ├── sgemm_ccc_zen2_nt32.png │ ├── sgemm_rrr_zen2_nt1.pdf │ ├── sgemm_rrr_zen2_nt1.png │ ├── sgemm_rrr_zen2_nt32.pdf │ └── sgemm_rrr_zen2_nt32.png ├── examples ├── oapi │ ├── 00obj_basic.c │ ├── 01obj_attach.c │ ├── 02obj_ij.c │ ├── 03obj_view.c │ ├── 04level0.c │ ├── 05level1v.c │ ├── 06level1m.c │ ├── 07level1m_diag.c │ ├── 08level2.c │ ├── 09level3.c │ ├── 10util.c │ ├── 11gemm_md.c │ ├── Makefile │ └── README └── tapi │ ├── 00level1v.c │ ├── 01level1m.c │ ├── 02level1m_diag.c │ ├── 03level2.c │ ├── 04level3.c │ ├── 05util.c │ ├── Makefile │ └── README ├── frame ├── 0 │ ├── bli_l0.h │ ├── bli_l0_check.c │ ├── bli_l0_check.h │ ├── bli_l0_fpa.c │ ├── bli_l0_fpa.h │ ├── bli_l0_ft.h │ ├── bli_l0_oapi.c │ ├── bli_l0_oapi.h │ ├── bli_l0_tapi.c │ ├── bli_l0_tapi.h │ └── copysc │ │ ├── bli_copysc.c │ │ └── bli_copysc.h ├── 1 │ ├── bli_l1v.h │ ├── bli_l1v_check.c │ ├── bli_l1v_check.h │ ├── bli_l1v_fpa.c │ ├── bli_l1v_fpa.h │ ├── bli_l1v_ft.h │ ├── bli_l1v_ft_ker.h │ ├── bli_l1v_ker.h │ ├── bli_l1v_ker_prot.h │ ├── bli_l1v_oapi.c │ ├── bli_l1v_oapi.h │ ├── bli_l1v_oapi_ba.c │ ├── bli_l1v_oapi_ex.c │ ├── bli_l1v_tapi.c │ ├── bli_l1v_tapi.h │ ├── bli_l1v_tapi_ba.c │ ├── bli_l1v_tapi_ex.c │ └── other │ │ ├── packv │ │ ├── bli_packv.c │ │ ├── bli_packv.h │ │ ├── bli_packv_check.c │ │ ├── bli_packv_check.h │ │ ├── bli_packv_cntl.c │ │ ├── bli_packv_cntl.h │ │ ├── bli_packv_init.c │ │ ├── bli_packv_init.h │ │ ├── bli_packv_int.c │ │ ├── bli_packv_int.h │ │ ├── bli_packv_unb_var1.c │ │ └── bli_packv_unb_var1.h │ │ ├── scalv │ │ ├── bli_scalv_cntl.c │ │ ├── bli_scalv_cntl.h │ │ ├── bli_scalv_int.c │ │ └── bli_scalv_int.h │ │ └── unpackv │ │ ├── bli_unpackv.c │ │ ├── bli_unpackv.h │ │ ├── bli_unpackv_check.c │ │ ├── bli_unpackv_check.h │ │ ├── bli_unpackv_cntl.c │ │ ├── bli_unpackv_cntl.h │ │ ├── bli_unpackv_int.c │ │ ├── bli_unpackv_int.h │ │ ├── bli_unpackv_unb_var1.c │ │ └── bli_unpackv_unb_var1.h ├── 2 │ ├── bli_l2.h │ ├── bli_l2_check.c │ ├── bli_l2_check.h │ ├── bli_l2_fpa.c │ ├── bli_l2_fpa.h │ ├── bli_l2_ft.h │ ├── bli_l2_ft_unb.h │ ├── bli_l2_oapi.c │ ├── bli_l2_oapi.h │ ├── bli_l2_oapi_ba.c │ ├── bli_l2_oapi_ex.c │ ├── bli_l2_tapi.c │ ├── bli_l2_tapi.h │ ├── bli_l2_tapi_ba.c │ ├── bli_l2_tapi_ex.c │ ├── gemv │ │ ├── bli_gemv.h │ │ ├── bli_gemv_unb_var1.c │ │ ├── bli_gemv_unb_var2.c │ │ ├── bli_gemv_unf_var1.c │ │ ├── bli_gemv_unf_var2.c │ │ ├── bli_gemv_var.h │ │ ├── bli_gemv_var_oapi.c │ │ ├── bli_gemv_var_oapi.c.prev │ │ └── other │ │ │ ├── bli_gemv_blk_var1.c │ │ │ ├── bli_gemv_blk_var2.c │ │ │ ├── bli_gemv_cntl.c │ │ │ ├── bli_gemv_cntl.h │ │ │ ├── bli_gemv_front.c │ │ │ ├── bli_gemv_front.h │ │ │ ├── bli_gemv_int.c │ │ │ └── bli_gemv_int.h │ ├── ger │ │ ├── bli_ger.h │ │ ├── bli_ger_unb_var1.c │ │ ├── bli_ger_unb_var2.c │ │ ├── bli_ger_var.h │ │ ├── bli_ger_var_oapi.c │ │ └── other │ │ │ ├── bli_ger_blk_var1.c │ │ │ ├── bli_ger_blk_var2.c │ │ │ ├── bli_ger_cntl.c │ │ │ ├── bli_ger_cntl.h │ │ │ ├── bli_ger_front.c │ │ │ ├── bli_ger_front.h │ │ │ ├── bli_ger_int.c │ │ │ └── bli_ger_int.h │ ├── hemv │ │ ├── bli_hemv.h │ │ ├── bli_hemv_unb_var1.c │ │ ├── bli_hemv_unb_var2.c │ │ ├── bli_hemv_unb_var3.c │ │ ├── bli_hemv_unb_var4.c │ │ ├── bli_hemv_unf_var1.c │ │ ├── bli_hemv_unf_var1a.c │ │ ├── bli_hemv_unf_var3.c │ │ ├── bli_hemv_unf_var3a.c │ │ ├── bli_hemv_var.h │ │ ├── bli_hemv_var_oapi.c │ │ └── other │ │ │ ├── bli_hemv_blk_var1.c │ │ │ ├── bli_hemv_blk_var2.c │ │ │ ├── bli_hemv_blk_var3.c │ │ │ ├── bli_hemv_blk_var4.c │ │ │ ├── bli_hemv_cntl.c │ │ │ ├── bli_hemv_cntl.h │ │ │ ├── bli_hemv_front.c │ │ │ ├── bli_hemv_front.h │ │ │ ├── bli_hemv_int.c │ │ │ └── bli_hemv_int.h │ ├── her │ │ ├── bli_her.h │ │ ├── bli_her_unb_var1.c │ │ ├── bli_her_unb_var2.c │ │ ├── bli_her_var.h │ │ ├── bli_her_var_oapi.c │ │ └── other │ │ │ ├── bli_her_blk_var1.c │ │ │ ├── bli_her_blk_var2.c │ │ │ ├── bli_her_cntl.c │ │ │ ├── bli_her_cntl.h │ │ │ ├── bli_her_front.c │ │ │ ├── bli_her_front.h │ │ │ ├── bli_her_int.c │ │ │ └── bli_her_int.h │ ├── her2 │ │ ├── bli_her2.h │ │ ├── bli_her2_unb_var1.c │ │ ├── bli_her2_unb_var2.c │ │ ├── bli_her2_unb_var3.c │ │ ├── bli_her2_unb_var4.c │ │ ├── bli_her2_unf_var1.c │ │ ├── bli_her2_unf_var4.c │ │ ├── bli_her2_var.h │ │ ├── bli_her2_var_oapi.c │ │ └── other │ │ │ ├── bli_her2_blk_var1.c │ │ │ ├── bli_her2_blk_var2.c │ │ │ ├── bli_her2_blk_var3.c │ │ │ ├── bli_her2_blk_var4.c │ │ │ ├── bli_her2_cntl.c │ │ │ ├── bli_her2_cntl.h │ │ │ ├── bli_her2_front.c │ │ │ ├── bli_her2_front.h │ │ │ ├── bli_her2_int.c │ │ │ └── bli_her2_int.h │ ├── symv │ │ ├── bli_symv.h │ │ └── other │ │ │ ├── bli_symv_front.c │ │ │ └── bli_symv_front.h │ ├── syr │ │ ├── bli_syr.h │ │ └── other │ │ │ ├── bli_syr_front.c │ │ │ └── bli_syr_front.h │ ├── syr2 │ │ ├── bli_syr2.h │ │ └── other │ │ │ ├── bli_syr2_front.c │ │ │ └── bli_syr2_front.h │ ├── trmv │ │ ├── bli_trmv.h │ │ ├── bli_trmv_unb_var1.c │ │ ├── bli_trmv_unb_var2.c │ │ ├── bli_trmv_unf_var1.c │ │ ├── bli_trmv_unf_var2.c │ │ ├── bli_trmv_var.h │ │ ├── bli_trmv_var_oapi.c │ │ └── other │ │ │ ├── bli_trmv_cntl.c │ │ │ ├── bli_trmv_cntl.h │ │ │ ├── bli_trmv_front.c │ │ │ ├── bli_trmv_front.h │ │ │ ├── bli_trmv_int.c │ │ │ ├── bli_trmv_int.h │ │ │ ├── bli_trmv_l_blk_var1.c │ │ │ ├── bli_trmv_l_blk_var2.c │ │ │ ├── bli_trmv_u_blk_var1.c │ │ │ └── bli_trmv_u_blk_var2.c │ └── trsv │ │ ├── bli_trsv.h │ │ ├── bli_trsv_unb_var1.c │ │ ├── bli_trsv_unb_var2.c │ │ ├── bli_trsv_unf_var1.c │ │ ├── bli_trsv_unf_var2.c │ │ ├── bli_trsv_var.h │ │ ├── bli_trsv_var_oapi.c │ │ └── other │ │ ├── bli_trsv_cntl.c │ │ ├── bli_trsv_cntl.h │ │ ├── bli_trsv_front.c │ │ ├── bli_trsv_front.h │ │ ├── bli_trsv_int.c │ │ ├── bli_trsv_int.h │ │ ├── bli_trsv_l_blk_var1.c │ │ ├── bli_trsv_l_blk_var2.c │ │ ├── bli_trsv_u_blk_var1.c │ │ └── bli_trsv_u_blk_var2.c ├── 3 │ ├── bli_l3.h │ ├── bli_l3_blocksize.c │ ├── bli_l3_blocksize.h │ ├── bli_l3_check.c │ ├── bli_l3_check.h │ ├── bli_l3_cntl.c │ ├── bli_l3_cntl.h │ ├── bli_l3_direct.c │ ├── bli_l3_direct.h │ ├── bli_l3_ft_ukr.h │ ├── bli_l3_oapi.c │ ├── bli_l3_oapi.h │ ├── bli_l3_oapi_ba.c │ ├── bli_l3_oapi_ex.c │ ├── bli_l3_oft.h │ ├── bli_l3_oft_var.h │ ├── bli_l3_packm.c │ ├── bli_l3_packm.h │ ├── bli_l3_prune.c │ ├── bli_l3_prune.h │ ├── bli_l3_sup.c │ ├── bli_l3_sup.h │ ├── bli_l3_sup_ft_ker.h │ ├── bli_l3_sup_int.c │ ├── bli_l3_sup_int.h │ ├── bli_l3_sup_ker.h │ ├── bli_l3_sup_ker_prot.h │ ├── bli_l3_sup_oft.h │ ├── bli_l3_sup_packm_a.c │ ├── bli_l3_sup_packm_a.h │ ├── bli_l3_sup_packm_b.c │ ├── bli_l3_sup_packm_b.h │ ├── bli_l3_sup_packm_var.c │ ├── bli_l3_sup_packm_var.h │ ├── bli_l3_sup_ref.c │ ├── bli_l3_sup_ref.h │ ├── bli_l3_sup_var12.c │ ├── bli_l3_sup_var1n2m.c │ ├── bli_l3_sup_vars.h │ ├── bli_l3_tapi.c │ ├── bli_l3_tapi.h │ ├── bli_l3_tapi_ba.c │ ├── bli_l3_tapi_ex.c │ ├── bli_l3_thrinfo.c │ ├── bli_l3_thrinfo.h │ ├── bli_l3_ukr.h │ ├── bli_l3_ukr_fpa.c │ ├── bli_l3_ukr_fpa.h │ ├── bli_l3_ukr_oapi.c │ ├── bli_l3_ukr_oapi.h │ ├── bli_l3_ukr_prot.h │ ├── bli_l3_ukr_tapi.c │ ├── bli_l3_ukr_tapi.h │ ├── gemm │ │ ├── bli_gemm.h │ │ ├── bli_gemm_blk_var1.c │ │ ├── bli_gemm_blk_var2.c │ │ ├── bli_gemm_blk_var3.c │ │ ├── bli_gemm_cntl.c │ │ ├── bli_gemm_cntl.h │ │ ├── bli_gemm_front.c │ │ ├── bli_gemm_front.h │ │ ├── bli_gemm_int.c │ │ ├── bli_gemm_int.h │ │ ├── bli_gemm_ker_var1.c │ │ ├── bli_gemm_ker_var2.c │ │ ├── bli_gemm_ker_var2_md.c │ │ ├── bli_gemm_md.c │ │ ├── bli_gemm_md.h │ │ ├── bli_gemm_md_c2r_ref.c │ │ ├── bli_gemm_md_c2r_ref.h │ │ ├── bli_gemm_packab.c │ │ ├── bli_gemm_var.h │ │ ├── ind │ │ │ ├── bli_gemm4mb_ker_var2.c │ │ │ ├── bli_gemm_ind_opt.h │ │ │ └── old │ │ │ │ ├── bli_gemm3m2_ker_var2.c │ │ │ │ └── bli_gemm3m3_packa.c │ │ └── other │ │ │ ├── bli_gemm_ker_var2.c │ │ │ ├── bli_gemm_ker_var2rr.c │ │ │ ├── bli_gemm_ker_var2sl.c │ │ │ ├── bli_gemm_ker_var5.c │ │ │ └── bli_gemm_ker_var5.h │ ├── gemmt │ │ ├── bli_gemmt.h │ │ ├── bli_gemmt_front.c │ │ └── bli_gemmt_front.h │ ├── hemm │ │ ├── bli_hemm.h │ │ ├── bli_hemm_front.c │ │ └── bli_hemm_front.h │ ├── her2k │ │ ├── bli_her2k.h │ │ ├── bli_her2k_front.c │ │ └── bli_her2k_front.h │ ├── herk │ │ ├── bli_herk.h │ │ ├── bli_herk_front.c │ │ ├── bli_herk_front.h │ │ ├── bli_herk_l_ker_var2.c │ │ ├── bli_herk_u_ker_var2.c │ │ ├── bli_herk_var.h │ │ ├── bli_herk_x_ker_var2.c │ │ └── other │ │ │ ├── bli_herk_l_ker_var2.1looprr.c │ │ │ ├── bli_herk_l_ker_var2.c │ │ │ ├── bli_herk_l_ker_var2rr.c │ │ │ ├── bli_herk_l_ker_var2sl.c │ │ │ ├── bli_herk_u_ker_var2.1looprr.c │ │ │ ├── bli_herk_u_ker_var2.c │ │ │ ├── bli_herk_u_ker_var2rr.c │ │ │ └── bli_herk_u_ker_var2sl.c │ ├── old │ │ ├── bli_l3_ft_ex.h │ │ ├── bli_l3_sup_edge.h │ │ └── bli_l3_sup_var1n2m.c │ ├── symm │ │ ├── bli_symm.h │ │ ├── bli_symm_front.c │ │ └── bli_symm_front.h │ ├── syr2k │ │ ├── bli_syr2k.h │ │ ├── bli_syr2k_front.c │ │ └── bli_syr2k_front.h │ ├── syrk │ │ ├── bli_syrk.h │ │ ├── bli_syrk_front.c │ │ └── bli_syrk_front.h │ ├── trmm │ │ ├── bli_trmm.h │ │ ├── bli_trmm_front.c │ │ ├── bli_trmm_front.h │ │ ├── bli_trmm_ll_ker_var2.c │ │ ├── bli_trmm_lu_ker_var2.c │ │ ├── bli_trmm_rl_ker_var2.c │ │ ├── bli_trmm_ru_ker_var2.c │ │ ├── bli_trmm_var.h │ │ ├── bli_trmm_xx_ker_var2.c │ │ └── other │ │ │ ├── bli_trmm_ll_ker_var2.c │ │ │ ├── bli_trmm_ll_ker_var2rr.c │ │ │ ├── bli_trmm_ll_ker_var2sl.c │ │ │ ├── bli_trmm_lu_ker_var2.c │ │ │ ├── bli_trmm_lu_ker_var2rr.c │ │ │ ├── bli_trmm_lu_ker_var2sl.c │ │ │ ├── bli_trmm_rl_ker_var2.c │ │ │ ├── bli_trmm_rl_ker_var2rr.c │ │ │ ├── bli_trmm_rl_ker_var2sl.c │ │ │ ├── bli_trmm_ru_ker_var2.c │ │ │ ├── bli_trmm_ru_ker_var2rr.c │ │ │ └── bli_trmm_ru_ker_var2sl.c │ ├── trmm3 │ │ ├── bli_trmm3.h │ │ ├── bli_trmm3_front.c │ │ └── bli_trmm3_front.h │ └── trsm │ │ ├── bli_trsm.h │ │ ├── bli_trsm_blk_var1.c │ │ ├── bli_trsm_blk_var2.c │ │ ├── bli_trsm_blk_var3.c │ │ ├── bli_trsm_cntl.c │ │ ├── bli_trsm_cntl.h │ │ ├── bli_trsm_front.c │ │ ├── bli_trsm_front.h │ │ ├── bli_trsm_int.c │ │ ├── bli_trsm_int.h │ │ ├── bli_trsm_ll_ker_var2.c │ │ ├── bli_trsm_lu_ker_var2.c │ │ ├── bli_trsm_packab.c │ │ ├── bli_trsm_rl_ker_var2.c │ │ ├── bli_trsm_ru_ker_var2.c │ │ ├── bli_trsm_var.h │ │ ├── bli_trsm_xx_ker_var2.c │ │ └── other │ │ ├── bli_trsm_ll_ker_var2.c │ │ ├── bli_trsm_ll_ker_var2rr.c │ │ ├── bli_trsm_ll_ker_var2sl.c │ │ ├── bli_trsm_lu_ker_var2.c │ │ ├── bli_trsm_lu_ker_var2rr.c │ │ ├── bli_trsm_lu_ker_var2sl.c │ │ ├── bli_trsm_rl_ker_var2.c │ │ └── bli_trsm_ru_ker_var2.c ├── 1d │ ├── bli_l1d.h │ ├── bli_l1d_check.c │ ├── bli_l1d_check.h │ ├── bli_l1d_fpa.c │ ├── bli_l1d_fpa.h │ ├── bli_l1d_ft.h │ ├── bli_l1d_oapi.c │ ├── bli_l1d_oapi.h │ ├── bli_l1d_oapi_ba.c │ ├── bli_l1d_oapi_ex.c │ ├── bli_l1d_tapi.c │ ├── bli_l1d_tapi.h │ ├── bli_l1d_tapi_ba.c │ └── bli_l1d_tapi_ex.c ├── 1f │ ├── bli_l1f.h │ ├── bli_l1f_check.c │ ├── bli_l1f_check.h │ ├── bli_l1f_fpa.c │ ├── bli_l1f_fpa.h │ ├── bli_l1f_ft.h │ ├── bli_l1f_ft_ker.h │ ├── bli_l1f_ker.h │ ├── bli_l1f_ker_prot.h │ ├── bli_l1f_oapi.c │ ├── bli_l1f_oapi.h │ ├── bli_l1f_oapi_ba.c │ ├── bli_l1f_oapi_ex.c │ ├── bli_l1f_tapi.c │ ├── bli_l1f_tapi.h │ ├── bli_l1f_tapi_ba.c │ └── bli_l1f_tapi_ex.c ├── 1m │ ├── bli_l1m.h │ ├── bli_l1m_check.c │ ├── bli_l1m_check.h │ ├── bli_l1m_fpa.c │ ├── bli_l1m_fpa.h │ ├── bli_l1m_ft.h │ ├── bli_l1m_ft_ker.h │ ├── bli_l1m_ker.h │ ├── bli_l1m_ker_prot.h │ ├── bli_l1m_oapi.c │ ├── bli_l1m_oapi.h │ ├── bli_l1m_oapi_ba.c │ ├── bli_l1m_oapi_ex.c │ ├── bli_l1m_oft_var.h │ ├── bli_l1m_tapi.c │ ├── bli_l1m_tapi.h │ ├── bli_l1m_tapi_ba.c │ ├── bli_l1m_tapi_ex.c │ ├── bli_l1m_unb_var1.c │ ├── bli_l1m_unb_var1.h │ ├── other │ │ ├── bli_scalm.h │ │ ├── bli_scalm_cntl.c │ │ ├── bli_scalm_cntl.h │ │ ├── bli_scalm_int.c │ │ └── bli_scalm_int.h │ ├── packm │ │ ├── bli_packm.h │ │ ├── bli_packm_blk_var1.c │ │ ├── bli_packm_blk_var1_md.c │ │ ├── bli_packm_blk_var1_md.h │ │ ├── bli_packm_check.c │ │ ├── bli_packm_check.h │ │ ├── bli_packm_cntl.c │ │ ├── bli_packm_cntl.h │ │ ├── bli_packm_cxk.c │ │ ├── bli_packm_cxk.h │ │ ├── bli_packm_cxk_1er.c │ │ ├── bli_packm_cxk_1er.h │ │ ├── bli_packm_cxk_3mis.c │ │ ├── bli_packm_cxk_3mis.h │ │ ├── bli_packm_cxk_4mi.c │ │ ├── bli_packm_cxk_4mi.h │ │ ├── bli_packm_cxk_rih.c │ │ ├── bli_packm_cxk_rih.h │ │ ├── bli_packm_init.c │ │ ├── bli_packm_init.h │ │ ├── bli_packm_int.c │ │ ├── bli_packm_int.h │ │ ├── bli_packm_md.h │ │ ├── bli_packm_part.c │ │ ├── bli_packm_part.h │ │ ├── bli_packm_struc_cxk.c │ │ ├── bli_packm_struc_cxk.h │ │ ├── bli_packm_struc_cxk_1er.c │ │ ├── bli_packm_struc_cxk_1er.h │ │ ├── bli_packm_struc_cxk_3mis.c │ │ ├── bli_packm_struc_cxk_3mis.h │ │ ├── bli_packm_struc_cxk_4mi.c │ │ ├── bli_packm_struc_cxk_4mi.h │ │ ├── bli_packm_struc_cxk_md.c │ │ ├── bli_packm_struc_cxk_md.h │ │ ├── bli_packm_struc_cxk_rih.c │ │ ├── bli_packm_struc_cxk_rih.h │ │ ├── bli_packm_thrinfo.c │ │ ├── bli_packm_thrinfo.h │ │ ├── bli_packm_unb_var1.c │ │ ├── bli_packm_unb_var1.h │ │ └── bli_packm_var.h │ └── unpackm │ │ ├── bli_unpackm.h │ │ ├── bli_unpackm_blk_var1.c │ │ ├── bli_unpackm_blk_var1.h │ │ ├── bli_unpackm_check.c │ │ ├── bli_unpackm_check.h │ │ ├── bli_unpackm_cntl.c │ │ ├── bli_unpackm_cntl.h │ │ ├── bli_unpackm_cxk.c │ │ ├── bli_unpackm_cxk.h │ │ ├── bli_unpackm_int.c │ │ ├── bli_unpackm_int.h │ │ ├── bli_unpackm_unb_var1.c │ │ └── bli_unpackm_unb_var1.h ├── base │ ├── bli_apool.c │ ├── bli_apool.h │ ├── bli_arch.c │ ├── bli_arch.h │ ├── bli_array.c │ ├── bli_array.h │ ├── bli_auxinfo.h │ ├── bli_blksz.c │ ├── bli_blksz.h │ ├── bli_check.c │ ├── bli_check.h │ ├── bli_clock.c │ ├── bli_clock.h │ ├── bli_cntl.c │ ├── bli_cntl.h │ ├── bli_cntx.c │ ├── bli_cntx.h │ ├── bli_const.c │ ├── bli_const.h │ ├── bli_cpuid.c │ ├── bli_cpuid.h │ ├── bli_env.c │ ├── bli_env.h │ ├── bli_error.c │ ├── bli_error.h │ ├── bli_func.c │ ├── bli_func.h │ ├── bli_getopt.c │ ├── bli_getopt.h │ ├── bli_gks.c │ ├── bli_gks.h │ ├── bli_info.c │ ├── bli_info.h │ ├── bli_init.c │ ├── bli_init.h │ ├── bli_machval.c │ ├── bli_machval.h │ ├── bli_malloc.c │ ├── bli_malloc.h │ ├── bli_mbool.c │ ├── bli_mbool.h │ ├── bli_mem.h │ ├── bli_memsys.c │ ├── bli_memsys.h │ ├── bli_obj.c │ ├── bli_obj.h │ ├── bli_obj_scalar.c │ ├── bli_obj_scalar.h │ ├── bli_opid.h │ ├── bli_pack.c │ ├── bli_pack.h │ ├── bli_param_map.c │ ├── bli_param_map.h │ ├── bli_part.c │ ├── bli_part.h │ ├── bli_pba.c │ ├── bli_pba.h │ ├── bli_pool.c │ ├── bli_pool.h │ ├── bli_prune.c │ ├── bli_prune.h │ ├── bli_query.c │ ├── bli_query.h │ ├── bli_rntm.c │ ├── bli_rntm.h │ ├── bli_sba.c │ ├── bli_sba.h │ ├── bli_setgetijm.c │ ├── bli_setgetijm.h │ ├── bli_setgetijv.c │ ├── bli_setgetijv.h │ ├── bli_setri.c │ ├── bli_setri.h │ ├── bli_string.c │ ├── bli_string.h │ ├── bli_winsys.c │ ├── bli_winsys.h │ ├── cast │ │ ├── bli_castm.c │ │ ├── bli_castm.h │ │ ├── bli_castnzm.c │ │ ├── bli_castnzm.h │ │ ├── bli_castv.c │ │ ├── bli_castv.h │ │ └── old │ │ │ ├── bli_cast_check.c │ │ │ └── bli_cast_check.h │ ├── check │ │ ├── bli_obj_check.c │ │ ├── bli_obj_check.h │ │ ├── bli_part_check.c │ │ └── bli_part_check.h │ ├── noopt │ │ ├── bli_dlamch.c │ │ ├── bli_dlamch.h │ │ ├── bli_lsame.c │ │ ├── bli_lsame.h │ │ ├── bli_slamch.c │ │ └── bli_slamch.h │ └── proj │ │ ├── bli_projm.c │ │ ├── bli_projm.h │ │ ├── bli_projv.c │ │ ├── bli_projv.h │ │ └── old │ │ ├── bli_proj_check.c │ │ └── bli_proj_check.h ├── compat │ ├── attic │ │ ├── bla_gbmv.c │ │ ├── bla_gbmv.h │ │ ├── bla_hbmv.c │ │ ├── bla_hbmv.h │ │ ├── bla_hpmv.c │ │ ├── bla_hpmv.h │ │ ├── bla_hpr.c │ │ ├── bla_hpr.h │ │ ├── bla_hpr2.c │ │ ├── bla_hpr2.h │ │ ├── bla_rot.c │ │ ├── bla_rot.h │ │ ├── bla_rotg.c │ │ ├── bla_rotg.h │ │ ├── bla_rotm.c │ │ ├── bla_rotm.h │ │ ├── bla_rotmg.c │ │ ├── bla_rotmg.h │ │ ├── bla_sbmv.c │ │ ├── bla_sbmv.h │ │ ├── bla_spmv.c │ │ ├── bla_spmv.h │ │ ├── bla_spr.c │ │ ├── bla_spr.h │ │ ├── bla_spr2.c │ │ ├── bla_spr2.h │ │ ├── bla_tbmv.c │ │ ├── bla_tbmv.h │ │ ├── bla_tbsv.c │ │ ├── bla_tbsv.h │ │ ├── bla_tpmv.c │ │ ├── bla_tpmv.h │ │ ├── bla_tpsv.c │ │ └── bla_tpsv.h │ ├── bla_amax.c │ ├── bla_amax.h │ ├── bla_asum.c │ ├── bla_asum.h │ ├── bla_axpy.c │ ├── bla_axpy.h │ ├── bla_copy.c │ ├── bla_copy.h │ ├── bla_dot.c │ ├── bla_dot.h │ ├── bla_gemm.c │ ├── bla_gemm.h │ ├── bla_gemmt.c │ ├── bla_gemmt.h │ ├── bla_gemv.c │ ├── bla_gemv.h │ ├── bla_ger.c │ ├── bla_ger.h │ ├── bla_hemm.c │ ├── bla_hemm.h │ ├── bla_hemv.c │ ├── bla_hemv.h │ ├── bla_her.c │ ├── bla_her.h │ ├── bla_her2.c │ ├── bla_her2.h │ ├── bla_her2k.c │ ├── bla_her2k.h │ ├── bla_herk.c │ ├── bla_herk.h │ ├── bla_nrm2.c │ ├── bla_nrm2.h │ ├── bla_scal.c │ ├── bla_scal.h │ ├── bla_swap.c │ ├── bla_swap.h │ ├── bla_symm.c │ ├── bla_symm.h │ ├── bla_symv.c │ ├── bla_symv.h │ ├── bla_syr.c │ ├── bla_syr.h │ ├── bla_syr2.c │ ├── bla_syr2.h │ ├── bla_syr2k.c │ ├── bla_syr2k.h │ ├── bla_syrk.c │ ├── bla_syrk.h │ ├── bla_trmm.c │ ├── bla_trmm.h │ ├── bla_trmv.c │ ├── bla_trmv.h │ ├── bla_trsm.c │ ├── bla_trsm.h │ ├── bla_trsv.c │ ├── bla_trsv.h │ ├── bli_blas.h │ ├── blis │ │ └── thread │ │ │ ├── b77_thread.c │ │ │ └── b77_thread.h │ ├── cblas │ │ ├── bli_cblas.h │ │ ├── cblas.tgz │ │ ├── f77_sub │ │ │ ├── f77_amax_sub.c │ │ │ ├── f77_amax_sub.h │ │ │ ├── f77_asum_sub.c │ │ │ ├── f77_asum_sub.h │ │ │ ├── f77_dot_sub.c │ │ │ ├── f77_dot_sub.h │ │ │ ├── f77_nrm2_sub.c │ │ │ └── f77_nrm2_sub.h │ │ ├── integrate-cblas-tarball.sh │ │ └── src │ │ │ ├── cblas.h │ │ │ ├── cblas_caxpy.c │ │ │ ├── cblas_ccopy.c │ │ │ ├── cblas_cdotc_sub.c │ │ │ ├── cblas_cdotu_sub.c │ │ │ ├── cblas_cgbmv.c │ │ │ ├── cblas_cgemm.c │ │ │ ├── cblas_cgemmt.c │ │ │ ├── cblas_cgemv.c │ │ │ ├── cblas_cgerc.c │ │ │ ├── cblas_cgeru.c │ │ │ ├── cblas_chbmv.c │ │ │ ├── cblas_chemm.c │ │ │ ├── cblas_chemv.c │ │ │ ├── cblas_cher.c │ │ │ ├── cblas_cher2.c │ │ │ ├── cblas_cher2k.c │ │ │ ├── cblas_cherk.c │ │ │ ├── cblas_chpmv.c │ │ │ ├── cblas_chpr.c │ │ │ ├── cblas_chpr2.c │ │ │ ├── cblas_cscal.c │ │ │ ├── cblas_csscal.c │ │ │ ├── cblas_cswap.c │ │ │ ├── cblas_csymm.c │ │ │ ├── cblas_csyr2k.c │ │ │ ├── cblas_csyrk.c │ │ │ ├── cblas_ctbmv.c │ │ │ ├── cblas_ctbsv.c │ │ │ ├── cblas_ctpmv.c │ │ │ ├── cblas_ctpsv.c │ │ │ ├── cblas_ctrmm.c │ │ │ ├── cblas_ctrmv.c │ │ │ ├── cblas_ctrsm.c │ │ │ ├── cblas_ctrsv.c │ │ │ ├── cblas_dasum.c │ │ │ ├── cblas_daxpy.c │ │ │ ├── cblas_dcopy.c │ │ │ ├── cblas_ddot.c │ │ │ ├── cblas_dgbmv.c │ │ │ ├── cblas_dgemm.c │ │ │ ├── cblas_dgemmt.c │ │ │ ├── cblas_dgemv.c │ │ │ ├── cblas_dger.c │ │ │ ├── cblas_dnrm2.c │ │ │ ├── cblas_drot.c │ │ │ ├── cblas_drotg.c │ │ │ ├── cblas_drotm.c │ │ │ ├── cblas_drotmg.c │ │ │ ├── cblas_dsbmv.c │ │ │ ├── cblas_dscal.c │ │ │ ├── cblas_dsdot.c │ │ │ ├── cblas_dspmv.c │ │ │ ├── cblas_dspr.c │ │ │ ├── cblas_dspr2.c │ │ │ ├── cblas_dswap.c │ │ │ ├── cblas_dsymm.c │ │ │ ├── cblas_dsymv.c │ │ │ ├── cblas_dsyr.c │ │ │ ├── cblas_dsyr2.c │ │ │ ├── cblas_dsyr2k.c │ │ │ ├── cblas_dsyrk.c │ │ │ ├── cblas_dtbmv.c │ │ │ ├── cblas_dtbsv.c │ │ │ ├── cblas_dtpmv.c │ │ │ ├── cblas_dtpsv.c │ │ │ ├── cblas_dtrmm.c │ │ │ ├── cblas_dtrmv.c │ │ │ ├── cblas_dtrsm.c │ │ │ ├── cblas_dtrsv.c │ │ │ ├── cblas_dzasum.c │ │ │ ├── cblas_dznrm2.c │ │ │ ├── cblas_f77.h │ │ │ ├── cblas_globals.c │ │ │ ├── cblas_icamax.c │ │ │ ├── cblas_idamax.c │ │ │ ├── cblas_isamax.c │ │ │ ├── cblas_izamax.c │ │ │ ├── cblas_sasum.c │ │ │ ├── cblas_saxpy.c │ │ │ ├── cblas_scasum.c │ │ │ ├── cblas_scnrm2.c │ │ │ ├── cblas_scopy.c │ │ │ ├── cblas_sdot.c │ │ │ ├── cblas_sdsdot.c │ │ │ ├── cblas_sgbmv.c │ │ │ ├── cblas_sgemm.c │ │ │ ├── cblas_sgemmt.c │ │ │ ├── cblas_sgemv.c │ │ │ ├── cblas_sger.c │ │ │ ├── cblas_snrm2.c │ │ │ ├── cblas_srot.c │ │ │ ├── cblas_srotg.c │ │ │ ├── cblas_srotm.c │ │ │ ├── cblas_srotmg.c │ │ │ ├── cblas_ssbmv.c │ │ │ ├── cblas_sscal.c │ │ │ ├── cblas_sspmv.c │ │ │ ├── cblas_sspr.c │ │ │ ├── cblas_sspr2.c │ │ │ ├── cblas_sswap.c │ │ │ ├── cblas_ssymm.c │ │ │ ├── cblas_ssymv.c │ │ │ ├── cblas_ssyr.c │ │ │ ├── cblas_ssyr2.c │ │ │ ├── cblas_ssyr2k.c │ │ │ ├── cblas_ssyrk.c │ │ │ ├── cblas_stbmv.c │ │ │ ├── cblas_stbsv.c │ │ │ ├── cblas_stpmv.c │ │ │ ├── cblas_stpsv.c │ │ │ ├── cblas_strmm.c │ │ │ ├── cblas_strmv.c │ │ │ ├── cblas_strsm.c │ │ │ ├── cblas_strsv.c │ │ │ ├── cblas_xerbla.c │ │ │ ├── cblas_zaxpy.c │ │ │ ├── cblas_zcopy.c │ │ │ ├── cblas_zdotc_sub.c │ │ │ ├── cblas_zdotu_sub.c │ │ │ ├── cblas_zdscal.c │ │ │ ├── cblas_zgbmv.c │ │ │ ├── cblas_zgemm.c │ │ │ ├── cblas_zgemmt.c │ │ │ ├── cblas_zgemv.c │ │ │ ├── cblas_zgerc.c │ │ │ ├── cblas_zgeru.c │ │ │ ├── cblas_zhbmv.c │ │ │ ├── cblas_zhemm.c │ │ │ ├── cblas_zhemv.c │ │ │ ├── cblas_zher.c │ │ │ ├── cblas_zher2.c │ │ │ ├── cblas_zher2k.c │ │ │ ├── cblas_zherk.c │ │ │ ├── cblas_zhpmv.c │ │ │ ├── cblas_zhpr.c │ │ │ ├── cblas_zhpr2.c │ │ │ ├── cblas_zscal.c │ │ │ ├── cblas_zswap.c │ │ │ ├── cblas_zsymm.c │ │ │ ├── cblas_zsyr2k.c │ │ │ ├── cblas_zsyrk.c │ │ │ ├── cblas_ztbmv.c │ │ │ ├── cblas_ztbsv.c │ │ │ ├── cblas_ztpmv.c │ │ │ ├── cblas_ztpsv.c │ │ │ ├── cblas_ztrmm.c │ │ │ ├── cblas_ztrmv.c │ │ │ ├── cblas_ztrsm.c │ │ │ └── cblas_ztrsv.c │ ├── check │ │ ├── bla_gemm_check.h │ │ ├── bla_gemmt_check.h │ │ ├── bla_gemv_check.h │ │ ├── bla_ger_check.h │ │ ├── bla_hemm_check.h │ │ ├── bla_hemv_check.h │ │ ├── bla_her2_check.h │ │ ├── bla_her2k_check.h │ │ ├── bla_her_check.h │ │ ├── bla_herk_check.h │ │ ├── bla_symm_check.h │ │ ├── bla_symv_check.h │ │ ├── bla_syr2_check.h │ │ ├── bla_syr2k_check.h │ │ ├── bla_syr_check.h │ │ ├── bla_syrk_check.h │ │ ├── bla_trmm_check.h │ │ ├── bla_trmv_check.h │ │ ├── bla_trsm_check.h │ │ └── bla_trsv_check.h │ └── f2c │ │ ├── bla_cabs1.c │ │ ├── bla_cabs1.h │ │ ├── bla_gbmv.c │ │ ├── bla_gbmv.h │ │ ├── bla_hbmv.c │ │ ├── bla_hbmv.h │ │ ├── bla_hpmv.c │ │ ├── bla_hpmv.h │ │ ├── bla_hpr.c │ │ ├── bla_hpr.h │ │ ├── bla_hpr2.c │ │ ├── bla_hpr2.h │ │ ├── bla_lsame.c │ │ ├── bla_lsame.h │ │ ├── bla_rot.c │ │ ├── bla_rot.h │ │ ├── bla_rotg.c │ │ ├── bla_rotg.h │ │ ├── bla_rotm.c │ │ ├── bla_rotm.h │ │ ├── bla_rotmg.c │ │ ├── bla_rotmg.h │ │ ├── bla_sbmv.c │ │ ├── bla_sbmv.h │ │ ├── bla_spmv.c │ │ ├── bla_spmv.h │ │ ├── bla_spr.c │ │ ├── bla_spr.h │ │ ├── bla_spr2.c │ │ ├── bla_spr2.h │ │ ├── bla_tbmv.c │ │ ├── bla_tbmv.h │ │ ├── bla_tbsv.c │ │ ├── bla_tbsv.h │ │ ├── bla_tpmv.c │ │ ├── bla_tpmv.h │ │ ├── bla_tpsv.c │ │ ├── bla_tpsv.h │ │ ├── bla_xerbla.c │ │ ├── bla_xerbla.h │ │ ├── bla_xerbla_array.c │ │ ├── bla_xerbla_array.h │ │ └── util │ │ ├── bla_c_abs.c │ │ ├── bla_c_abs.h │ │ ├── bla_c_div.c │ │ ├── bla_c_div.h │ │ ├── bla_d_abs.c │ │ ├── bla_d_abs.h │ │ ├── bla_d_cnjg.c │ │ ├── bla_d_cnjg.h │ │ ├── bla_d_imag.c │ │ ├── bla_d_imag.h │ │ ├── bla_d_sign.c │ │ ├── bla_d_sign.h │ │ ├── bla_f__cabs.c │ │ ├── bla_f__cabs.h │ │ ├── bla_r_abs.c │ │ ├── bla_r_abs.h │ │ ├── bla_r_cnjg.c │ │ ├── bla_r_cnjg.h │ │ ├── bla_r_imag.c │ │ ├── bla_r_imag.h │ │ ├── bla_r_sign.c │ │ ├── bla_r_sign.h │ │ ├── bla_z_abs.c │ │ ├── bla_z_abs.h │ │ ├── bla_z_div.c │ │ └── bla_z_div.h ├── include │ ├── bli_arch_config.h │ ├── bli_arch_config_pre.h │ ├── bli_blas_macro_defs.h │ ├── bli_builtin_macro_defs.h │ ├── bli_complex_macro_defs.h │ ├── bli_config_macro_defs.h │ ├── bli_error_macro_defs.h │ ├── bli_extern_defs.h │ ├── bli_f2c.h │ ├── bli_genarray_macro_defs.h │ ├── bli_gentdef_macro_defs.h │ ├── bli_gentfunc_macro_defs.h │ ├── bli_gentprot_macro_defs.h │ ├── bli_kernel_macro_defs.h │ ├── bli_macro_defs.h │ ├── bli_misc_macro_defs.h │ ├── bli_oapi_ba.h │ ├── bli_oapi_ex.h │ ├── bli_oapi_macro_defs.h │ ├── bli_obj_macro_defs.h │ ├── bli_param_macro_defs.h │ ├── bli_pragma_macro_defs.h │ ├── bli_sbox.h │ ├── bli_scalar_macro_defs.h │ ├── bli_system.h │ ├── bli_tapi_ba.h │ ├── bli_tapi_ex.h │ ├── bli_tapi_macro_defs.h │ ├── bli_type_defs.h │ ├── bli_x86_asm_macros.h │ ├── bli_xapi_undef.h │ ├── blis.h │ └── level0 │ │ ├── 1e │ │ ├── bli_copy1es.h │ │ ├── bli_copyj1es.h │ │ ├── bli_invert1es.h │ │ ├── bli_scal1es.h │ │ ├── bli_scal21es.h │ │ └── bli_scal2j1es.h │ │ ├── 1m │ │ ├── bli_invert1ms_mxn_diag.h │ │ ├── bli_scal1ms_mxn.h │ │ ├── bli_scal21ms_mxn.h │ │ ├── bli_scal21ms_mxn_diag.h │ │ ├── bli_scal21ms_mxn_uplo.h │ │ ├── bli_set1ms_mxn.h │ │ ├── bli_set1ms_mxn_diag.h │ │ ├── bli_set1ms_mxn_uplo.h │ │ └── bli_seti01ms_mxn_diag.h │ │ ├── 1r │ │ ├── bli_copy1rs.h │ │ ├── bli_copyj1rs.h │ │ ├── bli_invert1rs.h │ │ ├── bli_scal1rs.h │ │ ├── bli_scal21rs.h │ │ └── bli_scal2j1rs.h │ │ ├── bb │ │ ├── bli_bcastbbs_mxn.h │ │ ├── bli_scal2bbs_mxn.h │ │ └── bli_set0bbs_mxn.h │ │ ├── bli_absq2s.h │ │ ├── bli_abval2s.h │ │ ├── bli_add3s.h │ │ ├── bli_addjs.h │ │ ├── bli_adds.h │ │ ├── bli_adds_mxn.h │ │ ├── bli_adds_mxn_uplo.h │ │ ├── bli_axmys.h │ │ ├── bli_axpbyjs.h │ │ ├── bli_axpbys.h │ │ ├── bli_axpyjs.h │ │ ├── bli_axpys.h │ │ ├── bli_conjs.h │ │ ├── bli_constants.h │ │ ├── bli_copycjs.h │ │ ├── bli_copyjnzs.h │ │ ├── bli_copyjs.h │ │ ├── bli_copynzs.h │ │ ├── bli_copys.h │ │ ├── bli_copys_mxn.h │ │ ├── bli_dotjs.h │ │ ├── bli_dots.h │ │ ├── bli_eq.h │ │ ├── bli_fprints.h │ │ ├── bli_gets.h │ │ ├── bli_inverts.h │ │ ├── bli_invscaljs.h │ │ ├── bli_invscals.h │ │ ├── bli_neg2s.h │ │ ├── bli_randnp2s.h │ │ ├── bli_rands.h │ │ ├── bli_scal2js.h │ │ ├── bli_scal2s.h │ │ ├── bli_scal2s_mxn.h │ │ ├── bli_scalcjs.h │ │ ├── bli_scaljs.h │ │ ├── bli_scals.h │ │ ├── bli_set0s.h │ │ ├── bli_set0s_mxn.h │ │ ├── bli_set1s.h │ │ ├── bli_seti0s.h │ │ ├── bli_setis.h │ │ ├── bli_setrs.h │ │ ├── bli_sets.h │ │ ├── bli_sqrt2s.h │ │ ├── bli_subjs.h │ │ ├── bli_subs.h │ │ ├── bli_swaps.h │ │ ├── bli_xpbyjs.h │ │ ├── bli_xpbys.h │ │ ├── bli_xpbys_mxn.h │ │ ├── bli_xpbys_mxn_uplo.h │ │ ├── io │ │ ├── bli_scal2ios.h │ │ └── bli_scal2jios.h │ │ ├── old │ │ ├── bli_cast.h │ │ ├── bli_castfrom.h │ │ ├── bli_castto.h │ │ ├── bli_copynzjs.h │ │ ├── bli_copynzs.h │ │ ├── bli_invscalcjs.h │ │ ├── bli_scalcjs.h │ │ └── bli_set0ris_mxn.h │ │ ├── ri │ │ ├── bli_absq2ris.h │ │ ├── bli_abval2ris.h │ │ ├── bli_add3ris.h │ │ ├── bli_addjris.h │ │ ├── bli_addris.h │ │ ├── bli_axmyris.h │ │ ├── bli_axpbyjris.h │ │ ├── bli_axpbyris.h │ │ ├── bli_axpyjris.h │ │ ├── bli_axpyris.h │ │ ├── bli_conjris.h │ │ ├── bli_copycjris.h │ │ ├── bli_copyjris.h │ │ ├── bli_copyris.h │ │ ├── bli_eqris.h │ │ ├── bli_invertris.h │ │ ├── bli_invscaljris.h │ │ ├── bli_invscalris.h │ │ ├── bli_neg2ris.h │ │ ├── bli_scal2jris.h │ │ ├── bli_scal2ris.h │ │ ├── bli_scal2ris_mxn.h │ │ ├── bli_scalcjris.h │ │ ├── bli_scaljris.h │ │ ├── bli_scalris.h │ │ ├── bli_scalris_mxn_uplo.h │ │ ├── bli_set0ris.h │ │ ├── bli_sqrt2ris.h │ │ ├── bli_subjris.h │ │ ├── bli_subris.h │ │ ├── bli_swapris.h │ │ ├── bli_xpbyjris.h │ │ └── bli_xpbyris.h │ │ ├── ri3 │ │ ├── bli_copyjri3s.h │ │ ├── bli_copyri3s.h │ │ ├── bli_scal2jri3s.h │ │ ├── bli_scal2ri3s.h │ │ └── bli_scal2ri3s_mxn.h │ │ ├── rih │ │ ├── bli_scal2rihs_mxn.h │ │ ├── bli_scal2rihs_mxn_diag.h │ │ ├── bli_scal2rihs_mxn_uplo.h │ │ └── bli_setrihs_mxn_diag.h │ │ ├── ro │ │ ├── bli_scal2jros.h │ │ └── bli_scal2ros.h │ │ └── rpi │ │ ├── bli_scal2jrpis.h │ │ └── bli_scal2rpis.h ├── ind │ ├── bli_ind.c │ ├── bli_ind.h │ ├── bli_l3_ind.c │ ├── bli_l3_ind.h │ ├── cntx │ │ ├── bli_cntx_ind_stage.c │ │ └── bli_cntx_ind_stage.h │ ├── oapi │ │ ├── bli_l3_3m4m1m_oapi.c │ │ ├── bli_l3_ind_oapi.c │ │ ├── bli_l3_ind_oapi.h │ │ └── bli_l3_nat_oapi.c │ ├── tapi │ │ ├── bli_l3_ind_tapi.c │ │ └── bli_l3_ind_tapi.h │ └── ukernels │ │ └── bli_l3_ind_ukr.h ├── thread │ ├── bli_l3_decor.h │ ├── bli_l3_decor_openmp.c │ ├── bli_l3_decor_openmp.h │ ├── bli_l3_decor_pthreads.c │ ├── bli_l3_decor_pthreads.h │ ├── bli_l3_decor_single.c │ ├── bli_l3_decor_single.h │ ├── bli_l3_sup_decor.h │ ├── bli_l3_sup_decor_openmp.c │ ├── bli_l3_sup_decor_openmp.h │ ├── bli_l3_sup_decor_pthreads.c │ ├── bli_l3_sup_decor_pthreads.h │ ├── bli_l3_sup_decor_single.c │ ├── bli_l3_sup_decor_single.h │ ├── bli_pthread.c │ ├── bli_pthread.h │ ├── bli_thrcomm.c │ ├── bli_thrcomm.h │ ├── bli_thrcomm_openmp.c │ ├── bli_thrcomm_openmp.h │ ├── bli_thrcomm_pthreads.c │ ├── bli_thrcomm_pthreads.h │ ├── bli_thrcomm_single.c │ ├── bli_thrcomm_single.h │ ├── bli_thread.c │ ├── bli_thread.h │ ├── bli_thrinfo.c │ ├── bli_thrinfo.h │ ├── bli_thrinfo_sup.c │ ├── bli_thrinfo_sup.h │ └── old │ │ ├── bli_mutex.h │ │ ├── bli_mutex_openmp.h │ │ ├── bli_mutex_pthreads.h │ │ └── bli_mutex_single.h └── util │ ├── bli_util.h │ ├── bli_util_check.c │ ├── bli_util_check.h │ ├── bli_util_fpa.c │ ├── bli_util_fpa.h │ ├── bli_util_ft.h │ ├── bli_util_oapi.c │ ├── bli_util_oapi.h │ ├── bli_util_oapi_ba.c │ ├── bli_util_oapi_ex.c │ ├── bli_util_tapi.c │ ├── bli_util_tapi.h │ ├── bli_util_tapi_ba.c │ ├── bli_util_tapi_ex.c │ ├── bli_util_unb_var1.c │ └── bli_util_unb_var1.h ├── kernels ├── aaplmx │ ├── 3 │ │ ├── amx.h │ │ ├── amx_ext.h │ │ ├── bli_gemm_aaplmx_mac_d16x16.c │ │ ├── bli_gemm_aaplmx_mac_d32x16.c │ │ ├── bli_gemm_aaplmx_mac_s_sh32x32.c │ │ ├── bli_gemm_aaplmx_mac_sh64x32.c │ │ └── sup │ │ │ ├── bli_gemmsup_aaplmx_ref.c │ │ │ ├── bli_gemmsup_rv_aaplmx_mac_d32x16mn.c │ │ │ └── bli_gemmsup_rv_aaplmx_mac_s32x32mn.c │ ├── 1m │ │ ├── bli_packm_aaplmx_mac_d16xk.c │ │ ├── bli_packm_aaplmx_mac_d32xk.c │ │ └── bli_packm_aaplmx_mac_s32xk.c │ └── bli_kernels_aaplmx.h ├── armsve │ ├── 3 │ │ ├── armsve_asm_2vx10.h │ │ ├── armsve_asm_macros.h │ │ ├── armsve_asm_macros_double.h │ │ ├── armsve_asm_macros_half.h │ │ ├── armsve_asm_macros_single.h │ │ ├── bli_gemm_armsve256_asm_d8x8.c │ │ ├── bli_gemm_armsve_asm_d2vx10_unindexed.c │ │ ├── bli_gemm_armsve_asm_s2vx10_unindexed.c │ │ ├── bli_gemm_armsve_asm_sh2vx10_unindexed.c │ │ └── sup │ │ │ ├── bli_gemmsup_armsve_ref.c │ │ │ ├── bli_gemmsup_cv_armsve_asm_d2vx10_unindexed.c │ │ │ └── bli_gemmsup_rv_armsve_asm_d2vx10_unindexed.c │ ├── 1m │ │ ├── armsve512_asm_transpose_d8x2.h │ │ ├── armsve512_asm_transpose_d8x8.h │ │ ├── bli_dpackm_armsve256_asm_8xk.c │ │ ├── bli_dpackm_armsve512_asm_10xk.c │ │ ├── bli_dpackm_armsve512_asm_12xk.c │ │ └── bli_dpackm_armsve512_asm_16xk.c │ └── bli_kernels_armsve.h ├── armv7a │ ├── 3 │ │ ├── bli_cgemm_armv7a_asm_2x2.S │ │ ├── bli_dgemm_armv7a_asm_4x4.S │ │ ├── bli_gemm_armv7a_asm_d4x4.c │ │ ├── bli_gemm_armv7a_int_d4x4.c │ │ ├── bli_sgemm_armv7a_asm_4x4.S │ │ └── bli_zgemm_armv7a_asm_2x2.S │ └── bli_kernels_armv7a.h ├── armv8a │ ├── 3 │ │ ├── armv8a_asm_utils.h │ │ └── bli_gemm_armv8a_asm_d6x8.c │ └── bli_kernels_armv8a.h ├── bgq │ ├── 1 │ │ ├── bli_axpyv_bgq_int.c │ │ └── bli_dotv_bgq_int.c │ ├── 3 │ │ └── bli_gemm_bgq_int_8x8.c │ ├── 1f │ │ └── bli_axpyf_bgq_int.c │ └── bli_kernels_bgq.h ├── bulldozer │ ├── 3 │ │ └── bli_gemm_bulldozer_asm_d4x6_fma4.c │ └── bli_kernels_bulldozer.h ├── generic │ └── generic.txt ├── haswell │ ├── 3 │ │ ├── bli_gemm_haswell_asm_d6x8.c │ │ ├── bli_gemm_haswell_asm_d8x6.c │ │ ├── bli_gemmtrsm_l_haswell_asm_d6x8.c │ │ ├── bli_gemmtrsm_u_haswell_asm_d6x8.c │ │ ├── old │ │ │ ├── bli_gemm_haswell_asm_d12x4.c │ │ │ ├── bli_gemm_haswell_asm_d4x12.c │ │ │ ├── bli_gemm_haswell_asm_d6x8.c │ │ │ └── bli_gemm_haswell_asm_d8x6.c │ │ └── sup │ │ │ ├── bli_gemmsup_rd_haswell_asm_d6x8m.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_d6x8n.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_s6x16m.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_s6x16n.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_d6x8m.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_d6x8n.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_s6x16m.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_s6x16n.c │ │ │ ├── d6x8 │ │ │ ├── bli_gemmsup_r_haswell_ref_dMx1.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_dMx1.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_dMx2.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_dMx4.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_dMx8.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_dMx2.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_dMx4.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_dMx6.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_dMx8.c │ │ │ └── old │ │ │ │ ├── bli_gemmsup_rd_haswell_asm_d6x8.c │ │ │ │ └── bli_gemmsup_rv_haswell_asm_d6x8.c │ │ │ ├── old │ │ │ ├── bli_gemmsup_rd_haswell_asm_d6x8.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_d6x8m.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_d6x8m.c.newji │ │ │ ├── bli_gemmsup_rd_haswell_asm_d6x8m.c.worksij │ │ │ └── bli_gemmsup_rd_haswell_asm_d6x8n.c │ │ │ └── s6x16 │ │ │ ├── bli_gemmsup_r_haswell_ref_sMx1.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_sMx1.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_sMx12.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_sMx16.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_sMx2.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_sMx4.c │ │ │ ├── bli_gemmsup_rd_haswell_asm_sMx8.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_sMx12.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_sMx16.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_sMx2.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_sMx4.c │ │ │ ├── bli_gemmsup_rv_haswell_asm_sMx6.c │ │ │ └── bli_gemmsup_rv_haswell_asm_sMx8.c │ ├── 1m │ │ ├── bli_packm_haswell_asm_c3xk.c │ │ ├── bli_packm_haswell_asm_c8xk.c │ │ ├── bli_packm_haswell_asm_d6xk.c │ │ ├── bli_packm_haswell_asm_d8xk.c │ │ ├── bli_packm_haswell_asm_s16xk.c │ │ ├── bli_packm_haswell_asm_s6xk.c │ │ ├── bli_packm_haswell_asm_z3xk.c │ │ └── bli_packm_haswell_asm_z4xk.c │ └── bli_kernels_haswell.h ├── knc │ ├── 3 │ │ ├── bli_dgemm_knc_asm_30x8.c │ │ └── bli_sgemm_knc_asm_30x16.c │ └── bli_kernels_knc.h ├── knl │ ├── 3 │ │ ├── bli_dgemm_knl_asm_24x8.c │ │ ├── bli_sgemm_knl_asm_24x16.c │ │ └── other │ │ │ ├── bli_dgemm_knl_asm_12x16.c │ │ │ ├── bli_dgemm_knl_asm_30x8.c │ │ │ ├── bli_dgemm_knl_asm_30x8_knc.c │ │ │ ├── bli_dgemm_knl_asm_8x24.c │ │ │ └── bli_sgemm_knl_asm_30x16_knc.c │ ├── 1m │ │ ├── bli_dpackm_knl_asm_24x8.c │ │ ├── bli_spackm_knl_asm_24x16.c │ │ └── old │ │ │ └── bli_packm_knl_asm_30x8.c │ └── bli_kernels_knl.h ├── old │ ├── c99 │ │ ├── 3 │ │ │ ├── bli_gemm_c99_4x4.c │ │ │ ├── bli_gemmtrsm_l_c99_4x4.c │ │ │ ├── bli_gemmtrsm_u_c99_4x4.c │ │ │ ├── bli_trsm_l_c99_4x4.c │ │ │ └── bli_trsm_u_c99_4x4.c │ │ └── bli_kernels_c99.h │ ├── loongson3a │ │ └── 3 │ │ │ └── bli_gemm_loongson3a_opt_d4x4.c │ ├── nacl │ │ └── pnacl │ │ │ ├── 1 │ │ │ ├── bli_axpyv_opt.c │ │ │ └── bli_dotv_opt.c │ │ │ └── 3 │ │ │ └── bli_gemm_opt.c │ └── x86 │ │ ├── 3 │ │ ├── bli_gemm_opt_d2x4.c │ │ ├── bli_gemm_opt_d4x2.c │ │ ├── bli_gemmtrsm_l_opt_d4x2.c │ │ ├── bli_gemmtrsm_u_opt_d4x2.c │ │ └── bli_trsm_l_opt_d4x2.c │ │ └── 1m │ │ ├── bli_packm_2xk.c │ │ ├── bli_packm_2xk.h │ │ ├── bli_packm_4xk.c │ │ └── bli_packm_4xk.h ├── penryn │ ├── 1 │ │ ├── bli_axpyv_penryn_int.c │ │ └── bli_dotv_penryn_int.c │ ├── 3 │ │ ├── bli_gemm_penryn_asm_d4x4.c │ │ ├── bli_gemmtrsm_l_penryn_asm_d4x4.c │ │ ├── bli_gemmtrsm_u_penryn_asm_d4x4.c │ │ ├── bli_trsm_l_penryn_asm_d4x4.c │ │ └── bli_trsm_u_penryn_asm_d4x4.c │ ├── 1f │ │ ├── bli_axpy2v_penryn_int.c │ │ ├── bli_axpyf_penryn_int.c │ │ ├── bli_dotaxpyv_penryn_int.c │ │ ├── bli_dotxaxpyf_penryn_int.c │ │ └── bli_dotxf_penryn_int.c │ └── bli_kernels_penryn.h ├── piledriver │ ├── 3 │ │ └── bli_gemm_piledriver_asm_d8x3.c │ └── bli_kernels_piledriver.h ├── power10 │ ├── 3 │ │ ├── bli_dgemm_power10_mma.c │ │ ├── bli_i16gemm_power10_mma.c │ │ ├── bli_i16sgemm_power10_mma.c │ │ ├── bli_i4gemm_power10_mma.c │ │ ├── bli_i8gemm_power10_mma.c │ │ ├── bli_sbgemm_power10_mma.c │ │ ├── bli_sgemm_power10_mma.c │ │ ├── bli_shgemm_power10_mma.c │ │ └── vector_int_macros.h │ └── bli_kernels_power10.h ├── power7 │ ├── 3 │ │ ├── bli_gemm_power7_int_8x4.c │ │ └── test │ │ │ ├── Makefile │ │ │ ├── bli_gemm_power7_int_8x4.c │ │ │ ├── bli_gemm_power7_int_8x4.h │ │ │ ├── blis_utest.h │ │ │ └── exp.c │ └── bli_kernels_power7.h ├── power9 │ ├── 3 │ │ ├── bli_gemm_power9_asm_d12x6.c │ │ └── bli_pwr9_asm_macros_12x6.h │ └── bli_kernels_power9.h ├── sandybridge │ ├── 3 │ │ ├── bli_gemm_sandybridge_asm_d8x4.c │ │ └── bli_gemm_sandybridge_int_d8x4.c │ └── bli_kernels_sandybridge.h ├── skx │ ├── 3 │ │ ├── bli_dgemm_skx_asm_16x12_l2.c │ │ ├── bli_dgemm_skx_asm_16x14.c │ │ └── bli_sgemm_skx_asm_32x12_l2.c │ └── bli_kernels_skx.h ├── zen │ ├── 1 │ │ ├── bli_amaxv_zen_int.c │ │ ├── bli_axpyv_zen_int.c │ │ ├── bli_axpyv_zen_int10.c │ │ ├── bli_copyv_zen_int.c │ │ ├── bli_dotv_zen_int.c │ │ ├── bli_dotv_zen_int10.c │ │ ├── bli_dotxv_zen_int.c │ │ ├── bli_scalv_zen_int.c │ │ ├── bli_scalv_zen_int10.c │ │ ├── bli_setv_zen_int.c │ │ └── bli_swapv_zen_int8.c │ ├── 3 │ │ ├── bli_gemm_small.c │ │ ├── bli_syrk_small.c │ │ ├── bli_trsm_small.c │ │ └── sup │ │ │ ├── broken │ │ │ ├── bli_gemmsup_rv_zen_asm_c3x8.c │ │ │ ├── bli_gemmsup_rv_zen_asm_c3x8m.c │ │ │ ├── bli_gemmsup_rv_zen_asm_c3x8n.c │ │ │ ├── bli_gemmsup_rv_zen_asm_z3x4.c │ │ │ ├── bli_gemmsup_rv_zen_asm_z3x4m.c │ │ │ └── bli_gemmsup_rv_zen_asm_z3x4n.c │ │ │ └── other │ │ │ ├── bli_gemmsup_rd_zen_asm_s6x16.c │ │ │ ├── bli_gemmsup_rd_zen_asm_s6x16m.c │ │ │ ├── bli_gemmsup_rd_zen_asm_s6x16n.c │ │ │ ├── bli_gemmsup_rv_zen_asm_s6x16.c │ │ │ ├── bli_gemmsup_rv_zen_asm_s6x16m.c │ │ │ └── bli_gemmsup_rv_zen_asm_s6x16n.c │ ├── 1f │ │ ├── bli_axpyf_zen_int_8.c │ │ └── bli_dotxf_zen_int_8.c │ └── bli_kernels_zen.h └── zen2 │ ├── 1f │ └── bli_axpyf_zen_int_5.c │ └── bli_kernels_zen2.h ├── mpi_test ├── Makefile ├── test_gemm.c ├── test_hemm.c ├── test_her2k.c ├── test_herk.c ├── test_trmm.c └── test_trsm.c ├── ref_kernels ├── 1 │ ├── bli_addv_ref.c │ ├── bli_amaxv_ref.c │ ├── bli_axpbyv_ref.c │ ├── bli_axpyv_ref.c │ ├── bli_copyv_ref.c │ ├── bli_dotv_ref.c │ ├── bli_dotxv_ref.c │ ├── bli_invertv_ref.c │ ├── bli_scal2v_ref.c │ ├── bli_scalv_ref.c │ ├── bli_setv_ref.c │ ├── bli_subv_ref.c │ ├── bli_swapv_ref.c │ └── bli_xpbyv_ref.c ├── 3 │ ├── bb │ │ ├── bli_gemmbb_ref.c │ │ ├── bli_gemmtrsmbb_ref.c │ │ └── bli_trsmbb_ref.c │ ├── bli_gemm_ref.c │ ├── bli_gemmsup_ref.c │ ├── bli_gemmtrsm_ref.c │ ├── bli_trsm_ref.c │ └── old │ │ ├── bli_gemm_simd_ref.c │ │ ├── bli_gemm_unrl_ref.c │ │ └── bli_trsm_simd_ref.c ├── 1f │ ├── bli_axpy2v_ref.c │ ├── bli_axpyf_ref.c │ ├── bli_dotaxpyv_ref.c │ ├── bli_dotxaxpyf_ref.c │ ├── bli_dotxf_ref.c │ └── other │ │ └── bli_dotxaxpyf_ref_alt.c ├── 1m │ ├── bli_packm_cxk_1er_ref.c │ ├── bli_packm_cxk_3mis_ref.c │ ├── bli_packm_cxk_4mi_ref.c │ ├── bli_packm_cxk_bb_ref.c │ ├── bli_packm_cxk_ref.c │ ├── bli_packm_cxk_rih_ref.c │ └── bli_unpackm_cxk_ref.c ├── bli_cntx_ref.c └── ind │ ├── bli_gemm1m_ref.c │ ├── bli_gemm3m1_ref.c │ ├── bli_gemm3mh_ref.c │ ├── bli_gemm4m1_ref.c │ ├── bli_gemm4mb_ref.c │ ├── bli_gemm4mh_ref.c │ ├── bli_gemmtrsm1m_ref.c │ ├── bli_gemmtrsm3m1_ref.c │ ├── bli_gemmtrsm4m1_ref.c │ ├── bli_trsm1m_ref.c │ ├── bli_trsm3m1_ref.c │ └── bli_trsm4m1_ref.c ├── sandbox ├── appleamx2 │ ├── bli_gemmnat.c │ ├── bli_sandbox.h │ ├── gemm.c │ ├── gemm_prototypes.h │ ├── old │ │ └── amx_testsuite │ │ │ ├── Makefile │ │ │ ├── common.h │ │ │ ├── correctness.c │ │ │ ├── correctness.h │ │ │ ├── performance.c │ │ │ └── performance.h │ └── packm.c ├── gemmlike │ ├── bli_gemmnat.c │ ├── bli_sandbox.h │ ├── bls_gemm.c │ ├── bls_gemm.h │ ├── bls_gemm_bp_var1.c │ ├── bls_gemm_bp_var2.c │ ├── bls_gemm_var.h │ ├── bls_l3_packm_a.c │ ├── bls_l3_packm_a.h │ ├── bls_l3_packm_b.c │ ├── bls_l3_packm_b.h │ ├── bls_l3_packm_var.c │ ├── bls_l3_packm_var.h │ └── thread │ │ ├── bls_l3_decor.h │ │ ├── bls_l3_decor_openmp.c │ │ ├── bls_l3_decor_openmp.h │ │ ├── bls_l3_decor_pthreads.c │ │ ├── bls_l3_decor_pthreads.h │ │ ├── bls_l3_decor_single.c │ │ └── bls_l3_decor_single.h ├── power10 │ ├── POWER10.md │ ├── bli_gemmnat.c │ ├── bli_sandbox.h │ ├── gemm.c │ ├── gemm_prototypes.h │ ├── gemm_template.h │ ├── i4_macros.h │ ├── p10_testsuite │ │ ├── Makefile │ │ ├── cast_funcs.c │ │ ├── cast_funcs.h │ │ ├── common.h │ │ ├── correctness.c │ │ ├── correctness.h │ │ ├── performance.c │ │ └── performance.h │ ├── pack_a_templates.h │ ├── pack_b_templates.h │ └── pack_kernels.c └── ref99 │ ├── bli_gemmnat.c │ ├── bli_sandbox.h │ ├── blix.h │ ├── blx_gemm_ref_var2.c │ ├── blx_gemm_ref_var2.h │ └── old │ ├── base │ ├── blx_blksz.c │ └── blx_blksz.h │ ├── blx_gemm.h │ ├── blx_gemm_front.c │ ├── blx_gemm_front.h │ ├── blx_gemm_int.c │ ├── blx_gemm_int.h │ ├── cntl │ ├── blx_gemm_cntl.c │ ├── blx_gemm_cntl.h │ ├── blx_l3_cntl_if.c │ ├── blx_l3_cntl_if.h │ ├── blx_packm_cntl.c │ └── blx_packm_cntl.h │ ├── packm │ ├── blx_l3_packm.c │ └── blx_l3_packm.h │ ├── thread │ ├── blx_gemm_thread.c │ └── blx_gemm_thread.h │ └── vars │ ├── blx_gemm_blk_var1.c │ ├── blx_gemm_blk_var2.c │ ├── blx_gemm_blk_var3.c │ ├── blx_gemm_ker_var2.c │ ├── blx_gemm_packab.c │ ├── blx_gemm_var.h │ └── other │ ├── blx_gemm_ker_var2rr.c │ └── blx_gemm_ker_var2sl.c ├── so_version ├── test ├── 3 │ ├── Makefile │ ├── octave │ │ ├── gen_opnames.m │ │ ├── plot_l3_perf.m │ │ ├── plot_panel_4x5.m │ │ ├── read_data.m │ │ ├── runthese.m │ │ └── subplot_tight.m │ ├── runme.sh │ ├── test_gemm.c │ ├── test_hemm.c │ ├── test_herk.c │ ├── test_trmm.c │ └── test_trsm.c ├── 1m4m │ ├── Makefile │ ├── runme.sh │ └── test_gemm.c ├── Makefile ├── exec_sizes │ ├── Makefile │ ├── grab_top_outputs.sh │ ├── makefile.prev │ └── test_size.c ├── mixeddt │ ├── Makefile │ ├── matlab │ │ ├── gen_dt_combos.m │ │ ├── gen_prec_combos.m │ │ ├── output │ │ │ └── .gitkeep │ │ ├── plot_all.m │ │ ├── plot_dom_all.m │ │ ├── plot_dom_case.m │ │ ├── plot_dt_all.m │ │ ├── plot_dt_select.m │ │ ├── plot_gemm_perf.m │ │ ├── prec_dom_to_dt.m │ │ ├── testrand.m │ │ └── wawoxmem │ │ │ ├── dt_to_dom.m │ │ │ ├── gen_prec_combos.m │ │ │ ├── plot_dom_all.m │ │ │ ├── plot_dom_case.m │ │ │ ├── plot_dt_select.m │ │ │ ├── plot_gemm_perf.m │ │ │ ├── prec_dom_to_dt.m │ │ │ └── uses_xmem.m │ ├── runme.sh │ └── test_gemm.c ├── other │ ├── test_copyv.c │ ├── test_gemm.c │ ├── test_scalv.c │ ├── test_swapv.c │ └── test_trsm.c ├── runme.sh ├── studies │ ├── skx │ │ ├── Makefile │ │ ├── plot_gemm_mt_perf.m │ │ ├── plot_gemm_st_perf.m │ │ ├── plot_hemm_mt_perf.m │ │ ├── plot_hemm_st_perf.m │ │ ├── plot_skx_perf.m │ │ ├── plot_syrk_mt_perf.m │ │ ├── plot_syrk_st_perf.m │ │ ├── plot_trmm_mt_perf.m │ │ ├── plot_trmm_st_perf.m │ │ ├── runme.sh │ │ ├── test_gemm.c │ │ ├── test_hemm.c │ │ ├── test_syrk.c │ │ └── test_trmm.c │ └── thunderx2 │ │ ├── Makefile │ │ ├── plot_gemm_mt_perf.m │ │ ├── plot_gemm_st_perf.m │ │ ├── plot_hemm_mt_perf.m │ │ ├── plot_hemm_st_perf.m │ │ ├── plot_syrk_mt_perf.m │ │ ├── plot_syrk_st_perf.m │ │ ├── plot_thunderx2_perf.m │ │ ├── plot_trmm_mt_perf.m │ │ ├── plot_trmm_st_perf.m │ │ ├── runme.sh │ │ ├── test_gemm.c │ │ ├── test_hemm.c │ │ ├── test_syrk.c │ │ └── test_trmm.c ├── sup │ ├── Makefile │ ├── octave │ │ ├── bkup │ │ │ ├── gen_opsupnames.m │ │ │ ├── plot_l3sup_perf.m │ │ │ ├── plot_panel_trxsh.m │ │ │ └── runthese.m │ │ ├── gen_opsupnames.m │ │ ├── load_data.m │ │ ├── plot_l3sup_perf.m │ │ ├── plot_panel_trxsh.m │ │ ├── runthese.m │ │ └── subplot_tight.m │ ├── old │ │ ├── octave_mt │ │ │ ├── gen_opsupnames.m │ │ │ ├── plot_l3sup_perf.m │ │ │ ├── plot_panel_trxsh.m │ │ │ └── runthese.m │ │ ├── octave_st │ │ │ ├── gen_opsupnames.m │ │ │ ├── plot_l3sup_perf.m │ │ │ ├── plot_panel_trxsh.m │ │ │ └── runthese.m │ │ ├── supmt │ │ │ ├── Makefile │ │ │ ├── octave │ │ │ │ ├── gen_opsupnames.m │ │ │ │ ├── plot_l3sup_perf.m │ │ │ │ ├── plot_panel_trxsh.m │ │ │ │ └── runthese.m │ │ │ ├── runme.sh │ │ │ └── test_gemm.c │ │ └── supst │ │ │ ├── Makefile │ │ │ ├── octave │ │ │ ├── gen_opsupnames.m │ │ │ ├── plot_l3sup_perf.m │ │ │ ├── plot_panel_trxsh.m │ │ │ └── runthese.m │ │ │ ├── runme.sh │ │ │ └── test_gemm.c │ ├── runme.sh │ └── test_gemm.c ├── test_axpyv.c ├── test_dotv.c ├── test_gemm.c ├── test_gemmt.c ├── test_gemv.c ├── test_ger.c ├── test_hemm.c ├── test_hemv.c ├── test_her.c ├── test_her2.c ├── test_her2k.c ├── test_herk.c ├── test_trmm.c ├── test_trmv.c ├── test_trsm.c ├── test_trsv.c └── thread_ranges │ ├── Makefile │ └── test_ranges.c ├── testsuite ├── Makefile ├── check-blistest.sh ├── input.general ├── input.general.fast ├── input.general.mixed ├── input.general.salt ├── input.operations ├── input.operations.fast ├── input.operations.mixed ├── input.operations.salt ├── obj │ └── .gitkeep ├── old │ └── jobscripts │ │ ├── cfig.out │ │ ├── cfig.sh │ │ ├── jb-cfig.sh │ │ ├── jb-mk.sh │ │ ├── jb-runtest.sh │ │ ├── mk.out │ │ ├── mk.sh │ │ └── runtest.sh └── src │ ├── test_addm.c │ ├── test_addm.h │ ├── test_addv.c │ ├── test_addv.h │ ├── test_amaxv.c │ ├── test_amaxv.h │ ├── test_axpbyv.c │ ├── test_axpbyv.h │ ├── test_axpy2v.c │ ├── test_axpy2v.h │ ├── test_axpyf.c │ ├── test_axpyf.h │ ├── test_axpym.c │ ├── test_axpym.h │ ├── test_axpyv.c │ ├── test_axpyv.h │ ├── test_copym.c │ ├── test_copym.h │ ├── test_copyv.c │ ├── test_copyv.h │ ├── test_dotaxpyv.c │ ├── test_dotaxpyv.h │ ├── test_dotv.c │ ├── test_dotv.h │ ├── test_dotxaxpyf.c │ ├── test_dotxaxpyf.h │ ├── test_dotxf.c │ ├── test_dotxf.h │ ├── test_dotxv.c │ ├── test_dotxv.h │ ├── test_gemm.c │ ├── test_gemm.h │ ├── test_gemm_ukr.c │ ├── test_gemm_ukr.h │ ├── test_gemmt.c │ ├── test_gemmt.h │ ├── test_gemmtrsm_ukr.c │ ├── test_gemmtrsm_ukr.h │ ├── test_gemv.c │ ├── test_gemv.h │ ├── test_ger.c │ ├── test_ger.h │ ├── test_hemm.c │ ├── test_hemm.h │ ├── test_hemv.c │ ├── test_hemv.h │ ├── test_her.c │ ├── test_her.h │ ├── test_her2.c │ ├── test_her2.h │ ├── test_her2k.c │ ├── test_her2k.h │ ├── test_herk.c │ ├── test_herk.h │ ├── test_libblis.c │ ├── test_libblis.h │ ├── test_normfm.c │ ├── test_normfm.h │ ├── test_normfv.c │ ├── test_normfv.h │ ├── test_randm.c │ ├── test_randm.h │ ├── test_randv.c │ ├── test_randv.h │ ├── test_scal2m.c │ ├── test_scal2m.h │ ├── test_scal2v.c │ ├── test_scal2v.h │ ├── test_scalm.c │ ├── test_scalm.h │ ├── test_scalv.c │ ├── test_scalv.h │ ├── test_setm.c │ ├── test_setm.h │ ├── test_setv.c │ ├── test_setv.h │ ├── test_subm.c │ ├── test_subm.h │ ├── test_subv.c │ ├── test_subv.h │ ├── test_symm.c │ ├── test_symm.h │ ├── test_symv.c │ ├── test_symv.h │ ├── test_syr.c │ ├── test_syr.h │ ├── test_syr2.c │ ├── test_syr2.h │ ├── test_syr2k.c │ ├── test_syr2k.h │ ├── test_syrk.c │ ├── test_syrk.h │ ├── test_trmm.c │ ├── test_trmm.h │ ├── test_trmm3.c │ ├── test_trmm3.h │ ├── test_trmv.c │ ├── test_trmv.h │ ├── test_trsm.c │ ├── test_trsm.h │ ├── test_trsm_ukr.c │ ├── test_trsm_ukr.h │ ├── test_trsv.c │ ├── test_trsv.h │ ├── test_xpbym.c │ ├── test_xpbym.h │ ├── test_xpbyv.c │ └── test_xpbyv.h ├── travis ├── cpuid │ ├── excavator.def │ ├── haswell.def │ ├── penryn.def │ ├── piledriver.def │ ├── sandybridge.def │ ├── skx.def │ ├── skx1.def │ ├── steamroller.def │ └── zen.def ├── do_sde.sh ├── do_testsuite.sh └── patch-ld-so.py ├── vendor ├── cpp │ ├── blis.hh │ └── cblas.hh └── testcpp │ ├── Makefile │ ├── test.hh │ ├── test.sh │ ├── test_asum.cc │ ├── test_axpy.cc │ ├── test_copy.cc │ ├── test_dot.cc │ ├── test_dotc.cc │ ├── test_gbmv.cc │ ├── test_gemm.cc │ ├── test_gemm.hh │ ├── test_gemv.cc │ ├── test_ger.cc │ ├── test_gerc.cc │ ├── test_geru.cc │ ├── test_hemm.cc │ ├── test_hemv.cc │ ├── test_her.cc │ ├── test_her2.cc │ ├── test_herk.cc │ ├── test_hpr.cc │ ├── test_hpr2.cc │ ├── test_nrm2.cc │ ├── test_rot.cc │ ├── test_rotg.cc │ ├── test_rotm.cc │ ├── test_rotmg.cc │ ├── test_scal.cc │ ├── test_sdsdot.cc │ ├── test_spr.cc │ ├── test_spr2.cc │ ├── test_swap.cc │ ├── test_symm.cc │ ├── test_syr.cc │ ├── test_syr2.cc │ ├── test_syr2k.cc │ ├── test_syrk.cc │ ├── test_tbmv.cc │ ├── test_tbsv.cc │ ├── test_tpmv.cc │ ├── test_tpsv.cc │ ├── test_trmm.cc │ ├── test_trsm.cc │ └── test_trsv.cc └── version /.dir-locals.el: -------------------------------------------------------------------------------- 1 | ;; First (minimal) attempt at configuring Emacs CC mode for the BLIS 2 | ;; layout requirements. 3 | ((c-mode . ((c-file-style . "stroustrup") 4 | (c-basic-offset . 4) 5 | (comment-start . "// ") 6 | (comment-end . "") 7 | (indent-tabs-mode . t) 8 | (tab-width . 4) 9 | (parens-require-spaces . nil)))) 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # -- generic files to ignore -- 2 | 3 | # emacs backup files 4 | *~ 5 | # vim backup files 6 | *.swp 7 | # NFS file 8 | .nfs* 9 | 10 | # -- compiler-related -- 11 | 12 | # object files 13 | # NOTE: This will result in git also exluding the top-level obj directory 14 | # since its only contents are .o files. 15 | *.o 16 | # static library archives 17 | # NOTE: This will result in git also exluding the top-level lib directory 18 | # since its only contents are .a files. 19 | *.a 20 | *.so 21 | *.so.* 22 | # test executables 23 | *.x 24 | *.pexe 25 | *.nexe 26 | *.js 27 | # link map files 28 | *.map 29 | 30 | # -- build system files -- 31 | 32 | config.mk 33 | bli_config.h 34 | 35 | # -- monolithic headers -- 36 | 37 | include/*/*.h 38 | 39 | # -- makefile fragments -- 40 | 41 | .fragment.mk 42 | 43 | # -- misc. -- 44 | 45 | # BLIS testsuite output file 46 | output.testsuite.* 47 | 48 | # BLAS test output files 49 | out.* 50 | 51 | # GTAGS database 52 | GPATH 53 | GRTAGS 54 | GTAGS 55 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | 2 | BLIS framework 3 | INSTALL 4 | --- 5 | 6 | For a detailed description of how to configure, compile, install, and link 7 | to a BLIS library on your local system, please read the build system 8 | documentation located in: 9 | 10 | docs/BuildSystem.md 11 | 12 | Note that the document's markdown content can be conveniently rendered by 13 | viewing the file over GitHub via a web browser: 14 | 15 | https://github.com/flame/blis/blob/master/docs/BuildSystem.md 16 | 17 | This document will always contain the most up-to-date information related 18 | to instantiating a BLIS library from the framework source code. If you have 19 | any further questions or wish to provide feedback, please contact the BLIS 20 | community by posting your message to the BLIS developer's mailing list: 21 | 22 | https://groups.google.com/d/forum/blis-devel 23 | 24 | Thanks for your interest in the BLIS framework! 25 | 26 | Field Van Zee 27 | 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Current MD file explains this port of BLIS to Apple's matrix coprocessor. 2 | 3 | - Here is the original BLIS [README](README_BLIS.md). 4 | 5 | As of Jul., 2021, the coprocessor is undocumented but not protected either. (Any user / program is allowed to invoke this coprocessor and it's supposed to be safe.) This work is based on Dougall Johnson's effort on analyzing the related instructions. 6 | 7 | Known issues: 8 | - Generic-strided is not supported by our microkernels for the destination matrix. Program would `assert(false)` upon encountering such a situation. 9 | - TRSM might fail. Try commenting out function call to `bli_cntx_set_packm_kers` in `config/aaplmx/bli_cntx_init_aaplmx.c` if your need TRSM to work. 10 | 11 | Performance: 12 | 13 | ![](docs/graphs/aaplmx/output_st_dgemm_asm_blis.png) 14 | ![](docs/graphs/aaplmx/output_st_sgemm_asm_blis.png) 15 | 16 | -------------------------------------------------------------------------------- /RELEASING: -------------------------------------------------------------------------------- 1 | Here are the steps to follow to create a new release (version) of BLIS: 2 | 3 | 1. Make sure there are no commits that have yet to be pulled into 4 | local repository. 5 | 6 | $ git pull 7 | 8 | If there are any commits upstream, merge them as appropriate. 9 | 10 | 2. Verify that the code builds properly. 11 | 12 | $ ./configure auto; make 13 | 14 | 3. Verify that the code passes BLIS and BLAS tests: 15 | 16 | $ make check # BLIS testsuite (fast) + BLAS test drivers 17 | $ make checkblis # BLIS testsuite (full ex. mixed-datatype) 18 | $ make checkblis-md # BLIS testsuite (mixed-datatype only) 19 | $ make checkblis-salt # BLIS testsuite (fast + salt) 20 | 21 | 4. Draft a new announcement to blis-devel, crediting those who 22 | contributed towards this version by browsing 'git log'. 23 | 24 | 5. Update CREDITS file if 'git log' reveals any new contributors. 25 | 26 | 6. Update docs/ReleaseNotes.md file with body of finalized announcement 27 | and the date of the release. 28 | 29 | 7. Commit changes from steps 5 and 6. 30 | 31 | 8. Bump the version number: 32 | 33 | $ ./build/bump-version.sh "0.3.2" 34 | 35 | This will result in two new commits: a version file update and a CHANGELOG 36 | file update. 37 | 38 | 9. Push the new commits and new tag associated with the new version: 39 | 40 | $ git push 41 | $ git push --tag 42 | 43 | 10. Send finalized announcement to blis-devel. 44 | 45 | -------------------------------------------------------------------------------- /blastest/f2c/acos.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | double r_acos(real *x) 31 | { 32 | return( acos(*x) ); 33 | } 34 | double d_acos(const doublereal *x) 35 | { 36 | return( acos(*x) ); 37 | } 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | -------------------------------------------------------------------------------- /blastest/f2c/asin.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | double r_asin(real *x) 31 | { 32 | return( asin(*x) ); 33 | } 34 | double d_asin(const doublereal *x) 35 | { 36 | return( asin(*x) ); 37 | } 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | -------------------------------------------------------------------------------- /blastest/f2c/atan.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | double r_atan(real *x) 31 | { 32 | return( atan(*x) ); 33 | } 34 | double d_atan(const doublereal *x) 35 | { 36 | return( atan(*x) ); 37 | } 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | -------------------------------------------------------------------------------- /blastest/f2c/atn2.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | double r_atn2(real *x, real *y) 31 | { 32 | return( atan2(*x,*y) ); 33 | } 34 | double d_atn2(const doublereal *x, const doublereal *y) 35 | { 36 | return( atan2(*x,*y) ); 37 | } 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | -------------------------------------------------------------------------------- /blastest/f2c/cnjg.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | void d_cnjg(doublecomplex *r, const doublecomplex *z) 31 | { 32 | doublereal zi = z->i; 33 | r->r = z->r; 34 | r->i = -zi; 35 | } 36 | void r_cnjg(complex *r, complex *z) 37 | { 38 | real zi = z->i; 39 | r->r = z->r; 40 | r->i = -zi; 41 | } 42 | 43 | #ifdef __cplusplus 44 | } 45 | #endif 46 | -------------------------------------------------------------------------------- /blastest/f2c/cosh.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | double r_cosh(real *x) 31 | { 32 | return( cosh(*x) ); 33 | } 34 | double d_cosh(const doublereal *x) 35 | { 36 | return( cosh(*x) ); 37 | } 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | -------------------------------------------------------------------------------- /blastest/f2c/dolio.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include 25 | #include "f2c.h" 26 | #include "fio.h" 27 | 28 | integer do_lio(ftnint *type, ftnint *number, char *ptr, ftnlen len) 29 | { 30 | return((*f__lioproc)(number,ptr,len,*type)); 31 | } 32 | -------------------------------------------------------------------------------- /blastest/f2c/epsilon.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | #include "float.h" 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | real s_epsilon_( real* x ) 32 | { 33 | return FLT_EPSILON; 34 | } 35 | doublereal d_epsilon_( doublereal* x ) 36 | { 37 | return DBL_EPSILON; 38 | } 39 | 40 | #ifdef __cplusplus 41 | } 42 | #endif 43 | -------------------------------------------------------------------------------- /blastest/f2c/exit_.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | /* This gives the effect of 25 | 26 | subroutine exit(rc) 27 | integer*4 rc 28 | stop 29 | end 30 | 31 | * with the added side effect of supplying rc as the program's exit code. 32 | */ 33 | 34 | #include 35 | #include "f2c.h" 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | void exit_(integer *rc) 42 | { 43 | exit(*rc); 44 | } 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | -------------------------------------------------------------------------------- /blastest/f2c/h_dnnt.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | shortint h_dnnt(const doublereal *x) 31 | { 32 | return (shortint)round(*x); 33 | } 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /blastest/f2c/i_dnnt.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | integer i_dnnt(const doublereal *x) 31 | { 32 | return (integer)round(*x); 33 | } 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /blastest/f2c/i_len.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | integer i_len(const char *s, ftnlen n) 31 | { 32 | return(n); 33 | } 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /blastest/f2c/imag.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | double r_imag(complex *z) 31 | { 32 | return z->i; 33 | } 34 | double d_imag(const doublecomplex *z) 35 | { 36 | return z->i; 37 | } 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | -------------------------------------------------------------------------------- /blastest/f2c/int.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | double d_int(const doublereal *x) 31 | { 32 | return( (*x>0) ? floor(*x) : -floor(- *x) ); 33 | } 34 | double r_int(real *x) 35 | { 36 | return( (*x>0) ? floor(*x) : -floor(- *x) ); 37 | } 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | -------------------------------------------------------------------------------- /blastest/f2c/lg10.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | double r_lg10(real *x) 31 | { 32 | return( log10(*x) ); 33 | } 34 | double d_lg10(const doublereal *x) 35 | { 36 | return( log10(*x) ); 37 | } 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | -------------------------------------------------------------------------------- /blastest/f2c/prod.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | #include "f2c.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | double d_prod(const real *x, const real *y) 31 | { 32 | return( (double)(*x) * (double)(*y) ); 33 | } 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /blastest/f2c/rewind.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | //#include 25 | #include "f2c.h" 26 | #include "fio.h" 27 | 28 | integer f_rew(alist *a) 29 | { 30 | unit *b; 31 | if(a->aunit>=MXUNIT || a->aunit<0) 32 | err(a->aerr,101,"rewind"); 33 | b = &f__units[a->aunit]; 34 | if(b->ufd == NULL || b->uwrt == 3) 35 | return(0); 36 | if(!b->useek) 37 | err(a->aerr,106,"rewind") 38 | if(b->uwrt) { 39 | (void) t_runc(a); 40 | b->uwrt = 3; 41 | } 42 | rewind(b->ufd); 43 | b->uend=0; 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /blastest/f2c/s_stop.c: -------------------------------------------------------------------------------- 1 | /**************************************************************** 2 | Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. 3 | 4 | Permission to use, copy, modify, and distribute this software 5 | and its documentation for any purpose and without fee is hereby 6 | granted, provided that the above copyright notice appear in all 7 | copies and that both that the copyright notice and this 8 | permission notice and warranty disclaimer appear in supporting 9 | documentation, and that the names of AT&T, Bell Laboratories, 10 | Lucent or Bellcore or any of their entities not be used in 11 | advertising or publicity pertaining to distribution of the 12 | software without specific, written prior permission. 13 | 14 | AT&T, Lucent and Bellcore disclaim all warranties with regard to 15 | this software, including all implied warranties of 16 | merchantability and fitness. In no event shall AT&T, Lucent or 17 | Bellcore be liable for any special, indirect or consequential 18 | damages or any damages whatsoever resulting from loss of use, 19 | data or profits, whether in an action of contract, negligence or 20 | other tortious action, arising out of or in connection with the 21 | use or performance of this software. 22 | ****************************************************************/ 23 | 24 | //#include 25 | #include 26 | #include 27 | #include "f2c.h" 28 | 29 | int s_stop(char *s, ftnlen n) 30 | { 31 | int i; 32 | 33 | if(n > 0) 34 | { 35 | fprintf(stderr, "STOP "); 36 | for(i = 0; i 2 | #include 3 | 4 | int main( int argc, char **argv ) 5 | { 6 | void* p = hbw_malloc( 4096 ); 7 | 8 | printf( "%s: hbw_malloc() returned %p\n", __FILE__, p ); 9 | 10 | return 0; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /build/detect/omp_simd/omp_simd_detect.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define ARRAY_LEN 4096 5 | 6 | double x[ ARRAY_LEN ]; 7 | double y[ ARRAY_LEN ]; 8 | 9 | int main( int argc, char **argv ) 10 | { 11 | const double alpha = 2.1; 12 | 13 | for ( int i = 0; i < ARRAY_LEN; ++i ) 14 | { 15 | y[ i ] = 0.0; 16 | x[ i ] = 1.0; 17 | } 18 | 19 | #pragma omp simd 20 | for ( int i = 0; i < ARRAY_LEN; ++i ) 21 | { 22 | y[ i ] += alpha * x[ i ]; 23 | } 24 | 25 | #if 0 26 | _Pragma( "omp simd" ) 27 | for ( int i = 0; i < ARRAY_LEN; ++i ) 28 | { 29 | x[ i ] += alpha * y[ i ]; 30 | } 31 | #endif 32 | 33 | return 0; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /build/gen-make-frags/ignore_list: -------------------------------------------------------------------------------- 1 | attic 2 | broken 3 | old 4 | other 5 | temp 6 | tmp 7 | test 8 | p10_testsuite -------------------------------------------------------------------------------- /build/gen-make-frags/special_list: -------------------------------------------------------------------------------- 1 | noopt 2 | kernels 3 | -------------------------------------------------------------------------------- /build/gen-make-frags/suffix_list: -------------------------------------------------------------------------------- 1 | c 2 | cc 3 | cpp 4 | cxx 5 | s 6 | S 7 | -------------------------------------------------------------------------------- /build/templates/license.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2019, The University of Texas at Austin 8 | Copyright (C) 2018, Advanced Micro Devices, Inc. 9 | 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions are 12 | met: 13 | - Redistributions of source code must retain the above copyright 14 | notice, this list of conditions and the following disclaimer. 15 | - Redistributions in binary form must reproduce the above copyright 16 | notice, this list of conditions and the following disclaimer in the 17 | documentation and/or other materials provided with the distribution. 18 | - Neither the name(s) of the copyright holder(s) nor the names of its 19 | contributors may be used to endorse or promote products derived 20 | from this software without specific prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | */ 35 | -------------------------------------------------------------------------------- /build/templates/license.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2019, The University of Texas at Austin 8 | Copyright (C) 2018, Advanced Micro Devices, Inc. 9 | 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions are 12 | met: 13 | - Redistributions of source code must retain the above copyright 14 | notice, this list of conditions and the following disclaimer. 15 | - Redistributions in binary form must reproduce the above copyright 16 | notice, this list of conditions and the following disclaimer in the 17 | documentation and/or other materials provided with the distribution. 18 | - Neither the name(s) of the copyright holder(s) nor the names of its 19 | contributors may be used to endorse or promote products derived 20 | from this software without specific prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | */ 35 | -------------------------------------------------------------------------------- /build/templates/license.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # BLIS 4 | # An object-based framework for developing high-performance BLAS-like 5 | # libraries. 6 | # 7 | # Copyright (C) 2019, The University of Texas at Austin 8 | # Copyright (C) 2018, Advanced Micro Devices, Inc. 9 | # 10 | # Redistribution and use in source and binary forms, with or without 11 | # modification, are permitted provided that the following conditions are 12 | # met: 13 | # - Redistributions of source code must retain the above copyright 14 | # notice, this list of conditions and the following disclaimer. 15 | # - Redistributions in binary form must reproduce the above copyright 16 | # notice, this list of conditions and the following disclaimer in the 17 | # documentation and/or other materials provided with the distribution. 18 | # - Neither the name(s) of the copyright holder(s) nor the names of its 19 | # contributors may be used to endorse or promote products derived 20 | # from this software without specific prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | # 34 | # 35 | -------------------------------------------------------------------------------- /config/README.md: -------------------------------------------------------------------------------- 1 | 2 | For more information on sub-configurations and configuration families in BLIS, 3 | please read the Configuration Guide, which can be viewed in markdown-rendered 4 | form [from the BLIS wiki page](https://github.com/flame/blis/wiki/). 5 | 6 | If you don't have time, or are impatient, take a look at the `config_registry` 7 | file in the top-level directory of the BLIS distribution. It contains a 8 | grammar-like mapping of configuration names, or families, to sub-configurations, 9 | which may be other families. Keep in mind that the `/` notation: 10 | ``` 11 | : / 12 | ``` 13 | means that the kernel set associated with `` should be made available to 14 | the configuration `` if `` is targeted at configure-time. 15 | (Some configurations borrow kernels from other configurations, and this is how 16 | we specify that requirement.) 17 | 18 | -------------------------------------------------------------------------------- /config/intel64/bli_family_intel64.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | //#ifndef BLIS_FAMILY_H 36 | //#define BLIS_FAMILY_H 37 | 38 | 39 | 40 | //#endif 41 | 42 | -------------------------------------------------------------------------------- /config/old/newarch/bli_kernel.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #ifndef BLIS_KERNEL_H 36 | #define BLIS_KERNEL_H 37 | 38 | 39 | 40 | #endif 41 | 42 | -------------------------------------------------------------------------------- /config/x86_64/bli_family_x86_64.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | //#ifndef BLIS_FAMILY_H 36 | //#define BLIS_FAMILY_H 37 | 38 | 39 | 40 | //#endif 41 | 42 | -------------------------------------------------------------------------------- /config_registry: -------------------------------------------------------------------------------- 1 | # 2 | # config_registry 3 | # 4 | # Please refer to the BLIS wiki on configurations for information on the 5 | # syntax and semantics of this file [1]. 6 | # 7 | # [1] https://github.com/flame/blis/blob/master/docs/ConfigurationHowTo.md 8 | # 9 | 10 | # Processor families. 11 | x86_64: intel64 amd64 12 | intel64: skx knl haswell sandybridge penryn generic 13 | amd64: zen2 zen excavator steamroller piledriver bulldozer generic 14 | # NOTE: ARM families will remain disabled until runtime hardware detection 15 | # logic is added to BLIS. 16 | # NOTE: Additionally, Apple processors cannot be detected. Hence no 17 | # auto-detection code is hereby written for A14 or M1. 18 | #arm64: cortexa57 generic 19 | #arm32: cortexa15 cortexa9 generic 20 | 21 | # Intel architectures. 22 | skx: skx/skx/haswell/zen 23 | knl: knl/knl/haswell/zen 24 | haswell: haswell/haswell/zen 25 | sandybridge: sandybridge 26 | penryn: penryn 27 | 28 | # AMD architectures. 29 | zen2: zen2/zen2/zen/haswell 30 | zen: zen/zen/haswell 31 | excavator: excavator/piledriver 32 | steamroller: steamroller/piledriver 33 | piledriver: piledriver 34 | bulldozer: bulldozer 35 | 36 | # ARM architectures. 37 | armsve: armsve/armsve 38 | a64fx: a64fx/armsve 39 | aaplmx: aaplmx/armv8a/aaplmx 40 | thunderx2: thunderx2/armv8a 41 | cortexa57: cortexa57/armv8a 42 | cortexa53: cortexa53/armv8a 43 | cortexa15: cortexa15/armv7a 44 | cortexa9: cortexa9/armv7a 45 | 46 | # IBM architectures. 47 | power10: power10 48 | power9: power9 49 | bgq: bgq 50 | 51 | # Generic architectures. 52 | generic: generic 53 | -------------------------------------------------------------------------------- /docs/graphs/aaplmx/output_st_dgemm_asm_blis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/aaplmx/output_st_dgemm_asm_blis.png -------------------------------------------------------------------------------- /docs/graphs/aaplmx/output_st_sgemm_asm_blis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/aaplmx/output_st_sgemm_asm_blis.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_a64fx_jc1ic1jr12_nt12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_a64fx_jc1ic1jr12_nt12.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_a64fx_jc1ic1jr12_nt12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_a64fx_jc1ic1jr12_nt12.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_a64fx_jc1ic4jr12_nt48.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_a64fx_jc1ic4jr12_nt48.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_a64fx_jc1ic4jr12_nt48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_a64fx_jc1ic4jr12_nt48.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_a64fx_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_a64fx_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_a64fx_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_a64fx_nt1.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_has_jc2ic3jr2_nt12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_has_jc2ic3jr2_nt12.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_has_jc2ic3jr2_nt12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_has_jc2ic3jr2_nt12.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_has_jc4ic3jr2_nt24.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_has_jc4ic3jr2_nt24.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_has_jc4ic3jr2_nt24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_has_jc4ic3jr2_nt24.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_has_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_has_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_has_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_has_nt1.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_skx_jc2ic13_nt26.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_skx_jc2ic13_nt26.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_skx_jc2ic13_nt26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_skx_jc2ic13_nt26.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_skx_jc4ic13_nt52.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_skx_jc4ic13_nt52.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_skx_jc4ic13_nt52.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_skx_jc4ic13_nt52.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_skx_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_skx_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_skx_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_skx_nt1.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_tx2_jc4ic7_nt28.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_tx2_jc4ic7_nt28.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_tx2_jc4ic7_nt28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_tx2_jc4ic7_nt28.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_tx2_jc8ic7_nt56.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_tx2_jc8ic7_nt56.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_tx2_jc8ic7_nt56.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_tx2_jc8ic7_nt56.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_tx2_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_tx2_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_tx2_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_tx2_nt1.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen2_jc4ic4jr4_nt64.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen2_jc4ic4jr4_nt64.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen2_jc4ic4jr4_nt64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen2_jc4ic4jr4_nt64.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen2_jc8ic4jr4_nt128.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen2_jc8ic4jr4_nt128.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen2_jc8ic4jr4_nt128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen2_jc8ic4jr4_nt128.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen2_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen2_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen2_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen2_nt1.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen_jc1ic8jr4_nt32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen_jc1ic8jr4_nt32.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen_jc1ic8jr4_nt32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen_jc1ic8jr4_nt32.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen_jc2ic8jr4_nt64.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen_jc2ic8jr4_nt64.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen_jc2ic8jr4_nt64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen_jc2ic8jr4_nt64.png -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/large/l3_perf_zen_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/large/l3_perf_zen_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_has_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_has_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_has_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_has_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_has_nt12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_has_nt12.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_has_nt12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_has_nt12.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_kbl_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_kbl_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_kbl_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_kbl_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_kbl_nt4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_kbl_nt4.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_kbl_nt4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_kbl_nt4.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_zen2_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_zen2_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_zen2_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_zen2_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_zen2_nt32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_zen2_nt32.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_zen2_nt32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_zen2_nt32.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_zen_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_zen_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_zen_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_zen_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_zen_nt32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_zen_nt32.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_ccc_zen_nt32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_ccc_zen_nt32.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_has_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_has_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_has_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_has_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_has_nt12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_has_nt12.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_has_nt12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_has_nt12.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_kbl_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_kbl_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_kbl_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_kbl_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_kbl_nt4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_kbl_nt4.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_kbl_nt4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_kbl_nt4.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_zen2_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_zen2_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_zen2_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_zen2_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_zen2_nt32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_zen2_nt32.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_zen2_nt32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_zen2_nt32.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_zen_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_zen_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_zen_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_zen_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_zen_nt32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_zen_nt32.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/dgemm_rrr_zen_nt32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/dgemm_rrr_zen_nt32.png -------------------------------------------------------------------------------- /docs/graphs/sup/sgemm_ccc_zen2_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/sgemm_ccc_zen2_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/sgemm_ccc_zen2_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/sgemm_ccc_zen2_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/sgemm_ccc_zen2_nt32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/sgemm_ccc_zen2_nt32.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/sgemm_ccc_zen2_nt32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/sgemm_ccc_zen2_nt32.png -------------------------------------------------------------------------------- /docs/graphs/sup/sgemm_rrr_zen2_nt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/sgemm_rrr_zen2_nt1.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/sgemm_rrr_zen2_nt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/sgemm_rrr_zen2_nt1.png -------------------------------------------------------------------------------- /docs/graphs/sup/sgemm_rrr_zen2_nt32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/sgemm_rrr_zen2_nt32.pdf -------------------------------------------------------------------------------- /docs/graphs/sup/sgemm_rrr_zen2_nt32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/docs/graphs/sup/sgemm_rrr_zen2_nt32.png -------------------------------------------------------------------------------- /examples/oapi/README: -------------------------------------------------------------------------------- 1 | 2 | BLIS object API examples 3 | ------------------------ 4 | 5 | This directory contains several files, each containing various pieces of 6 | example code that demonstrate core functionality of the object API in BLIS. 7 | These example files should be thought of collectively like a tutorial, and 8 | therefore it is recommended to start from the beginning (the file that 9 | starts in '00'). 10 | 11 | You can build all of the examples by simply running 'make' from this 12 | directory. (You can also run 'make clean'.) The makefile assumes that 13 | you've already configured and built (but not necessarily installed) BLIS 14 | two directories up, in "../..". If you have already installed BLIS to 15 | some permanent directory, you may refer to that installation by setting 16 | the environment variable BLIS_INSTALL_PATH prior to running make: 17 | 18 | export BLIS_INSTALL_PATH=/usr/local; make 19 | 20 | or by setting the same variable as part of the make command: 21 | 22 | make BLIS_INSTALL_PATH=/usr/local 23 | 24 | Once the executable files have been built, we recommend reading the code in 25 | one terminal window alongside the executable output in another terminal. 26 | This will help you see the effects of each section of code. 27 | 28 | This tutorial is not exhaustive or complete; several object API functions 29 | were omitted (mostly for brevity's sake) and thus more examples could be 30 | written. If you've found object functionality in BLIS and are unsure how to 31 | use it, or if you are unsure of what additional functionality is present in 32 | BLIS, please feel free to join and then start a discussion on the blis-devel 33 | mailing list [1]. 34 | 35 | Thanks for your interest in BLIS! 36 | 37 | [1] https://groups.google.com/d/forum/blis-devel 38 | 39 | -------------------------------------------------------------------------------- /examples/tapi/README: -------------------------------------------------------------------------------- 1 | 2 | BLIS typed API examples 3 | ----------------------- 4 | 5 | This directory contains several files, each containing various pieces of 6 | example code that demonstrate core functionality of the typed API in BLIS. 7 | These example files should be thought of collectively like a tutorial, and 8 | therefore it is recommended to start from the beginning (the file that 9 | starts in '00'). 10 | 11 | You can build all of the examples by simply running 'make' from this 12 | directory. (You can also run 'make clean'.) The makefile assumes that 13 | you've already configured and built (but not necessarily installed) BLIS 14 | two directories up, in "../..". If you have already installed BLIS to 15 | some permanent directory, you may refer to that installation by setting 16 | the environment variable BLIS_INSTALL_PATH prior to running make: 17 | 18 | export BLIS_INSTALL_PATH=/usr/local; make 19 | 20 | or by setting the same variable as part of the make command: 21 | 22 | make BLIS_INSTALL_PATH=/usr/local 23 | 24 | Once the executable files have been built, we recommend reading the code in 25 | one terminal window alongside the executable output in another terminal. 26 | This will help you see the effects of each section of code. 27 | 28 | This tutorial is not exhaustive or complete; many typed API functions 29 | were omitted (mostly for brevity's sake) and thus more examples could be 30 | written. If you've found typed functionality in BLIS and are unsure how to 31 | use it, or if you are unsure of what additional functionality is present in 32 | BLIS, please feel free to join and then start a discussion on the blis-devel 33 | mailing list [1]. 34 | 35 | Thanks for your interest in BLIS! 36 | 37 | [1] https://groups.google.com/d/forum/blis-devel 38 | 39 | -------------------------------------------------------------------------------- /frame/1/other/packv/bli_packv.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "blis.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/1/other/unpackv/bli_unpackv.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "blis.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/1m/other/bli_scalm.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_scalm_cntl.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/1m/packm/bli_packm_md.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_packm_blk_var1_md.h" 36 | #include "bli_packm_struc_cxk_md.h" 37 | 38 | -------------------------------------------------------------------------------- /frame/2/symv/bli_symv.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | // NOTE: level-2 control tree code is temporarily disabled. 36 | //#include "bli_symv_front.h" 37 | 38 | -------------------------------------------------------------------------------- /frame/2/syr/bli_syr.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | // NOTE: level-2 control tree code is temporarily disabled. 36 | //#include "bli_syr_front.h" 37 | 38 | -------------------------------------------------------------------------------- /frame/2/syr2/bli_syr2.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | // NOTE: level-2 control tree code is temporarily disabled. 36 | //#include "bli_syr2_front.h" 37 | 38 | -------------------------------------------------------------------------------- /frame/3/gemmt/bli_gemmt.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2020, Advanced Micro Devices, Inc. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_gemmt_front.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/3/hemm/bli_hemm.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_hemm_front.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/3/her2k/bli_her2k.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_her2k_front.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/3/herk/bli_herk.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_herk_front.h" 36 | 37 | #include "bli_herk_var.h" 38 | 39 | -------------------------------------------------------------------------------- /frame/3/symm/bli_symm.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_symm_front.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/3/syr2k/bli_syr2k.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_syr2k_front.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/3/syrk/bli_syrk.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_syrk_front.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/3/trmm/bli_trmm.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_trmm_front.h" 36 | 37 | #include "bli_trmm_var.h" 38 | 39 | -------------------------------------------------------------------------------- /frame/3/trmm3/bli_trmm3.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #include "bli_trmm3_front.h" 36 | 37 | -------------------------------------------------------------------------------- /frame/base/bli_const.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | void bli_const_init( void ); 36 | void bli_const_finalize( void ); 37 | 38 | -------------------------------------------------------------------------------- /frame/base/bli_string.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | void bli_string_mkupper( char* s ); 36 | -------------------------------------------------------------------------------- /frame/base/noopt/bli_dlamch.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | bla_double bli_dlamch( bla_character* cmach, ftnlen cmach_len ); 36 | -------------------------------------------------------------------------------- /frame/base/noopt/bli_lsame.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | bla_logical bli_lsame( bla_character* ca, bla_character* cb, ftnlen ca_len, ftnlen cb_len ); 36 | -------------------------------------------------------------------------------- /frame/base/noopt/bli_slamch.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | bla_real bli_slamch( bla_character* cmach, ftnlen cmach_len ); 36 | -------------------------------------------------------------------------------- /frame/compat/cblas/cblas.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/frame/compat/cblas/cblas.tgz -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_caxpy.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_caxpy.c 5 | * 6 | * The program is a C interface to caxpy. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_caxpy( f77_int N, const void *alpha, const void *X, 14 | f77_int incX, void *Y, f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_caxpy( &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_ccopy.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_ccopy.c 5 | * 6 | * The program is a C interface to ccopy. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_ccopy( f77_int N, const void *X, 14 | f77_int incX, void *Y, f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_ccopy( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_cdotc_sub.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_cdotc_sub.c 5 | * 6 | * The program is a C interface to cdotc. 7 | * It calls the fortran wrapper before calling cdotc. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | void cblas_cdotc_sub( f77_int N, const void *X, f77_int incX, 15 | const void *Y, f77_int incY,void *dotc) 16 | { 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #define F77_incY incY 23 | #endif 24 | F77_cdotc_sub( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)dotc); 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_cdotu_sub.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_cdotu_sub.f 5 | * 6 | * The program is a C interface to cdotu. 7 | * It calls the forteran wrapper before calling cdotu. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | void cblas_cdotu_sub( f77_int N, const void *X, 15 | f77_int incX, const void *Y, f77_int incY,void *dotu) 16 | { 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #define F77_incY incY 23 | #endif 24 | F77_cdotu_sub( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)dotu); 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_cgeru.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_cgeru.c 5 | * The program is a C interface to cgeru. 6 | * 7 | * Keita Teranishi 5/20/98 8 | * 9 | */ 10 | #include "cblas.h" 11 | #include "cblas_f77.h" 12 | void cblas_cgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, 13 | const void *alpha, const void *X, f77_int incX, 14 | const void *Y, f77_int incY, void *A, f77_int lda) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_M M 20 | #define F77_N N 21 | #define F77_incX incX 22 | #define F77_incY incY 23 | #define F77_lda lda 24 | #endif 25 | 26 | extern int CBLAS_CallFromC; 27 | extern int RowMajorStrg; 28 | RowMajorStrg = 0; 29 | 30 | CBLAS_CallFromC = 1; 31 | 32 | if (order == CblasColMajor) 33 | { 34 | F77_cgeru( &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)A, 35 | &F77_lda); 36 | } 37 | else if (order == CblasRowMajor) 38 | { 39 | RowMajorStrg = 1; 40 | F77_cgeru( &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)Y, &F77_incY, (scomplex*)X, &F77_incX, (scomplex*)A, 41 | &F77_lda); 42 | } 43 | else cblas_xerbla(1, "cblas_cgeru","Illegal Order setting, %d\n", order); 44 | CBLAS_CallFromC = 0; 45 | RowMajorStrg = 0; 46 | return; 47 | } 48 | #endif 49 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_cscal.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_cscal.c 5 | * 6 | * The program is a C interface to cscal.f. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_cscal( f77_int N, const void *alpha, void *X, 14 | f77_int incX) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #endif 22 | F77_cscal( &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX); 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_csscal.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_csscal.c 5 | * 6 | * The program is a C interface to csscal. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_csscal( f77_int N, float alpha, void *X, 14 | f77_int incX) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #endif 22 | F77_csscal( &F77_N, &alpha, (scomplex*)X, &F77_incX); 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_cswap.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_cswap.c 5 | * 6 | * The program is a C interface to cswap. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_cswap( f77_int N, void *X, f77_int incX, void *Y, 14 | f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_cswap( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_dasum.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_dasum.c 5 | * 6 | * The program is a C interface to dasum. 7 | * It calls the fortran wrapper before calling dasum. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | double cblas_dasum( f77_int N, const double *X, f77_int incX) 15 | { 16 | double asum; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_dasum_sub( &F77_N, X, &F77_incX, &asum); 24 | return asum; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_daxpy.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_daxpy.c 5 | * 6 | * The program is a C interface to daxpy. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_daxpy( f77_int N, double alpha, const double *X, 14 | f77_int incX, double *Y, f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_daxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_dcopy.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_dcopy.c 5 | * 6 | * The program is a C interface to dcopy. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_dcopy( f77_int N, const double *X, 14 | f77_int incX, double *Y, f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_dcopy( &F77_N, X, &F77_incX, Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_ddot.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_ddot.c 5 | * 6 | * The program is a C interface to ddot. 7 | * It calls the fortran wrapper before calling ddot. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | double cblas_ddot( f77_int N, const double *X, 15 | f77_int incX, const double *Y, f77_int incY) 16 | { 17 | double dot; 18 | #ifdef F77_INT 19 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 20 | #else 21 | #define F77_N N 22 | #define F77_incX incX 23 | #define F77_incY incY 24 | #endif 25 | F77_ddot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot); 26 | return dot; 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_dger.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * 5 | * cblas_dger.c 6 | * This program is a C interface to dger. 7 | * Written by Keita Teranishi 8 | * 4/6/1998 9 | * 10 | */ 11 | 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | void cblas_dger(enum CBLAS_ORDER order, f77_int M, f77_int N, 15 | double alpha, const double *X, f77_int incX, 16 | const double *Y, f77_int incY, double *A, f77_int lda) 17 | { 18 | #ifdef F77_INT 19 | F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; 20 | #else 21 | #define F77_M M 22 | #define F77_N N 23 | #define F77_incX incX 24 | #define F77_incY incY 25 | #define F77_lda lda 26 | #endif 27 | 28 | extern int CBLAS_CallFromC; 29 | extern int RowMajorStrg; 30 | RowMajorStrg = 0; 31 | 32 | CBLAS_CallFromC = 1; 33 | if (order == CblasColMajor) 34 | { 35 | F77_dger( &F77_M, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A, 36 | &F77_lda); 37 | } 38 | else if (order == CblasRowMajor) 39 | { 40 | RowMajorStrg = 1; 41 | F77_dger( &F77_N, &F77_M ,&alpha, Y, &F77_incY, X, &F77_incX, A, 42 | &F77_lda); 43 | 44 | } 45 | else cblas_xerbla(1, "cblas_dger", "Illegal Order setting, %d\n", order); 46 | CBLAS_CallFromC = 0; 47 | RowMajorStrg = 0; 48 | return; 49 | } 50 | #endif 51 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_dnrm2.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_dnrm2.c 5 | * 6 | * The program is a C interface to dnrm2. 7 | * It calls the fortranwrapper before calling dnrm2. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | double cblas_dnrm2( f77_int N, const double *X, f77_int incX) 15 | { 16 | double nrm2; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_dnrm2_sub( &F77_N, X, &F77_incX, &nrm2); 24 | return nrm2; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_drot.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_drot.c 5 | * 6 | * The program is a C interface to drot. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_drot(f77_int N, double *X, f77_int incX, 14 | double *Y, f77_int incY, const double c, const double s) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_drot(&F77_N, X, &F77_incX, Y, &F77_incY, &c, &s); 24 | return; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_drotg.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_drotg.c 5 | * 6 | * The program is a C interface to drotg. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_drotg( double *a, double *b, double *c, double *s) 14 | { 15 | F77_drotg(a,b,c,s); 16 | } 17 | #endif 18 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_drotm.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | #include "cblas.h" 4 | #include "cblas_f77.h" 5 | void cblas_drotm( f77_int N, double *X, f77_int incX, double *Y, 6 | f77_int incY, const double *P) 7 | { 8 | #ifdef F77_INT 9 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 10 | #else 11 | #define F77_N N 12 | #define F77_incX incX 13 | #define F77_incY incY 14 | #endif 15 | F77_drotm( &F77_N, X, &F77_incX, Y, &F77_incY, P); 16 | } 17 | #endif 18 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_drotmg.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_drotmg.c 5 | * 6 | * The program is a C interface to drotmg. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_drotmg( double *d1, double *d2, double *b1, 14 | const double b2, double *p) 15 | { 16 | F77_drotmg(d1,d2,b1,&b2,p); 17 | } 18 | #endif 19 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_dscal.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_dscal.c 5 | * 6 | * The program is a C interface to dscal. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_dscal( f77_int N, double alpha, double *X, 14 | f77_int incX) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #endif 22 | F77_dscal( &F77_N, &alpha, X, &F77_incX); 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_dsdot.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_dsdot.c 5 | * 6 | * The program is a C interface to dsdot. 7 | * It calls fthe fortran wrapper before calling dsdot. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | double cblas_dsdot( f77_int N, const float *X, 15 | f77_int incX, const float *Y, f77_int incY) 16 | { 17 | double dot; 18 | #ifdef F77_INT 19 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 20 | #else 21 | #define F77_N N 22 | #define F77_incX incX 23 | #define F77_incY incY 24 | #endif 25 | F77_dsdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot); 26 | return dot; 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_dswap.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_dswap.c 5 | * 6 | * The program is a C interface to dswap. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y, 14 | f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_dswap( &F77_N, X, &F77_incX, Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_dzasum.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_dzasum.c 5 | * 6 | * The program is a C interface to dzasum. 7 | * It calls the fortran wrapper before calling dzasum. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | double cblas_dzasum( f77_int N, const void *X, f77_int incX) 15 | { 16 | double asum; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_dzasum_sub( &F77_N, X, &F77_incX, &asum); 24 | return asum; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_dznrm2.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_dznrm2.c 5 | * 6 | * The program is a C interface to dznrm2. 7 | * It calls the fortran wrapper before calling dznrm2. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | double cblas_dznrm2( f77_int N, const void *X, f77_int incX) 15 | { 16 | double nrm2; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_dznrm2_sub( &F77_N, X, &F77_incX, &nrm2); 24 | return nrm2; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_globals.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | int CBLAS_CallFromC=0; 4 | int RowMajorStrg=0; 5 | #endif 6 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_icamax.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_icamax.c 5 | * 6 | * The program is a C interface to icamax. 7 | * It calls the fortran wrapper before calling icamax. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | f77_int cblas_icamax( f77_int N, const void *X, f77_int incX) 15 | { 16 | f77_int iamax; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_icamax_sub( &F77_N, (scomplex*)X, &F77_incX, &iamax); 24 | return iamax ? iamax-1 : 0; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_idamax.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_idamax.c 5 | * 6 | * The program is a C interface to idamax. 7 | * It calls the fortran wrapper before calling idamax. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | f77_int cblas_idamax( f77_int N, const double *X, f77_int incX) 15 | { 16 | f77_int iamax; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_idamax_sub( &F77_N, X, &F77_incX, &iamax); 24 | return iamax ? iamax-1 : 0; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_isamax.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_isamax.c 5 | * 6 | * The program is a C interface to isamax. 7 | * It calls the fortran wrapper before calling isamax. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | f77_int cblas_isamax( f77_int N, const float *X, f77_int incX) 15 | { 16 | f77_int iamax; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_isamax_sub( &F77_N, X, &F77_incX, &iamax); 24 | return iamax ? iamax-1 : 0; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_izamax.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_izamax.c 5 | * 6 | * The program is a C interface to izamax. 7 | * It calls the fortran wrapper before calling izamax. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | f77_int cblas_izamax( f77_int N, const void *X, f77_int incX) 15 | { 16 | f77_int iamax; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_izamax_sub( &F77_N, (dcomplex*)X, &F77_incX, &iamax); 24 | return (iamax ? iamax-1 : 0); 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_sasum.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_sasum.c 5 | * 6 | * The program is a C interface to sasum. 7 | * It calls the fortran wrapper before calling sasum. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | float cblas_sasum( f77_int N, const float *X, f77_int incX) 15 | { 16 | float asum; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_sasum_sub( &F77_N, X, &F77_incX, &asum); 24 | return asum; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_saxpy.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_saxpy.c 5 | * 6 | * The program is a C interface to saxpy. 7 | * It calls the fortran wrapper before calling saxpy. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | void cblas_saxpy( f77_int N, float alpha, const float *X, 15 | f77_int incX, float *Y, f77_int incY) 16 | { 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #define F77_incY incY 23 | #endif 24 | F77_saxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY); 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_scasum.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_scasum.c 5 | * 6 | * The program is a C interface to scasum. 7 | * It calls the fortran wrapper before calling scasum. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | float cblas_scasum( f77_int N, const void *X, f77_int incX) 15 | { 16 | float asum; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_scasum_sub( &F77_N, X, &F77_incX, &asum); 24 | return asum; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_scnrm2.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_scnrm2.c 5 | * 6 | * The program is a C interface to scnrm2. 7 | * It calls the fortran wrapper before calling scnrm2. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | float cblas_scnrm2( f77_int N, const void *X, f77_int incX) 15 | { 16 | float nrm2; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_scnrm2_sub( &F77_N, X, &F77_incX, &nrm2); 24 | return nrm2; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_scopy.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_scopy.c 5 | * 6 | * The program is a C interface to scopy. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_scopy( f77_int N, const float *X, 14 | f77_int incX, float *Y, f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_scopy( &F77_N, X, &F77_incX, Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_sdot.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_sdot.c 5 | * 6 | * The program is a C interface to sdot. 7 | * It calls the fortran wrapper before calling sdot. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | float cblas_sdot( f77_int N, const float *X, 15 | f77_int incX, const float *Y, f77_int incY) 16 | { 17 | float dot; 18 | #ifdef F77_INT 19 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 20 | #else 21 | #define F77_N N 22 | #define F77_incX incX 23 | #define F77_incY incY 24 | #endif 25 | F77_sdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot); 26 | return dot; 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_sdsdot.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_sdsdot.c 5 | * 6 | * The program is a C interface to sdsdot. 7 | * It calls the fortran wrapper before calling sdsdot. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | float cblas_sdsdot( f77_int N, float alpha, const float *X, 15 | f77_int incX, const float *Y, f77_int incY) 16 | { 17 | float dot; 18 | #ifdef F77_INT 19 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 20 | #else 21 | #define F77_N N 22 | #define F77_incX incX 23 | #define F77_incY incY 24 | #endif 25 | F77_sdsdot_sub( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, &dot); 26 | return dot; 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_sger.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * 5 | * cblas_sger.c 6 | * This program is a C interface to sger. 7 | * Written by Keita Teranishi 8 | * 4/6/1998 9 | * 10 | */ 11 | 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | void cblas_sger(enum CBLAS_ORDER order, f77_int M, f77_int N, 15 | const float alpha, const float *X, f77_int incX, 16 | const float *Y, f77_int incY, float *A, f77_int lda) 17 | { 18 | #ifdef F77_INT 19 | F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; 20 | #else 21 | #define F77_M M 22 | #define F77_N N 23 | #define F77_incX incX 24 | #define F77_incY incY 25 | #define F77_lda lda 26 | #endif 27 | 28 | extern int CBLAS_CallFromC; 29 | extern int RowMajorStrg; 30 | RowMajorStrg = 0; 31 | 32 | CBLAS_CallFromC = 1; 33 | if (order == CblasColMajor) 34 | { 35 | F77_sger( &F77_M, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A, 36 | &F77_lda); 37 | } 38 | else if (order == CblasRowMajor) 39 | { 40 | RowMajorStrg = 1; 41 | F77_sger( &F77_N, &F77_M, &alpha, Y, &F77_incY, X, &F77_incX, A, 42 | &F77_lda); 43 | } 44 | else cblas_xerbla(1, "cblas_sger", "Illegal Order setting, %d\n", order); 45 | CBLAS_CallFromC = 0; 46 | RowMajorStrg = 0; 47 | return; 48 | } 49 | #endif 50 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_snrm2.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_snrm2.c 5 | * 6 | * The program is a C interface to snrm2. 7 | * It calls the fortran wrapper before calling snrm2. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | float cblas_snrm2( f77_int N, const float *X, f77_int incX) 15 | { 16 | float nrm2; 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #endif 23 | F77_snrm2_sub( &F77_N, X, &F77_incX, &nrm2); 24 | return nrm2; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_srot.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_srot.c 5 | * 6 | * The program is a C interface to srot. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_srot( f77_int N, float *X, f77_int incX, float *Y, 14 | f77_int incY, const float c, const float s) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_srot(&F77_N, X, &F77_incX, Y, &F77_incY, &c, &s); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_srotg.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_srotg.c 5 | * 6 | * The program is a C interface to srotg. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_srotg( float *a, float *b, float *c, float *s) 14 | { 15 | F77_srotg(a,b,c,s); 16 | } 17 | #endif 18 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_srotm.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_srotm.c 5 | * 6 | * The program is a C interface to srotm. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_srotm( f77_int N, float *X, f77_int incX, float *Y, 14 | f77_int incY, const float *P) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_srotm( &F77_N, X, &F77_incX, Y, &F77_incY, P); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_srotmg.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_srotmg.c 5 | * 6 | * The program is a C interface to srotmg. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_srotmg( float *d1, float *d2, float *b1, 14 | const float b2, float *p) 15 | { 16 | F77_srotmg(d1,d2,b1,&b2,p); 17 | } 18 | #endif 19 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_sscal.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_sscal.c 5 | * 6 | * The program is a C interface to sscal. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_sscal( f77_int N, float alpha, float *X, 14 | f77_int incX) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #endif 22 | F77_sscal( &F77_N, &alpha, X, &F77_incX); 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_sswap.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_sswap.c 5 | * 6 | * The program is a C interface to sswap. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y, 14 | f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_sswap( &F77_N, X, &F77_incX, Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_zaxpy.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_zaxpy.c 5 | * 6 | * The program is a C interface to zaxpy. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_zaxpy( f77_int N, const void *alpha, const void *X, 14 | f77_int incX, void *Y, f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_zaxpy( &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_zcopy.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_zcopy.c 5 | * 6 | * The program is a C interface to zcopy. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_zcopy( f77_int N, const void *X, 14 | f77_int incX, void *Y, f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_zcopy( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_zdotc_sub.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_zdotc_sub.c 5 | * 6 | * The program is a C interface to zdotc. 7 | * It calls the fortran wrapper before calling zdotc. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | void cblas_zdotc_sub( f77_int N, const void *X, f77_int incX, 15 | const void *Y, f77_int incY, void *dotc) 16 | { 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #define F77_incY incY 23 | #endif 24 | F77_zdotc_sub( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)dotc); 25 | return; 26 | } 27 | #endif 28 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_zdotu_sub.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_zdotu_sub.c 5 | * 6 | * The program is a C interface to zdotu. 7 | * It calls the fortran wrapper before calling zdotu. 8 | * 9 | * Written by Keita Teranishi. 2/11/1998 10 | * 11 | */ 12 | #include "cblas.h" 13 | #include "cblas_f77.h" 14 | void cblas_zdotu_sub( f77_int N, const void *X, f77_int incX, 15 | const void *Y, f77_int incY, void *dotu) 16 | { 17 | #ifdef F77_INT 18 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 19 | #else 20 | #define F77_N N 21 | #define F77_incX incX 22 | #define F77_incY incY 23 | #endif 24 | F77_zdotu_sub( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)dotu); 25 | return; 26 | } 27 | #endif 28 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_zdscal.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_zdscal.c 5 | * 6 | * The program is a C interface to zdscal. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_zdscal( f77_int N, double alpha, void *X, 14 | f77_int incX) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #endif 22 | F77_zdscal( &F77_N, &alpha, (dcomplex*)X, &F77_incX); 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_zgeru.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_zgeru.c 5 | * The program is a C interface to zgeru. 6 | * 7 | * Keita Teranishi 5/20/98 8 | * 9 | */ 10 | #include "cblas.h" 11 | #include "cblas_f77.h" 12 | void cblas_zgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, 13 | const void *alpha, const void *X, f77_int incX, 14 | const void *Y, f77_int incY, void *A, f77_int lda) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_M M 20 | #define F77_N N 21 | #define F77_incX incX 22 | #define F77_incY incY 23 | #define F77_lda lda 24 | #endif 25 | 26 | extern int CBLAS_CallFromC; 27 | extern int RowMajorStrg; 28 | RowMajorStrg = 0; 29 | CBLAS_CallFromC = 1; 30 | 31 | if (order == CblasColMajor) 32 | { 33 | F77_zgeru( &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)A, 34 | &F77_lda); 35 | } 36 | else if (order == CblasRowMajor) 37 | { 38 | RowMajorStrg = 1; 39 | F77_zgeru( &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)Y, &F77_incY, (dcomplex*)X, &F77_incX, (dcomplex*)A, 40 | &F77_lda); 41 | } 42 | else cblas_xerbla(1, "cblas_zgeru", "Illegal Order setting, %d\n", order); 43 | CBLAS_CallFromC = 0; 44 | RowMajorStrg = 0; 45 | return; 46 | } 47 | #endif 48 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_zscal.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_zscal.c 5 | * 6 | * The program is a C interface to zscal. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_zscal( f77_int N, const void *alpha, void *X, 14 | f77_int incX) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #endif 22 | F77_zscal( &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX); 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /frame/compat/cblas/src/cblas_zswap.c: -------------------------------------------------------------------------------- 1 | #include "blis.h" 2 | #ifdef BLIS_ENABLE_CBLAS 3 | /* 4 | * cblas_zswap.c 5 | * 6 | * The program is a C interface to zswap. 7 | * 8 | * Written by Keita Teranishi. 2/11/1998 9 | * 10 | */ 11 | #include "cblas.h" 12 | #include "cblas_f77.h" 13 | void cblas_zswap( f77_int N, void *X, f77_int incX, void *Y, 14 | f77_int incY) 15 | { 16 | #ifdef F77_INT 17 | F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; 18 | #else 19 | #define F77_N N 20 | #define F77_incX incX 21 | #define F77_incY incY 22 | #endif 23 | F77_zswap( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY); 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /frame/compat/check/bla_symm_check.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #ifdef BLIS_ENABLE_BLAS 36 | 37 | #define bla_symm_check bla_hemm_check 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /frame/compat/check/bla_syr_check.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | 35 | #ifdef BLIS_ENABLE_BLAS 36 | 37 | #define bla_syr_check bla_her_check 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /frame/include/level0/old/bli_castfrom.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | -------------------------------------------------------------------------------- /frame/include/level0/old/bli_castto.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BLIS 4 | An object-based framework for developing high-performance BLAS-like 5 | libraries. 6 | 7 | Copyright (C) 2014, The University of Texas at Austin 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | - Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | - Neither the name(s) of the copyright holder(s) nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | */ 34 | -------------------------------------------------------------------------------- /kernels/generic/generic.txt: -------------------------------------------------------------------------------- 1 | 2 | generic.txt 3 | ----------- 4 | 5 | This file in 'kernels/generic' exists only to force 'git' to track what 6 | would otherwise be an empty directory. Having this empty directory is 7 | necessary because the 'generic' singleton family is defined in the 8 | configuration registry as: 9 | 10 | generic: generic 11 | 12 | which implies that the 'generic' sub-configuration depends on the 13 | 'generic' kernel set (because there were no complementary kernel sets 14 | specified via '/'). Thus, we need there to be a kernel set named 15 | 'generic', but we don't actually refer to any such kernels in BLIS. 16 | In other words, this file is simply a workaround to a quirk in the 17 | syntax and semantics of the config_registry file. 18 | 19 | -FGVZ 20 | -------------------------------------------------------------------------------- /kernels/power7/3/test/Makefile: -------------------------------------------------------------------------------- 1 | 2 | CC = gcc 3 | TARGET_ARCH = -m64 -mvsx 4 | 5 | TGTS = exp 6 | 7 | KERNEL = bli_gemm_opt_8x4.o 8 | 9 | CFLAGS = -DUTEST -std=gnu99 -ggdb3 -Wall 10 | CFLAGS += -O3 11 | 12 | all: $(TGTS) 13 | 14 | exp: exp.o $(KERNEL) 15 | 16 | clean: 17 | rm -f $(TGTS) *.o 18 | 19 | -------------------------------------------------------------------------------- /kernels/power7/3/test/bli_gemm_power7_int_8x4.c: -------------------------------------------------------------------------------- 1 | ../bli_gemm_power7_int_8x4.c -------------------------------------------------------------------------------- /kernels/power7/3/test/blis_utest.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _BLIS_UTEST_H_ 3 | #define _BLIS_UTEST_H_ 4 | 5 | #define BLIS_DEFAULT_MR_S 8 6 | #define BLIS_DEFAULT_NR_S 4 7 | 8 | #define BLIS_DEFAULT_MR_D 8 9 | #define BLIS_DEFAULT_NR_D 4 10 | 11 | #define BLIS_DEFAULT_MR_C 8 12 | #define BLIS_DEFAULT_NR_C 4 13 | 14 | #define BLIS_DEFAULT_MR_Z 8 15 | #define BLIS_DEFAULT_NR_Z 4 16 | 17 | typedef unsigned long dim_t; 18 | typedef long inc_t; 19 | 20 | // Complex types 21 | typedef struct scomplex_s 22 | { 23 | float real; 24 | float imag; 25 | } scomplex; 26 | 27 | typedef struct dcomplex_s 28 | { 29 | double real; 30 | double imag; 31 | } dcomplex; 32 | 33 | #define bli_check_error_code(x) 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /kernels/zen/3/sup/broken/bli_gemmsup_rv_zen_asm_c3x8.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/kernels/zen/3/sup/broken/bli_gemmsup_rv_zen_asm_c3x8.c -------------------------------------------------------------------------------- /kernels/zen/3/sup/broken/bli_gemmsup_rv_zen_asm_c3x8m.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/kernels/zen/3/sup/broken/bli_gemmsup_rv_zen_asm_c3x8m.c -------------------------------------------------------------------------------- /kernels/zen/3/sup/broken/bli_gemmsup_rv_zen_asm_z3x4.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/kernels/zen/3/sup/broken/bli_gemmsup_rv_zen_asm_z3x4.c -------------------------------------------------------------------------------- /kernels/zen/3/sup/broken/bli_gemmsup_rv_zen_asm_z3x4m.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/kernels/zen/3/sup/broken/bli_gemmsup_rv_zen_asm_z3x4m.c -------------------------------------------------------------------------------- /sandbox/appleamx2/old/amx_testsuite/Makefile: -------------------------------------------------------------------------------- 1 | BLIS_PATH := ../../../.. 2 | 3 | -include $(BLIS_PATH)/config.mk 4 | 5 | INC_PATH = $(BLIS_PATH)/include/$(CONFIG_NAME) 6 | LIBBLIS_L = $(BLIS_PATH)/lib/$(CONFIG_NAME)/libblis.a 7 | 8 | LINKER := $(CC) 9 | CFLAGS := -I $(INC_PATH) 10 | LDFLAGS := -lpthread -lm 11 | 12 | OBJS := $(patsubst %.c,%.o, $(wildcard *.c)) 13 | PERF_OBJS := performance.o 14 | COR_OBJS := correctness.o 15 | 16 | all: performance correctness 17 | 18 | $(OBJS): %.o: %.c %.h common.h ../../bli_sandbox.h $(INC_PATH)/blis.h 19 | $(CC) $(CFLAGS) -c $< -o $@ 20 | 21 | performance: $(PERF_OBJS) 22 | $(LINKER) $(PERF_OBJS) $(LIBBLIS_L) -o ./gather_perf.x $(LDFLAGS) 23 | 24 | correctness: $(COR_OBJS) 25 | $(LINKER) $(COR_OBJS) $(LIBBLIS_L) -o ./test_correctness.x $(LDFLAGS) 26 | 27 | csv_clean: 28 | rm -rf *.csv 29 | 30 | clean: 31 | rm -rf *.x *.o 32 | -------------------------------------------------------------------------------- /sandbox/appleamx2/old/amx_testsuite/common.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef COMMON_H 3 | #define COMMON_H 4 | 5 | // enumerate all datatypes that will be tested 6 | enum DATATYPES { 7 | FLOAT64, 8 | FLOAT32, 9 | FLOAT16, 10 | FLOAT16_32, 11 | INT16, 12 | INT16_32 13 | }; 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /sandbox/appleamx2/old/amx_testsuite/performance.h: -------------------------------------------------------------------------------- 1 | 2 | // function name template 3 | // each function that will gather perform will be named test_api 4 | #define GEN_PERF_FUNC_NAME_(ch) test_ ## ch ## api 5 | #define GEN_PERF_FUNC_NAME(ch) GEN_PERF_FUNC_NAME_(ch) 6 | 7 | /* 8 | Macro template for getting the best GEMM kernel runtime out of `num_runs` 9 | for matrices of size (m x n x k). 10 | */ 11 | #define GET_PERF_API_TEMP(ch, kernel, input_t, output_t) \ 12 | double GEN_PERF_FUNC_NAME(ch) ( \ 13 | int num_runs, \ 14 | int m, \ 15 | int n, \ 16 | int k \ 17 | ) \ 18 | { \ 19 | input_t *A,*B; \ 20 | output_t *C; \ 21 | output_t alpha,beta; \ 22 | \ 23 | A = (input_t*) malloc(m*k*sizeof(input_t)); \ 24 | B = (input_t*) malloc(n*k*sizeof(input_t)); \ 25 | C = (output_t*) malloc(m*n*sizeof(output_t)); \ 26 | \ 27 | alpha = 1; \ 28 | beta = 0; \ 29 | \ 30 | double best = 1e9; \ 31 | \ 32 | for (int irep=0; irepapi 4 | #define GEN_PERF_FUNC_NAME_(ch) test_ ## ch ## api 5 | #define GEN_PERF_FUNC_NAME(ch) GEN_PERF_FUNC_NAME_(ch) 6 | 7 | /* 8 | Macro template for getting the best GEMM kernel runtime out of `num_runs` 9 | for matrices of size (m x n x k). 10 | */ 11 | #define GET_PERF_API_TEMP(ch, kernel, input_t, output_t) \ 12 | double GEN_PERF_FUNC_NAME(ch) ( \ 13 | int num_runs, \ 14 | int m, \ 15 | int n, \ 16 | int k \ 17 | ) \ 18 | { \ 19 | input_t *A,*B; \ 20 | output_t *C; \ 21 | output_t alpha,beta; \ 22 | \ 23 | A = (input_t*) malloc(m*k*sizeof(input_t)); \ 24 | B = (input_t*) malloc(n*k*sizeof(input_t)); \ 25 | C = (output_t*) malloc(m*n*sizeof(output_t)); \ 26 | \ 27 | alpha = 1; \ 28 | beta = 1; \ 29 | \ 30 | double best = 1e9; \ 31 | \ 32 | for (int irep=0; irep ${out_file}" 27 | 28 | # Run executable. 29 | ./${exec_name} > ${out_file} 30 | 31 | sleep 1 32 | 33 | done 34 | done 35 | -------------------------------------------------------------------------------- /test/studies/skx/plot_skx_perf.m: -------------------------------------------------------------------------------- 1 | fontsize = 6; 2 | numcores = 4; 3 | freq = 3.5; 4 | sflopspercycle = 64; 5 | dflopspercycle = 32; 6 | 7 | speak = sflopspercycle*freq; 8 | dpeak = dflopspercycle*freq; 9 | 10 | xmax_mt = 5000; 11 | 12 | fig1 = figure(1); 13 | clf(fig1) 14 | % 15 | pathname = './20180711/'; 16 | plot_gemm_st_perf 17 | plot_syrk_st_perf 18 | plot_hemm_st_perf 19 | plot_trmm_st_perf 20 | 21 | fig1.PaperPositionMode = 'auto'; 22 | orient(fig1,'landscape') 23 | print(fig1, 'skx-st', '-dpdf','-fillpage') 24 | 25 | % fig1 = figure(2); 26 | % clf; 27 | % 28 | % plot_gemm_mt_perf 29 | % plot_syrk_mt_perf 30 | % plot_hemm_mt_perf 31 | % plot_trmm_mt_perf 32 | % 33 | % fig1.PaperPositionMode = 'auto'; 34 | % orient(fig1,'landscape') 35 | % print(fig1, 'A57-mt', '-dpdf','-fillpage') 36 | -------------------------------------------------------------------------------- /test/sup/octave/bkup/runthese.m: -------------------------------------------------------------------------------- 1 | % kabylake 2 | plot_panel_trxsh(3.80,16,1,'st','d','rrr',[ 6 8 4 ],'lds','uaub','../results/kabylake/20200302/mnkt100000_st','kbl','MKL','octave'); close; clear all; 3 | 4 | % haswell 5 | plot_panel_trxsh(3.5,16,1,'st','d','rrr',[ 6 8 4 ],'lds','uaub','../results/haswell/20200302/mnkt100000_st','has','MKL','octave'); close; clear all; 6 | 7 | % epyc 8 | plot_panel_trxsh(3.00, 8,1,'st','d','rrr',[ 6 8 4 ],'lds','uaub','../results/epyc/20200302/mnkt100000_st','epyc','MKL','octave'); close; clear all; 9 | -------------------------------------------------------------------------------- /test/sup/octave/load_data.m: -------------------------------------------------------------------------------- 1 | function [ r_val ] = load_data( ... 2 | filetemp, ... 3 | dirpath, ... 4 | thr_str, ... 5 | opsupname, ... 6 | vartemp, ... 7 | opname, ... 8 | impl_str ... 9 | ) 10 | 11 | filepath = sprintf( filetemp, dirpath, thr_str, opsupname ); 12 | run( filepath ) 13 | varname = sprintf( vartemp, thr_str, opname, impl_str ); 14 | data = eval( varname ); % e.g. data_st_dgemm_blissup( :, : ); 15 | 16 | r_val = data; 17 | 18 | -------------------------------------------------------------------------------- /test/sup/old/octave_mt/runthese.m: -------------------------------------------------------------------------------- 1 | % kabylake 2 | plot_panel_trxsh(3.80,16,4,'mt','d','rrr',[ 6 8 10 ],'lds','uaub','../results/kabylake/20200302/mnkt100000_mt4','kbl','MKL','octave'); close; clear all; 3 | 4 | % haswell 5 | plot_panel_trxsh(3.1,16,12,'mt','d','rrr',[ 6 8 10 ],'lds','uaub','../results/haswell/20200302/mnkt100000_mt12','has','MKL','octave'); close; clear all; 6 | 7 | % epyc 8 | plot_panel_trxsh(2.55,8,32,'mt','d','rrr',[ 6 8 10 ],'lds','uaub','../results/epyc/20200302/mnkt100000_mt32','epyc','MKL','octave'); close; clear all; 9 | -------------------------------------------------------------------------------- /test/sup/old/octave_st/gen_opsupnames.m: -------------------------------------------------------------------------------- 1 | function [ r_val1, r_val2 ] = gen_opsupnames( ops, stor, smalldims, ldim, pack ) 2 | 3 | nops = size( ops, 1 ); 4 | 5 | smallm = smalldims( 1 ); 6 | smalln = smalldims( 2 ); 7 | smallk = smalldims( 3 ); 8 | 9 | i = 1; 10 | 11 | for io = 1:nops 12 | 13 | op = ops( io, : ); 14 | 15 | opsupnames( i+0, : ) = sprintf( '%s_%s_m%dnpkp_%s_%s', op, stor, smallm, ldim, pack ); 16 | opsupnames( i+1, : ) = sprintf( '%s_%s_mpn%dkp_%s_%s', op, stor, smalln, ldim, pack ); 17 | opsupnames( i+2, : ) = sprintf( '%s_%s_mpnpk%d_%s_%s', op, stor, smallk, ldim, pack ); 18 | opsupnames( i+3, : ) = sprintf( '%s_%s_mpn%dk%d_%s_%s', op, stor, smalln, smallk, ldim, pack ); 19 | opsupnames( i+4, : ) = sprintf( '%s_%s_m%dnpk%d_%s_%s', op, stor, smallm, smallk, ldim, pack ); 20 | opsupnames( i+5, : ) = sprintf( '%s_%s_m%dn%dkp_%s_%s', op, stor, smallm, smalln, ldim, pack ); 21 | opsupnames( i+6, : ) = sprintf( '%s_%s_mpnpkp_%s_%s', op, stor, ldim, pack ); 22 | 23 | opnames( i+0, : ) = sprintf( '%s', op ); 24 | opnames( i+1, : ) = sprintf( '%s', op ); 25 | opnames( i+2, : ) = sprintf( '%s', op ); 26 | opnames( i+3, : ) = sprintf( '%s', op ); 27 | opnames( i+4, : ) = sprintf( '%s', op ); 28 | opnames( i+5, : ) = sprintf( '%s', op ); 29 | opnames( i+6, : ) = sprintf( '%s', op ); 30 | 31 | i = i + 7; 32 | end 33 | 34 | r_val1 = opsupnames; 35 | r_val2 = opnames; 36 | 37 | -------------------------------------------------------------------------------- /test/sup/old/octave_st/runthese.m: -------------------------------------------------------------------------------- 1 | % kabylake 2 | plot_panel_trxsh(3.80,16,1,'st','d','rrr',[ 6 8 4 ],'lds','uaub','../results/kabylake/20200302/mnkt100000_st','kbl','MKL','octave'); close; clear all; 3 | 4 | % haswell 5 | plot_panel_trxsh(3.5,16,1,'st','d','rrr',[ 6 8 4 ],'lds','uaub','../results/haswell/20200302/mnkt100000_st','has','MKL','octave'); close; clear all; 6 | 7 | % epyc 8 | plot_panel_trxsh(3.00, 8,1,'st','d','rrr',[ 6 8 4 ],'lds','uaub','../results/epyc/20200302/mnkt100000_st','epyc','MKL','octave'); close; clear all; 9 | -------------------------------------------------------------------------------- /test/sup/old/supmt/octave/runthese.m: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | % kabylake 19 | plot_panel_trxsh(3.80,16,4,'mt','d','rrr',[ 6 8 10 ],'../../sup/results/kabylake/20200302/mnkt100000_mt4','kbl','MKL','octave'); close; clear all; 20 | 21 | % haswell 22 | plot_panel_trxsh(3.1,16,12,'mt','d','rrr',[ 6 8 10 ],'../../sup/results/haswell/20200302/mnkt100000_mt12','has','MKL','octave'); close; clear all; 23 | 24 | % epyc 25 | plot_panel_trxsh(2.55,8,32,'mt','d','rrr',[ 6 8 10 ],'../../sup/results/epyc/20200302/mnkt100000_mt32','epyc','MKL','octave'); close; clear all; 26 | -------------------------------------------------------------------------------- /test/sup/old/supst/octave/gen_opsupnames.m: -------------------------------------------------------------------------------- 1 | function [ r_val1, r_val2 ] = gen_opsupnames( ops, stor, smalldims ) 2 | 3 | nops = size( ops, 1 ); 4 | 5 | smallm = smalldims( 1 ); 6 | smalln = smalldims( 2 ); 7 | smallk = smalldims( 3 ); 8 | 9 | i = 1; 10 | 11 | for io = 1:nops 12 | 13 | op = ops( io, : ); 14 | 15 | opsupnames( i+0, : ) = sprintf( '%s_%s_m%dnpkp', op, stor, smallm ); 16 | opsupnames( i+1, : ) = sprintf( '%s_%s_mpn%dkp', op, stor, smalln ); 17 | opsupnames( i+2, : ) = sprintf( '%s_%s_mpnpk%d', op, stor, smallk ); 18 | opsupnames( i+3, : ) = sprintf( '%s_%s_mpn%dk%d', op, stor, smalln, smallk ); 19 | opsupnames( i+4, : ) = sprintf( '%s_%s_m%dnpk%d', op, stor, smallm, smallk ); 20 | opsupnames( i+5, : ) = sprintf( '%s_%s_m%dn%dkp', op, stor, smallm, smalln ); 21 | opsupnames( i+6, : ) = sprintf( '%s_%s_mpnpkp', op, stor ); 22 | 23 | opnames( i+0, : ) = sprintf( '%s', op ); 24 | opnames( i+1, : ) = sprintf( '%s', op ); 25 | opnames( i+2, : ) = sprintf( '%s', op ); 26 | opnames( i+3, : ) = sprintf( '%s', op ); 27 | opnames( i+4, : ) = sprintf( '%s', op ); 28 | opnames( i+5, : ) = sprintf( '%s', op ); 29 | opnames( i+6, : ) = sprintf( '%s', op ); 30 | 31 | i = i + 7; 32 | end 33 | 34 | r_val1 = opsupnames; 35 | r_val2 = opnames; 36 | 37 | -------------------------------------------------------------------------------- /test/sup/old/supst/octave/runthese.m: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | % kabylake 19 | plot_panel_trxsh(3.80,16,1,'st','d','rrr',[ 6 8 4 ],'../results/kabylake/20200302/mnkt100000_st','kbl','MKL','octave'); close; clear all; 20 | 21 | % haswell 22 | plot_panel_trxsh(3.5,16,1,'st','d','rrr',[ 6 8 4 ],'../results/haswell/20200302/mnkt100000_st','has','MKL','octave'); close; clear all; 23 | 24 | % epyc 25 | plot_panel_trxsh(3.00, 8,1,'st','d','rrr',[ 6 8 4 ],'../results/epyc/20200302/mnkt100000_st','epyc','MKL','octave'); close; clear all; 26 | -------------------------------------------------------------------------------- /testsuite/obj/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xrq-phys/blis_apple/ac36b825a0c8c599761ff21af768d82a85698ede/testsuite/obj/.gitkeep -------------------------------------------------------------------------------- /testsuite/old/jobscripts/cfig.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ~/blis 4 | ./configure power9 5 | echo "CONFIGURE DONE" 6 | -------------------------------------------------------------------------------- /testsuite/old/jobscripts/jb-cfig.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # execute in the general partition 4 | #SBATCH --partition=general 5 | 6 | # execute with 40 processes/tasks 7 | #SBATCH --ntasks=1 8 | 9 | # maximum time is 30 minutes 10 | #SBATCH --time=00:30:00 11 | 12 | # job name is my_job 13 | #SBATCH --job-name=blis 14 | 15 | # send email for status updates 16 | #SBATCH --mail-type=ALL,TIME_LIMIT 17 | #SBATCH --mail-user=ntukanov 18 | 19 | # change default output file name 20 | #SBATCH --output=cfig.out 21 | 22 | # load environment 23 | module load gcc/8.2 24 | 25 | # application execution 26 | srun cfig.sh 27 | -------------------------------------------------------------------------------- /testsuite/old/jobscripts/jb-mk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # execute in the general partition 4 | #SBATCH --partition=general 5 | 6 | # execute with 40 processes/tasks 7 | #SBATCH --ntasks=1 8 | 9 | # maximum time is 30 minutes 10 | #SBATCH --time=00:30:00 11 | 12 | # job name is my_job 13 | #SBATCH --job-name=blis 14 | 15 | # send email for status updates 16 | #SBATCH --mail-type=ALL,TIME_LIMIT 17 | #SBATCH --mail-user=ntukanov 18 | 19 | # change default output file name 20 | #SBATCH --output=mk.out 21 | 22 | # load environment 23 | module load gcc/8.2 24 | 25 | # application execution 26 | srun mk.sh 27 | -------------------------------------------------------------------------------- /testsuite/old/jobscripts/jb-runtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # execute in the general partition 4 | #SBATCH --partition=general 5 | 6 | # execute with 40 processes/tasks 7 | #SBATCH --ntasks=1 8 | 9 | # maximum time is 30 minutes 10 | #SBATCH --time=00:30:00 11 | 12 | # job name is my_job 13 | #SBATCH --job-name=blis 14 | 15 | # send email for status updates 16 | #SBATCH --mail-type=ALL,TIME_LIMIT 17 | #SBATCH --mail-user=ntukanov 18 | 19 | # change default output file name 20 | #SBATCH --output=runtest.out 21 | 22 | # load environment 23 | module load gcc/8.2 24 | 25 | # application execution 26 | srun runtest.sh 27 | -------------------------------------------------------------------------------- /testsuite/old/jobscripts/mk.out: -------------------------------------------------------------------------------- 1 | Removing flattened header files from include/power9 2 | Removing object files from ./obj/power9 3 | srun: Job step aborted: Waiting up to 32 seconds for job step to finish. 4 | srun: got SIGCONT 5 | slurmstepd: error: *** JOB 1155 ON lookout00 CANCELLED AT 2019-06-10T17:29:07 *** 6 | srun: forcing job termination 7 | slurmstepd: error: *** STEP 1155.0 ON lookout00 CANCELLED AT 2019-06-10T17:29:07 *** 8 | make: *** [cleanlib] Terminated 9 | srun: error: lookout00: task 0: Terminated 10 | -------------------------------------------------------------------------------- /testsuite/old/jobscripts/mk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ~/blis 4 | make clean 5 | make 6 | echo "MAKE DONE" 7 | -------------------------------------------------------------------------------- /testsuite/old/jobscripts/runtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ~/blis/testsuite 4 | rm -rf test_libblis.out 5 | make clean 6 | make -j 7 | ./test_libblis.x > test_libblis.out 8 | echo "TEST DONE" 9 | -------------------------------------------------------------------------------- /travis/do_testsuite.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | export BLIS_JC_NT=1 7 | export BLIS_IC_NT=2 8 | export BLIS_JR_NT=1 9 | export BLIS_IR_NT=1 10 | 11 | if [ "$TEST" = "FAST" ]; then 12 | make testblis-fast 13 | elif [ "$TEST" = "MD" ]; then 14 | make testblis-md 15 | elif [ "$TEST" = "SALT" ]; then 16 | # Disable multithreading within BLIS. 17 | export BLIS_JC_NT=1 BLIS_IC_NT=1 BLIS_JR_NT=1 BLIS_IR_NT=1 18 | make testblis-salt 19 | else 20 | make testblis 21 | fi 22 | 23 | $DIST_PATH/testsuite/check-blistest.sh ./output.testsuite 24 | make testblas 25 | $DIST_PATH/blastest/check-blastest.sh 26 | 27 | -------------------------------------------------------------------------------- /travis/patch-ld-so.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # 4 | # Patch ld.so to disable runtime CPUID detection 5 | # Taken from https://stackoverflow.com/a/44483482 6 | # 7 | 8 | import re 9 | import sys 10 | 11 | infile, outfile = sys.argv[1:] 12 | d = open(infile, 'rb').read() 13 | # Match CPUID(eax=0), "xor eax,eax" followed closely by "cpuid" 14 | o = re.sub(b'(\x31\xc0.{0,32})\x0f\xa2', b'\\1\x66\x90', d) 15 | #assert d != o 16 | open(outfile, 'wb').write(o) 17 | -------------------------------------------------------------------------------- /vendor/testcpp/test.sh: -------------------------------------------------------------------------------- 1 | 2 | echo Build BLIS CPP Template tests 3 | make clean 4 | make 5 | 6 | echo Run tests 7 | ./test_asum_blis.x 8 | ./test_axpy_blis.x 9 | ./test_copy_blis.x 10 | ./test_dot_blis.x 11 | ./test_dotc_blis.x 12 | ./test_gbmv_blis.x 13 | ./test_gemm_blis.x 14 | ./test_gemv_blis.x 15 | ./test_ger_blis.x 16 | ./test_gerc_blis.x 17 | ./test_geru_blis.x 18 | ./test_hemm_blis.x 19 | ./test_hemv_blis.x 20 | ./test_her2_blis.x 21 | ./test_her_blis.x 22 | ./test_herk_blis.x 23 | ./test_hpr2_blis.x 24 | ./test_hpr_blis.x 25 | ./test_nrm2_blis.x 26 | ./test_rot_blis.x 27 | ./test_rotg_blis.x 28 | ./test_rotm_blis.x 29 | ./test_rotmg_blis.x 30 | ./test_scal_blis.x 31 | ./test_sdsdot_blis.x 32 | ./test_spr2_blis.x 33 | ./test_spr_blis.x 34 | ./test_swap_blis.x 35 | ./test_symm_blis.x 36 | ./test_syr2_blis.x 37 | ./test_syr2k_blis.x 38 | ./test_syr_blis.x 39 | ./test_syrk_blis.x 40 | ./test_tbmv_blis.x 41 | ./test_tbsv_blis.x 42 | ./test_tpmv_blis.x 43 | ./test_tpsv_blis.x 44 | ./test_trmm_blis.x 45 | ./test_trsm_blis.x 46 | ./test_trsv_blis.x 47 | -------------------------------------------------------------------------------- /version: -------------------------------------------------------------------------------- 1 | 0.8.1 2 | --------------------------------------------------------------------------------