├── .gitignore ├── README.md ├── armv7a ├── armv7afp │ ├── README.md │ ├── clear.sh │ ├── gflops_benchmark.c │ ├── make.sh │ └── test.S └── src │ ├── MMult0.h │ ├── MMult1.h │ ├── MMult2.h │ ├── MMult_1x4_3.h │ ├── MMult_1x4_4.h │ ├── MMult_1x4_5.h │ ├── MMult_1x4_7.h │ ├── MMult_1x4_8.h │ ├── MMult_1x4x6.h │ ├── MMult_4x4_10.h │ ├── MMult_4x4_11.h │ ├── MMult_4x4_12.h │ ├── MMult_4x4_13.h │ ├── MMult_4x4_14.h │ ├── MMult_4x4_18.h │ ├── MMult_4x4_19.h │ ├── MMult_4x4_20.h │ ├── MMult_4x4_3.h │ ├── MMult_4x4_4.h │ ├── MMult_4x4_5.h │ ├── MMult_4x4_6.h │ ├── MMult_4x4_7.h │ ├── MMult_4x4_8.h │ ├── Makefile │ ├── README.md │ ├── convolution1x1s1_12x4.h │ ├── convolution1x1s1_4x4.h │ ├── convolution1x1s1_8x4.h │ ├── dclock.h │ ├── matrix_multiply_origin.h │ ├── now.txt │ ├── plot_gflops.py │ ├── test_matrix_multiply.cpp │ └── test_matrix_multiply_conv1x1.cpp ├── armv8a ├── armv8afp │ ├── README.md │ ├── clear.sh │ ├── gflops_benchmark.c │ ├── make.sh │ └── test.S └── src │ ├── MMult1.h │ ├── MMult2.h │ ├── MMult_1x4_3.h │ ├── MMult_1x4_4.h │ ├── MMult_1x4_5.h │ ├── MMult_1x4_7.h │ ├── MMult_1x4_8.h │ ├── MMult_4x4_10.h │ ├── MMult_4x4_11.h │ ├── MMult_4x4_13.h │ ├── MMult_4x4_18.h │ ├── MMult_4x4_3.h │ ├── MMult_4x4_4.h │ ├── MMult_4x4_5.h │ ├── MMult_4x4_6.h │ ├── MMult_4x4_7.h │ ├── MMult_4x4_8.h │ ├── Makefile │ ├── dclock.h │ ├── matrix_multiply_origin.h │ ├── plot_gflops.py │ └── test_matrix_multiply.cpp └── x86 ├── cpufp ├── LICENSE ├── README.md ├── build.sh ├── clean.sh ├── cpufp_kernel_x86.h ├── cpufp_kernel_x86_avx.s ├── cpufp_kernel_x86_avx512_vnni.s ├── cpufp_kernel_x86_avx512f.s ├── cpufp_kernel_x86_fma.s ├── cpufp_kernel_x86_sse.s ├── cpufp_x86.c ├── cpuid_x86.cpp ├── smtl.c └── smtl.h └── src ├── MMult0.h ├── MMult1.h ├── MMult2.h ├── MMult_1x4_3.h ├── MMult_1x4_4.h ├── MMult_1x4_5.h ├── MMult_1x4_6.h ├── MMult_1x4_7.h ├── MMult_1x4_8.h ├── MMult_1x4_9.h ├── MMult_1x4x6.h ├── MMult_4x4_10.h ├── MMult_4x4_11.h ├── MMult_4x4_12.h ├── MMult_4x4_13.h ├── MMult_4x4_14.h ├── MMult_4x4_3.h ├── MMult_4x4_4.h ├── MMult_4x4_5.h ├── MMult_4x4_6.h ├── MMult_4x4_7.h ├── MMult_4x4_8.h ├── Makefile ├── README.md ├── dclock.h ├── matrix_multiply_origin.h ├── now.txt ├── plot_gflops.py └── test_matrix_multiply.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | unit_test 3 | *.png 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/README.md -------------------------------------------------------------------------------- /armv7a/armv7afp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/armv7afp/README.md -------------------------------------------------------------------------------- /armv7a/armv7afp/clear.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/armv7afp/clear.sh -------------------------------------------------------------------------------- /armv7a/armv7afp/gflops_benchmark.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/armv7afp/gflops_benchmark.c -------------------------------------------------------------------------------- /armv7a/armv7afp/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/armv7afp/make.sh -------------------------------------------------------------------------------- /armv7a/armv7afp/test.S: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/armv7afp/test.S -------------------------------------------------------------------------------- /armv7a/src/MMult0.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult0.h -------------------------------------------------------------------------------- /armv7a/src/MMult1.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult1.h -------------------------------------------------------------------------------- /armv7a/src/MMult2.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult2.h -------------------------------------------------------------------------------- /armv7a/src/MMult_1x4_3.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_1x4_3.h -------------------------------------------------------------------------------- /armv7a/src/MMult_1x4_4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_1x4_4.h -------------------------------------------------------------------------------- /armv7a/src/MMult_1x4_5.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_1x4_5.h -------------------------------------------------------------------------------- /armv7a/src/MMult_1x4_7.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_1x4_7.h -------------------------------------------------------------------------------- /armv7a/src/MMult_1x4_8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_1x4_8.h -------------------------------------------------------------------------------- /armv7a/src/MMult_1x4x6.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_1x4x6.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_10.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_10.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_11.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_11.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_12.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_12.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_13.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_13.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_14.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_14.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_18.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_18.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_19.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_19.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_20.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_20.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_3.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_3.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_4.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_5.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_5.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_6.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_6.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_7.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_7.h -------------------------------------------------------------------------------- /armv7a/src/MMult_4x4_8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/MMult_4x4_8.h -------------------------------------------------------------------------------- /armv7a/src/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/Makefile -------------------------------------------------------------------------------- /armv7a/src/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/README.md -------------------------------------------------------------------------------- /armv7a/src/convolution1x1s1_12x4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/convolution1x1s1_12x4.h -------------------------------------------------------------------------------- /armv7a/src/convolution1x1s1_4x4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/convolution1x1s1_4x4.h -------------------------------------------------------------------------------- /armv7a/src/convolution1x1s1_8x4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/convolution1x1s1_8x4.h -------------------------------------------------------------------------------- /armv7a/src/dclock.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/dclock.h -------------------------------------------------------------------------------- /armv7a/src/matrix_multiply_origin.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/matrix_multiply_origin.h -------------------------------------------------------------------------------- /armv7a/src/now.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/now.txt -------------------------------------------------------------------------------- /armv7a/src/plot_gflops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/plot_gflops.py -------------------------------------------------------------------------------- /armv7a/src/test_matrix_multiply.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/test_matrix_multiply.cpp -------------------------------------------------------------------------------- /armv7a/src/test_matrix_multiply_conv1x1.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv7a/src/test_matrix_multiply_conv1x1.cpp -------------------------------------------------------------------------------- /armv8a/armv8afp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/armv8afp/README.md -------------------------------------------------------------------------------- /armv8a/armv8afp/clear.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/armv8afp/clear.sh -------------------------------------------------------------------------------- /armv8a/armv8afp/gflops_benchmark.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/armv8afp/gflops_benchmark.c -------------------------------------------------------------------------------- /armv8a/armv8afp/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/armv8afp/make.sh -------------------------------------------------------------------------------- /armv8a/armv8afp/test.S: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/armv8afp/test.S -------------------------------------------------------------------------------- /armv8a/src/MMult1.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult1.h -------------------------------------------------------------------------------- /armv8a/src/MMult2.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult2.h -------------------------------------------------------------------------------- /armv8a/src/MMult_1x4_3.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_1x4_3.h -------------------------------------------------------------------------------- /armv8a/src/MMult_1x4_4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_1x4_4.h -------------------------------------------------------------------------------- /armv8a/src/MMult_1x4_5.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_1x4_5.h -------------------------------------------------------------------------------- /armv8a/src/MMult_1x4_7.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_1x4_7.h -------------------------------------------------------------------------------- /armv8a/src/MMult_1x4_8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_1x4_8.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_10.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_10.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_11.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_11.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_13.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_13.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_18.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_18.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_3.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_3.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_4.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_5.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_5.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_6.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_6.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_7.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_7.h -------------------------------------------------------------------------------- /armv8a/src/MMult_4x4_8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/MMult_4x4_8.h -------------------------------------------------------------------------------- /armv8a/src/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/Makefile -------------------------------------------------------------------------------- /armv8a/src/dclock.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/dclock.h -------------------------------------------------------------------------------- /armv8a/src/matrix_multiply_origin.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/matrix_multiply_origin.h -------------------------------------------------------------------------------- /armv8a/src/plot_gflops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/plot_gflops.py -------------------------------------------------------------------------------- /armv8a/src/test_matrix_multiply.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/armv8a/src/test_matrix_multiply.cpp -------------------------------------------------------------------------------- /x86/cpufp/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/LICENSE -------------------------------------------------------------------------------- /x86/cpufp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/README.md -------------------------------------------------------------------------------- /x86/cpufp/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/build.sh -------------------------------------------------------------------------------- /x86/cpufp/clean.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/clean.sh -------------------------------------------------------------------------------- /x86/cpufp/cpufp_kernel_x86.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/cpufp_kernel_x86.h -------------------------------------------------------------------------------- /x86/cpufp/cpufp_kernel_x86_avx.s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/cpufp_kernel_x86_avx.s -------------------------------------------------------------------------------- /x86/cpufp/cpufp_kernel_x86_avx512_vnni.s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/cpufp_kernel_x86_avx512_vnni.s -------------------------------------------------------------------------------- /x86/cpufp/cpufp_kernel_x86_avx512f.s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/cpufp_kernel_x86_avx512f.s -------------------------------------------------------------------------------- /x86/cpufp/cpufp_kernel_x86_fma.s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/cpufp_kernel_x86_fma.s -------------------------------------------------------------------------------- /x86/cpufp/cpufp_kernel_x86_sse.s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/cpufp_kernel_x86_sse.s -------------------------------------------------------------------------------- /x86/cpufp/cpufp_x86.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/cpufp_x86.c -------------------------------------------------------------------------------- /x86/cpufp/cpuid_x86.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/cpuid_x86.cpp -------------------------------------------------------------------------------- /x86/cpufp/smtl.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/smtl.c -------------------------------------------------------------------------------- /x86/cpufp/smtl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/cpufp/smtl.h -------------------------------------------------------------------------------- /x86/src/MMult0.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult0.h -------------------------------------------------------------------------------- /x86/src/MMult1.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult1.h -------------------------------------------------------------------------------- /x86/src/MMult2.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult2.h -------------------------------------------------------------------------------- /x86/src/MMult_1x4_3.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_1x4_3.h -------------------------------------------------------------------------------- /x86/src/MMult_1x4_4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_1x4_4.h -------------------------------------------------------------------------------- /x86/src/MMult_1x4_5.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_1x4_5.h -------------------------------------------------------------------------------- /x86/src/MMult_1x4_6.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_1x4_6.h -------------------------------------------------------------------------------- /x86/src/MMult_1x4_7.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_1x4_7.h -------------------------------------------------------------------------------- /x86/src/MMult_1x4_8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_1x4_8.h -------------------------------------------------------------------------------- /x86/src/MMult_1x4_9.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_1x4_9.h -------------------------------------------------------------------------------- /x86/src/MMult_1x4x6.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_1x4x6.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_10.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_10.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_11.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_11.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_12.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_12.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_13.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_13.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_14.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_14.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_3.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_3.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_4.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_5.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_5.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_6.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_6.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_7.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_7.h -------------------------------------------------------------------------------- /x86/src/MMult_4x4_8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/MMult_4x4_8.h -------------------------------------------------------------------------------- /x86/src/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/Makefile -------------------------------------------------------------------------------- /x86/src/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/README.md -------------------------------------------------------------------------------- /x86/src/dclock.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/dclock.h -------------------------------------------------------------------------------- /x86/src/matrix_multiply_origin.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/matrix_multiply_origin.h -------------------------------------------------------------------------------- /x86/src/now.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/now.txt -------------------------------------------------------------------------------- /x86/src/plot_gflops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/plot_gflops.py -------------------------------------------------------------------------------- /x86/src/test_matrix_multiply.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BBuf/how-to-optimize-gemm/HEAD/x86/src/test_matrix_multiply.cpp --------------------------------------------------------------------------------