├── .bazelrc ├── .clang-format ├── .github └── workflows │ ├── build-linux-windows.yml │ ├── build.yml │ ├── on-pr-approved.yml │ ├── on-pr-merge-to-master.yml │ ├── on-push.yml │ └── sde-tests-linux-windows.yml ├── .gitignore ├── BUILD.bazel ├── BUILD.md ├── CMakeLists.txt ├── CONTRIBUTING.md ├── DEPS ├── LICENSE ├── MODULE.bazel ├── README.md ├── WORKSPACE ├── WORKSPACE.bzlmod ├── bench ├── BUILD.bazel ├── CMakeLists.txt ├── bf16-gemm.cc ├── bgemm.h ├── conv.h ├── dconv.h ├── dwconv.h ├── f16-conv-hwc2chw.cc ├── f16-dwconv.cc ├── f16-dwconv2d-chw.cc ├── f16-f32acc-gemm.cc ├── f16-f32acc-igemm.cc ├── f16-gemm-minmax.cc ├── f16-gemm.cc ├── f16-igemm.cc ├── f16-raddstoreexpminusmax.cc ├── f16-vcmul.cc ├── f32-bgemm.cc ├── f32-conv-hwc.cc ├── f32-conv-hwc2chw.cc ├── f32-dwconv.cc ├── f32-dwconv2d-chw.cc ├── f32-gemm-minmax.cc ├── f32-gemm.cc ├── f32-igemm.cc ├── f32-qc4w-gemm.cc ├── f32-qc8w-gemm.cc ├── f32-raddexpminusmax.cc ├── f32-raddextexp.cc ├── f32-raddstoreexpminusmax.cc ├── f32-softmax.cc ├── f32-vcmul.cc ├── f32-vscaleexpminusmax.cc ├── f32-vscaleextexp.cc ├── gemm-benchmark.cc ├── gemm-benchmark.h ├── gemm.h ├── operators │ ├── BUILD │ ├── CMakeLists.txt │ ├── average-pooling.cc │ ├── batch-matrix-multiply.cc │ ├── binary.cc │ ├── convolution.cc │ ├── deconvolution.cc │ ├── fully-connected.cc │ ├── max-pooling.cc │ ├── prelu.cc │ ├── resize-bilinear-nhwc.cc │ ├── softmax.cc │ └── unary.cc ├── packq-benchmark.cc ├── packq-benchmark.h ├── packw-benchmark.h ├── pf16-gemm-minmax.cc ├── pf32-gemm-minmax.cc ├── pqs8-qc8w-gemm-minmax.cc ├── qb4-packw.cc ├── qd8-f16-qb4w-gemm.cc ├── qd8-f16-qc4w-gemm.cc ├── qd8-f16-qc8w-gemm.cc ├── qd8-f32-qb4w-gemm.cc ├── qd8-f32-qc2w-gemm.cc ├── qd8-f32-qc4w-gemm.cc ├── qd8-f32-qc8w-gemm.cc ├── qp8-f32-qb4w-gemm.cc ├── qp8-f32-qc4w-gemm.cc ├── qp8-f32-qc8w-gemm.cc ├── qs8-dwconv.cc ├── qs8-gemm.cc ├── qs8-packw.cc ├── qs8-qc4w-gemm-fp32.cc ├── qs8-qc4w-packw.cc ├── qs8-qc8w-gemm-fp32.cc ├── qu8-gemm-fp32.cc ├── qu8-gemm-rndnu.cc ├── qu8-gemm.cc ├── rdminmax.cc ├── rdsum.cc ├── rdsum2.cc ├── rminmax.cc ├── rsum.cc ├── rsum2.cc ├── spmm.cc ├── spmm.h ├── subgraph │ ├── BUILD │ ├── CMakeLists.txt │ ├── attention.cc │ ├── benchmark.cc │ ├── benchmark.h │ ├── binary.cc │ ├── convolution.cc │ ├── depthwise-separable.cc │ ├── elementwise.cc │ ├── fp32-mobilenet-v1.cc │ ├── fp32-mobilenet-v2.cc │ ├── fp32-mobilenet-v3-large.cc │ ├── fp32-mobilenet-v3-small.cc │ ├── fp32-transformer.cc │ ├── fully-connected.cc │ ├── l2-norm.cc │ ├── layer-norm.cc │ ├── mobilenet.cc │ ├── models.h │ ├── qd8-transformer.cc │ ├── qs8-mobilenet-v2.cc │ ├── simple_scheduler.h │ ├── softmax.cc │ ├── static-reduce.cc │ ├── transformer.cc │ └── unary.cc ├── utils.cc ├── utils.h ├── vbinary.cc ├── vunary.cc ├── x16-packw.cc ├── x16-x32-packw.cc ├── x32-packw.cc ├── x8-lut.cc ├── x8-packq.cc ├── x8-packw.cc ├── xN-transposec.cc └── xx-transposev.cc ├── cmake ├── DownloadCpuinfo.cmake ├── DownloadFXdiv.cmake ├── DownloadGoogleBenchmark.cmake ├── DownloadGoogleTest.cmake ├── DownloadKleidiAI.cmake ├── DownloadPThreadPool.cmake ├── aarch64.toolchain ├── armhf.toolchain ├── gen │ ├── aarch32_microkernels.cmake │ ├── aarch64_microkernels.cmake │ ├── amd64_microkernels.cmake │ ├── armsimd32_microkernels.cmake │ ├── avx256skx_microkernels.cmake │ ├── avx256vnni_microkernels.cmake │ ├── avx256vnnigfni_microkernels.cmake │ ├── avx2_microkernels.cmake │ ├── avx512amx_microkernels.cmake │ ├── avx512f_microkernels.cmake │ ├── avx512fp16_microkernels.cmake │ ├── avx512skx_microkernels.cmake │ ├── avx512vbmi_microkernels.cmake │ ├── avx512vnni_microkernels.cmake │ ├── avx512vnnigfni_microkernels.cmake │ ├── avx_microkernels.cmake │ ├── avxvnni_microkernels.cmake │ ├── avxvnniint8_microkernels.cmake │ ├── f16c_microkernels.cmake │ ├── fma3_microkernels.cmake │ ├── fp16arith_microkernels.cmake │ ├── hexagon_microkernels.cmake │ ├── hvx_microkernels.cmake │ ├── microkernels.cmake │ ├── neon_aarch64_microkernels.cmake │ ├── neon_microkernels.cmake │ ├── neonbf16_microkernels.cmake │ ├── neondot_aarch64_microkernels.cmake │ ├── neondot_microkernels.cmake │ ├── neondotfp16arith_microkernels.cmake │ ├── neonfma_aarch64_microkernels.cmake │ ├── neonfma_microkernels.cmake │ ├── neonfp16_microkernels.cmake │ ├── neonfp16arith_aarch64_microkernels.cmake │ ├── neonfp16arith_microkernels.cmake │ ├── neoni8mm_microkernels.cmake │ ├── neonsme2_microkernels.cmake │ ├── neonsme_microkernels.cmake │ ├── neonv8_microkernels.cmake │ ├── rvv_microkernels.cmake │ ├── rvvfp16arith_microkernels.cmake │ ├── scalar_microkernels.cmake │ ├── sme_aarch64_microkernels.cmake │ ├── sme_microkernels.cmake │ ├── sse2_microkernels.cmake │ ├── sse2fma_microkernels.cmake │ ├── sse41_microkernels.cmake │ ├── sse_microkernels.cmake │ ├── ssse3_microkernels.cmake │ ├── wasm32_microkernels.cmake │ ├── wasmrelaxedsimd32_microkernels.cmake │ ├── wasmrelaxedsimd_microkernels.cmake │ ├── wasmsimd32_microkernels.cmake │ └── wasmsimd_microkernels.cmake ├── hexagon.toolchain ├── hexagon.toolchain.v73 ├── riscv64.toolchain └── x64_arm64.toolchain ├── doc ├── dwconv.md ├── microkernel-enumerators.md └── microkernel-naming-conventions.md ├── emscripten.bzl ├── gemm_compiler ├── BUILD ├── aarch32_template.py ├── aarch64_template.py ├── arm_template.py ├── avx512bf16_template.py ├── avx512f_template.py ├── avx512vnni_template.py ├── base_architecture.py ├── fma3_template.py ├── generate.py ├── generate_bf16_f32_gemm_microkernels.py ├── generate_f32_gemm_microkernels.py ├── generate_gemm_microkernels_main.py ├── generate_qd8_f32_qc4w_gemm_microkernels.py ├── generate_qd8_f32_qc8w_gemm_microkernels.py ├── generate_qs8_qc4w_gemm_microkernels.py ├── generate_qs8_qc8w_gemm_microkernels.py ├── neondot_template.py ├── neonfma_template.py ├── neonmlal_aarch32_template.py └── x64_template.py ├── gen ├── BUILD ├── aarch32_microkernels.bzl ├── aarch64_microkernels.bzl ├── amd64_microkernels.bzl ├── armsimd32_microkernels.bzl ├── avx256skx_microkernels.bzl ├── avx256vnni_microkernels.bzl ├── avx256vnnigfni_microkernels.bzl ├── avx2_microkernels.bzl ├── avx512amx_microkernels.bzl ├── avx512f_microkernels.bzl ├── avx512fp16_microkernels.bzl ├── avx512skx_microkernels.bzl ├── avx512vbmi_microkernels.bzl ├── avx512vnni_microkernels.bzl ├── avx512vnnigfni_microkernels.bzl ├── avx_microkernels.bzl ├── avxvnni_microkernels.bzl ├── avxvnniint8_microkernels.bzl ├── f16c_microkernels.bzl ├── fma3_microkernels.bzl ├── fp16arith_microkernels.bzl ├── hexagon_microkernels.bzl ├── hvx_microkernels.bzl ├── microkernels.bzl ├── neon_aarch64_microkernels.bzl ├── neon_microkernels.bzl ├── neonbf16_microkernels.bzl ├── neondot_aarch64_microkernels.bzl ├── neondot_microkernels.bzl ├── neondotfp16arith_microkernels.bzl ├── neonfma_aarch64_microkernels.bzl ├── neonfma_microkernels.bzl ├── neonfp16_microkernels.bzl ├── neonfp16arith_aarch64_microkernels.bzl ├── neonfp16arith_microkernels.bzl ├── neoni8mm_microkernels.bzl ├── neonsme2_microkernels.bzl ├── neonsme_microkernels.bzl ├── neonv8_microkernels.bzl ├── rvv_microkernels.bzl ├── rvvfp16arith_microkernels.bzl ├── scalar_microkernels.bzl ├── sme_aarch64_microkernels.bzl ├── sme_microkernels.bzl ├── sse2_microkernels.bzl ├── sse2fma_microkernels.bzl ├── sse41_microkernels.bzl ├── sse_microkernels.bzl ├── ssse3_microkernels.bzl ├── wasm32_microkernels.bzl ├── wasmrelaxedsimd32_microkernels.bzl ├── wasmrelaxedsimd_microkernels.bzl ├── wasmsimd32_microkernels.bzl └── wasmsimd_microkernels.bzl ├── generated_file.bzl ├── include ├── experimental.h └── xnnpack.h ├── preamble.js.lds ├── register_extension_info.bzl ├── scripts ├── build-android-arm64.sh ├── build-android-armv7.sh ├── build-android-x86.sh ├── build-linux-aarch64.sh ├── build-linux-armhf.sh ├── build-linux-riscv64.sh ├── build-local.sh ├── build-qurt-v68.sh ├── build-qurt-v73.sh ├── build-wasm.sh ├── build-windows-arm64.cmd ├── build-windows-x64.cmd ├── build-windows-x86.cmd ├── check_config.py ├── check_files_changed.py ├── generate-bf16-gemm.sh ├── generate-build-identifier.py ├── generate-f16-avgpool.sh ├── generate-f16-dwconv.sh ├── generate-f16-dwconv2d-chw.sh ├── generate-f16-f32-vcvt.sh ├── generate-f16-f32acc-rdsum.sh ├── generate-f16-f32acc-rdsum2.sh ├── generate-f16-f32acc-rsum.sh ├── generate-f16-f32acc-rsum2.sh ├── generate-f16-gemm.sh ├── generate-f16-ibilinear-chw.sh ├── generate-f16-ibilinear.sh ├── generate-f16-igemm.sh ├── generate-f16-maxpool.sh ├── generate-f16-raddstoreexpminusmax.sh ├── generate-f16-rdminmax.sh ├── generate-f16-rminmax.sh ├── generate-f16-rsum.sh ├── generate-f16-spmm.sh ├── generate-f16-vapproxgelu.sh ├── generate-f16-vbinary.sh ├── generate-f16-vclamp.sh ├── generate-f16-vcos.sh ├── generate-f16-velu.sh ├── generate-f16-vexp.sh ├── generate-f16-vgelu.sh ├── generate-f16-vhswish.sh ├── generate-f16-vlrelu.sh ├── generate-f16-vmulcaddc.sh ├── generate-f16-vrnd.sh ├── generate-f16-vrsqrt.sh ├── generate-f16-vsigmoid.sh ├── generate-f16-vsin.sh ├── generate-f16-vsqrt.sh ├── generate-f16-vtanh.sh ├── generate-f16-vunary.sh ├── generate-f32-avgpool.sh ├── generate-f32-conv-hwc.sh ├── generate-f32-dwconv.sh ├── generate-f32-dwconv2d-chw.sh ├── generate-f32-f16-vcvt.sh ├── generate-f32-gemm.sh ├── generate-f32-ibilinear-chw.sh ├── generate-f32-ibilinear.sh ├── generate-f32-igemm.sh ├── generate-f32-maxpool.sh ├── generate-f32-ppmm.sh ├── generate-f32-qc4w-gemm.sh ├── generate-f32-qc8w-gemm.sh ├── generate-f32-qc8w-spmm.sh ├── generate-f32-qs8-vcvt.sh ├── generate-f32-raddexpminusmax.sh ├── generate-f32-raddextexp.sh ├── generate-f32-raddstoreexpminusmax.sh ├── generate-f32-rdminmax.sh ├── generate-f32-rdsum.sh ├── generate-f32-rdsum2.sh ├── generate-f32-rminmax.sh ├── generate-f32-rsum.sh ├── generate-f32-rsum2.sh ├── generate-f32-spmm.sh ├── generate-f32-vapproxgelu.sh ├── generate-f32-vbinary.sh ├── generate-f32-vclamp.sh ├── generate-f32-vcmul.sh ├── generate-f32-vcopysign.sh ├── generate-f32-vcos.sh ├── generate-f32-velu.sh ├── generate-f32-vexp.sh ├── generate-f32-vgelu.sh ├── generate-f32-vhswish.sh ├── generate-f32-vlog.sh ├── generate-f32-vlrelu.sh ├── generate-f32-vmulcaddc.sh ├── generate-f32-vrnd.sh ├── generate-f32-vrsqrt.sh ├── generate-f32-vscaleexpminusmax.sh ├── generate-f32-vscaleextexp.sh ├── generate-f32-vsigmoid.sh ├── generate-f32-vsin.sh ├── generate-f32-vsqrt.sh ├── generate-f32-vtanh.sh ├── generate-f32-vunary.sh ├── generate-qb4-packw.sh ├── generate-qs8-dwconv.sh ├── generate-qs8-f16-vcvt.sh ├── generate-qs8-f32-vcvt.sh ├── generate-qs8-gemm.sh ├── generate-qs8-igemm.sh ├── generate-qs8-rdsum.sh ├── generate-qs8-rsum.sh ├── generate-qs8-vadd.sh ├── generate-qs8-vcvt.sh ├── generate-qs8-vlrelu.sh ├── generate-qs8-vmul.sh ├── generate-qs8-vprelu.sh ├── generate-qs8-vpreluc.sh ├── generate-qs8-vrpreluc.sh ├── generate-qu8-rdsum.sh ├── generate-qu8-rsum.sh ├── generate-s8-ibilinear.sh ├── generate-s8-maxpool.sh ├── generate-s8-rdminmax.sh ├── generate-s8-rminmax.sh ├── generate-tests.sh ├── generate-u8-maxpool.sh ├── generate-u8-rdminmax.sh ├── generate-u8-rminmax.sh ├── generate-x16-packw.sh ├── generate-x16-x32-packw.sh ├── generate-x32-packb.sh ├── generate-x32-packw.sh ├── generate-x32-packx.sh ├── generate-x4-packw.sh ├── generate-x8-lut.sh ├── generate-x8-packw.sh ├── generate-x8-vclamp.sh ├── generate-xN-transpose.sh ├── genxnn ├── run-on-hexagon-device.sh ├── run-on-hexagon-sim.sh └── sort-filenames.py ├── src ├── allocator.c ├── bf16-f32-gemm │ ├── bf16-f32-gemm-1x4c2-minmax-scalar.c │ └── gen │ │ ├── bf16-f32-gemm-10x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-10x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-11x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-11x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-1x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-1x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-1x64c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-2x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-2x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-2x64c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-3x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-3x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-3x64c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-4x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-4x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-4x64c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-5x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-5x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-5x64c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-6x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-6x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-7x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-7x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-8x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-8x32c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ ├── bf16-f32-gemm-9x16c2-minmax-asm-amd64-avx512bf16-broadcast.S │ │ └── bf16-f32-gemm-9x32c2-minmax-asm-amd64-avx512bf16-broadcast.S ├── bf16-gemm │ ├── c2-neonbf16-bfdot-lane-ld128.c.in │ ├── c8-neon-shland.c.in │ ├── c8-neon-zip.c.in │ ├── c8-neonbf16.c.in │ └── gen │ │ ├── bf16-gemm-1x4c8-minmax-neonbf16-bfdot.c │ │ ├── bf16-gemm-1x4c8-minmax-neonbf16-bfmlal.c │ │ ├── bf16-gemm-1x4c8-minmax-neonfma-shland.c │ │ ├── bf16-gemm-1x4c8-minmax-neonfma-zip.c │ │ ├── bf16-gemm-1x8c2-minmax-neonbf16-bfdot-lane-ld128.c │ │ ├── bf16-gemm-2x4c8-minmax-neonbf16-bfdot.c │ │ ├── bf16-gemm-2x4c8-minmax-neonbf16-bfmlal.c │ │ ├── bf16-gemm-2x4c8-minmax-neonfma-shland.c │ │ ├── bf16-gemm-2x4c8-minmax-neonfma-zip.c │ │ ├── bf16-gemm-3x4c8-minmax-neonbf16-bfdot.c │ │ ├── bf16-gemm-3x4c8-minmax-neonbf16-bfmlal.c │ │ ├── bf16-gemm-3x4c8-minmax-neonfma-shland.c │ │ ├── bf16-gemm-3x4c8-minmax-neonfma-zip.c │ │ ├── bf16-gemm-4x4c8-minmax-neonbf16-bfdot.c │ │ ├── bf16-gemm-4x4c8-minmax-neonbf16-bfmlal.c │ │ ├── bf16-gemm-4x4c8-minmax-neonfma-shland.c │ │ ├── bf16-gemm-4x4c8-minmax-neonfma-zip.c │ │ ├── bf16-gemm-4x8c2-minmax-neonbf16-bfdot-lane-ld128.c │ │ ├── bf16-gemm-5x4c8-minmax-neonbf16-bfdot.c │ │ ├── bf16-gemm-5x4c8-minmax-neonbf16-bfmlal.c │ │ ├── bf16-gemm-5x4c8-minmax-neonfma-shland.c │ │ ├── bf16-gemm-5x4c8-minmax-neonfma-zip.c │ │ ├── bf16-gemm-5x8c2-minmax-neonbf16-bfdot-lane-ld128.c │ │ └── bf16-gemm-6x8c2-minmax-neonbf16-bfdot-lane-ld128.c ├── bf16-vunary │ └── neon.c.in ├── cache.c ├── configs │ ├── BUILD │ ├── argmaxpool-config.c │ ├── avgpool-config.c │ ├── binary-elementwise-config.c │ ├── cmul-config.c │ ├── conv-hwc2chw-config.c │ ├── dwconv-config.c │ ├── dwconv2d-chw-config.c │ ├── gemm-config.c │ ├── hardware-config.c │ ├── ibilinear-chw-config.c │ ├── ibilinear-config.c │ ├── lut32norm-config.c │ ├── maxpool-config.c │ ├── pack-lh-config.c │ ├── raddstoreexpminusmax-config.c │ ├── reduce-config.c │ ├── spmm-config.c │ ├── transpose-config.c │ ├── unary-elementwise-config.c │ ├── unpool-config.c │ ├── vmulcaddc-config.c │ ├── x8-lut-config.c │ ├── xx-fill-config.c │ └── xx-pad-config.c ├── datatype.c ├── enums │ ├── allocation-type.c │ ├── datatype-strings.c │ ├── microkernel-type.c │ ├── node-type.c │ └── operator-type.c ├── f16-avgpool │ ├── f16-avgpool-minmax.inc │ ├── f16c.c.in │ └── gen │ │ ├── f16-avgpool-9p-minmax-f16c.c │ │ └── f16-avgpool-9p-minmax-neonfp16arith.c ├── f16-conv-hwc2chw │ └── f16-conv-hwc2chw-3x3s2p1c3x4-neonfp16arith-2x2.c ├── f16-dwconv │ ├── f16-dwconv-minmax.inc │ ├── gen │ │ ├── f16-dwconv-25p16c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-25p16c-minmax-fma3.c │ │ ├── f16-dwconv-25p16c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-25p16c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-25p32c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-25p32c-minmax-fma3.c │ │ ├── f16-dwconv-25p32c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-25p32c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-25p8c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-25p8c-minmax-fma3.c │ │ ├── f16-dwconv-25p8c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-25p8c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-3p16c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-3p16c-minmax-fma3.c │ │ ├── f16-dwconv-3p16c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-3p16c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-3p32c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-3p32c-minmax-fma3.c │ │ ├── f16-dwconv-3p32c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-3p32c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-3p8c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-3p8c-minmax-fma3.c │ │ ├── f16-dwconv-3p8c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-3p8c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-4p16c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-4p16c-minmax-fma3.c │ │ ├── f16-dwconv-4p16c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-4p16c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-4p32c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-4p32c-minmax-fma3.c │ │ ├── f16-dwconv-4p32c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-4p32c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-4p8c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-4p8c-minmax-fma3.c │ │ ├── f16-dwconv-4p8c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-4p8c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-9p16c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-9p16c-minmax-fma3.c │ │ ├── f16-dwconv-9p16c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-9p16c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-9p32c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-9p32c-minmax-fma3.c │ │ ├── f16-dwconv-9p32c-minmax-neonfp16arith-acc2.c │ │ ├── f16-dwconv-9p32c-minmax-neonfp16arith.c │ │ ├── f16-dwconv-9p8c-minmax-fma3-acc2.c │ │ ├── f16-dwconv-9p8c-minmax-fma3.c │ │ ├── f16-dwconv-9p8c-minmax-neonfp16arith-acc2.c │ │ └── f16-dwconv-9p8c-minmax-neonfp16arith.c │ ├── unipass-fma3.c.in │ └── unipass-neonfp16arith.c.in ├── f16-dwconv2d-chw │ ├── 3x3p1-neonfp16arith.c.in │ ├── 3x3s2p1-neonfp16arith.c.in │ ├── 5x5p2-neonfp16arith.c.in │ ├── 5x5s2p2-neonfp16arith.c.in │ └── gen │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-1x8-acc2.c │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-1x8-acc3.c │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-1x8-acc4.c │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-1x8.c │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-2x8-acc2.c │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-2x8.c │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-3x8.c │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-4x8.c │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-5x8.c │ │ ├── f16-dwconv2d-chw-3x3p1-minmax-neonfp16arith-6x8.c │ │ ├── f16-dwconv2d-chw-3x3s2p1-minmax-neonfp16arith-1x8-acc2.c │ │ ├── f16-dwconv2d-chw-3x3s2p1-minmax-neonfp16arith-1x8-acc3.c │ │ ├── f16-dwconv2d-chw-3x3s2p1-minmax-neonfp16arith-1x8-acc4.c │ │ ├── f16-dwconv2d-chw-3x3s2p1-minmax-neonfp16arith-1x8.c │ │ ├── f16-dwconv2d-chw-3x3s2p1-minmax-neonfp16arith-2x8-acc2.c │ │ ├── f16-dwconv2d-chw-3x3s2p1-minmax-neonfp16arith-2x8.c │ │ ├── f16-dwconv2d-chw-3x3s2p1-minmax-neonfp16arith-3x8.c │ │ ├── f16-dwconv2d-chw-3x3s2p1-minmax-neonfp16arith-4x8.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-1x8-acc2.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-1x8-acc3.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-1x8-acc4.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-1x8-acc5.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-1x8.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-2x8-acc2.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-2x8-acc3.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-2x8.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-3x8-acc2.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-3x8.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-4x8-acc2.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-4x8.c │ │ ├── f16-dwconv2d-chw-5x5p2-minmax-neonfp16arith-5x8.c │ │ ├── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-1x8-acc2.c │ │ ├── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-1x8-acc3.c │ │ ├── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-1x8-acc4.c │ │ ├── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-1x8-acc5.c │ │ ├── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-1x8.c │ │ ├── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-2x8-acc2.c │ │ ├── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-2x8-acc3.c │ │ ├── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-2x8.c │ │ ├── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-3x8-acc2.c │ │ └── f16-dwconv2d-chw-5x5s2p2-minmax-neonfp16arith-3x8.c ├── f16-f32-vcvt │ ├── avx512skx.c.in │ ├── f16-f32-vcvt.inc │ ├── f16c.c.in │ ├── gen │ │ ├── f16-f32-vcvt-avx-int16-u16.c │ │ ├── f16-f32-vcvt-avx-int16-u24.c │ │ ├── f16-f32-vcvt-avx-int16-u32.c │ │ ├── f16-f32-vcvt-avx-int16-u8.c │ │ ├── f16-f32-vcvt-avx-int32-u16.c │ │ ├── f16-f32-vcvt-avx-int32-u24.c │ │ ├── f16-f32-vcvt-avx-int32-u32.c │ │ ├── f16-f32-vcvt-avx-int32-u8.c │ │ ├── f16-f32-vcvt-avx512skx-u16.c │ │ ├── f16-f32-vcvt-avx512skx-u32.c │ │ ├── f16-f32-vcvt-f16c-u16.c │ │ ├── f16-f32-vcvt-f16c-u8.c │ │ ├── f16-f32-vcvt-neon-int16-u16.c │ │ ├── f16-f32-vcvt-neon-int16-u24.c │ │ ├── f16-f32-vcvt-neon-int16-u32.c │ │ ├── f16-f32-vcvt-neon-int16-u8.c │ │ ├── f16-f32-vcvt-neon-int32-u16.c │ │ ├── f16-f32-vcvt-neon-int32-u24.c │ │ ├── f16-f32-vcvt-neon-int32-u32.c │ │ ├── f16-f32-vcvt-neon-int32-u8.c │ │ ├── f16-f32-vcvt-neonfp16-u16.c │ │ ├── f16-f32-vcvt-neonfp16-u8.c │ │ ├── f16-f32-vcvt-rvvfp16arith-u1v.c │ │ ├── f16-f32-vcvt-rvvfp16arith-u2v.c │ │ ├── f16-f32-vcvt-rvvfp16arith-u4v.c │ │ ├── f16-f32-vcvt-scalar-u1.c │ │ ├── f16-f32-vcvt-scalar-u2.c │ │ ├── f16-f32-vcvt-scalar-u3.c │ │ ├── f16-f32-vcvt-scalar-u4.c │ │ ├── f16-f32-vcvt-sse2-int16-u16.c │ │ ├── f16-f32-vcvt-sse2-int16-u24.c │ │ ├── f16-f32-vcvt-sse2-int16-u32.c │ │ ├── f16-f32-vcvt-sse2-int16-u8.c │ │ ├── f16-f32-vcvt-sse2-int32-u16.c │ │ ├── f16-f32-vcvt-sse2-int32-u24.c │ │ ├── f16-f32-vcvt-sse2-int32-u32.c │ │ ├── f16-f32-vcvt-sse2-int32-u8.c │ │ ├── f16-f32-vcvt-sse41-int16-u16.c │ │ ├── f16-f32-vcvt-sse41-int16-u24.c │ │ ├── f16-f32-vcvt-sse41-int16-u32.c │ │ ├── f16-f32-vcvt-sse41-int16-u8.c │ │ ├── f16-f32-vcvt-sse41-int32-u16.c │ │ ├── f16-f32-vcvt-sse41-int32-u24.c │ │ ├── f16-f32-vcvt-sse41-int32-u32.c │ │ ├── f16-f32-vcvt-sse41-int32-u8.c │ │ ├── f16-f32-vcvt-wasmrelaxedsimd-int16-u16.c │ │ ├── f16-f32-vcvt-wasmrelaxedsimd-int16-u24.c │ │ ├── f16-f32-vcvt-wasmrelaxedsimd-int16-u32.c │ │ ├── f16-f32-vcvt-wasmrelaxedsimd-int16-u8.c │ │ ├── f16-f32-vcvt-wasmrelaxedsimd-int32-u16.c │ │ ├── f16-f32-vcvt-wasmrelaxedsimd-int32-u24.c │ │ ├── f16-f32-vcvt-wasmrelaxedsimd-int32-u32.c │ │ ├── f16-f32-vcvt-wasmrelaxedsimd-int32-u8.c │ │ ├── f16-f32-vcvt-wasmsimd-int16-u16.c │ │ ├── f16-f32-vcvt-wasmsimd-int16-u24.c │ │ ├── f16-f32-vcvt-wasmsimd-int16-u32.c │ │ ├── f16-f32-vcvt-wasmsimd-int16-u8.c │ │ ├── f16-f32-vcvt-wasmsimd-int32-u16.c │ │ ├── f16-f32-vcvt-wasmsimd-int32-u24.c │ │ ├── f16-f32-vcvt-wasmsimd-int32-u32.c │ │ └── f16-f32-vcvt-wasmsimd-int32-u8.c │ ├── neon-int16.c.in │ ├── neon-int32.c.in │ ├── neonfp16.c.in │ ├── rvvfp16arith.c.in │ ├── scalar.c.in │ ├── sse-int16.c.in │ ├── sse-int32.c.in │ ├── wasmsimd-int16.c.in │ └── wasmsimd-int32.c.in ├── f16-f32acc-gemm │ └── gen │ │ ├── f16-f32acc-gemm-1x16-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-gemm-1x8-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-gemm-3x16-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-gemm-4x16-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-gemm-4x8-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-gemm-5x16-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-gemm-5x8-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-gemm-6x8-minmax-avx2-broadcast.c │ │ └── f16-f32acc-gemm-7x8-minmax-avx2-broadcast.c ├── f16-f32acc-igemm │ └── gen │ │ ├── f16-f32acc-igemm-1x16-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-igemm-1x8-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-igemm-3x16-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-igemm-4x16-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-igemm-4x8-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-igemm-5x16-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-igemm-5x8-minmax-avx2-broadcast.c │ │ ├── f16-f32acc-igemm-6x8-minmax-avx2-broadcast.c │ │ └── f16-f32acc-igemm-7x8-minmax-avx2-broadcast.c ├── f16-f32acc-rdsum │ ├── avx.c.in │ ├── avx512skx.c.in │ ├── f16-f32acc-rdsum.inc │ ├── gen │ │ ├── f16-f32acc-rdsum-7p7x-minmax-avx512skx-u128.c │ │ ├── f16-f32acc-rdsum-7p7x-minmax-avx512skx-u16.c │ │ ├── f16-f32acc-rdsum-7p7x-minmax-avx512skx-u32.c │ │ ├── f16-f32acc-rdsum-7p7x-minmax-avx512skx-u64.c │ │ ├── f16-f32acc-rdsum-7p7x-minmax-f16c-u128.c │ │ ├── f16-f32acc-rdsum-7p7x-minmax-f16c-u16.c │ │ ├── f16-f32acc-rdsum-7p7x-minmax-f16c-u32.c │ │ ├── f16-f32acc-rdsum-7p7x-minmax-f16c-u64.c │ │ ├── f16-f32acc-rdsum-7p7x-minmax-neonfp16arith-u16.c │ │ ├── f16-f32acc-rdsum-7p7x-minmax-neonfp16arith-u32.c │ │ └── f16-f32acc-rdsum-7p7x-minmax-neonfp16arith-u64.c │ └── neon.c.in ├── f16-f32acc-rdsum2 │ ├── avx.c.in │ ├── avx512skx.c.in │ ├── f16-f32acc-rdsum2.inc │ ├── gen │ │ ├── f16-f32acc-rdsum2-7p7x-avx512skx.c │ │ ├── f16-f32acc-rdsum2-7p7x-f16c.c │ │ └── f16-f32acc-rdsum2-7p7x-minmax-neonfp16arith.c │ └── neon.c.in ├── f16-f32acc-rsum │ ├── avx512skx.c.in │ ├── f16-f32acc-rsum.inc │ ├── f16c.c.in │ ├── gen │ │ ├── f16-f32acc-rsum-avx512skx-u128-acc4.c │ │ ├── f16-f32acc-rsum-avx512skx-u16.c │ │ ├── f16-f32acc-rsum-avx512skx-u32-acc2.c │ │ ├── f16-f32acc-rsum-avx512skx-u48-acc3.c │ │ ├── f16-f32acc-rsum-avx512skx-u64-acc2.c │ │ ├── f16-f32acc-rsum-avx512skx-u64-acc4.c │ │ ├── f16-f32acc-rsum-f16c-u16-acc2.c │ │ ├── f16-f32acc-rsum-f16c-u24-acc3.c │ │ ├── f16-f32acc-rsum-f16c-u32-acc2.c │ │ ├── f16-f32acc-rsum-f16c-u32-acc4.c │ │ ├── f16-f32acc-rsum-f16c-u8.c │ │ ├── f16-f32acc-rsum-neonfp16arith-u16-acc2.c │ │ ├── f16-f32acc-rsum-neonfp16arith-u24-acc3.c │ │ ├── f16-f32acc-rsum-neonfp16arith-u32-acc2.c │ │ ├── f16-f32acc-rsum-neonfp16arith-u32-acc4.c │ │ └── f16-f32acc-rsum-neonfp16arith-u8.c │ └── neonfp16arith.c.in ├── f16-f32acc-rsum2 │ ├── avx512skx.c.in │ ├── f16-f32acc-rsum2.inc │ ├── f16c.c.in │ ├── gen │ │ ├── f16-f32acc-rsum2-avx512skx.c │ │ ├── f16-f32acc-rsum2-f16c.c │ │ └── f16-f32acc-rsum2-neonfp16arith.c │ └── neonfp16arith.c.in ├── f16-gemm │ ├── 1x16-aarch64-neonfp16arith-ld32.S.in │ ├── 1x16-aarch64-neonfp16arith-ld64.S.in │ ├── 1x8-aarch64-neonfp16arith-ld64.S.in │ ├── 4x16-aarch64-neonfp16arith-ld32.S.in │ ├── 4x16-aarch64-neonfp16arith-ld64.S.in │ ├── 4x8-aarch64-neonfp16arith-ld64.S.in │ ├── 6x16-aarch64-neonfp16arith-cortex-a55.S.in │ ├── 6x16-aarch64-neonfp16arith-cortex-a55r0.S.in │ ├── 6x16-aarch64-neonfp16arith-cortex-a75.S.in │ ├── 6x16-aarch64-neonfp16arith-ld32.S.in │ ├── 6x16-aarch64-neonfp16arith-ld64.S.in │ ├── 6x8-aarch64-neonfp16arith-ld64.S.in │ ├── 8x8-aarch64-neonfp16arith-ld64.S.in │ ├── avx2-broadcast.c.in │ ├── avx512fp16-broadcast.c.in │ ├── gen │ │ ├── f16-gemm-1x16-minmax-asm-aarch64-neonfp16arith-ld32.S │ │ ├── f16-gemm-1x16-minmax-asm-aarch64-neonfp16arith-ld64.S │ │ ├── f16-gemm-1x16-minmax-avx2-broadcast.c │ │ ├── f16-gemm-1x16-minmax-neonfp16arith-ld64.c │ │ ├── f16-gemm-1x16-minmax-wasmrelaxedsimd-splat.c │ │ ├── f16-gemm-1x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-1x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-1x8-minmax-asm-aarch64-neonfp16arith-ld64.S │ │ ├── f16-gemm-1x8-minmax-avx2-broadcast.c │ │ ├── f16-gemm-1x8-minmax-neonfp16arith-ld64.c │ │ ├── f16-gemm-1x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f16-gemm-3x16-minmax-avx2-broadcast.c │ │ ├── f16-gemm-4x16-minmax-asm-aarch64-neonfp16arith-ld32.S │ │ ├── f16-gemm-4x16-minmax-asm-aarch64-neonfp16arith-ld64.S │ │ ├── f16-gemm-4x16-minmax-avx2-broadcast.c │ │ ├── f16-gemm-4x16-minmax-neonfp16arith-ld64.c │ │ ├── f16-gemm-4x16-minmax-wasmrelaxedsimd-splat.c │ │ ├── f16-gemm-4x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-4x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-4x8-minmax-asm-aarch64-neonfp16arith-ld64.S │ │ ├── f16-gemm-4x8-minmax-avx2-broadcast.c │ │ ├── f16-gemm-4x8-minmax-neonfp16arith-ld64.c │ │ ├── f16-gemm-4x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f16-gemm-5x16-minmax-avx2-broadcast.c │ │ ├── f16-gemm-5x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-5x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-5x8-minmax-avx2-broadcast.c │ │ ├── f16-gemm-6x16-minmax-asm-aarch64-neonfp16arith-cortex-a55.S │ │ ├── f16-gemm-6x16-minmax-asm-aarch64-neonfp16arith-cortex-a55r0.S │ │ ├── f16-gemm-6x16-minmax-asm-aarch64-neonfp16arith-cortex-a75.S │ │ ├── f16-gemm-6x16-minmax-asm-aarch64-neonfp16arith-ld32.S │ │ ├── f16-gemm-6x16-minmax-asm-aarch64-neonfp16arith-ld64.S │ │ ├── f16-gemm-6x16-minmax-neonfp16arith-ld64.c │ │ ├── f16-gemm-6x16-minmax-wasmrelaxedsimd-splat.c │ │ ├── f16-gemm-6x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-6x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-6x8-minmax-asm-aarch64-neonfp16arith-ld64.S │ │ ├── f16-gemm-6x8-minmax-avx2-broadcast.c │ │ ├── f16-gemm-6x8-minmax-neonfp16arith-ld64.c │ │ ├── f16-gemm-6x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f16-gemm-7x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-7x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-7x8-minmax-avx2-broadcast.c │ │ ├── f16-gemm-8x16-minmax-neonfp16arith-ld64.c │ │ ├── f16-gemm-8x16-minmax-wasmrelaxedsimd-splat.c │ │ ├── f16-gemm-8x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-8x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-gemm-8x8-minmax-asm-aarch64-neonfp16arith-ld64.S │ │ ├── f16-gemm-8x8-minmax-neonfp16arith-ld64.c │ │ └── f16-gemm-8x8-minmax-wasmrelaxedsimd-splat.c │ ├── neonfp16arith-ld64.c.in │ └── wasmrelaxedsimd-splat.c.in ├── f16-ibilinear-chw │ ├── gen │ │ ├── f16-ibilinear-chw-neonfp16arith-p16.c │ │ ├── f16-ibilinear-chw-neonfp16arith-p4.c │ │ └── f16-ibilinear-chw-neonfp16arith-p8.c │ └── neonfp16arith.c.in ├── f16-ibilinear │ ├── fma3.c.in │ ├── gen │ │ ├── f16-ibilinear-fma3-u16.c │ │ ├── f16-ibilinear-fma3-u8.c │ │ ├── f16-ibilinear-neonfp16arith-u16.c │ │ └── f16-ibilinear-neonfp16arith-u8.c │ └── neonfp16arith.c.in ├── f16-igemm │ ├── avx2-broadcast.c.in │ ├── avx512fp16-broadcast.c.in │ ├── f16-igemm-1x16-minmax-asm-aarch64-neonfp16arith-ld32.S │ ├── f16-igemm-1x16-minmax-asm-aarch64-neonfp16arith-ld64.S │ ├── f16-igemm-4x16-minmax-asm-aarch64-neonfp16arith-ld32.S │ ├── f16-igemm-4x16-minmax-asm-aarch64-neonfp16arith-ld64.S │ ├── f16-igemm-6x16-minmax-asm-aarch64-neonfp16arith-cortex-a55.S │ ├── f16-igemm-6x16-minmax-asm-aarch64-neonfp16arith-cortex-a55r0.S │ ├── f16-igemm-6x16-minmax-asm-aarch64-neonfp16arith-cortex-a75.S │ ├── f16-igemm-6x16-minmax-asm-aarch64-neonfp16arith-ld32.S │ ├── f16-igemm-6x16-minmax-asm-aarch64-neonfp16arith-ld64.S │ ├── gen │ │ ├── f16-igemm-1x16-minmax-avx2-broadcast.c │ │ ├── f16-igemm-1x16-minmax-neonfp16arith-ld64.c │ │ ├── f16-igemm-1x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-1x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-1x8-minmax-avx2-broadcast.c │ │ ├── f16-igemm-1x8-minmax-neonfp16arith-ld64.c │ │ ├── f16-igemm-3x16-minmax-avx2-broadcast.c │ │ ├── f16-igemm-4x16-minmax-avx2-broadcast.c │ │ ├── f16-igemm-4x16-minmax-neonfp16arith-ld64.c │ │ ├── f16-igemm-4x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-4x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-4x8-minmax-avx2-broadcast.c │ │ ├── f16-igemm-4x8-minmax-neonfp16arith-ld64.c │ │ ├── f16-igemm-5x16-minmax-avx2-broadcast.c │ │ ├── f16-igemm-5x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-5x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-5x8-minmax-avx2-broadcast.c │ │ ├── f16-igemm-6x16-minmax-neonfp16arith-ld64.c │ │ ├── f16-igemm-6x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-6x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-6x8-minmax-avx2-broadcast.c │ │ ├── f16-igemm-6x8-minmax-neonfp16arith-ld64.c │ │ ├── f16-igemm-7x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-7x64-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-7x8-minmax-avx2-broadcast.c │ │ ├── f16-igemm-8x16-minmax-neonfp16arith-ld64.c │ │ ├── f16-igemm-8x32-minmax-avx512fp16-broadcast.c │ │ ├── f16-igemm-8x64-minmax-avx512fp16-broadcast.c │ │ └── f16-igemm-8x8-minmax-neonfp16arith-ld64.c │ └── neonfp16arith-ld64.c.in ├── f16-maxpool │ ├── f16-maxpool-minmax.inc │ └── gen │ │ ├── f16-maxpool-9p-minmax-avx2-u16.c │ │ ├── f16-maxpool-9p-minmax-neonfp16arith-u8.c │ │ └── f16-maxpool-9p-minmax-sse41-u8.c ├── f16-qs8-vcvt │ ├── f16-qs8-vcvt.inc │ ├── gen │ │ ├── f16-qs8-vcvt-neonfp16arith-u16.c │ │ ├── f16-qs8-vcvt-neonfp16arith-u24.c │ │ ├── f16-qs8-vcvt-neonfp16arith-u32.c │ │ ├── f16-qs8-vcvt-neonfp16arith-u64.c │ │ ├── f16-qs8-vcvt-neonfp16arith-u8.c │ │ ├── f16-qs8-vcvt-scalar-fmagic-u1.c │ │ ├── f16-qs8-vcvt-scalar-fmagic-u2.c │ │ ├── f16-qs8-vcvt-scalar-fmagic-u3.c │ │ ├── f16-qs8-vcvt-scalar-fmagic-u4.c │ │ ├── f16-qs8-vcvt-scalar-imagic-u1.c │ │ ├── f16-qs8-vcvt-scalar-imagic-u2.c │ │ ├── f16-qs8-vcvt-scalar-imagic-u3.c │ │ └── f16-qs8-vcvt-scalar-imagic-u4.c │ └── neonfp16arith.c.in ├── f16-qu8-vcvt │ ├── f16-qu8-vcvt.inc │ └── gen │ │ ├── f16-qu8-vcvt-scalar-imagic-u1.c │ │ ├── f16-qu8-vcvt-scalar-imagic-u2.c │ │ ├── f16-qu8-vcvt-scalar-imagic-u3.c │ │ └── f16-qu8-vcvt-scalar-imagic-u4.c ├── f16-raddstoreexpminusmax │ ├── avx2-rr1-p2.c.in │ ├── gen │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u16-acc2.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u16.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u32-acc2.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u32-acc4.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u32.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u40-acc2.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u40-acc5.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u40.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u48-acc2.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u48-acc3.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u48.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u64-acc2.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u64-acc4.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u64.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u72-acc3.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u72.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u80-acc2.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u80-acc5.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u80.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u96-acc2.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u96-acc3.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u96-acc6.c │ │ ├── f16-raddstoreexpminusmax-avx2-rr1-p2-u96.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u32-acc2.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u32-acc4.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u32.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u40-acc2.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u40-acc5.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u40.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u48-acc2.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u48-acc3.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u48.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u64-acc2.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u64-acc4.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u64.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u72-acc3.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u72.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u80-acc2.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u80-acc5.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u80.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u96-acc2.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u96-acc3.c │ │ ├── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u96-acc6.c │ │ └── f16-raddstoreexpminusmax-neonfp16arith-rr2-p2-u96.c │ └── neonfp16arith-rr2-p2.c.in ├── f16-rdminmax │ ├── f16-rdmax.inc │ ├── f16-rdmin.inc │ ├── gen │ │ ├── f16-rdmax-2p2x-neonfp16arith-u32.c │ │ ├── f16-rdmax-2p2x-scalar-u2.c │ │ ├── f16-rdmin-2p2x-neonfp16arith-u32.c │ │ └── f16-rdmin-2p2x-scalar-u2.c │ └── simd.c.in ├── f16-rminmax │ ├── avx512fp16.c.in │ ├── avx512skx.c.in │ ├── f16-rmax-f16c-u32.c │ ├── f16-rmax.inc │ ├── f16-rmin.inc │ ├── f16-rminmax.inc │ ├── gen │ │ ├── f16-rmax-avx512fp16-u128-acc2.c │ │ ├── f16-rmax-avx512fp16-u128-acc4.c │ │ ├── f16-rmax-avx512fp16-u32.c │ │ ├── f16-rmax-avx512fp16-u64-acc2.c │ │ ├── f16-rmax-avx512fp16-u96-acc3.c │ │ ├── f16-rmax-avx512skx-u16.c │ │ ├── f16-rmax-avx512skx-u32-acc2.c │ │ ├── f16-rmax-avx512skx-u48-acc3.c │ │ ├── f16-rmax-avx512skx-u64-acc2.c │ │ ├── f16-rmax-avx512skx-u64-acc4.c │ │ ├── f16-rmax-neonfp16arith-u16-acc2.c │ │ ├── f16-rmax-neonfp16arith-u24-acc3.c │ │ ├── f16-rmax-neonfp16arith-u32-acc2.c │ │ ├── f16-rmax-neonfp16arith-u32-acc4.c │ │ ├── f16-rmax-neonfp16arith-u8.c │ │ ├── f16-rmax-scalar-u1.c │ │ ├── f16-rmax-scalar-u2-acc2.c │ │ ├── f16-rmax-scalar-u3-acc3.c │ │ ├── f16-rmax-scalar-u4-acc2.c │ │ ├── f16-rmax-scalar-u4-acc4.c │ │ ├── f16-rmin-avx512fp16-u128-acc2.c │ │ ├── f16-rmin-avx512fp16-u128-acc4.c │ │ ├── f16-rmin-avx512fp16-u32.c │ │ ├── f16-rmin-avx512fp16-u64-acc2.c │ │ ├── f16-rmin-avx512fp16-u96-acc3.c │ │ ├── f16-rmin-avx512skx-u16.c │ │ ├── f16-rmin-avx512skx-u32-acc2.c │ │ ├── f16-rmin-avx512skx-u48-acc3.c │ │ ├── f16-rmin-avx512skx-u64-acc2.c │ │ ├── f16-rmin-avx512skx-u64-acc4.c │ │ ├── f16-rmin-neonfp16arith-u16-acc2.c │ │ ├── f16-rmin-neonfp16arith-u24-acc3.c │ │ ├── f16-rmin-neonfp16arith-u32-acc2.c │ │ ├── f16-rmin-neonfp16arith-u32-acc4.c │ │ ├── f16-rmin-neonfp16arith-u8.c │ │ ├── f16-rmin-scalar-u1.c │ │ ├── f16-rmin-scalar-u2-acc2.c │ │ ├── f16-rmin-scalar-u3-acc3.c │ │ ├── f16-rmin-scalar-u4-acc2.c │ │ ├── f16-rmin-scalar-u4-acc4.c │ │ ├── f16-rminmax-avx512fp16-u128-acc2.c │ │ ├── f16-rminmax-avx512fp16-u128-acc4.c │ │ ├── f16-rminmax-avx512fp16-u32.c │ │ ├── f16-rminmax-avx512fp16-u64-acc2.c │ │ ├── f16-rminmax-avx512fp16-u96-acc3.c │ │ ├── f16-rminmax-avx512skx-u16.c │ │ ├── f16-rminmax-avx512skx-u32-acc2.c │ │ ├── f16-rminmax-avx512skx-u48-acc3.c │ │ ├── f16-rminmax-avx512skx-u64-acc2.c │ │ ├── f16-rminmax-avx512skx-u64-acc4.c │ │ ├── f16-rminmax-neonfp16arith-u16-acc2.c │ │ ├── f16-rminmax-neonfp16arith-u24-acc3.c │ │ ├── f16-rminmax-neonfp16arith-u32-acc2.c │ │ ├── f16-rminmax-neonfp16arith-u32-acc4.c │ │ ├── f16-rminmax-neonfp16arith-u8.c │ │ ├── f16-rminmax-scalar-u1.c │ │ ├── f16-rminmax-scalar-u2-acc2.c │ │ ├── f16-rminmax-scalar-u3-acc3.c │ │ ├── f16-rminmax-scalar-u4-acc2.c │ │ └── f16-rminmax-scalar-u4-acc4.c │ ├── neonfp16arith.c.in │ └── scalar.c.in ├── f16-rsum │ ├── avx512fp16.c.in │ ├── f16-rsum.inc │ ├── gen │ │ ├── f16-rsum-avx512fp16-u128-acc2.c │ │ ├── f16-rsum-avx512fp16-u128-acc4.c │ │ ├── f16-rsum-avx512fp16-u32.c │ │ ├── f16-rsum-avx512fp16-u64-acc2.c │ │ ├── f16-rsum-avx512fp16-u96-acc3.c │ │ ├── f16-rsum-neonfp16arith-u16-acc2.c │ │ ├── f16-rsum-neonfp16arith-u24-acc3.c │ │ ├── f16-rsum-neonfp16arith-u32-acc2.c │ │ ├── f16-rsum-neonfp16arith-u32-acc4.c │ │ └── f16-rsum-neonfp16arith-u8.c │ └── neonfp16arith.c.in ├── f16-spmm │ ├── f16-spmm-minmax.inc │ ├── gen │ │ ├── f16-spmm-16x1-minmax-neonfp16arith-pipelined.c │ │ ├── f16-spmm-16x1-minmax-neonfp16arith-u2.c │ │ ├── f16-spmm-16x1-minmax-neonfp16arith.c │ │ ├── f16-spmm-24x1-minmax-neonfp16arith-pipelined.c │ │ ├── f16-spmm-24x1-minmax-neonfp16arith-u2.c │ │ ├── f16-spmm-24x1-minmax-neonfp16arith.c │ │ ├── f16-spmm-32x1-minmax-neonfp16arith-pipelined.c │ │ ├── f16-spmm-32x1-minmax-neonfp16arith-u2.c │ │ ├── f16-spmm-32x1-minmax-neonfp16arith.c │ │ ├── f16-spmm-8x1-minmax-neonfp16arith-pipelined.c │ │ ├── f16-spmm-8x1-minmax-neonfp16arith-u2.c │ │ └── f16-spmm-8x1-minmax-neonfp16arith.c │ ├── neonfp16arith-pipelined.c.in │ └── neonfp16arith.c.in ├── f16-vabs │ └── f16-vabs.inc ├── f16-vapproxgelu │ ├── f16-vapproxgelu.inc │ ├── gen │ │ ├── f16-vapproxgelu-neonfp16arith-rational-6-4-div.c │ │ └── f16-vapproxgelu-scalar-rational-6-4-div.c │ └── rational-6-4.c.in ├── f16-vbinary │ ├── f16-vadd.inc │ ├── f16-vaddc.inc │ ├── f16-vcmul.inc │ ├── f16-vdiv.inc │ ├── f16-vdivc.inc │ ├── f16-vmax.inc │ ├── f16-vmaxc.inc │ ├── f16-vmin.inc │ ├── f16-vminc.inc │ ├── f16-vmul.inc │ ├── f16-vmulc.inc │ ├── f16-vprelu.inc │ ├── f16-vpreluc.inc │ ├── f16-vrdivc.inc │ ├── f16-vrpreluc.inc │ ├── f16-vrsubc.inc │ ├── f16-vsqrdiff.inc │ ├── f16-vsqrdiffc.inc │ ├── f16-vsub.inc │ ├── f16-vsubc.inc │ ├── gen │ │ ├── f16-vadd-avx512fp16-u32.c │ │ ├── f16-vadd-avx512fp16-u64.c │ │ ├── f16-vadd-f16c-u16.c │ │ ├── f16-vadd-f16c-u8.c │ │ ├── f16-vadd-fp16arith-u1.c │ │ ├── f16-vadd-fp16arith-u2.c │ │ ├── f16-vadd-fp16arith-u4.c │ │ ├── f16-vadd-neonfp16arith-u16.c │ │ ├── f16-vadd-neonfp16arith-u8.c │ │ ├── f16-vaddc-avx512fp16-u32.c │ │ ├── f16-vaddc-avx512fp16-u64.c │ │ ├── f16-vaddc-f16c-u16.c │ │ ├── f16-vaddc-f16c-u8.c │ │ ├── f16-vaddc-fp16arith-u1.c │ │ ├── f16-vaddc-fp16arith-u2.c │ │ ├── f16-vaddc-fp16arith-u4.c │ │ ├── f16-vaddc-neonfp16arith-u16.c │ │ ├── f16-vaddc-neonfp16arith-u8.c │ │ ├── f16-vdiv-aarch64-neonfp16arith-u16.c │ │ ├── f16-vdiv-aarch64-neonfp16arith-u8.c │ │ ├── f16-vdiv-avx512fp16-u32.c │ │ ├── f16-vdiv-avx512fp16-u64.c │ │ ├── f16-vdiv-f16c-u16.c │ │ ├── f16-vdiv-f16c-u8.c │ │ ├── f16-vdiv-fp16arith-u1.c │ │ ├── f16-vdiv-fp16arith-u2.c │ │ ├── f16-vdiv-fp16arith-u4.c │ │ ├── f16-vdivc-aarch64-neonfp16arith-u16.c │ │ ├── f16-vdivc-aarch64-neonfp16arith-u8.c │ │ ├── f16-vdivc-avx512fp16-u32.c │ │ ├── f16-vdivc-avx512fp16-u64.c │ │ ├── f16-vdivc-f16c-u16.c │ │ ├── f16-vdivc-f16c-u8.c │ │ ├── f16-vdivc-fp16arith-u1.c │ │ ├── f16-vdivc-fp16arith-u2.c │ │ ├── f16-vdivc-fp16arith-u4.c │ │ ├── f16-vmax-avx512fp16-u32.c │ │ ├── f16-vmax-avx512fp16-u64.c │ │ ├── f16-vmax-f16c-u16.c │ │ ├── f16-vmax-f16c-u8.c │ │ ├── f16-vmax-fp16arith-u1.c │ │ ├── f16-vmax-fp16arith-u2.c │ │ ├── f16-vmax-fp16arith-u4.c │ │ ├── f16-vmax-neonfp16arith-u16.c │ │ ├── f16-vmax-neonfp16arith-u8.c │ │ ├── f16-vmaxc-avx512fp16-u32.c │ │ ├── f16-vmaxc-avx512fp16-u64.c │ │ ├── f16-vmaxc-f16c-u16.c │ │ ├── f16-vmaxc-f16c-u8.c │ │ ├── f16-vmaxc-fp16arith-u1.c │ │ ├── f16-vmaxc-fp16arith-u2.c │ │ ├── f16-vmaxc-fp16arith-u4.c │ │ ├── f16-vmaxc-neonfp16arith-u16.c │ │ ├── f16-vmaxc-neonfp16arith-u8.c │ │ ├── f16-vmin-avx512fp16-u32.c │ │ ├── f16-vmin-avx512fp16-u64.c │ │ ├── f16-vmin-f16c-u16.c │ │ ├── f16-vmin-f16c-u8.c │ │ ├── f16-vmin-fp16arith-u1.c │ │ ├── f16-vmin-fp16arith-u2.c │ │ ├── f16-vmin-fp16arith-u4.c │ │ ├── f16-vmin-neonfp16arith-u16.c │ │ ├── f16-vmin-neonfp16arith-u8.c │ │ ├── f16-vminc-avx512fp16-u32.c │ │ ├── f16-vminc-avx512fp16-u64.c │ │ ├── f16-vminc-f16c-u16.c │ │ ├── f16-vminc-f16c-u8.c │ │ ├── f16-vminc-fp16arith-u1.c │ │ ├── f16-vminc-fp16arith-u2.c │ │ ├── f16-vminc-fp16arith-u4.c │ │ ├── f16-vminc-neonfp16arith-u16.c │ │ ├── f16-vminc-neonfp16arith-u8.c │ │ ├── f16-vmul-avx512fp16-u32.c │ │ ├── f16-vmul-avx512fp16-u64.c │ │ ├── f16-vmul-f16c-u16.c │ │ ├── f16-vmul-f16c-u8.c │ │ ├── f16-vmul-fp16arith-u1.c │ │ ├── f16-vmul-fp16arith-u2.c │ │ ├── f16-vmul-fp16arith-u4.c │ │ ├── f16-vmul-neonfp16arith-u16.c │ │ ├── f16-vmul-neonfp16arith-u8.c │ │ ├── f16-vmulc-avx512fp16-u32.c │ │ ├── f16-vmulc-avx512fp16-u64.c │ │ ├── f16-vmulc-f16c-u16.c │ │ ├── f16-vmulc-f16c-u8.c │ │ ├── f16-vmulc-fp16arith-u1.c │ │ ├── f16-vmulc-fp16arith-u2.c │ │ ├── f16-vmulc-fp16arith-u4.c │ │ ├── f16-vmulc-neonfp16arith-u16.c │ │ ├── f16-vmulc-neonfp16arith-u8.c │ │ ├── f16-vprelu-avx512fp16-u32.c │ │ ├── f16-vprelu-avx512fp16-u64.c │ │ ├── f16-vprelu-f16c-u16.c │ │ ├── f16-vprelu-f16c-u8.c │ │ ├── f16-vprelu-neonfp16arith-u16.c │ │ ├── f16-vprelu-neonfp16arith-u8.c │ │ ├── f16-vpreluc-avx512fp16-u32.c │ │ ├── f16-vpreluc-avx512fp16-u64.c │ │ ├── f16-vpreluc-f16c-u16.c │ │ ├── f16-vpreluc-f16c-u8.c │ │ ├── f16-vpreluc-neonfp16arith-u16.c │ │ ├── f16-vpreluc-neonfp16arith-u8.c │ │ ├── f16-vrdivc-aarch64-neonfp16arith-u16.c │ │ ├── f16-vrdivc-aarch64-neonfp16arith-u8.c │ │ ├── f16-vrdivc-avx512fp16-u32.c │ │ ├── f16-vrdivc-avx512fp16-u64.c │ │ ├── f16-vrdivc-f16c-u16.c │ │ ├── f16-vrdivc-f16c-u8.c │ │ ├── f16-vrdivc-fp16arith-u1.c │ │ ├── f16-vrdivc-fp16arith-u2.c │ │ ├── f16-vrdivc-fp16arith-u4.c │ │ ├── f16-vrpreluc-avx512fp16-u32.c │ │ ├── f16-vrpreluc-avx512fp16-u64.c │ │ ├── f16-vrpreluc-f16c-u16.c │ │ ├── f16-vrpreluc-f16c-u8.c │ │ ├── f16-vrpreluc-neonfp16arith-u16.c │ │ ├── f16-vrpreluc-neonfp16arith-u8.c │ │ ├── f16-vrsubc-avx512fp16-u32.c │ │ ├── f16-vrsubc-avx512fp16-u64.c │ │ ├── f16-vrsubc-f16c-u16.c │ │ ├── f16-vrsubc-f16c-u8.c │ │ ├── f16-vrsubc-fp16arith-u1.c │ │ ├── f16-vrsubc-fp16arith-u2.c │ │ ├── f16-vrsubc-fp16arith-u4.c │ │ ├── f16-vrsubc-neonfp16arith-u16.c │ │ ├── f16-vrsubc-neonfp16arith-u8.c │ │ ├── f16-vsqrdiff-avx512fp16-u32.c │ │ ├── f16-vsqrdiff-avx512fp16-u64.c │ │ ├── f16-vsqrdiff-f16c-u16.c │ │ ├── f16-vsqrdiff-f16c-u8.c │ │ ├── f16-vsqrdiff-fp16arith-u1.c │ │ ├── f16-vsqrdiff-fp16arith-u2.c │ │ ├── f16-vsqrdiff-fp16arith-u4.c │ │ ├── f16-vsqrdiff-neonfp16arith-u16.c │ │ ├── f16-vsqrdiff-neonfp16arith-u8.c │ │ ├── f16-vsqrdiffc-avx512fp16-u32.c │ │ ├── f16-vsqrdiffc-avx512fp16-u64.c │ │ ├── f16-vsqrdiffc-f16c-u16.c │ │ ├── f16-vsqrdiffc-f16c-u8.c │ │ ├── f16-vsqrdiffc-fp16arith-u1.c │ │ ├── f16-vsqrdiffc-fp16arith-u2.c │ │ ├── f16-vsqrdiffc-fp16arith-u4.c │ │ ├── f16-vsqrdiffc-neonfp16arith-u16.c │ │ ├── f16-vsqrdiffc-neonfp16arith-u8.c │ │ ├── f16-vsub-avx512fp16-u32.c │ │ ├── f16-vsub-avx512fp16-u64.c │ │ ├── f16-vsub-f16c-u16.c │ │ ├── f16-vsub-f16c-u8.c │ │ ├── f16-vsub-fp16arith-u1.c │ │ ├── f16-vsub-fp16arith-u2.c │ │ ├── f16-vsub-fp16arith-u4.c │ │ ├── f16-vsub-neonfp16arith-u16.c │ │ ├── f16-vsub-neonfp16arith-u8.c │ │ ├── f16-vsubc-avx512fp16-u32.c │ │ ├── f16-vsubc-avx512fp16-u64.c │ │ ├── f16-vsubc-f16c-u16.c │ │ ├── f16-vsubc-f16c-u8.c │ │ ├── f16-vsubc-fp16arith-u1.c │ │ ├── f16-vsubc-fp16arith-u2.c │ │ ├── f16-vsubc-fp16arith-u4.c │ │ ├── f16-vsubc-neonfp16arith-u16.c │ │ └── f16-vsubc-neonfp16arith-u8.c │ ├── vop-avx512fp16.c.in │ ├── vop-f16c.c.in │ ├── vop-fp16arith.c.in │ ├── vop-neonfp16arith.c.in │ ├── vopc-avx512fp16.c.in │ ├── vopc-f16c.c.in │ ├── vopc-fp16arith.c.in │ └── vopc-neonfp16arith.c.in ├── f16-vclamp │ ├── f16-vclamp.inc │ ├── f16c.c.in │ ├── gen │ │ ├── f16-vclamp-f16c-u16.c │ │ ├── f16-vclamp-f16c-u8.c │ │ ├── f16-vclamp-neonfp16arith-u16.c │ │ ├── f16-vclamp-neonfp16arith-u8.c │ │ ├── f16-vclamp-rvvfp16arith-u1v.c │ │ ├── f16-vclamp-rvvfp16arith-u2v.c │ │ ├── f16-vclamp-rvvfp16arith-u4v.c │ │ └── f16-vclamp-rvvfp16arith-u8v.c │ ├── neonfp16arith.c.in │ └── rvvfp16arith.c.in ├── f16-vcmul │ ├── gen │ │ ├── f16-vcmul-neonfp16arith-u16.c │ │ ├── f16-vcmul-neonfp16arith-u32.c │ │ └── f16-vcmul-neonfp16arith-u8.c │ └── neonfp16arith.c.in ├── f16-vcos │ ├── f16-vcos.inc │ └── gen │ │ ├── f16-vcos-avx512fp16-rational-3-2-div.c │ │ ├── f16-vcos-neonfp16arith-rational-3-2-div.c │ │ └── f16-vcos-scalar-rational-3-2-div.c ├── f16-velu │ ├── avx2-rr1-p3.c.in │ ├── f16-velu.inc │ ├── gen │ │ ├── f16-velu-avx2-rr1-p3-u16.c │ │ ├── f16-velu-avx2-rr1-p3-u8.c │ │ ├── f16-velu-neonfp16arith-rr1-p3-u16.c │ │ └── f16-velu-neonfp16arith-rr1-p3-u8.c │ └── neonfp16arith-rr1-p3.c.in ├── f16-vexp │ ├── f16-vexp.inc │ ├── gen │ │ ├── f16-vexp-neonfp16arith-poly-3.c │ │ └── f16-vexp-scalar-poly-3.c │ └── poly-3.c.in ├── f16-vgelu │ ├── f16-vgelu.inc │ ├── gen │ │ ├── f16-vgelu-avx512fp16-rational-6-4-div.c │ │ ├── f16-vgelu-neonfp16arith-rational-6-4-div.c │ │ └── f16-vgelu-scalar-rational-6-4-div.c │ └── rational-6-4.c.in ├── f16-vhswish │ ├── f16-vhswish.inc │ ├── f16c.c.in │ ├── gen │ │ ├── f16-vhswish-f16c-u16.c │ │ ├── f16-vhswish-f16c-u8.c │ │ ├── f16-vhswish-neonfp16arith-u16.c │ │ └── f16-vhswish-neonfp16arith-u8.c │ └── neonfp16arith.c.in ├── f16-vlrelu │ ├── f16-vlrelu.inc │ ├── f16c.c.in │ ├── gen │ │ ├── f16-vlrelu-f16c-u16.c │ │ ├── f16-vlrelu-f16c-u8.c │ │ ├── f16-vlrelu-neonfp16arith-u16.c │ │ └── f16-vlrelu-neonfp16arith-u8.c │ └── neonfp16arith.c.in ├── f16-vmulcaddc │ ├── f16-vmulcaddc.inc │ ├── fma3.c.in │ ├── gen │ │ ├── f16-vmulcaddc-c16-minmax-fma3-2x.c │ │ ├── f16-vmulcaddc-c16-minmax-neonfp16arith-2x.c │ │ ├── f16-vmulcaddc-c8-minmax-fma3-2x.c │ │ └── f16-vmulcaddc-c8-minmax-neonfp16arith-2x.c │ └── neonfp16arith.c.in ├── f16-vneg │ └── f16-vneg.inc ├── f16-vrnd │ ├── f16-vrndd.inc │ ├── f16-vrndne.inc │ ├── f16-vrndu.inc │ ├── f16-vrndz.inc │ ├── f16c.c.in │ ├── gen │ │ ├── f16-vrndd-f16c-u16.c │ │ ├── f16-vrndd-f16c-u8.c │ │ ├── f16-vrndd-neonfp16arith-u16.c │ │ ├── f16-vrndd-neonfp16arith-u8.c │ │ ├── f16-vrndne-f16c-u16.c │ │ ├── f16-vrndne-f16c-u8.c │ │ ├── f16-vrndne-neonfp16arith-u16.c │ │ ├── f16-vrndne-neonfp16arith-u8.c │ │ ├── f16-vrndu-f16c-u16.c │ │ ├── f16-vrndu-f16c-u8.c │ │ ├── f16-vrndu-neonfp16arith-u16.c │ │ ├── f16-vrndu-neonfp16arith-u8.c │ │ ├── f16-vrndz-f16c-u16.c │ │ ├── f16-vrndz-f16c-u8.c │ │ ├── f16-vrndz-neonfp16arith-u16.c │ │ └── f16-vrndz-neonfp16arith-u8.c │ └── neonfp16arith.c.in ├── f16-vrsqrt │ ├── f16-vrsqrt.inc │ ├── f16c-rsqrt.c.in │ ├── gen │ │ ├── f16-vrsqrt-f16c-rsqrt-u16.c │ │ ├── f16-vrsqrt-f16c-rsqrt-u32.c │ │ ├── f16-vrsqrt-f16c-rsqrt-u8.c │ │ ├── f16-vrsqrt-neonfp16arith-rsqrt-u16.c │ │ ├── f16-vrsqrt-neonfp16arith-rsqrt-u32.c │ │ └── f16-vrsqrt-neonfp16arith-rsqrt-u8.c │ └── neonfp16arith-rsqrt.c.in ├── f16-vsigmoid │ ├── avx2.c.in │ ├── f16-vsigmoid.inc │ ├── gen │ │ ├── f16-vsigmoid-aarch64-neonfp16arith-rr2-p2-div-u16.c │ │ ├── f16-vsigmoid-aarch64-neonfp16arith-rr2-p2-div-u24.c │ │ ├── f16-vsigmoid-aarch64-neonfp16arith-rr2-p2-div-u32.c │ │ ├── f16-vsigmoid-aarch64-neonfp16arith-rr2-p2-div-u8.c │ │ ├── f16-vsigmoid-avx2-rr1-p2-div-u16.c │ │ ├── f16-vsigmoid-avx2-rr1-p2-div-u24.c │ │ ├── f16-vsigmoid-avx2-rr1-p2-div-u32.c │ │ ├── f16-vsigmoid-avx2-rr1-p2-div-u8.c │ │ ├── f16-vsigmoid-avx2-rr1-p2-rcp-u16.c │ │ ├── f16-vsigmoid-avx2-rr1-p2-rcp-u24.c │ │ ├── f16-vsigmoid-avx2-rr1-p2-rcp-u32.c │ │ ├── f16-vsigmoid-avx2-rr1-p2-rcp-u8.c │ │ ├── f16-vsigmoid-neonfp16arith-rr2-p2-nr1fma-u16.c │ │ ├── f16-vsigmoid-neonfp16arith-rr2-p2-nr1fma-u24.c │ │ ├── f16-vsigmoid-neonfp16arith-rr2-p2-nr1fma-u32.c │ │ ├── f16-vsigmoid-neonfp16arith-rr2-p2-nr1fma-u8.c │ │ ├── f16-vsigmoid-neonfp16arith-rr2-p2-nr1recps-u16.c │ │ ├── f16-vsigmoid-neonfp16arith-rr2-p2-nr1recps-u24.c │ │ ├── f16-vsigmoid-neonfp16arith-rr2-p2-nr1recps-u32.c │ │ └── f16-vsigmoid-neonfp16arith-rr2-p2-nr1recps-u8.c │ └── neonfp16arith.c.in ├── f16-vsin │ ├── f16-vsin.inc │ ├── gen │ │ ├── f16-vsin-avx512fp16-rational-3-2-div.c │ │ ├── f16-vsin-neonfp16arith-rational-3-2-div.c │ │ └── f16-vsin-scalar-rational-3-2-div.c │ └── rational-3-2.c.in ├── f16-vsqr │ └── f16-vsqr.inc ├── f16-vsqrt │ ├── avx512fp16-sqrt.c.in │ ├── avx512skx-sqrt.c.in │ ├── f16-vsqrt.inc │ ├── f16c-rsqrt.c.in │ ├── f16c-sqrt.c.in │ ├── fp16arith-sqrt.c.in │ ├── gen │ │ ├── f16-vsqrt-aarch64-neonfp16arith-sqrt-u16.c │ │ ├── f16-vsqrt-aarch64-neonfp16arith-sqrt-u32.c │ │ ├── f16-vsqrt-aarch64-neonfp16arith-sqrt-u8.c │ │ ├── f16-vsqrt-avx512fp16-sqrt-u128.c │ │ ├── f16-vsqrt-avx512fp16-sqrt-u32.c │ │ ├── f16-vsqrt-avx512fp16-sqrt-u64.c │ │ ├── f16-vsqrt-avx512skx-sqrt-u16.c │ │ ├── f16-vsqrt-avx512skx-sqrt-u32.c │ │ ├── f16-vsqrt-avx512skx-sqrt-u64.c │ │ ├── f16-vsqrt-f16c-rsqrt-u16.c │ │ ├── f16-vsqrt-f16c-rsqrt-u32.c │ │ ├── f16-vsqrt-f16c-rsqrt-u8.c │ │ ├── f16-vsqrt-f16c-sqrt-u16.c │ │ ├── f16-vsqrt-f16c-sqrt-u32.c │ │ ├── f16-vsqrt-f16c-sqrt-u8.c │ │ ├── f16-vsqrt-fp16arith-sqrt-u1.c │ │ ├── f16-vsqrt-fp16arith-sqrt-u2.c │ │ ├── f16-vsqrt-fp16arith-sqrt-u4.c │ │ ├── f16-vsqrt-neonfp16arith-nr1fma1adj-u16.c │ │ ├── f16-vsqrt-neonfp16arith-nr1fma1adj-u32.c │ │ └── f16-vsqrt-neonfp16arith-nr1fma1adj-u8.c │ ├── neonfp16arith-nr1fma1adj.c.in │ └── neonfp16arith-sqrt.c.in ├── f16-vtanh │ ├── avx-expm1minus.c.in │ ├── avx-polynomial.c.in │ ├── f16-vtanh.inc │ ├── gen │ │ ├── f16-vtanh-aarch64-neonfp16arith-expm1minus-rr1-p3h2ts-div-u16.c │ │ ├── f16-vtanh-aarch64-neonfp16arith-expm1minus-rr1-p3h2ts-div-u24.c │ │ ├── f16-vtanh-aarch64-neonfp16arith-expm1minus-rr1-p3h2ts-div-u32.c │ │ ├── f16-vtanh-aarch64-neonfp16arith-expm1minus-rr1-p3h2ts-div-u8.c │ │ ├── f16-vtanh-avx2-expm1minus-rr1-p3h2ts-div-u16.c │ │ ├── f16-vtanh-avx2-expm1minus-rr1-p3h2ts-div-u24.c │ │ ├── f16-vtanh-avx2-expm1minus-rr1-p3h2ts-div-u32.c │ │ ├── f16-vtanh-avx2-expm1minus-rr1-p3h2ts-div-u8.c │ │ ├── f16-vtanh-avx2-expm1minus-rr1-p3h2ts-rcp-u16.c │ │ ├── f16-vtanh-avx2-expm1minus-rr1-p3h2ts-rcp-u24.c │ │ ├── f16-vtanh-avx2-expm1minus-rr1-p3h2ts-rcp-u32.c │ │ ├── f16-vtanh-avx2-expm1minus-rr1-p3h2ts-rcp-u8.c │ │ ├── f16-vtanh-f16c-expm1minus-rr1-p3h2ts-div-u16.c │ │ ├── f16-vtanh-f16c-expm1minus-rr1-p3h2ts-div-u24.c │ │ ├── f16-vtanh-f16c-expm1minus-rr1-p3h2ts-div-u32.c │ │ ├── f16-vtanh-f16c-expm1minus-rr1-p3h2ts-div-u8.c │ │ ├── f16-vtanh-f16c-expm1minus-rr1-p3h2ts-rcp-u16.c │ │ ├── f16-vtanh-f16c-expm1minus-rr1-p3h2ts-rcp-u24.c │ │ ├── f16-vtanh-f16c-expm1minus-rr1-p3h2ts-rcp-u32.c │ │ ├── f16-vtanh-f16c-expm1minus-rr1-p3h2ts-rcp-u8.c │ │ ├── f16-vtanh-f16c-polynomial-p19h9t2-u16.c │ │ ├── f16-vtanh-f16c-polynomial-p19h9t2-u24.c │ │ ├── f16-vtanh-f16c-polynomial-p19h9t2-u32.c │ │ ├── f16-vtanh-f16c-polynomial-p19h9t2-u8.c │ │ ├── f16-vtanh-fma3-expm1minus-rr1-p3h2ts-div-u16.c │ │ ├── f16-vtanh-fma3-expm1minus-rr1-p3h2ts-div-u24.c │ │ ├── f16-vtanh-fma3-expm1minus-rr1-p3h2ts-div-u32.c │ │ ├── f16-vtanh-fma3-expm1minus-rr1-p3h2ts-div-u8.c │ │ ├── f16-vtanh-fma3-expm1minus-rr1-p3h2ts-rcp-u16.c │ │ ├── f16-vtanh-fma3-expm1minus-rr1-p3h2ts-rcp-u24.c │ │ ├── f16-vtanh-fma3-expm1minus-rr1-p3h2ts-rcp-u32.c │ │ ├── f16-vtanh-fma3-expm1minus-rr1-p3h2ts-rcp-u8.c │ │ ├── f16-vtanh-fma3-polynomial-p19h9t2-u16.c │ │ ├── f16-vtanh-fma3-polynomial-p19h9t2-u24.c │ │ ├── f16-vtanh-fma3-polynomial-p19h9t2-u32.c │ │ ├── f16-vtanh-fma3-polynomial-p19h9t2-u8.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-nr1fma-u16.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-nr1fma-u24.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-nr1fma-u32.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-nr1fma-u8.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-nr1recps-u16.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-nr1recps-u24.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-nr1recps-u32.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-nr1recps-u8.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-recpeadj-u16.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-recpeadj-u24.c │ │ ├── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-recpeadj-u32.c │ │ └── f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-recpeadj-u8.c │ └── neonfp16arith-expm1minus.c.in ├── f16-vunary │ ├── f16c.c.in │ ├── gen │ │ ├── f16-vabs-neonfp16arith-u16.c │ │ ├── f16-vabs-neonfp16arith-u8.c │ │ ├── f16-vabs-sse2-u16.c │ │ ├── f16-vabs-sse2-u8.c │ │ ├── f16-vneg-neonfp16arith-u16.c │ │ ├── f16-vneg-neonfp16arith-u8.c │ │ ├── f16-vneg-sse2-u16.c │ │ ├── f16-vneg-sse2-u8.c │ │ ├── f16-vsqr-f16c-u16.c │ │ ├── f16-vsqr-f16c-u8.c │ │ ├── f16-vsqr-neonfp16arith-u16.c │ │ └── f16-vsqr-neonfp16arith-u8.c │ ├── neonfp16arith.c.in │ └── sse2.c.in ├── f32-argmaxpool │ ├── f32-argmaxpool-9p8x-neon-c4.c │ ├── f32-argmaxpool-9p8x-rvv-u1v.c │ ├── f32-argmaxpool-9p8x-scalar-c1.c │ ├── f32-argmaxpool-9p8x-sse2-c4.c │ └── f32-argmaxpool-9p8x-wasmsimd-c4.c ├── f32-avgpool │ ├── avgpool.c.in │ ├── f32-avgpool-minmax.inc │ └── gen │ │ ├── f32-avgpool-9p-minmax-avx-u8.c │ │ ├── f32-avgpool-9p-minmax-avx512f-u16.c │ │ ├── f32-avgpool-9p-minmax-hvx-u32.c │ │ ├── f32-avgpool-9p-minmax-neon-u4.c │ │ ├── f32-avgpool-9p-minmax-scalar-u1.c │ │ ├── f32-avgpool-9p-minmax-sse2-u4.c │ │ └── f32-avgpool-9p-minmax-wasmsimd-u4.c ├── f32-conv-hwc │ ├── 3x3s2p0p1c3-neon-x1.c.in │ ├── 3x3s2p0p1c3-neon-x2.c.in │ ├── 3x3s2p1c3-neon-x1.c.in │ ├── 3x3s2p1c3-neon-x2.c.in │ ├── f32-conv-hwc-3x3s2p0p1c3x4-scalar-1x1.c │ ├── f32-conv-hwc-3x3s2p1c3x4-scalar-1x1.c │ ├── f32-conv-hwc.inc │ └── gen │ │ ├── f32-conv-hwc-3x3s2p0p1c3x4-aarch64-neonfma-2x1.c │ │ ├── f32-conv-hwc-3x3s2p0p1c3x4-aarch64-neonfma-2x2.c │ │ ├── f32-conv-hwc-3x3s2p0p1c3x4-neon-2x1.c │ │ ├── f32-conv-hwc-3x3s2p0p1c3x4-neon-2x2.c │ │ ├── f32-conv-hwc-3x3s2p0p1c3x8-aarch64-neonfma-2x1.c │ │ ├── f32-conv-hwc-3x3s2p0p1c3x8-aarch64-neonfma-2x2.c │ │ ├── f32-conv-hwc-3x3s2p0p1c3x8-neon-2x1.c │ │ ├── f32-conv-hwc-3x3s2p0p1c3x8-neon-2x2.c │ │ ├── f32-conv-hwc-3x3s2p1c3x4-aarch64-neonfma-2x1.c │ │ ├── f32-conv-hwc-3x3s2p1c3x4-aarch64-neonfma-2x2.c │ │ ├── f32-conv-hwc-3x3s2p1c3x4-neon-2x1.c │ │ ├── f32-conv-hwc-3x3s2p1c3x4-neon-2x2.c │ │ ├── f32-conv-hwc-3x3s2p1c3x8-aarch64-neonfma-2x1.c │ │ ├── f32-conv-hwc-3x3s2p1c3x8-aarch64-neonfma-2x2.c │ │ ├── f32-conv-hwc-3x3s2p1c3x8-neon-2x1.c │ │ └── f32-conv-hwc-3x3s2p1c3x8-neon-2x2.c ├── f32-conv-hwc2chw │ ├── f32-conv-hwc2chw-3x3s2p1c3x2v-rvv-1x1.c │ ├── f32-conv-hwc2chw-3x3s2p1c3x2v-rvv-2x1.c │ ├── f32-conv-hwc2chw-3x3s2p1c3x2v-rvv-2x2.c │ ├── f32-conv-hwc2chw-3x3s2p1c3x4-aarch64-neonfma-2x2.c │ ├── f32-conv-hwc2chw-3x3s2p1c3x4-neon-2x2.c │ ├── f32-conv-hwc2chw-3x3s2p1c3x4-scalar-1x1.c │ ├── f32-conv-hwc2chw-3x3s2p1c3x4-sse-1x1.c │ ├── f32-conv-hwc2chw-3x3s2p1c3x4-sse-2x2.c │ └── f32-conv-hwc2chw-3x3s2p1c3x4-wasmsimd-2x2.c ├── f32-dwconv │ ├── f32-dwconv-9p4c-minmax-asm-aarch64-neonfma-cortex-a55.S │ ├── f32-dwconv-9p4c-minmax-asm-aarch64-neonfma.S │ ├── f32-dwconv-minmax.inc │ ├── f32-dwconv.inc │ ├── gen │ │ ├── f32-dwconv-25p16c-minmax-avx-acc2.c │ │ ├── f32-dwconv-25p16c-minmax-avx.c │ │ ├── f32-dwconv-25p16c-minmax-avx512f-acc2.c │ │ ├── f32-dwconv-25p16c-minmax-avx512f.c │ │ ├── f32-dwconv-25p16c-minmax-fma3-acc2.c │ │ ├── f32-dwconv-25p16c-minmax-fma3.c │ │ ├── f32-dwconv-25p16c-minmax-neon-acc2.c │ │ ├── f32-dwconv-25p16c-minmax-neon.c │ │ ├── f32-dwconv-25p16c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-25p16c-minmax-neonfma.c │ │ ├── f32-dwconv-25p1c-minmax-scalar-acc2.c │ │ ├── f32-dwconv-25p1c-minmax-scalar.c │ │ ├── f32-dwconv-25p1c-scalar-acc2.c │ │ ├── f32-dwconv-25p1c-scalar.c │ │ ├── f32-dwconv-25p2c-minmax-scalar-acc2.c │ │ ├── f32-dwconv-25p2c-minmax-scalar.c │ │ ├── f32-dwconv-25p2c-scalar-acc2.c │ │ ├── f32-dwconv-25p2c-scalar.c │ │ ├── f32-dwconv-25p32c-minmax-avx512f-acc2.c │ │ ├── f32-dwconv-25p32c-minmax-avx512f.c │ │ ├── f32-dwconv-25p32c-minmax-hvx-acc2.c │ │ ├── f32-dwconv-25p32c-minmax-hvx.c │ │ ├── f32-dwconv-25p4c-minmax-neon-acc2.c │ │ ├── f32-dwconv-25p4c-minmax-neon.c │ │ ├── f32-dwconv-25p4c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-25p4c-minmax-neonfma.c │ │ ├── f32-dwconv-25p4c-minmax-sse-acc2.c │ │ ├── f32-dwconv-25p4c-minmax-sse.c │ │ ├── f32-dwconv-25p4c-minmax-wasmrelaxedsimd-acc2.c │ │ ├── f32-dwconv-25p4c-minmax-wasmrelaxedsimd-fma-acc2.c │ │ ├── f32-dwconv-25p4c-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-25p4c-minmax-wasmrelaxedsimd.c │ │ ├── f32-dwconv-25p4c-minmax-wasmsimd-arm-acc2.c │ │ ├── f32-dwconv-25p4c-minmax-wasmsimd-arm.c │ │ ├── f32-dwconv-25p4c-minmax-wasmsimd-x86-acc2.c │ │ ├── f32-dwconv-25p4c-minmax-wasmsimd-x86.c │ │ ├── f32-dwconv-25p4c-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-25p4c-wasmsimd.c │ │ ├── f32-dwconv-25p64c-minmax-hvx-acc2.c │ │ ├── f32-dwconv-25p64c-minmax-hvx.c │ │ ├── f32-dwconv-25p8c-minmax-avx-acc2.c │ │ ├── f32-dwconv-25p8c-minmax-avx.c │ │ ├── f32-dwconv-25p8c-minmax-fma3-acc2.c │ │ ├── f32-dwconv-25p8c-minmax-fma3.c │ │ ├── f32-dwconv-25p8c-minmax-neon-acc2.c │ │ ├── f32-dwconv-25p8c-minmax-neon.c │ │ ├── f32-dwconv-25p8c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-25p8c-minmax-neonfma.c │ │ ├── f32-dwconv-25p8c-minmax-sse-acc2.c │ │ ├── f32-dwconv-25p8c-minmax-sse.c │ │ ├── f32-dwconv-25p8c-minmax-wasmrelaxedsimd-acc2.c │ │ ├── f32-dwconv-25p8c-minmax-wasmrelaxedsimd-fma-acc2.c │ │ ├── f32-dwconv-25p8c-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-25p8c-minmax-wasmrelaxedsimd.c │ │ ├── f32-dwconv-25p8c-minmax-wasmsimd-arm-acc2.c │ │ ├── f32-dwconv-25p8c-minmax-wasmsimd-arm.c │ │ ├── f32-dwconv-25p8c-minmax-wasmsimd-x86-acc2.c │ │ ├── f32-dwconv-25p8c-minmax-wasmsimd-x86.c │ │ ├── f32-dwconv-25p8c-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-25p8c-wasmsimd.c │ │ ├── f32-dwconv-25p8vc-minmax-rvv.c │ │ ├── f32-dwconv-25p8vc-rvv.c │ │ ├── f32-dwconv-3p16c-minmax-avx-acc2.c │ │ ├── f32-dwconv-3p16c-minmax-avx.c │ │ ├── f32-dwconv-3p16c-minmax-avx512f-acc2.c │ │ ├── f32-dwconv-3p16c-minmax-avx512f.c │ │ ├── f32-dwconv-3p16c-minmax-fma3-acc2.c │ │ ├── f32-dwconv-3p16c-minmax-fma3.c │ │ ├── f32-dwconv-3p16c-minmax-neon-acc2.c │ │ ├── f32-dwconv-3p16c-minmax-neon.c │ │ ├── f32-dwconv-3p16c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-3p16c-minmax-neonfma.c │ │ ├── f32-dwconv-3p1c-minmax-scalar-acc2.c │ │ ├── f32-dwconv-3p1c-minmax-scalar.c │ │ ├── f32-dwconv-3p1c-scalar-acc2.c │ │ ├── f32-dwconv-3p1c-scalar.c │ │ ├── f32-dwconv-3p2c-minmax-scalar-acc2.c │ │ ├── f32-dwconv-3p2c-minmax-scalar.c │ │ ├── f32-dwconv-3p2c-scalar-acc2.c │ │ ├── f32-dwconv-3p2c-scalar.c │ │ ├── f32-dwconv-3p32c-minmax-avx512f-acc2.c │ │ ├── f32-dwconv-3p32c-minmax-avx512f.c │ │ ├── f32-dwconv-3p32c-minmax-hvx-acc2.c │ │ ├── f32-dwconv-3p32c-minmax-hvx.c │ │ ├── f32-dwconv-3p4c-minmax-neon-acc2.c │ │ ├── f32-dwconv-3p4c-minmax-neon.c │ │ ├── f32-dwconv-3p4c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-3p4c-minmax-neonfma.c │ │ ├── f32-dwconv-3p4c-minmax-sse-acc2.c │ │ ├── f32-dwconv-3p4c-minmax-sse.c │ │ ├── f32-dwconv-3p4c-minmax-wasmrelaxedsimd-acc2.c │ │ ├── f32-dwconv-3p4c-minmax-wasmrelaxedsimd-fma-acc2.c │ │ ├── f32-dwconv-3p4c-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-3p4c-minmax-wasmrelaxedsimd.c │ │ ├── f32-dwconv-3p4c-minmax-wasmsimd-arm-acc2.c │ │ ├── f32-dwconv-3p4c-minmax-wasmsimd-arm.c │ │ ├── f32-dwconv-3p4c-minmax-wasmsimd-x86-acc2.c │ │ ├── f32-dwconv-3p4c-minmax-wasmsimd-x86.c │ │ ├── f32-dwconv-3p4c-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-3p4c-wasmsimd.c │ │ ├── f32-dwconv-3p64c-minmax-hvx-acc2.c │ │ ├── f32-dwconv-3p64c-minmax-hvx.c │ │ ├── f32-dwconv-3p8c-minmax-avx-acc2.c │ │ ├── f32-dwconv-3p8c-minmax-avx.c │ │ ├── f32-dwconv-3p8c-minmax-fma3-acc2.c │ │ ├── f32-dwconv-3p8c-minmax-fma3.c │ │ ├── f32-dwconv-3p8c-minmax-neon-acc2.c │ │ ├── f32-dwconv-3p8c-minmax-neon.c │ │ ├── f32-dwconv-3p8c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-3p8c-minmax-neonfma.c │ │ ├── f32-dwconv-3p8c-minmax-sse-acc2.c │ │ ├── f32-dwconv-3p8c-minmax-sse.c │ │ ├── f32-dwconv-3p8c-minmax-wasmrelaxedsimd-acc2.c │ │ ├── f32-dwconv-3p8c-minmax-wasmrelaxedsimd-fma-acc2.c │ │ ├── f32-dwconv-3p8c-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-3p8c-minmax-wasmrelaxedsimd.c │ │ ├── f32-dwconv-3p8c-minmax-wasmsimd-arm-acc2.c │ │ ├── f32-dwconv-3p8c-minmax-wasmsimd-arm.c │ │ ├── f32-dwconv-3p8c-minmax-wasmsimd-x86-acc2.c │ │ ├── f32-dwconv-3p8c-minmax-wasmsimd-x86.c │ │ ├── f32-dwconv-3p8c-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-3p8c-wasmsimd.c │ │ ├── f32-dwconv-3p8vc-minmax-rvv.c │ │ ├── f32-dwconv-3p8vc-rvv.c │ │ ├── f32-dwconv-4p16c-minmax-avx-acc2.c │ │ ├── f32-dwconv-4p16c-minmax-avx.c │ │ ├── f32-dwconv-4p16c-minmax-avx512f-acc2.c │ │ ├── f32-dwconv-4p16c-minmax-avx512f.c │ │ ├── f32-dwconv-4p16c-minmax-fma3-acc2.c │ │ ├── f32-dwconv-4p16c-minmax-fma3.c │ │ ├── f32-dwconv-4p16c-minmax-neon-acc2.c │ │ ├── f32-dwconv-4p16c-minmax-neon.c │ │ ├── f32-dwconv-4p16c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-4p16c-minmax-neonfma.c │ │ ├── f32-dwconv-4p1c-minmax-scalar-acc2.c │ │ ├── f32-dwconv-4p1c-minmax-scalar.c │ │ ├── f32-dwconv-4p1c-scalar-acc2.c │ │ ├── f32-dwconv-4p1c-scalar.c │ │ ├── f32-dwconv-4p2c-minmax-scalar-acc2.c │ │ ├── f32-dwconv-4p2c-minmax-scalar.c │ │ ├── f32-dwconv-4p2c-scalar-acc2.c │ │ ├── f32-dwconv-4p2c-scalar.c │ │ ├── f32-dwconv-4p32c-minmax-avx512f-acc2.c │ │ ├── f32-dwconv-4p32c-minmax-avx512f.c │ │ ├── f32-dwconv-4p32c-minmax-hvx-acc2.c │ │ ├── f32-dwconv-4p32c-minmax-hvx.c │ │ ├── f32-dwconv-4p4c-minmax-neon-acc2.c │ │ ├── f32-dwconv-4p4c-minmax-neon.c │ │ ├── f32-dwconv-4p4c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-4p4c-minmax-neonfma.c │ │ ├── f32-dwconv-4p4c-minmax-sse-acc2.c │ │ ├── f32-dwconv-4p4c-minmax-sse.c │ │ ├── f32-dwconv-4p4c-minmax-wasmrelaxedsimd-acc2.c │ │ ├── f32-dwconv-4p4c-minmax-wasmrelaxedsimd-fma-acc2.c │ │ ├── f32-dwconv-4p4c-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-4p4c-minmax-wasmrelaxedsimd.c │ │ ├── f32-dwconv-4p4c-minmax-wasmsimd-arm-acc2.c │ │ ├── f32-dwconv-4p4c-minmax-wasmsimd-arm.c │ │ ├── f32-dwconv-4p4c-minmax-wasmsimd-x86-acc2.c │ │ ├── f32-dwconv-4p4c-minmax-wasmsimd-x86.c │ │ ├── f32-dwconv-4p4c-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-4p4c-wasmsimd.c │ │ ├── f32-dwconv-4p64c-minmax-hvx-acc2.c │ │ ├── f32-dwconv-4p64c-minmax-hvx.c │ │ ├── f32-dwconv-4p8c-minmax-avx-acc2.c │ │ ├── f32-dwconv-4p8c-minmax-avx.c │ │ ├── f32-dwconv-4p8c-minmax-fma3-acc2.c │ │ ├── f32-dwconv-4p8c-minmax-fma3.c │ │ ├── f32-dwconv-4p8c-minmax-neon-acc2.c │ │ ├── f32-dwconv-4p8c-minmax-neon.c │ │ ├── f32-dwconv-4p8c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-4p8c-minmax-neonfma.c │ │ ├── f32-dwconv-4p8c-minmax-sse-acc2.c │ │ ├── f32-dwconv-4p8c-minmax-sse.c │ │ ├── f32-dwconv-4p8c-minmax-wasmrelaxedsimd-acc2.c │ │ ├── f32-dwconv-4p8c-minmax-wasmrelaxedsimd-fma-acc2.c │ │ ├── f32-dwconv-4p8c-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-4p8c-minmax-wasmrelaxedsimd.c │ │ ├── f32-dwconv-4p8c-minmax-wasmsimd-arm-acc2.c │ │ ├── f32-dwconv-4p8c-minmax-wasmsimd-arm.c │ │ ├── f32-dwconv-4p8c-minmax-wasmsimd-x86-acc2.c │ │ ├── f32-dwconv-4p8c-minmax-wasmsimd-x86.c │ │ ├── f32-dwconv-4p8c-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-4p8c-wasmsimd.c │ │ ├── f32-dwconv-4p8vc-minmax-rvv.c │ │ ├── f32-dwconv-4p8vc-rvv.c │ │ ├── f32-dwconv-9p16c-minmax-avx-acc2.c │ │ ├── f32-dwconv-9p16c-minmax-avx.c │ │ ├── f32-dwconv-9p16c-minmax-avx512f-acc2.c │ │ ├── f32-dwconv-9p16c-minmax-avx512f.c │ │ ├── f32-dwconv-9p16c-minmax-fma3-acc2.c │ │ ├── f32-dwconv-9p16c-minmax-fma3.c │ │ ├── f32-dwconv-9p16c-minmax-neon-acc2.c │ │ ├── f32-dwconv-9p16c-minmax-neon.c │ │ ├── f32-dwconv-9p16c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-9p16c-minmax-neonfma.c │ │ ├── f32-dwconv-9p1c-minmax-scalar-acc2.c │ │ ├── f32-dwconv-9p1c-minmax-scalar.c │ │ ├── f32-dwconv-9p1c-scalar-acc2.c │ │ ├── f32-dwconv-9p1c-scalar.c │ │ ├── f32-dwconv-9p2c-minmax-scalar-acc2.c │ │ ├── f32-dwconv-9p2c-minmax-scalar.c │ │ ├── f32-dwconv-9p2c-scalar-acc2.c │ │ ├── f32-dwconv-9p2c-scalar.c │ │ ├── f32-dwconv-9p32c-minmax-avx512f-acc2.c │ │ ├── f32-dwconv-9p32c-minmax-avx512f.c │ │ ├── f32-dwconv-9p32c-minmax-hvx-acc2.c │ │ ├── f32-dwconv-9p32c-minmax-hvx.c │ │ ├── f32-dwconv-9p4c-minmax-neon-acc2.c │ │ ├── f32-dwconv-9p4c-minmax-neon.c │ │ ├── f32-dwconv-9p4c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-9p4c-minmax-neonfma.c │ │ ├── f32-dwconv-9p4c-minmax-sse-acc2.c │ │ ├── f32-dwconv-9p4c-minmax-sse.c │ │ ├── f32-dwconv-9p4c-minmax-wasmrelaxedsimd-acc2.c │ │ ├── f32-dwconv-9p4c-minmax-wasmrelaxedsimd-fma-acc2.c │ │ ├── f32-dwconv-9p4c-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-9p4c-minmax-wasmrelaxedsimd.c │ │ ├── f32-dwconv-9p4c-minmax-wasmsimd-arm-acc2.c │ │ ├── f32-dwconv-9p4c-minmax-wasmsimd-arm.c │ │ ├── f32-dwconv-9p4c-minmax-wasmsimd-x86-acc2.c │ │ ├── f32-dwconv-9p4c-minmax-wasmsimd-x86.c │ │ ├── f32-dwconv-9p4c-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-9p4c-wasmsimd-acc2.c │ │ ├── f32-dwconv-9p4c-wasmsimd.c │ │ ├── f32-dwconv-9p64c-minmax-hvx-acc2.c │ │ ├── f32-dwconv-9p64c-minmax-hvx.c │ │ ├── f32-dwconv-9p8c-minmax-avx-acc2.c │ │ ├── f32-dwconv-9p8c-minmax-avx.c │ │ ├── f32-dwconv-9p8c-minmax-fma3-acc2.c │ │ ├── f32-dwconv-9p8c-minmax-fma3.c │ │ ├── f32-dwconv-9p8c-minmax-neon-acc2.c │ │ ├── f32-dwconv-9p8c-minmax-neon.c │ │ ├── f32-dwconv-9p8c-minmax-neonfma-acc2.c │ │ ├── f32-dwconv-9p8c-minmax-neonfma.c │ │ ├── f32-dwconv-9p8c-minmax-sse-acc2.c │ │ ├── f32-dwconv-9p8c-minmax-sse.c │ │ ├── f32-dwconv-9p8c-minmax-wasmrelaxedsimd-acc2.c │ │ ├── f32-dwconv-9p8c-minmax-wasmrelaxedsimd-fma-acc2.c │ │ ├── f32-dwconv-9p8c-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-9p8c-minmax-wasmrelaxedsimd.c │ │ ├── f32-dwconv-9p8c-minmax-wasmsimd-arm-acc2.c │ │ ├── f32-dwconv-9p8c-minmax-wasmsimd-arm.c │ │ ├── f32-dwconv-9p8c-minmax-wasmsimd-x86-acc2.c │ │ ├── f32-dwconv-9p8c-minmax-wasmsimd-x86.c │ │ ├── f32-dwconv-9p8c-wasmrelaxedsimd-fma.c │ │ ├── f32-dwconv-9p8c-wasmsimd-acc2.c │ │ ├── f32-dwconv-9p8c-wasmsimd.c │ │ ├── f32-dwconv-9p8vc-minmax-rvv.c │ │ └── f32-dwconv-9p8vc-rvv.c │ ├── simd.c.in │ ├── unipass-avx.c.in │ ├── unipass-avx512.c.in │ ├── unipass-neon.c.in │ ├── unipass-rvv.c.in │ ├── unipass-scalar.c.in │ ├── unipass-sse.c.in │ └── unipass-wasmsimd.c.in ├── f32-dwconv2d-chw │ ├── 3x3p1-neon.c.in │ ├── 3x3p1-rvv.c.in │ ├── 3x3p1-scalar.c.in │ ├── 3x3p1-sse.c.in │ ├── 3x3p1-ssse3.c.in │ ├── 3x3p1-wasmsimd-loadsplat.c.in │ ├── 3x3p1-wasmsimd-splat.c.in │ ├── 3x3s2p1-neon.c.in │ ├── 3x3s2p1-rvv.c.in │ ├── 3x3s2p1-scalar.c.in │ ├── 3x3s2p1-sse.c.in │ ├── 3x3s2p1-wasmsimd-loadsplat.c.in │ ├── 3x3s2p1-wasmsimd-splat.c.in │ ├── 5x5p2-neon.c.in │ ├── 5x5p2-scalar.c.in │ ├── 5x5p2-sse.c.in │ ├── 5x5p2-wasmsimd-loadsplat.c.in │ ├── 5x5p2-wasmsimd-splat.c.in │ ├── 5x5s2p2-neon.c.in │ ├── 5x5s2p2-scalar.c.in │ ├── 5x5s2p2-sse.c.in │ ├── 5x5s2p2-wasmsimd-loadsplat.c.in │ ├── 5x5s2p2-wasmsimd-splat.c.in │ └── gen │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-1x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-2x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-3x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-4x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-5x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-aarch64-neonfma-6x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-1x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-2x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-3x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-4x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-5x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-neon-6x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-rvv-1x2v.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-rvv-2x2v.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-rvv-3x2v.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-rvv-4x2v.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-rvv-5x1v.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-rvv-6x1v.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-rvv-7x1v.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-rvv-8x1v.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-1x1-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-1x1-acc3.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-1x1-acc4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-1x1.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-2x1-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-2x1.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-3x1.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-4x1.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-5x1.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-scalar-6x1.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-1x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-2x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-3x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-4x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-5x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-sse-6x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-1x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-2x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-3x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-4x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-5x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-ssse3-6x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-1x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-2x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-3x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-4x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-1x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-2x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-3x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-4x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-5x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-arm-splat-6x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-1x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-2x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-3x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-4x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-5x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-1x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-2x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-3x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-4x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-5x4.c │ │ ├── f32-dwconv2d-chw-3x3p1-minmax-wasmsimd-x86-splat-6x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-aarch64-neonfma-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-aarch64-neonfma-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-aarch64-neonfma-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-aarch64-neonfma-1x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-aarch64-neonfma-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-aarch64-neonfma-2x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-aarch64-neonfma-3x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-aarch64-neonfma-4x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-neon-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-neon-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-neon-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-neon-1x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-neon-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-neon-2x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-neon-3x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-neon-4x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-rvv-1x2v.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-rvv-2x2v.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-rvv-3x2v.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-rvv-4x2v.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-rvv-5x1v.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-rvv-6x1v.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-rvv-7x1v.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-rvv-8x1v.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-scalar-1x1-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-scalar-1x1-acc3.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-scalar-1x1-acc4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-scalar-1x1.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-scalar-2x1-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-scalar-2x1.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-scalar-3x1.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-scalar-4x1.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-sse-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-sse-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-sse-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-sse-1x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-sse-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-sse-2x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-sse-3x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-sse-4x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-loadsplat-2x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-splat-1x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-splat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-splat-2x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-arm-splat-4x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-loadsplat-4x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-splat-1x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-splat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-splat-2x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-splat-3x4.c │ │ ├── f32-dwconv2d-chw-3x3s2p1-minmax-wasmsimd-x86-splat-4x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-1x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-2x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-3x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-4x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-4x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-aarch64-neonfma-5x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-1x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-2x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-3x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-4x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-4x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-neon-5x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-1x1-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-1x1-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-1x1-acc4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-1x1-acc5.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-1x1.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-2x1-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-2x1-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-2x1.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-3x1-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-scalar-3x1.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-1x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-2x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-3x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-4x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-4x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-sse-5x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-1x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-2x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-3x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-4x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-1x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-2x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-3x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-4x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-4x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-arm-splat-5x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-1x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-2x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-3x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-4x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-1x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-2x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-3x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-4x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-4x4.c │ │ ├── f32-dwconv2d-chw-5x5p2-minmax-wasmsimd-x86-splat-5x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-1x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-2x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-aarch64-neonfma-3x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-1x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-2x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-neon-3x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-1x1-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-1x1-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-1x1-acc4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-1x1-acc5.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-1x1.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-2x1-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-2x1-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-2x1.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-3x1-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-scalar-3x1.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-1x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-2x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-sse-3x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-1x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-2x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-arm-splat-3x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc5.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-1x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-2x4-acc2.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-2x4-acc3.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-2x4.c │ │ ├── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-3x4-acc2.c │ │ └── f32-dwconv2d-chw-5x5s2p2-minmax-wasmsimd-x86-splat-3x4.c ├── f32-f16-vcvt │ ├── avx512skx.c.in │ ├── f16c.c.in │ ├── f32-f16-vcvt.inc │ ├── gen │ │ ├── f32-f16-vcvt-avx-u16.c │ │ ├── f32-f16-vcvt-avx-u24.c │ │ ├── f32-f16-vcvt-avx-u32.c │ │ ├── f32-f16-vcvt-avx-u8.c │ │ ├── f32-f16-vcvt-avx512skx-u16.c │ │ ├── f32-f16-vcvt-avx512skx-u32.c │ │ ├── f32-f16-vcvt-f16c-u16.c │ │ ├── f32-f16-vcvt-f16c-u8.c │ │ ├── f32-f16-vcvt-neon-u16.c │ │ ├── f32-f16-vcvt-neon-u24.c │ │ ├── f32-f16-vcvt-neon-u32.c │ │ ├── f32-f16-vcvt-neon-u8.c │ │ ├── f32-f16-vcvt-neonfp16-u16.c │ │ ├── f32-f16-vcvt-neonfp16-u8.c │ │ ├── f32-f16-vcvt-rvvfp16arith-u1v.c │ │ ├── f32-f16-vcvt-rvvfp16arith-u2v.c │ │ ├── f32-f16-vcvt-rvvfp16arith-u4v.c │ │ ├── f32-f16-vcvt-rvvfp16arith-u8v.c │ │ ├── f32-f16-vcvt-scalar-bitcast-u1.c │ │ ├── f32-f16-vcvt-scalar-bitcast-u2.c │ │ ├── f32-f16-vcvt-scalar-bitcast-u3.c │ │ ├── f32-f16-vcvt-scalar-bitcast-u4.c │ │ ├── f32-f16-vcvt-scalar-fabsf-u1.c │ │ ├── f32-f16-vcvt-scalar-fabsf-u2.c │ │ ├── f32-f16-vcvt-scalar-fabsf-u3.c │ │ ├── f32-f16-vcvt-scalar-fabsf-u4.c │ │ ├── f32-f16-vcvt-sse2-u16.c │ │ ├── f32-f16-vcvt-sse2-u24.c │ │ ├── f32-f16-vcvt-sse2-u32.c │ │ ├── f32-f16-vcvt-sse2-u8.c │ │ ├── f32-f16-vcvt-sse41-u16.c │ │ ├── f32-f16-vcvt-sse41-u24.c │ │ ├── f32-f16-vcvt-sse41-u32.c │ │ ├── f32-f16-vcvt-sse41-u8.c │ │ ├── f32-f16-vcvt-wasmrelaxedsimd-u16.c │ │ ├── f32-f16-vcvt-wasmrelaxedsimd-u24.c │ │ ├── f32-f16-vcvt-wasmrelaxedsimd-u32.c │ │ ├── f32-f16-vcvt-wasmrelaxedsimd-u8.c │ │ ├── f32-f16-vcvt-wasmsimd-u16.c │ │ ├── f32-f16-vcvt-wasmsimd-u24.c │ │ ├── f32-f16-vcvt-wasmsimd-u32.c │ │ └── f32-f16-vcvt-wasmsimd-u8.c │ ├── neon.c.in │ ├── neonfp16.c.in │ ├── rvvfp16arith.c.in │ ├── scalar-bitcast.c.in │ ├── scalar-fabsf.c.in │ ├── sse.c.in │ └── wasmsimd.c.in ├── f32-gemm │ ├── 1x12-aarch64-neonfma-cortex-a53.S.in │ ├── 1x8-aarch32-neon-cortex-a53.S.in │ ├── 1x8-aarch64-neon-ld128-acc2.S.in │ ├── 1x8-aarch64-neonfma-cortex-a53.S.in │ ├── 1x8-aarch64-neonfma-cortex-a75.S.in │ ├── 1x8-aarch64-neonfma-ld128-acc2.S.in │ ├── 1x8-aarch64-neonfma-ld128-acc4.S.in │ ├── 1x8-aarch64-neonfma-ld128.S.in │ ├── 1x8-aarch64-neonfma-ld64-acc2.S.in │ ├── 1x8-aarch64-neonfma-ld64-acc4.S.in │ ├── 1x8-aarch64-neonfma-ld64.S.in │ ├── 4x1-aarch64-neonfma-ld128.S.in │ ├── 4x1-aarch64-neonfma-ld64.S.in │ ├── 4x12-aarch64-neonfma-cortex-a53.S.in │ ├── 4x2-aarch64-neonfma-cortex-a75.S.in │ ├── 4x2-aarch64-neonfma-ld128.S.in │ ├── 4x2-aarch64-neonfma-ld64.S.in │ ├── 4x4-aarch32-vfp-ld64.S.in │ ├── 4x4-linear-aarch32-vfp-ld64.S.in │ ├── 4x8-aarch32-neon-cortex-a53.S.in │ ├── 4x8-aarch32-neon-cortex-a55.S.in │ ├── 4x8-aarch32-neon-cortex-a7.S.in │ ├── 4x8-aarch32-neon-cortex-a75.S.in │ ├── 4x8-aarch32-neon-ld64.S.in │ ├── 4x8-aarch64-neonfma-cortex-a53.S.in │ ├── 4x8-aarch64-neonfma-cortex-a55.S.in │ ├── 4x8-aarch64-neonfma-cortex-a75.S.in │ ├── 4x8-aarch64-neonfma-ld128.S.in │ ├── 4x8-aarch64-neonfma-ld64.S.in │ ├── 5x8-aarch64-neonfma-cortex-a75.S.in │ ├── 6x8-aarch64-neonfma-cortex-a53.S.in │ ├── 6x8-aarch64-neonfma-cortex-a55.S.in │ ├── 6x8-aarch64-neonfma-cortex-a73.S.in │ ├── 6x8-aarch64-neonfma-cortex-a75.S.in │ ├── 6x8-aarch64-neonfma-ld128.S.in │ ├── 6x8-aarch64-neonfma-ld64.S.in │ ├── MRx2-neon-ld64.c.in │ ├── MRx2c4-sse.c.in │ ├── MRx2c4-wasmsimd.c.in │ ├── MRxNRv-rvv.c.in │ ├── avx-broadcast.c.in │ ├── avx-shuffle4.c.in │ ├── avx512-broadcast.c.in │ ├── gen │ │ ├── f32-gemm-10x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-10x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-10x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-10x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-10x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-10x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-10x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-10x64-minmax-hvx-broadcast.c │ │ ├── f32-gemm-10x8-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-10x8-minmax-fma3-broadcast.c │ │ ├── f32-gemm-11x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-11x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-11x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-11x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-11x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-11x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-12x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-12x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-12x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-13x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-13x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-13x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-14x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-14x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-14x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-15x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-15x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-15x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-16x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-16x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-16x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-1x12-minmax-asm-aarch64-neonfma-cortex-a53.S │ │ ├── f32-gemm-1x128-minmax-hvx-broadcast.c │ │ ├── f32-gemm-1x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-gemm-1x16-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-1x16-minmax-asm-aarch64-neonfma-ld32.S │ │ ├── f32-gemm-1x16-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-1x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-1x16-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-1x16-minmax-avx-broadcast.c │ │ ├── f32-gemm-1x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-1x16-minmax-fma3-broadcast.c │ │ ├── f32-gemm-1x16-minmax-neon-lane-ld128.c │ │ ├── f32-gemm-1x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-1x16s4-minmax-fma3-broadcast.c │ │ ├── f32-gemm-1x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-1x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-1x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-1x32c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-1x4-minmax-scalar.c │ │ ├── f32-gemm-1x4-relu-scalar.c │ │ ├── f32-gemm-1x4-scalar.c │ │ ├── f32-gemm-1x4v-minmax-rvv.c │ │ ├── f32-gemm-1x4v-relu-rvv.c │ │ ├── f32-gemm-1x4v-rvv.c │ │ ├── f32-gemm-1x64-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-1x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-1x64-minmax-hvx-broadcast.c │ │ ├── f32-gemm-1x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-gemm-1x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-gemm-1x8-minmax-asm-aarch32-neon-cortex-a53-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch32-neon-cortex-a53.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a53-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a53.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-2.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld32-2.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-2.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc2-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc2.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc4-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc4.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-prfm.S │ │ ├── f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-1x8-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-1x8-minmax-avx-broadcast.c │ │ ├── f32-gemm-1x8-minmax-fma3-broadcast.c │ │ ├── f32-gemm-1x8-minmax-neon-dup-ld64.c │ │ ├── f32-gemm-1x8-minmax-neon-lane-ld128.c │ │ ├── f32-gemm-1x8-minmax-neon-lane-ld64.c │ │ ├── f32-gemm-1x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-gemm-1x8-minmax-sse-dup.c │ │ ├── f32-gemm-1x8-minmax-sse-load1.c │ │ ├── f32-gemm-1x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-1x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-1x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-gemm-1x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-gemm-1x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-gemm-1x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-gemm-1x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-gemm-1x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-gemm-1x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-1x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-1x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-gemm-1x8-relu-wasmsimd-splat.c │ │ ├── f32-gemm-1x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-1x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-1x8-wasmsimd-loadsplat.c │ │ ├── f32-gemm-1x8-wasmsimd-splat.c │ │ ├── f32-gemm-1x8s4-minmax-neon.c │ │ ├── f32-gemm-1x8s4-minmax-neonfma.c │ │ ├── f32-gemm-1x8s4-minmax-sse.c │ │ ├── f32-gemm-1x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-1x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-gemm-1x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-gemm-1x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-gemm-1x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-1x8s4-relu-wasmsimd.c │ │ ├── f32-gemm-1x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-1x8s4-wasmsimd.c │ │ ├── f32-gemm-2x128-minmax-hvx-broadcast.c │ │ ├── f32-gemm-2x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-gemm-2x16-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-2x16-minmax-asm-aarch64-neonfma-ld32.S │ │ ├── f32-gemm-2x16-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-2x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-2x16-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-2x16-minmax-fma3-broadcast.c │ │ ├── f32-gemm-2x16-minmax-neon-lane-ld128.c │ │ ├── f32-gemm-2x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-2x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-2x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-2x32c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-2x4-minmax-scalar.c │ │ ├── f32-gemm-2x4-relu-scalar.c │ │ ├── f32-gemm-2x4-scalar.c │ │ ├── f32-gemm-2x64-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-2x64-minmax-hvx-broadcast.c │ │ ├── f32-gemm-2x8-minmax-asm-aarch64-neonfma-ld128-2.S │ │ ├── f32-gemm-2x8-minmax-asm-aarch64-neonfma-ld32-2.S │ │ ├── f32-gemm-2x8-minmax-asm-aarch64-neonfma-ld64-2.S │ │ ├── f32-gemm-2x8-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-3x128-minmax-hvx-broadcast.c │ │ ├── f32-gemm-3x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-gemm-3x16-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-3x16-minmax-asm-aarch64-neonfma-ld32.S │ │ ├── f32-gemm-3x16-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-3x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-3x16-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-3x16-minmax-avx-broadcast.c │ │ ├── f32-gemm-3x16-minmax-fma3-broadcast.c │ │ ├── f32-gemm-3x16-minmax-neon-lane-ld128.c │ │ ├── f32-gemm-3x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-3x16s4-minmax-fma3-broadcast.c │ │ ├── f32-gemm-3x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-3x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-3x32c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-3x64-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-3x64-minmax-hvx-broadcast.c │ │ ├── f32-gemm-3x8-minmax-asm-aarch64-neonfma-ld128-2.S │ │ ├── f32-gemm-3x8-minmax-asm-aarch64-neonfma-ld32-2.S │ │ ├── f32-gemm-3x8-minmax-asm-aarch64-neonfma-ld64-2.S │ │ ├── f32-gemm-3x8-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-3x8-minmax-sse-dup.c │ │ ├── f32-gemm-3x8-minmax-sse-load1.c │ │ ├── f32-gemm-3x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-3x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-3x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-gemm-3x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-gemm-3x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-gemm-3x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-gemm-3x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-gemm-3x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-gemm-3x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-3x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-3x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-gemm-3x8-relu-wasmsimd-splat.c │ │ ├── f32-gemm-3x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-3x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-3x8-wasmsimd-loadsplat.c │ │ ├── f32-gemm-3x8-wasmsimd-splat.c │ │ ├── f32-gemm-3x8s4-minmax-sse.c │ │ ├── f32-gemm-3x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-3x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-gemm-3x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-gemm-3x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-gemm-3x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-3x8s4-relu-wasmsimd.c │ │ ├── f32-gemm-3x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-3x8s4-wasmsimd.c │ │ ├── f32-gemm-4x1-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-4x1-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-4x12-minmax-asm-aarch64-neonfma-cortex-a53.S │ │ ├── f32-gemm-4x128-minmax-hvx-broadcast.c │ │ ├── f32-gemm-4x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-gemm-4x16-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-4x16-minmax-asm-aarch64-neonfma-ld32.S │ │ ├── f32-gemm-4x16-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-4x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-4x16-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-4x16-minmax-avx-broadcast.c │ │ ├── f32-gemm-4x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-4x16-minmax-fma3-broadcast.c │ │ ├── f32-gemm-4x16-minmax-neon-lane-ld128.c │ │ ├── f32-gemm-4x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-4x16s4-minmax-fma3-broadcast.c │ │ ├── f32-gemm-4x2-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-gemm-4x2-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-gemm-4x2-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-gemm-4x2-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-4x2-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-4x2-minmax-neon-lane-ld64.c │ │ ├── f32-gemm-4x2-minmax-scalar.c │ │ ├── f32-gemm-4x2-relu-scalar.c │ │ ├── f32-gemm-4x2-scalar.c │ │ ├── f32-gemm-4x2c4-minmax-sse.c │ │ ├── f32-gemm-4x2c4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-4x2c4-minmax-wasmrelaxedsimd.c │ │ ├── f32-gemm-4x2c4-minmax-wasmsimd-arm.c │ │ ├── f32-gemm-4x2c4-minmax-wasmsimd-x86.c │ │ ├── f32-gemm-4x2c4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-4x2c4-relu-wasmsimd.c │ │ ├── f32-gemm-4x2c4-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-4x2c4-wasmsimd.c │ │ ├── f32-gemm-4x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-4x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-4x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-4x32c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-4x4-asm-aarch32-vfp-ld64.S │ │ ├── f32-gemm-4x4-minmax-asm-aarch32-vfp-ld64.S │ │ ├── f32-gemm-4x4-minmax-scalar.c │ │ ├── f32-gemm-4x4-relu-scalar.c │ │ ├── f32-gemm-4x4-scalar.c │ │ ├── f32-gemm-4x64-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-4x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-4x64-minmax-hvx-broadcast.c │ │ ├── f32-gemm-4x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-gemm-4x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-gemm-4x8-minmax-asm-aarch32-neon-cortex-a53-prfm.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch32-neon-cortex-a53.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch32-neon-cortex-a55.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch32-neon-cortex-a7.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch32-neon-cortex-a75-prfm.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch32-neon-cortex-a75.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch32-neon-ld64.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a53-prfm.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a53.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a55.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128-2.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld32-2.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld64-2.S │ │ ├── f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-4x8-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-4x8-minmax-avx-broadcast.c │ │ ├── f32-gemm-4x8-minmax-fma3-broadcast.c │ │ ├── f32-gemm-4x8-minmax-neon-dup-ld128.c │ │ ├── f32-gemm-4x8-minmax-neon-dup-ld64.c │ │ ├── f32-gemm-4x8-minmax-neon-lane-ld128.c │ │ ├── f32-gemm-4x8-minmax-neon-lane-ld64.c │ │ ├── f32-gemm-4x8-minmax-neonfma-dup-ld128.c │ │ ├── f32-gemm-4x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-gemm-4x8-minmax-sse-dup.c │ │ ├── f32-gemm-4x8-minmax-sse-load1.c │ │ ├── f32-gemm-4x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-4x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-4x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-gemm-4x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-gemm-4x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-gemm-4x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-gemm-4x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-gemm-4x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-gemm-4x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-4x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-4x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-gemm-4x8-relu-wasmsimd-splat.c │ │ ├── f32-gemm-4x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-4x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-4x8-wasmsimd-loadsplat.c │ │ ├── f32-gemm-4x8-wasmsimd-splat.c │ │ ├── f32-gemm-4x8s4-minmax-neon.c │ │ ├── f32-gemm-4x8s4-minmax-neonfma.c │ │ ├── f32-gemm-4x8s4-minmax-sse.c │ │ ├── f32-gemm-4x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-4x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-gemm-4x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-gemm-4x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-gemm-4x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-4x8s4-relu-wasmsimd.c │ │ ├── f32-gemm-4x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-4x8s4-wasmsimd.c │ │ ├── f32-gemm-5x128-minmax-hvx-broadcast.c │ │ ├── f32-gemm-5x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-gemm-5x16-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-5x16-minmax-asm-aarch64-neonfma-ld32.S │ │ ├── f32-gemm-5x16-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-5x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-5x16-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-5x16-minmax-avx-broadcast.c │ │ ├── f32-gemm-5x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-5x16-minmax-fma3-broadcast.c │ │ ├── f32-gemm-5x16-minmax-neon-lane-ld128.c │ │ ├── f32-gemm-5x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-5x16s4-minmax-fma3-broadcast.c │ │ ├── f32-gemm-5x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-5x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-5x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-5x32c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-5x64-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-5x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-5x64-minmax-hvx-broadcast.c │ │ ├── f32-gemm-5x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-gemm-5x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-gemm-5x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-gemm-5x8-minmax-asm-aarch64-neonfma-ld128-2.S │ │ ├── f32-gemm-5x8-minmax-asm-aarch64-neonfma-ld32-2.S │ │ ├── f32-gemm-5x8-minmax-asm-aarch64-neonfma-ld64-2.S │ │ ├── f32-gemm-5x8-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-5x8-minmax-avx-broadcast.c │ │ ├── f32-gemm-5x8-minmax-fma3-broadcast.c │ │ ├── f32-gemm-5x8-minmax-neon-lane-ld64.c │ │ ├── f32-gemm-5x8-minmax-sse-dup.c │ │ ├── f32-gemm-5x8-minmax-sse-load1.c │ │ ├── f32-gemm-5x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-5x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-5x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-gemm-5x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-gemm-5x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-gemm-5x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-gemm-5x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-gemm-5x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-gemm-5x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-5x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-5x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-gemm-5x8-relu-wasmsimd-splat.c │ │ ├── f32-gemm-5x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-5x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-5x8-wasmsimd-loadsplat.c │ │ ├── f32-gemm-5x8-wasmsimd-splat.c │ │ ├── f32-gemm-5x8s4-minmax-sse.c │ │ ├── f32-gemm-5x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-5x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-gemm-5x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-gemm-5x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-gemm-5x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-5x8s4-relu-wasmsimd.c │ │ ├── f32-gemm-5x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-5x8s4-wasmsimd.c │ │ ├── f32-gemm-6x128-minmax-hvx-broadcast.c │ │ ├── f32-gemm-6x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-gemm-6x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-6x16-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-6x16-minmax-avx-broadcast.c │ │ ├── f32-gemm-6x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-6x16-minmax-fma3-broadcast.c │ │ ├── f32-gemm-6x16-minmax-neon-lane-ld128.c │ │ ├── f32-gemm-6x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-6x16s4-minmax-fma3-broadcast.c │ │ ├── f32-gemm-6x2-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-gemm-6x2-minmax-neon-lane-ld64.c │ │ ├── f32-gemm-6x2c4-minmax-sse.c │ │ ├── f32-gemm-6x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-6x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-6x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-6x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-6x64-minmax-hvx-broadcast.c │ │ ├── f32-gemm-6x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-gemm-6x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a53-prfm.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a53.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a55.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a73.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-ld128-2.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-ld32-2.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-ld64-2.S │ │ ├── f32-gemm-6x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-gemm-6x8-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-6x8-minmax-avx-broadcast.c │ │ ├── f32-gemm-6x8-minmax-fma3-broadcast.c │ │ ├── f32-gemm-6x8-minmax-neon-dup-ld128.c │ │ ├── f32-gemm-6x8-minmax-neon-dup-ld64.c │ │ ├── f32-gemm-6x8-minmax-neon-lane-ld128.c │ │ ├── f32-gemm-6x8-minmax-neon-lane-ld64.c │ │ ├── f32-gemm-6x8-minmax-neonfma-dup-ld128.c │ │ ├── f32-gemm-6x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-gemm-6x8-minmax-sse-dup.c │ │ ├── f32-gemm-6x8-minmax-sse-load1.c │ │ ├── f32-gemm-6x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-6x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-6x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-gemm-6x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-gemm-6x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-gemm-6x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-gemm-6x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-gemm-6x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-gemm-6x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-6x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-6x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-gemm-6x8-relu-wasmsimd-splat.c │ │ ├── f32-gemm-6x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-gemm-6x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-gemm-6x8-wasmsimd-loadsplat.c │ │ ├── f32-gemm-6x8-wasmsimd-splat.c │ │ ├── f32-gemm-6x8s4-minmax-neon.c │ │ ├── f32-gemm-6x8s4-minmax-neonfma.c │ │ ├── f32-gemm-6x8s4-minmax-sse.c │ │ ├── f32-gemm-6x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-6x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-gemm-6x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-gemm-6x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-gemm-6x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-6x8s4-relu-wasmsimd.c │ │ ├── f32-gemm-6x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-gemm-6x8s4-wasmsimd.c │ │ ├── f32-gemm-7x128-minmax-hvx-broadcast.c │ │ ├── f32-gemm-7x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-7x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-7x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-7x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-7x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-7x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-7x4v-minmax-rvv.c │ │ ├── f32-gemm-7x4v-relu-rvv.c │ │ ├── f32-gemm-7x4v-rvv.c │ │ ├── f32-gemm-7x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-7x64-minmax-hvx-broadcast.c │ │ ├── f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld128-2.S │ │ ├── f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld32-2.S │ │ ├── f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld64-2.S │ │ ├── f32-gemm-7x8-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-7x8-minmax-avx-broadcast.c │ │ ├── f32-gemm-7x8-minmax-fma3-broadcast.c │ │ ├── f32-gemm-8x128-minmax-hvx-broadcast.c │ │ ├── f32-gemm-8x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-8x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-8x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-8x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-8x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-8x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-8x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-8x64-minmax-hvx-broadcast.c │ │ ├── f32-gemm-8x8-minmax-asm-aarch64-neonfma-ld128-2.S │ │ ├── f32-gemm-8x8-minmax-asm-aarch64-neonfma-ld32-2.S │ │ ├── f32-gemm-8x8-minmax-asm-aarch64-neonfma-ld64-2.S │ │ ├── f32-gemm-8x8-minmax-asm-amd64-fma3-broadcast.S │ │ ├── f32-gemm-8x8-minmax-fma3-broadcast.c │ │ ├── f32-gemm-8x8s4-minmax-neon.c │ │ ├── f32-gemm-8x8s4-minmax-neonfma.c │ │ ├── f32-gemm-9x16-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-9x16-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-9x16c2-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-9x32-minmax-asm-amd64-avx512f-broadcast.S │ │ ├── f32-gemm-9x32-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-9x32-minmax-hvx-broadcast.c │ │ ├── f32-gemm-9x64-minmax-avx512f-broadcast.c │ │ ├── f32-gemm-9x64-minmax-hvx-broadcast.c │ │ └── f32-gemm-9x8-minmax-asm-amd64-fma3-broadcast.S │ ├── hvx-broadcast.c.in │ ├── neon-ld128.c.in │ ├── neon-ld64.c.in │ ├── neon-shuffle.c.in │ ├── scalar.c.in │ ├── sse-dup.c.in │ ├── sse-load1.c.in │ ├── sse-shuffle.c.in │ ├── wasmsimd-loadsplat.c.in │ ├── wasmsimd-s4.c.in │ └── wasmsimd-splat.c.in ├── f32-ibilinear-chw │ ├── gen │ │ ├── f32-ibilinear-chw-neon-p16.c │ │ ├── f32-ibilinear-chw-neon-p4.c │ │ ├── f32-ibilinear-chw-neon-p8.c │ │ ├── f32-ibilinear-chw-neonfma-p16.c │ │ ├── f32-ibilinear-chw-neonfma-p4.c │ │ ├── f32-ibilinear-chw-neonfma-p8.c │ │ ├── f32-ibilinear-chw-scalar-p1.c │ │ ├── f32-ibilinear-chw-scalar-p2.c │ │ ├── f32-ibilinear-chw-scalar-p4.c │ │ ├── f32-ibilinear-chw-sse-p4.c │ │ ├── f32-ibilinear-chw-sse-p8.c │ │ ├── f32-ibilinear-chw-wasmsimd-p4.c │ │ └── f32-ibilinear-chw-wasmsimd-p8.c │ ├── neon.c.in │ ├── scalar.c.in │ ├── sse.c.in │ └── wasmsimd.c.in ├── f32-ibilinear │ ├── gen │ │ ├── f32-ibilinear-neon-u4.c │ │ ├── f32-ibilinear-neon-u8.c │ │ ├── f32-ibilinear-neonfma-u4.c │ │ ├── f32-ibilinear-neonfma-u8.c │ │ ├── f32-ibilinear-scalar-u1.c │ │ ├── f32-ibilinear-scalar-u2.c │ │ ├── f32-ibilinear-scalar-u4.c │ │ ├── f32-ibilinear-sse-u4.c │ │ ├── f32-ibilinear-sse-u8.c │ │ ├── f32-ibilinear-wasmrelaxedsimd-u4.c │ │ ├── f32-ibilinear-wasmrelaxedsimd-u8.c │ │ ├── f32-ibilinear-wasmsimd-u4.c │ │ └── f32-ibilinear-wasmsimd-u8.c │ ├── neon.c.in │ ├── scalar.c.in │ ├── sse.c.in │ └── wasmsimd.c.in ├── f32-igemm │ ├── 1x8-aarch32-neon-cortex-a53.S.in │ ├── 1x8-aarch64-neonfma-cortex-a53.S.in │ ├── 1x8-aarch64-neonfma-cortex-a75.S.in │ ├── 1x8-aarch64-neonfma-ld64.S.in │ ├── 4x2-aarch64-neonfma-cortex-a75.S.in │ ├── 4x2-aarch64-neonfma-ld64.S.in │ ├── 4x8-aarch32-neon-cortex-a53.S.in │ ├── 4x8-aarch32-neon-cortex-a7.S.in │ ├── 4x8-aarch32-neon-cortex-a75.S.in │ ├── 4x8-aarch32-neon-ld64.S.in │ ├── 4x8-aarch64-neonfma-cortex-a53.S.in │ ├── 4x8-aarch64-neonfma-cortex-a75.S.in │ ├── 4x8-aarch64-neonfma-ld128.S.in │ ├── 4x8-aarch64-neonfma-ld64.S.in │ ├── 5x8-aarch64-neonfma-cortex-a75.S.in │ ├── 6x8-aarch64-neonfma-cortex-a53.S.in │ ├── 6x8-aarch64-neonfma-cortex-a75.S.in │ ├── 6x8-aarch64-neonfma-ld128.S.in │ ├── 6x8-aarch64-neonfma-ld64.S.in │ ├── MRx2-neon-ld64.c.in │ ├── MRx2c4-sse.c.in │ ├── MRx2c4-wasmsimd.c.in │ ├── MRxNRv-rvv.c.in │ ├── avx-broadcast.c.in │ ├── avx-shuffle4.c.in │ ├── avx512-broadcast.c.in │ ├── f32-igemm-1x12-minmax-asm-aarch64-neonfma-cortex-a53.S │ ├── f32-igemm-4x12-minmax-asm-aarch64-neonfma-cortex-a53.S │ ├── f32-igemm-4x8-minmax-asm-aarch32-neon-cortex-a55.S │ ├── f32-igemm-4x8-minmax-asm-aarch64-neonfma-cortex-a55.S │ ├── f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a55.S │ ├── f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a73.S │ ├── gen │ │ ├── f32-igemm-10x32-minmax-hvx-broadcast.c │ │ ├── f32-igemm-10x64-minmax-hvx-broadcast.c │ │ ├── f32-igemm-10x8-minmax-fma3-broadcast.c │ │ ├── f32-igemm-1x128-minmax-hvx-broadcast.c │ │ ├── f32-igemm-1x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-igemm-1x16-minmax-avx-broadcast.c │ │ ├── f32-igemm-1x16-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-1x16-minmax-fma3-broadcast.c │ │ ├── f32-igemm-1x16-minmax-neon-lane-ld128.c │ │ ├── f32-igemm-1x16s4-minmax-fma3-broadcast.c │ │ ├── f32-igemm-1x32-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-1x32-minmax-hvx-broadcast.c │ │ ├── f32-igemm-1x4-minmax-scalar.c │ │ ├── f32-igemm-1x4-relu-scalar.c │ │ ├── f32-igemm-1x4-scalar.c │ │ ├── f32-igemm-1x4v-minmax-rvv.c │ │ ├── f32-igemm-1x4v-relu-rvv.c │ │ ├── f32-igemm-1x4v-rvv.c │ │ ├── f32-igemm-1x64-minmax-hvx-broadcast.c │ │ ├── f32-igemm-1x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-igemm-1x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-igemm-1x8-minmax-asm-aarch32-neon-cortex-a53-prfm.S │ │ ├── f32-igemm-1x8-minmax-asm-aarch32-neon-cortex-a53.S │ │ ├── f32-igemm-1x8-minmax-asm-aarch64-neonfma-cortex-a53-prfm.S │ │ ├── f32-igemm-1x8-minmax-asm-aarch64-neonfma-cortex-a53.S │ │ ├── f32-igemm-1x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-igemm-1x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-igemm-1x8-minmax-asm-aarch64-neonfma-ld64-prfm.S │ │ ├── f32-igemm-1x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-igemm-1x8-minmax-avx-broadcast.c │ │ ├── f32-igemm-1x8-minmax-fma3-broadcast.c │ │ ├── f32-igemm-1x8-minmax-neon-dup-ld64.c │ │ ├── f32-igemm-1x8-minmax-neon-lane-ld128.c │ │ ├── f32-igemm-1x8-minmax-neon-lane-ld64.c │ │ ├── f32-igemm-1x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-igemm-1x8-minmax-sse-dup.c │ │ ├── f32-igemm-1x8-minmax-sse-load1.c │ │ ├── f32-igemm-1x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-1x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-1x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-igemm-1x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-igemm-1x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-igemm-1x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-igemm-1x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-igemm-1x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-igemm-1x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-1x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-1x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-igemm-1x8-relu-wasmsimd-splat.c │ │ ├── f32-igemm-1x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-1x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-1x8-wasmsimd-loadsplat.c │ │ ├── f32-igemm-1x8-wasmsimd-splat.c │ │ ├── f32-igemm-1x8s4-minmax-neon.c │ │ ├── f32-igemm-1x8s4-minmax-neonfma.c │ │ ├── f32-igemm-1x8s4-minmax-sse.c │ │ ├── f32-igemm-1x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-1x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-igemm-1x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-igemm-1x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-igemm-1x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-1x8s4-relu-wasmsimd.c │ │ ├── f32-igemm-1x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-1x8s4-wasmsimd.c │ │ ├── f32-igemm-2x128-minmax-hvx-broadcast.c │ │ ├── f32-igemm-2x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-igemm-2x16-minmax-neon-lane-ld128.c │ │ ├── f32-igemm-2x32-minmax-hvx-broadcast.c │ │ ├── f32-igemm-2x4-minmax-scalar.c │ │ ├── f32-igemm-2x4-relu-scalar.c │ │ ├── f32-igemm-2x4-scalar.c │ │ ├── f32-igemm-2x64-minmax-hvx-broadcast.c │ │ ├── f32-igemm-3x128-minmax-hvx-broadcast.c │ │ ├── f32-igemm-3x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-igemm-3x16-minmax-avx-broadcast.c │ │ ├── f32-igemm-3x16-minmax-fma3-broadcast.c │ │ ├── f32-igemm-3x16-minmax-neon-lane-ld128.c │ │ ├── f32-igemm-3x16s4-minmax-fma3-broadcast.c │ │ ├── f32-igemm-3x32-minmax-hvx-broadcast.c │ │ ├── f32-igemm-3x64-minmax-hvx-broadcast.c │ │ ├── f32-igemm-3x8-minmax-sse-dup.c │ │ ├── f32-igemm-3x8-minmax-sse-load1.c │ │ ├── f32-igemm-3x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-3x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-3x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-igemm-3x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-igemm-3x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-igemm-3x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-igemm-3x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-igemm-3x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-igemm-3x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-3x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-3x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-igemm-3x8-relu-wasmsimd-splat.c │ │ ├── f32-igemm-3x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-3x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-3x8-wasmsimd-loadsplat.c │ │ ├── f32-igemm-3x8-wasmsimd-splat.c │ │ ├── f32-igemm-3x8s4-minmax-sse.c │ │ ├── f32-igemm-3x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-3x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-igemm-3x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-igemm-3x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-igemm-3x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-3x8s4-relu-wasmsimd.c │ │ ├── f32-igemm-3x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-3x8s4-wasmsimd.c │ │ ├── f32-igemm-4x128-minmax-hvx-broadcast.c │ │ ├── f32-igemm-4x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-igemm-4x16-minmax-avx-broadcast.c │ │ ├── f32-igemm-4x16-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-4x16-minmax-fma3-broadcast.c │ │ ├── f32-igemm-4x16-minmax-neon-lane-ld128.c │ │ ├── f32-igemm-4x16s4-minmax-fma3-broadcast.c │ │ ├── f32-igemm-4x2-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-igemm-4x2-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-igemm-4x2-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-igemm-4x2-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-igemm-4x2-minmax-neon-lane-ld64.c │ │ ├── f32-igemm-4x2-minmax-scalar.c │ │ ├── f32-igemm-4x2-relu-scalar.c │ │ ├── f32-igemm-4x2-scalar.c │ │ ├── f32-igemm-4x2c4-minmax-sse.c │ │ ├── f32-igemm-4x2c4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-4x2c4-minmax-wasmrelaxedsimd.c │ │ ├── f32-igemm-4x2c4-minmax-wasmsimd-arm.c │ │ ├── f32-igemm-4x2c4-minmax-wasmsimd-x86.c │ │ ├── f32-igemm-4x2c4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-4x2c4-relu-wasmsimd.c │ │ ├── f32-igemm-4x2c4-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-4x2c4-wasmsimd.c │ │ ├── f32-igemm-4x32-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-4x32-minmax-hvx-broadcast.c │ │ ├── f32-igemm-4x4-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-igemm-4x4-minmax-neon-lane-ld64.c │ │ ├── f32-igemm-4x4-minmax-scalar.c │ │ ├── f32-igemm-4x4-relu-scalar.c │ │ ├── f32-igemm-4x4-scalar.c │ │ ├── f32-igemm-4x64-minmax-hvx-broadcast.c │ │ ├── f32-igemm-4x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-igemm-4x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-igemm-4x8-minmax-asm-aarch32-neon-cortex-a53-prfm.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch32-neon-cortex-a53.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch32-neon-cortex-a7.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch32-neon-cortex-a75-prfm.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch32-neon-cortex-a75.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch32-neon-ld64.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch64-neonfma-cortex-a53-prfm.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch64-neonfma-cortex-a53.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-igemm-4x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-igemm-4x8-minmax-avx-broadcast.c │ │ ├── f32-igemm-4x8-minmax-fma3-broadcast.c │ │ ├── f32-igemm-4x8-minmax-neon-dup-ld128.c │ │ ├── f32-igemm-4x8-minmax-neon-dup-ld64.c │ │ ├── f32-igemm-4x8-minmax-neon-lane-ld128.c │ │ ├── f32-igemm-4x8-minmax-neon-lane-ld64.c │ │ ├── f32-igemm-4x8-minmax-neonfma-dup-ld128.c │ │ ├── f32-igemm-4x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-igemm-4x8-minmax-sse-dup.c │ │ ├── f32-igemm-4x8-minmax-sse-load1.c │ │ ├── f32-igemm-4x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-4x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-4x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-igemm-4x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-igemm-4x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-igemm-4x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-igemm-4x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-igemm-4x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-igemm-4x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-4x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-4x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-igemm-4x8-relu-wasmsimd-splat.c │ │ ├── f32-igemm-4x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-4x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-4x8-wasmsimd-loadsplat.c │ │ ├── f32-igemm-4x8-wasmsimd-splat.c │ │ ├── f32-igemm-4x8s4-minmax-neon.c │ │ ├── f32-igemm-4x8s4-minmax-neonfma.c │ │ ├── f32-igemm-4x8s4-minmax-sse.c │ │ ├── f32-igemm-4x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-4x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-igemm-4x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-igemm-4x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-igemm-4x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-4x8s4-relu-wasmsimd.c │ │ ├── f32-igemm-4x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-4x8s4-wasmsimd.c │ │ ├── f32-igemm-5x128-minmax-hvx-broadcast.c │ │ ├── f32-igemm-5x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-igemm-5x16-minmax-avx-broadcast.c │ │ ├── f32-igemm-5x16-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-5x16-minmax-fma3-broadcast-prfm.c │ │ ├── f32-igemm-5x16-minmax-fma3-broadcast.c │ │ ├── f32-igemm-5x16-minmax-neon-lane-ld128.c │ │ ├── f32-igemm-5x16s4-minmax-fma3-broadcast.c │ │ ├── f32-igemm-5x32-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-5x32-minmax-hvx-broadcast.c │ │ ├── f32-igemm-5x64-minmax-hvx-broadcast.c │ │ ├── f32-igemm-5x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-igemm-5x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-igemm-5x8-minmax-avx-broadcast.c │ │ ├── f32-igemm-5x8-minmax-fma3-broadcast.c │ │ ├── f32-igemm-5x8-minmax-sse-dup.c │ │ ├── f32-igemm-5x8-minmax-sse-load1.c │ │ ├── f32-igemm-5x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-5x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-5x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-igemm-5x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-igemm-5x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-igemm-5x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-igemm-5x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-igemm-5x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-igemm-5x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-5x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-5x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-igemm-5x8-relu-wasmsimd-splat.c │ │ ├── f32-igemm-5x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-5x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-5x8-wasmsimd-loadsplat.c │ │ ├── f32-igemm-5x8-wasmsimd-splat.c │ │ ├── f32-igemm-5x8s4-minmax-sse.c │ │ ├── f32-igemm-5x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-5x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-igemm-5x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-igemm-5x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-igemm-5x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-5x8s4-relu-wasmsimd.c │ │ ├── f32-igemm-5x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-5x8s4-wasmsimd.c │ │ ├── f32-igemm-6x128-minmax-hvx-broadcast.c │ │ ├── f32-igemm-6x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-igemm-6x16-minmax-avx-broadcast.c │ │ ├── f32-igemm-6x16-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-6x16-minmax-fma3-broadcast-prfm.c │ │ ├── f32-igemm-6x16-minmax-fma3-broadcast.c │ │ ├── f32-igemm-6x16-minmax-neon-lane-ld128.c │ │ ├── f32-igemm-6x16s4-minmax-fma3-broadcast.c │ │ ├── f32-igemm-6x2-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-igemm-6x2-minmax-neon-lane-ld64.c │ │ ├── f32-igemm-6x2c4-minmax-sse.c │ │ ├── f32-igemm-6x32-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-6x32-minmax-hvx-broadcast.c │ │ ├── f32-igemm-6x64-minmax-hvx-broadcast.c │ │ ├── f32-igemm-6x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-igemm-6x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a53-prfm.S │ │ ├── f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a53.S │ │ ├── f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-igemm-6x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-igemm-6x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-igemm-6x8-minmax-avx-broadcast.c │ │ ├── f32-igemm-6x8-minmax-fma3-broadcast.c │ │ ├── f32-igemm-6x8-minmax-neon-dup-ld128.c │ │ ├── f32-igemm-6x8-minmax-neon-dup-ld64.c │ │ ├── f32-igemm-6x8-minmax-neon-lane-ld128.c │ │ ├── f32-igemm-6x8-minmax-neon-lane-ld64.c │ │ ├── f32-igemm-6x8-minmax-neonfma-dup-ld128.c │ │ ├── f32-igemm-6x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-igemm-6x8-minmax-sse-dup.c │ │ ├── f32-igemm-6x8-minmax-sse-load1.c │ │ ├── f32-igemm-6x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-6x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-6x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-igemm-6x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-igemm-6x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-igemm-6x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-igemm-6x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-igemm-6x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-igemm-6x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-6x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-6x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-igemm-6x8-relu-wasmsimd-splat.c │ │ ├── f32-igemm-6x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-igemm-6x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-igemm-6x8-wasmsimd-loadsplat.c │ │ ├── f32-igemm-6x8-wasmsimd-splat.c │ │ ├── f32-igemm-6x8s4-minmax-neon.c │ │ ├── f32-igemm-6x8s4-minmax-neonfma.c │ │ ├── f32-igemm-6x8s4-minmax-sse.c │ │ ├── f32-igemm-6x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-6x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-igemm-6x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-igemm-6x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-igemm-6x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-6x8s4-relu-wasmsimd.c │ │ ├── f32-igemm-6x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-igemm-6x8s4-wasmsimd.c │ │ ├── f32-igemm-7x128-minmax-hvx-broadcast.c │ │ ├── f32-igemm-7x16-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-7x32-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-7x32-minmax-hvx-broadcast.c │ │ ├── f32-igemm-7x4v-minmax-rvv.c │ │ ├── f32-igemm-7x4v-relu-rvv.c │ │ ├── f32-igemm-7x4v-rvv.c │ │ ├── f32-igemm-7x64-minmax-hvx-broadcast.c │ │ ├── f32-igemm-7x8-minmax-avx-broadcast.c │ │ ├── f32-igemm-7x8-minmax-fma3-broadcast.c │ │ ├── f32-igemm-8x128-minmax-hvx-broadcast.c │ │ ├── f32-igemm-8x16-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-8x32-minmax-avx512f-broadcast.c │ │ ├── f32-igemm-8x32-minmax-hvx-broadcast.c │ │ ├── f32-igemm-8x64-minmax-hvx-broadcast.c │ │ ├── f32-igemm-8x8-minmax-fma3-broadcast.c │ │ ├── f32-igemm-8x8s4-minmax-neon.c │ │ ├── f32-igemm-8x8s4-minmax-neonfma.c │ │ ├── f32-igemm-9x32-minmax-hvx-broadcast.c │ │ └── f32-igemm-9x64-minmax-hvx-broadcast.c │ ├── hvx-broadcast.c.in │ ├── neon-ld128.c.in │ ├── neon-ld64.c.in │ ├── neon-shuffle.c.in │ ├── scalar.c.in │ ├── sse-dup.c.in │ ├── sse-load1.c.in │ ├── sse-shuffle.c.in │ ├── wasmsimd-loadsplat.c.in │ ├── wasmsimd-s4.c.in │ └── wasmsimd-splat.c.in ├── f32-maxpool │ ├── f32-maxpool-minmax.inc │ ├── gen │ │ ├── f32-maxpool-9p-minmax-hvx-u32.c │ │ ├── f32-maxpool-9p-minmax-neon-u4.c │ │ ├── f32-maxpool-9p-minmax-rvv-u1v.c │ │ ├── f32-maxpool-9p-minmax-rvv-u2v.c │ │ ├── f32-maxpool-9p-minmax-scalar-u1.c │ │ ├── f32-maxpool-9p-minmax-sse2-u4.c │ │ └── f32-maxpool-9p-minmax-wasmsimd-u4.c │ ├── maxpool.c.in │ └── rvv.c.in ├── f32-ppmm │ ├── 4x8-aarch64-neonfma-cortex-a75.S.in │ ├── 4x8-aarch64-neonfma-ld128.S.in │ ├── 8x8-aarch64-neonfma-cortex-a75.S.in │ ├── 8x8-aarch64-neonfma-ld128.S.in │ ├── gen │ │ ├── f32-ppmm-2x4-minmax-scalar.c │ │ ├── f32-ppmm-3x3-minmax-scalar.c │ │ ├── f32-ppmm-4x16-minmax-aarch64-neonfma-prfm.c │ │ ├── f32-ppmm-4x16-minmax-aarch64-neonfma.c │ │ ├── f32-ppmm-4x16-minmax-neon-prfm.c │ │ ├── f32-ppmm-4x16-minmax-neon.c │ │ ├── f32-ppmm-4x2-minmax-scalar.c │ │ ├── f32-ppmm-4x4-minmax-scalar.c │ │ ├── f32-ppmm-4x8-minmax-aarch64-neonfma-prfm.c │ │ ├── f32-ppmm-4x8-minmax-aarch64-neonfma.c │ │ ├── f32-ppmm-4x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-ppmm-4x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-ppmm-4x8-minmax-asm-aarch64-neonfma-ld128-prfm.S │ │ ├── f32-ppmm-4x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-ppmm-4x8-minmax-neon-prfm.c │ │ ├── f32-ppmm-4x8-minmax-neon.c │ │ ├── f32-ppmm-4x8-minmax-sse.c │ │ ├── f32-ppmm-4x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-ppmm-4x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-ppmm-8x8-minmax-aarch64-neonfma-prfm.c │ │ ├── f32-ppmm-8x8-minmax-aarch64-neonfma.c │ │ ├── f32-ppmm-8x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S │ │ ├── f32-ppmm-8x8-minmax-asm-aarch64-neonfma-cortex-a75.S │ │ ├── f32-ppmm-8x8-minmax-asm-aarch64-neonfma-ld128-prfm.S │ │ ├── f32-ppmm-8x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-ppmm-8x8-minmax-neon-prfm.c │ │ └── f32-ppmm-8x8-minmax-neon.c │ ├── neon.c.in │ ├── scalar.c.in │ ├── sse.c.in │ └── wasmsimd-splat.c.in ├── f32-qc4w-gemm │ ├── avx-broadcast.c.in │ ├── avx512-broadcast.c.in │ └── gen │ │ ├── f32-qc4w-gemm-1x16-minmax-avx-broadcast.c │ │ ├── f32-qc4w-gemm-1x16-minmax-avx2-broadcast.c │ │ ├── f32-qc4w-gemm-1x16-minmax-fma3-broadcast.c │ │ ├── f32-qc4w-gemm-1x4-minmax-scalar.c │ │ ├── f32-qc4w-gemm-1x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-qc4w-gemm-1x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2-prfm.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2-prfm.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4-prfm.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-prfm.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc2-prfm.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc2.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc4-prfm.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc4.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-prfm.S │ │ ├── f32-qc4w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc4w-gemm-1x8-minmax-neon-dup-ld64.c │ │ ├── f32-qc4w-gemm-1x8-minmax-neon-lane-ld64.c │ │ ├── f32-qc4w-gemm-1x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-qc4w-gemm-1x8-minmax-sse41-dup.c │ │ ├── f32-qc4w-gemm-2x16-minmax-avx-broadcast.c │ │ ├── f32-qc4w-gemm-2x16-minmax-avx2-broadcast.c │ │ ├── f32-qc4w-gemm-2x16-minmax-fma3-broadcast.c │ │ ├── f32-qc4w-gemm-2x4-minmax-scalar.c │ │ ├── f32-qc4w-gemm-3x16-minmax-avx-broadcast.c │ │ ├── f32-qc4w-gemm-3x16-minmax-avx2-broadcast.c │ │ ├── f32-qc4w-gemm-3x16-minmax-fma3-broadcast.c │ │ ├── f32-qc4w-gemm-3x8-minmax-sse41-dup.c │ │ ├── f32-qc4w-gemm-4x1-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc4w-gemm-4x1-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc4w-gemm-4x16-minmax-avx-broadcast.c │ │ ├── f32-qc4w-gemm-4x16-minmax-avx2-broadcast.c │ │ ├── f32-qc4w-gemm-4x16-minmax-fma3-broadcast.c │ │ ├── f32-qc4w-gemm-4x2-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc4w-gemm-4x2-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc4w-gemm-4x2-minmax-scalar.c │ │ ├── f32-qc4w-gemm-4x4-minmax-scalar.c │ │ ├── f32-qc4w-gemm-4x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-qc4w-gemm-4x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc4w-gemm-4x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc4w-gemm-4x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc4w-gemm-4x8-minmax-neon-dup-ld64.c │ │ ├── f32-qc4w-gemm-4x8-minmax-neon-lane-ld64.c │ │ ├── f32-qc4w-gemm-4x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-qc4w-gemm-4x8-minmax-sse41-dup.c │ │ ├── f32-qc4w-gemm-5x16-minmax-avx-broadcast.c │ │ ├── f32-qc4w-gemm-5x16-minmax-avx2-broadcast.c │ │ ├── f32-qc4w-gemm-5x16-minmax-fma3-broadcast.c │ │ ├── f32-qc4w-gemm-5x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc4w-gemm-5x8-minmax-neon-lane-ld64.c │ │ ├── f32-qc4w-gemm-5x8-minmax-sse41-dup.c │ │ ├── f32-qc4w-gemm-6x16-minmax-avx-broadcast.c │ │ ├── f32-qc4w-gemm-6x16-minmax-avx2-broadcast.c │ │ ├── f32-qc4w-gemm-6x16-minmax-fma3-broadcast.c │ │ ├── f32-qc4w-gemm-6x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-qc4w-gemm-6x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc4w-gemm-6x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc4w-gemm-6x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc4w-gemm-6x8-minmax-neon-dup-ld64.c │ │ ├── f32-qc4w-gemm-6x8-minmax-neon-lane-ld64.c │ │ ├── f32-qc4w-gemm-6x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-qc4w-gemm-6x8-minmax-sse41-dup.c │ │ ├── f32-qc4w-gemm-7x16-minmax-avx-broadcast.c │ │ ├── f32-qc4w-gemm-7x16-minmax-avx2-broadcast.c │ │ ├── f32-qc4w-gemm-7x16-minmax-fma3-broadcast.c │ │ ├── f32-qc4w-gemm-8x16-minmax-avx-broadcast.c │ │ ├── f32-qc4w-gemm-8x16-minmax-avx2-broadcast.c │ │ └── f32-qc4w-gemm-8x16-minmax-fma3-broadcast.c ├── f32-qc8w-gemm │ └── gen │ │ ├── f32-qc4w-gemm-1x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc4w-gemm-2x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc4w-gemm-3x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc4w-gemm-4x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc4w-gemm-5x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc4w-gemm-6x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc4w-gemm-7x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc4w-gemm-8x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-1x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-qc8w-gemm-1x16-minmax-avx-broadcast.c │ │ ├── f32-qc8w-gemm-1x16-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-1x16-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-1x16-minmax-fma3-broadcast.c │ │ ├── f32-qc8w-gemm-1x16s4-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-1x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-1x4-minmax-scalar.c │ │ ├── f32-qc8w-gemm-1x4-relu-scalar.c │ │ ├── f32-qc8w-gemm-1x4-scalar.c │ │ ├── f32-qc8w-gemm-1x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-qc8w-gemm-1x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2-prfm.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neon-ld128-acc2.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2-prfm.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4-prfm.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-prfm.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc2-prfm.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc2.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc4-prfm.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc4.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-prfm.S │ │ ├── f32-qc8w-gemm-1x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc8w-gemm-1x8-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-1x8-minmax-neon-dup-ld64.c │ │ ├── f32-qc8w-gemm-1x8-minmax-neon-lane-ld64.c │ │ ├── f32-qc8w-gemm-1x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-qc8w-gemm-1x8-minmax-sse41-dup.c │ │ ├── f32-qc8w-gemm-1x8-minmax-sse41-load1.c │ │ ├── f32-qc8w-gemm-1x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-1x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-1x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-1x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-qc8w-gemm-1x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-qc8w-gemm-1x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-qc8w-gemm-1x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-qc8w-gemm-1x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-qc8w-gemm-1x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-1x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-1x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-1x8-relu-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-1x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-1x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-1x8-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-1x8-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-1x8s4-minmax-neonfma.c │ │ ├── f32-qc8w-gemm-1x8s4-minmax-sse41.c │ │ ├── f32-qc8w-gemm-1x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-1x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-qc8w-gemm-1x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-qc8w-gemm-1x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-qc8w-gemm-1x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-1x8s4-relu-wasmsimd.c │ │ ├── f32-qc8w-gemm-1x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-1x8s4-wasmsimd.c │ │ ├── f32-qc8w-gemm-2x16-minmax-avx-broadcast.c │ │ ├── f32-qc8w-gemm-2x16-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-2x16-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-2x16-minmax-fma3-broadcast.c │ │ ├── f32-qc8w-gemm-2x16s4-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-2x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-2x4-minmax-scalar.c │ │ ├── f32-qc8w-gemm-2x4-relu-scalar.c │ │ ├── f32-qc8w-gemm-2x4-scalar.c │ │ ├── f32-qc8w-gemm-3x16-minmax-avx-broadcast.c │ │ ├── f32-qc8w-gemm-3x16-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-3x16-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-3x16-minmax-fma3-broadcast.c │ │ ├── f32-qc8w-gemm-3x16s4-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-3x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-3x8-minmax-sse41-dup.c │ │ ├── f32-qc8w-gemm-3x8-minmax-sse41-load1.c │ │ ├── f32-qc8w-gemm-3x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-3x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-3x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-3x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-qc8w-gemm-3x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-qc8w-gemm-3x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-qc8w-gemm-3x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-qc8w-gemm-3x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-qc8w-gemm-3x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-3x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-3x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-3x8-relu-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-3x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-3x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-3x8-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-3x8-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-3x8s4-minmax-sse41.c │ │ ├── f32-qc8w-gemm-3x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-3x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-qc8w-gemm-3x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-qc8w-gemm-3x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-qc8w-gemm-3x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-3x8s4-relu-wasmsimd.c │ │ ├── f32-qc8w-gemm-3x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-3x8s4-wasmsimd.c │ │ ├── f32-qc8w-gemm-4x1-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc8w-gemm-4x1-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc8w-gemm-4x16-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-qc8w-gemm-4x16-minmax-avx-broadcast.c │ │ ├── f32-qc8w-gemm-4x16-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-4x16-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-4x16-minmax-fma3-broadcast.c │ │ ├── f32-qc8w-gemm-4x16s4-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-4x2-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc8w-gemm-4x2-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc8w-gemm-4x2-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc8w-gemm-4x2-minmax-neon-lane-ld64.c │ │ ├── f32-qc8w-gemm-4x2-minmax-scalar.c │ │ ├── f32-qc8w-gemm-4x2-relu-scalar.c │ │ ├── f32-qc8w-gemm-4x2-scalar.c │ │ ├── f32-qc8w-gemm-4x2c4-minmax-sse41.c │ │ ├── f32-qc8w-gemm-4x2c4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-4x2c4-minmax-wasmrelaxedsimd.c │ │ ├── f32-qc8w-gemm-4x2c4-minmax-wasmsimd-arm.c │ │ ├── f32-qc8w-gemm-4x2c4-minmax-wasmsimd-x86.c │ │ ├── f32-qc8w-gemm-4x2c4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-4x2c4-relu-wasmsimd.c │ │ ├── f32-qc8w-gemm-4x2c4-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-4x2c4-wasmsimd.c │ │ ├── f32-qc8w-gemm-4x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-4x4-minmax-scalar.c │ │ ├── f32-qc8w-gemm-4x4-relu-scalar.c │ │ ├── f32-qc8w-gemm-4x4-scalar.c │ │ ├── f32-qc8w-gemm-4x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-qc8w-gemm-4x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc8w-gemm-4x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc8w-gemm-4x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc8w-gemm-4x8-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-4x8-minmax-neon-dup-ld64.c │ │ ├── f32-qc8w-gemm-4x8-minmax-neon-lane-ld64.c │ │ ├── f32-qc8w-gemm-4x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-qc8w-gemm-4x8-minmax-sse41-dup.c │ │ ├── f32-qc8w-gemm-4x8-minmax-sse41-load1.c │ │ ├── f32-qc8w-gemm-4x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-4x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-4x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-4x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-qc8w-gemm-4x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-qc8w-gemm-4x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-qc8w-gemm-4x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-qc8w-gemm-4x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-qc8w-gemm-4x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-4x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-4x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-4x8-relu-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-4x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-4x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-4x8-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-4x8-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-4x8s4-minmax-neonfma.c │ │ ├── f32-qc8w-gemm-4x8s4-minmax-sse41.c │ │ ├── f32-qc8w-gemm-4x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-4x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-qc8w-gemm-4x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-qc8w-gemm-4x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-qc8w-gemm-4x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-4x8s4-relu-wasmsimd.c │ │ ├── f32-qc8w-gemm-4x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-4x8s4-wasmsimd.c │ │ ├── f32-qc8w-gemm-5x16-minmax-avx-broadcast.c │ │ ├── f32-qc8w-gemm-5x16-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-5x16-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-5x16-minmax-fma3-broadcast.c │ │ ├── f32-qc8w-gemm-5x16s4-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-5x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-5x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc8w-gemm-5x8-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-5x8-minmax-neon-lane-ld64.c │ │ ├── f32-qc8w-gemm-5x8-minmax-sse41-dup.c │ │ ├── f32-qc8w-gemm-5x8-minmax-sse41-load1.c │ │ ├── f32-qc8w-gemm-5x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-5x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-5x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-5x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-qc8w-gemm-5x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-qc8w-gemm-5x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-qc8w-gemm-5x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-qc8w-gemm-5x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-qc8w-gemm-5x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-5x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-5x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-5x8-relu-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-5x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-5x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-5x8-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-5x8-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-5x8s4-minmax-sse41.c │ │ ├── f32-qc8w-gemm-5x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-5x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-qc8w-gemm-5x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-qc8w-gemm-5x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-qc8w-gemm-5x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-5x8s4-relu-wasmsimd.c │ │ ├── f32-qc8w-gemm-5x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-5x8s4-wasmsimd.c │ │ ├── f32-qc8w-gemm-6x16-minmax-avx-broadcast.c │ │ ├── f32-qc8w-gemm-6x16-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-6x16-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-6x16-minmax-fma3-broadcast.c │ │ ├── f32-qc8w-gemm-6x16s4-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-6x2-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc8w-gemm-6x2-minmax-neon-lane-ld64.c │ │ ├── f32-qc8w-gemm-6x2c4-minmax-sse41.c │ │ ├── f32-qc8w-gemm-6x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-6x8-minmax-aarch64-neonfma-lane-ld128.c │ │ ├── f32-qc8w-gemm-6x8-minmax-aarch64-neonfma-lane-ld64.c │ │ ├── f32-qc8w-gemm-6x8-minmax-asm-aarch64-neonfma-ld128.S │ │ ├── f32-qc8w-gemm-6x8-minmax-asm-aarch64-neonfma-ld64.S │ │ ├── f32-qc8w-gemm-6x8-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-6x8-minmax-neon-dup-ld64.c │ │ ├── f32-qc8w-gemm-6x8-minmax-neon-lane-ld64.c │ │ ├── f32-qc8w-gemm-6x8-minmax-neonfma-dup-ld64.c │ │ ├── f32-qc8w-gemm-6x8-minmax-sse41-dup.c │ │ ├── f32-qc8w-gemm-6x8-minmax-sse41-load1.c │ │ ├── f32-qc8w-gemm-6x8-minmax-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-6x8-minmax-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-6x8-minmax-wasmrelaxedsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-6x8-minmax-wasmrelaxedsimd-splat.c │ │ ├── f32-qc8w-gemm-6x8-minmax-wasmsimd-arm-loadsplat.c │ │ ├── f32-qc8w-gemm-6x8-minmax-wasmsimd-arm-splat.c │ │ ├── f32-qc8w-gemm-6x8-minmax-wasmsimd-x86-loadsplat.c │ │ ├── f32-qc8w-gemm-6x8-minmax-wasmsimd-x86-splat.c │ │ ├── f32-qc8w-gemm-6x8-relu-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-6x8-relu-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-6x8-relu-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-6x8-relu-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-6x8-wasmrelaxedsimd-fma-loadsplat.c │ │ ├── f32-qc8w-gemm-6x8-wasmrelaxedsimd-fma-splat.c │ │ ├── f32-qc8w-gemm-6x8-wasmsimd-loadsplat.c │ │ ├── f32-qc8w-gemm-6x8-wasmsimd-splat.c │ │ ├── f32-qc8w-gemm-6x8s4-minmax-neonfma.c │ │ ├── f32-qc8w-gemm-6x8s4-minmax-sse41.c │ │ ├── f32-qc8w-gemm-6x8s4-minmax-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-6x8s4-minmax-wasmrelaxedsimd.c │ │ ├── f32-qc8w-gemm-6x8s4-minmax-wasmsimd-arm.c │ │ ├── f32-qc8w-gemm-6x8s4-minmax-wasmsimd-x86.c │ │ ├── f32-qc8w-gemm-6x8s4-relu-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-6x8s4-relu-wasmsimd.c │ │ ├── f32-qc8w-gemm-6x8s4-wasmrelaxedsimd-fma.c │ │ ├── f32-qc8w-gemm-6x8s4-wasmsimd.c │ │ ├── f32-qc8w-gemm-7x16-minmax-avx-broadcast.c │ │ ├── f32-qc8w-gemm-7x16-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-7x16-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-7x16-minmax-fma3-broadcast.c │ │ ├── f32-qc8w-gemm-7x32-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-7x8-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-8x16-minmax-avx-broadcast.c │ │ ├── f32-qc8w-gemm-8x16-minmax-avx2-broadcast.c │ │ ├── f32-qc8w-gemm-8x16-minmax-avx512skx-broadcast.c │ │ ├── f32-qc8w-gemm-8x16-minmax-fma3-broadcast.c │ │ ├── f32-qc8w-gemm-8x32-minmax-avx512skx-broadcast.c │ │ └── f32-qc8w-gemm-8x8-minmax-avx2-broadcast.c ├── f32-qc8w-spmm │ └── gen │ │ ├── f32-qc8w-spmm-1x1-minmax-scalar.c │ │ ├── f32-qc8w-spmm-2x1-minmax-scalar.c │ │ ├── f32-qc8w-spmm-4x1-minmax-scalar.c │ │ ├── f32-qc8w-spmm-8x1-minmax-scalar.c │ │ ├── f32-qc8w-spmm-8x2-minmax-scalar.c │ │ └── f32-qc8w-spmm-8x4-minmax-scalar.c ├── f32-qs8-vcvt │ ├── avx.c.in │ ├── avx2.c.in │ ├── avx512skx.c.in │ ├── f32-qs8-vcvt.inc │ ├── gen │ │ ├── f32-qs8-vcvt-avx-u16.c │ │ ├── f32-qs8-vcvt-avx-u24.c │ │ ├── f32-qs8-vcvt-avx-u32.c │ │ ├── f32-qs8-vcvt-avx-u8.c │ │ ├── f32-qs8-vcvt-avx2-u16.c │ │ ├── f32-qs8-vcvt-avx2-u32.c │ │ ├── f32-qs8-vcvt-avx2-u48.c │ │ ├── f32-qs8-vcvt-avx2-u64.c │ │ ├── f32-qs8-vcvt-avx512skx-u128.c │ │ ├── f32-qs8-vcvt-avx512skx-u32.c │ │ ├── f32-qs8-vcvt-avx512skx-u64.c │ │ ├── f32-qs8-vcvt-avx512skx-u96.c │ │ ├── f32-qs8-vcvt-hvx-u128.c │ │ ├── f32-qs8-vcvt-hvx-u256.c │ │ ├── f32-qs8-vcvt-hvx-u32.c │ │ ├── f32-qs8-vcvt-hvx-u64.c │ │ ├── f32-qs8-vcvt-hvx-u96.c │ │ ├── f32-qs8-vcvt-neon-u16.c │ │ ├── f32-qs8-vcvt-neon-u24.c │ │ ├── f32-qs8-vcvt-neon-u32.c │ │ ├── f32-qs8-vcvt-neon-u8.c │ │ ├── f32-qs8-vcvt-neonv8-u16.c │ │ ├── f32-qs8-vcvt-neonv8-u24.c │ │ ├── f32-qs8-vcvt-neonv8-u32.c │ │ ├── f32-qs8-vcvt-neonv8-u8.c │ │ ├── f32-qs8-vcvt-rvv-u1v.c │ │ ├── f32-qs8-vcvt-rvv-u2v.c │ │ ├── f32-qs8-vcvt-rvv-u4v.c │ │ ├── f32-qs8-vcvt-rvv-u8v.c │ │ ├── f32-qs8-vcvt-scalar-fmagic-u1.c │ │ ├── f32-qs8-vcvt-scalar-fmagic-u2.c │ │ ├── f32-qs8-vcvt-scalar-fmagic-u3.c │ │ ├── f32-qs8-vcvt-scalar-fmagic-u4.c │ │ ├── f32-qs8-vcvt-scalar-imagic-u1.c │ │ ├── f32-qs8-vcvt-scalar-imagic-u2.c │ │ ├── f32-qs8-vcvt-scalar-imagic-u3.c │ │ ├── f32-qs8-vcvt-scalar-imagic-u4.c │ │ ├── f32-qs8-vcvt-scalar-lrintf-u1.c │ │ ├── f32-qs8-vcvt-scalar-lrintf-u2.c │ │ ├── f32-qs8-vcvt-scalar-lrintf-u3.c │ │ ├── f32-qs8-vcvt-scalar-lrintf-u4.c │ │ ├── f32-qs8-vcvt-sse2-u16.c │ │ ├── f32-qs8-vcvt-sse2-u24.c │ │ ├── f32-qs8-vcvt-sse2-u32.c │ │ ├── f32-qs8-vcvt-sse2-u8.c │ │ ├── f32-qs8-vcvt-sse41-u16.c │ │ ├── f32-qs8-vcvt-sse41-u24.c │ │ ├── f32-qs8-vcvt-sse41-u32.c │ │ ├── f32-qs8-vcvt-sse41-u8.c │ │ ├── f32-qs8-vcvt-wasmsimd-cvt-u16.c │ │ ├── f32-qs8-vcvt-wasmsimd-cvt-u24.c │ │ ├── f32-qs8-vcvt-wasmsimd-cvt-u32.c │ │ ├── f32-qs8-vcvt-wasmsimd-cvt-u8.c │ │ ├── f32-qs8-vcvt-wasmsimd-magic-u16.c │ │ ├── f32-qs8-vcvt-wasmsimd-magic-u24.c │ │ ├── f32-qs8-vcvt-wasmsimd-magic-u32.c │ │ └── f32-qs8-vcvt-wasmsimd-magic-u8.c │ ├── hvx.c.in │ ├── neon.c.in │ ├── neonv8.c.in │ ├── rvv.c.in │ ├── scalar-fmagic.c.in │ ├── scalar-imagic.c.in │ ├── scalar-lrintf.c.in │ ├── sse.c.in │ ├── wasmsimd-cvt.c.in │ └── wasmsimd-magic.c.in ├── f32-qu8-vcvt │ ├── f32-qu8-vcvt.inc │ └── gen │ │ ├── f32-qu8-vcvt-avx-u16.c │ │ ├── f32-qu8-vcvt-avx-u24.c │ │ ├── f32-qu8-vcvt-avx-u32.c │ │ ├── f32-qu8-vcvt-avx-u8.c │ │ ├── f32-qu8-vcvt-avx2-u16.c │ │ ├── f32-qu8-vcvt-avx2-u32.c │ │ ├── f32-qu8-vcvt-avx2-u48.c │ │ ├── f32-qu8-vcvt-avx2-u64.c │ │ ├── f32-qu8-vcvt-avx512skx-u128.c │ │ ├── f32-qu8-vcvt-avx512skx-u32.c │ │ ├── f32-qu8-vcvt-avx512skx-u64.c │ │ ├── f32-qu8-vcvt-avx512skx-u96.c │ │ ├── f32-qu8-vcvt-neon-u16.c │ │ ├── f32-qu8-vcvt-neon-u24.c │ │ ├── f32-qu8-vcvt-neon-u32.c │ │ ├── f32-qu8-vcvt-neon-u8.c │ │ ├── f32-qu8-vcvt-neonv8-u16.c │ │ ├── f32-qu8-vcvt-neonv8-u24.c │ │ ├── f32-qu8-vcvt-neonv8-u32.c │ │ ├── f32-qu8-vcvt-neonv8-u8.c │ │ ├── f32-qu8-vcvt-rvv-u1v.c │ │ ├── f32-qu8-vcvt-rvv-u2v.c │ │ ├── f32-qu8-vcvt-rvv-u4v.c │ │ ├── f32-qu8-vcvt-rvv-u8v.c │ │ ├── f32-qu8-vcvt-scalar-fmagic-u1.c │ │ ├── f32-qu8-vcvt-scalar-fmagic-u2.c │ │ ├── f32-qu8-vcvt-scalar-fmagic-u3.c │ │ ├── f32-qu8-vcvt-scalar-fmagic-u4.c │ │ ├── f32-qu8-vcvt-scalar-imagic-u1.c │ │ ├── f32-qu8-vcvt-scalar-imagic-u2.c │ │ ├── f32-qu8-vcvt-scalar-imagic-u3.c │ │ ├── f32-qu8-vcvt-scalar-imagic-u4.c │ │ ├── f32-qu8-vcvt-scalar-lrintf-u1.c │ │ ├── f32-qu8-vcvt-scalar-lrintf-u2.c │ │ ├── f32-qu8-vcvt-scalar-lrintf-u3.c │ │ ├── f32-qu8-vcvt-scalar-lrintf-u4.c │ │ ├── f32-qu8-vcvt-sse2-u16.c │ │ ├── f32-qu8-vcvt-sse2-u24.c │ │ ├── f32-qu8-vcvt-sse2-u32.c │ │ ├── f32-qu8-vcvt-sse2-u8.c │ │ ├── f32-qu8-vcvt-wasmsimd-cvt-u16.c │ │ ├── f32-qu8-vcvt-wasmsimd-cvt-u24.c │ │ ├── f32-qu8-vcvt-wasmsimd-cvt-u32.c │ │ ├── f32-qu8-vcvt-wasmsimd-cvt-u8.c │ │ ├── f32-qu8-vcvt-wasmsimd-magic-u16.c │ │ ├── f32-qu8-vcvt-wasmsimd-magic-u24.c │ │ ├── f32-qu8-vcvt-wasmsimd-magic-u32.c │ │ └── f32-qu8-vcvt-wasmsimd-magic-u8.c ├── f32-raddexpminusmax │ ├── avx2-p5.c.in │ ├── avx512f-p5-scalef.c.in │ └── gen │ │ ├── f32-raddexpminusmax-avx2-p5-u32-acc2.c │ │ ├── f32-raddexpminusmax-avx2-p5-u32-acc4.c │ │ ├── f32-raddexpminusmax-avx2-p5-u32.c │ │ ├── f32-raddexpminusmax-avx2-p5-u64-acc2.c │ │ ├── f32-raddexpminusmax-avx2-p5-u64-acc4.c │ │ ├── f32-raddexpminusmax-avx2-p5-u64.c │ │ ├── f32-raddexpminusmax-avx2-p5-u72-acc3.c │ │ ├── f32-raddexpminusmax-avx2-p5-u72.c │ │ ├── f32-raddexpminusmax-avx2-p5-u80-acc2.c │ │ ├── f32-raddexpminusmax-avx2-p5-u80-acc5.c │ │ ├── f32-raddexpminusmax-avx2-p5-u80.c │ │ ├── f32-raddexpminusmax-avx2-p5-u96-acc2.c │ │ ├── f32-raddexpminusmax-avx2-p5-u96-acc3.c │ │ ├── f32-raddexpminusmax-avx2-p5-u96-acc6.c │ │ ├── f32-raddexpminusmax-avx2-p5-u96.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u128-acc2.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u128-acc4.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u128.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u144-acc3.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u144.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u160-acc2.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u160-acc5.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u160.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u192-acc2.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u192-acc3.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u192-acc6.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u192.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u64-acc2.c │ │ ├── f32-raddexpminusmax-avx512f-p5-scalef-u64-acc4.c │ │ └── f32-raddexpminusmax-avx512f-p5-scalef-u64.c ├── f32-raddextexp │ ├── avx2-p5.c.in │ ├── avx512f-p5-scalef.c.in │ ├── f32-raddextexp.inc │ └── gen │ │ ├── f32-raddextexp-avx2-p5-u32-acc2.c │ │ ├── f32-raddextexp-avx2-p5-u32-acc4.c │ │ ├── f32-raddextexp-avx2-p5-u32.c │ │ ├── f32-raddextexp-avx2-p5-u64-acc2.c │ │ ├── f32-raddextexp-avx2-p5-u64-acc4.c │ │ ├── f32-raddextexp-avx2-p5-u64.c │ │ ├── f32-raddextexp-avx2-p5-u72-acc3.c │ │ ├── f32-raddextexp-avx2-p5-u72.c │ │ ├── f32-raddextexp-avx2-p5-u80-acc2.c │ │ ├── f32-raddextexp-avx2-p5-u80-acc5.c │ │ ├── f32-raddextexp-avx2-p5-u80.c │ │ ├── f32-raddextexp-avx2-p5-u96-acc2.c │ │ ├── f32-raddextexp-avx2-p5-u96-acc3.c │ │ ├── f32-raddextexp-avx2-p5-u96-acc6.c │ │ ├── f32-raddextexp-avx2-p5-u96.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u128-acc2.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u128-acc4.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u128.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u144-acc3.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u144.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u160-acc2.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u160-acc5.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u160.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u192-acc2.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u192-acc3.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u192-acc6.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u192.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u64-acc2.c │ │ ├── f32-raddextexp-avx512f-p5-scalef-u64-acc4.c │ │ └── f32-raddextexp-avx512f-p5-scalef-u64.c ├── f32-raddstoreexpminusmax │ ├── avx2-rr1-p5.c.in │ ├── avx2-rr2-p5.c.in │ ├── avx512f-rr1-p5-scalef.c.in │ ├── avx512f-rr2-p5.c.in │ ├── gen │ │ ├── f32-raddstoreexpminusmax-avx2-rr1-p5-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx2-rr1-p5-u32-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx2-rr1-p5-u32-acc4.c │ │ ├── f32-raddstoreexpminusmax-avx2-rr1-p5-u8.c │ │ ├── f32-raddstoreexpminusmax-avx2-rr2-p5-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx2-rr2-p5-u32-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx2-rr2-p5-u32-acc4.c │ │ ├── f32-raddstoreexpminusmax-avx2-rr2-p5-u8.c │ │ ├── f32-raddstoreexpminusmax-avx256skx-rr2-p5-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx256skx-rr2-p5-u32-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx256skx-rr2-p5-u32-acc4.c │ │ ├── f32-raddstoreexpminusmax-avx256skx-rr2-p5-u8.c │ │ ├── f32-raddstoreexpminusmax-avx512f-rr1-p5-scalef-u16.c │ │ ├── f32-raddstoreexpminusmax-avx512f-rr1-p5-scalef-u32-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx512f-rr1-p5-scalef-u64-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx512f-rr1-p5-scalef-u64-acc4.c │ │ ├── f32-raddstoreexpminusmax-avx512f-rr2-p5-u16.c │ │ ├── f32-raddstoreexpminusmax-avx512f-rr2-p5-u32-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx512f-rr2-p5-u64-acc2.c │ │ ├── f32-raddstoreexpminusmax-avx512f-rr2-p5-u64-acc4.c │ │ ├── f32-raddstoreexpminusmax-hvx-rr2-p5-u128-acc2.c │ │ ├── f32-raddstoreexpminusmax-hvx-rr2-p5-u128-acc4.c │ │ ├── f32-raddstoreexpminusmax-hvx-rr2-p5-u32.c │ │ ├── f32-raddstoreexpminusmax-hvx-rr2-p5-u64-acc2.c │ │ ├── f32-raddstoreexpminusmax-neon-rr2-lut64-p2-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-neon-rr2-lut64-p2-u16-acc4.c │ │ ├── f32-raddstoreexpminusmax-neon-rr2-lut64-p2-u4.c │ │ ├── f32-raddstoreexpminusmax-neon-rr2-lut64-p2-u8-acc2.c │ │ ├── f32-raddstoreexpminusmax-neon-rr2-p5-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-neon-rr2-p5-u16-acc4.c │ │ ├── f32-raddstoreexpminusmax-neon-rr2-p5-u4.c │ │ ├── f32-raddstoreexpminusmax-neon-rr2-p5-u8-acc2.c │ │ ├── f32-raddstoreexpminusmax-neonfma-rr1-lut64-p2-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-neonfma-rr1-lut64-p2-u16-acc4.c │ │ ├── f32-raddstoreexpminusmax-neonfma-rr1-lut64-p2-u4.c │ │ ├── f32-raddstoreexpminusmax-neonfma-rr1-lut64-p2-u8-acc2.c │ │ ├── f32-raddstoreexpminusmax-neonfma-rr1-p5-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-neonfma-rr1-p5-u16-acc4.c │ │ ├── f32-raddstoreexpminusmax-neonfma-rr1-p5-u4.c │ │ ├── f32-raddstoreexpminusmax-neonfma-rr1-p5-u8-acc2.c │ │ ├── f32-raddstoreexpminusmax-rvv-rr2-p6-u2v.c │ │ ├── f32-raddstoreexpminusmax-rvv-rr2-p6-u4v.c │ │ ├── f32-raddstoreexpminusmax-scalar-rr2-lut64-p2-u1.c │ │ ├── f32-raddstoreexpminusmax-scalar-rr2-lut64-p2-u2-acc2.c │ │ ├── f32-raddstoreexpminusmax-scalar-rr2-lut64-p2-u4-acc2.c │ │ ├── f32-raddstoreexpminusmax-scalar-rr2-lut64-p2-u4-acc4.c │ │ ├── f32-raddstoreexpminusmax-scalar-rr2-p5-u1.c │ │ ├── f32-raddstoreexpminusmax-scalar-rr2-p5-u2-acc2.c │ │ ├── f32-raddstoreexpminusmax-scalar-rr2-p5-u4-acc2.c │ │ ├── f32-raddstoreexpminusmax-scalar-rr2-p5-u4-acc4.c │ │ ├── f32-raddstoreexpminusmax-sse2-rr2-p5-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-sse2-rr2-p5-u16-acc4.c │ │ ├── f32-raddstoreexpminusmax-sse2-rr2-p5-u4.c │ │ ├── f32-raddstoreexpminusmax-sse2-rr2-p5-u8-acc2.c │ │ ├── f32-raddstoreexpminusmax-wasmrelaxedsimd-rr2-p5-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-wasmrelaxedsimd-rr2-p5-u16-acc4.c │ │ ├── f32-raddstoreexpminusmax-wasmrelaxedsimd-rr2-p5-u4.c │ │ ├── f32-raddstoreexpminusmax-wasmrelaxedsimd-rr2-p5-u8-acc2.c │ │ ├── f32-raddstoreexpminusmax-wasmsimd-rr2-p5-u16-acc2.c │ │ ├── f32-raddstoreexpminusmax-wasmsimd-rr2-p5-u16-acc4.c │ │ ├── f32-raddstoreexpminusmax-wasmsimd-rr2-p5-u4.c │ │ └── f32-raddstoreexpminusmax-wasmsimd-rr2-p5-u8-acc2.c │ ├── hvx-rr2-p5.c.in │ ├── neon-lut64-p2.c.in │ ├── neon-p5.c.in │ ├── rvv-rr2-p6.c.in │ ├── scalar-rr2-lut64-p2.c.in │ ├── scalar-rr2-p5.c.in │ ├── sse2-rr2-p5.c.in │ └── wasmsimd-rr2-p5.c.in ├── f32-rdminmax │ ├── f32-rdmax.inc │ ├── f32-rdmin.inc │ ├── gen │ │ ├── f32-rdmax-2p2x-avx-u32.c │ │ ├── f32-rdmax-2p2x-avx512f-u32.c │ │ ├── f32-rdmax-2p2x-hvx-u32.c │ │ ├── f32-rdmax-2p2x-neon-u32.c │ │ ├── f32-rdmax-2p2x-scalar-u2.c │ │ ├── f32-rdmax-2p2x-sse2-u32.c │ │ ├── f32-rdmax-2p2x-wasmsimd-u32.c │ │ ├── f32-rdmin-2p2x-avx-u32.c │ │ ├── f32-rdmin-2p2x-avx512f-u32.c │ │ ├── f32-rdmin-2p2x-hvx-u32.c │ │ ├── f32-rdmin-2p2x-neon-u32.c │ │ ├── f32-rdmin-2p2x-scalar-u2.c │ │ ├── f32-rdmin-2p2x-sse2-u32.c │ │ └── f32-rdmin-2p2x-wasmsimd-u32.c │ └── simd.c.in ├── f32-rdsum │ ├── f32-rdsum.inc │ ├── gen │ │ ├── f32-rdsum-7p7x-minmax-avx-u16.c │ │ ├── f32-rdsum-7p7x-minmax-avx-u32.c │ │ ├── f32-rdsum-7p7x-minmax-avx-u64.c │ │ ├── f32-rdsum-7p7x-minmax-avx512f-u128.c │ │ ├── f32-rdsum-7p7x-minmax-avx512f-u16.c │ │ ├── f32-rdsum-7p7x-minmax-avx512f-u32.c │ │ ├── f32-rdsum-7p7x-minmax-avx512f-u64.c │ │ ├── f32-rdsum-7p7x-minmax-hvx-u128.c │ │ ├── f32-rdsum-7p7x-minmax-hvx-u32.c │ │ ├── f32-rdsum-7p7x-minmax-hvx-u64.c │ │ ├── f32-rdsum-7p7x-minmax-neon-u16.c │ │ ├── f32-rdsum-7p7x-minmax-neon-u32.c │ │ ├── f32-rdsum-7p7x-minmax-neon-u64.c │ │ ├── f32-rdsum-7p7x-minmax-rvv-u1v.c │ │ ├── f32-rdsum-7p7x-minmax-rvv-u2v.c │ │ ├── f32-rdsum-7p7x-minmax-rvv-u4v.c │ │ ├── f32-rdsum-7p7x-minmax-scalar.c │ │ ├── f32-rdsum-7p7x-minmax-sse2-u16.c │ │ ├── f32-rdsum-7p7x-minmax-sse2-u32.c │ │ ├── f32-rdsum-7p7x-minmax-sse2-u64.c │ │ ├── f32-rdsum-7p7x-minmax-wasmsimd-u16.c │ │ ├── f32-rdsum-7p7x-minmax-wasmsimd-u32.c │ │ └── f32-rdsum-7p7x-minmax-wasmsimd-u64.c │ ├── rvv.c.in │ └── simd.c.in ├── f32-rdsum2 │ ├── f32-rdsum2.inc │ ├── gen │ │ ├── f32-rdsum2-7p7x-minmax-avx.c │ │ ├── f32-rdsum2-7p7x-minmax-avx512f.c │ │ ├── f32-rdsum2-7p7x-minmax-hvx.c │ │ ├── f32-rdsum2-7p7x-minmax-neon.c │ │ ├── f32-rdsum2-7p7x-minmax-scalar.c │ │ ├── f32-rdsum2-7p7x-minmax-sse2.c │ │ └── f32-rdsum2-7p7x-minmax-wasmsimd.c │ └── simd.c.in ├── f32-rminmax │ ├── f32-rmax.inc │ ├── f32-rmin.inc │ ├── f32-rminmax.inc │ ├── gen │ │ ├── f32-rmax-avx-u16-acc2.c │ │ ├── f32-rmax-avx-u24-acc3.c │ │ ├── f32-rmax-avx-u32-acc2.c │ │ ├── f32-rmax-avx-u32-acc4.c │ │ ├── f32-rmax-avx-u8.c │ │ ├── f32-rmax-avx512f-u16.c │ │ ├── f32-rmax-avx512f-u32-acc2.c │ │ ├── f32-rmax-avx512f-u48-acc3.c │ │ ├── f32-rmax-avx512f-u64-acc2.c │ │ ├── f32-rmax-avx512f-u64-acc4.c │ │ ├── f32-rmax-hvx-u128-acc2.c │ │ ├── f32-rmax-hvx-u128-acc4.c │ │ ├── f32-rmax-hvx-u32.c │ │ ├── f32-rmax-hvx-u64-acc2.c │ │ ├── f32-rmax-hvx-u96-acc3.c │ │ ├── f32-rmax-neon-u12-acc3.c │ │ ├── f32-rmax-neon-u16-acc2.c │ │ ├── f32-rmax-neon-u16-acc4.c │ │ ├── f32-rmax-neon-u4.c │ │ ├── f32-rmax-neon-u8-acc2.c │ │ ├── f32-rmax-rvv-u1v.c │ │ ├── f32-rmax-rvv-u2v.c │ │ ├── f32-rmax-rvv-u4v.c │ │ ├── f32-rmax-rvv-u8v.c │ │ ├── f32-rmax-scalar-u1.c │ │ ├── f32-rmax-scalar-u2-acc2.c │ │ ├── f32-rmax-scalar-u3-acc3.c │ │ ├── f32-rmax-scalar-u4-acc2.c │ │ ├── f32-rmax-scalar-u4-acc4.c │ │ ├── f32-rmax-sse-u12-acc3.c │ │ ├── f32-rmax-sse-u16-acc2.c │ │ ├── f32-rmax-sse-u16-acc4.c │ │ ├── f32-rmax-sse-u4.c │ │ ├── f32-rmax-sse-u8-acc2.c │ │ ├── f32-rmax-wasmsimd-minmax-u12-acc3.c │ │ ├── f32-rmax-wasmsimd-minmax-u16-acc2.c │ │ ├── f32-rmax-wasmsimd-minmax-u16-acc4.c │ │ ├── f32-rmax-wasmsimd-minmax-u4.c │ │ ├── f32-rmax-wasmsimd-minmax-u8-acc2.c │ │ ├── f32-rmax-wasmsimd-pminmax-u12-acc3.c │ │ ├── f32-rmax-wasmsimd-pminmax-u16-acc2.c │ │ ├── f32-rmax-wasmsimd-pminmax-u16-acc4.c │ │ ├── f32-rmax-wasmsimd-pminmax-u4.c │ │ ├── f32-rmax-wasmsimd-pminmax-u8-acc2.c │ │ ├── f32-rmin-avx-u16-acc2.c │ │ ├── f32-rmin-avx-u24-acc3.c │ │ ├── f32-rmin-avx-u32-acc2.c │ │ ├── f32-rmin-avx-u32-acc4.c │ │ ├── f32-rmin-avx-u8.c │ │ ├── f32-rmin-avx512f-u16.c │ │ ├── f32-rmin-avx512f-u32-acc2.c │ │ ├── f32-rmin-avx512f-u48-acc3.c │ │ ├── f32-rmin-avx512f-u64-acc2.c │ │ ├── f32-rmin-avx512f-u64-acc4.c │ │ ├── f32-rmin-hvx-u128-acc2.c │ │ ├── f32-rmin-hvx-u128-acc4.c │ │ ├── f32-rmin-hvx-u32.c │ │ ├── f32-rmin-hvx-u64-acc2.c │ │ ├── f32-rmin-hvx-u96-acc3.c │ │ ├── f32-rmin-neon-u12-acc3.c │ │ ├── f32-rmin-neon-u16-acc2.c │ │ ├── f32-rmin-neon-u16-acc4.c │ │ ├── f32-rmin-neon-u4.c │ │ ├── f32-rmin-neon-u8-acc2.c │ │ ├── f32-rmin-rvv-u1v.c │ │ ├── f32-rmin-rvv-u2v.c │ │ ├── f32-rmin-rvv-u4v.c │ │ ├── f32-rmin-rvv-u8v.c │ │ ├── f32-rmin-scalar-u1.c │ │ ├── f32-rmin-scalar-u2-acc2.c │ │ ├── f32-rmin-scalar-u3-acc3.c │ │ ├── f32-rmin-scalar-u4-acc2.c │ │ ├── f32-rmin-scalar-u4-acc4.c │ │ ├── f32-rmin-sse-u12-acc3.c │ │ ├── f32-rmin-sse-u16-acc2.c │ │ ├── f32-rmin-sse-u16-acc4.c │ │ ├── f32-rmin-sse-u4.c │ │ ├── f32-rmin-sse-u8-acc2.c │ │ ├── f32-rmin-wasmsimd-minmax-u12-acc3.c │ │ ├── f32-rmin-wasmsimd-minmax-u16-acc2.c │ │ ├── f32-rmin-wasmsimd-minmax-u16-acc4.c │ │ ├── f32-rmin-wasmsimd-minmax-u4.c │ │ ├── f32-rmin-wasmsimd-minmax-u8-acc2.c │ │ ├── f32-rmin-wasmsimd-pminmax-u12-acc3.c │ │ ├── f32-rmin-wasmsimd-pminmax-u16-acc2.c │ │ ├── f32-rmin-wasmsimd-pminmax-u16-acc4.c │ │ ├── f32-rmin-wasmsimd-pminmax-u4.c │ │ ├── f32-rmin-wasmsimd-pminmax-u8-acc2.c │ │ ├── f32-rminmax-avx-u16-acc2.c │ │ ├── f32-rminmax-avx-u24-acc3.c │ │ ├── f32-rminmax-avx-u32-acc2.c │ │ ├── f32-rminmax-avx-u32-acc4.c │ │ ├── f32-rminmax-avx-u8.c │ │ ├── f32-rminmax-avx512f-u16.c │ │ ├── f32-rminmax-avx512f-u32-acc2.c │ │ ├── f32-rminmax-avx512f-u48-acc3.c │ │ ├── f32-rminmax-avx512f-u64-acc2.c │ │ ├── f32-rminmax-avx512f-u64-acc4.c │ │ ├── f32-rminmax-hvx-u128-acc2.c │ │ ├── f32-rminmax-hvx-u128-acc4.c │ │ ├── f32-rminmax-hvx-u32.c │ │ ├── f32-rminmax-hvx-u64-acc2.c │ │ ├── f32-rminmax-hvx-u96-acc3.c │ │ ├── f32-rminmax-neon-u12-acc3.c │ │ ├── f32-rminmax-neon-u16-acc2.c │ │ ├── f32-rminmax-neon-u16-acc4.c │ │ ├── f32-rminmax-neon-u4.c │ │ ├── f32-rminmax-neon-u8-acc2.c │ │ ├── f32-rminmax-rvv-u1v.c │ │ ├── f32-rminmax-rvv-u2v.c │ │ ├── f32-rminmax-rvv-u4v.c │ │ ├── f32-rminmax-rvv-u8v.c │ │ ├── f32-rminmax-scalar-u1.c │ │ ├── f32-rminmax-scalar-u2-acc2.c │ │ ├── f32-rminmax-scalar-u3-acc3.c │ │ ├── f32-rminmax-scalar-u4-acc2.c │ │ ├── f32-rminmax-scalar-u4-acc4.c │ │ ├── f32-rminmax-sse-u12-acc3.c │ │ ├── f32-rminmax-sse-u16-acc2.c │ │ ├── f32-rminmax-sse-u16-acc4.c │ │ ├── f32-rminmax-sse-u4.c │ │ ├── f32-rminmax-sse-u8-acc2.c │ │ ├── f32-rminmax-wasmsimd-minmax-u12-acc3.c │ │ ├── f32-rminmax-wasmsimd-minmax-u16-acc2.c │ │ ├── f32-rminmax-wasmsimd-minmax-u16-acc4.c │ │ ├── f32-rminmax-wasmsimd-minmax-u4.c │ │ ├── f32-rminmax-wasmsimd-minmax-u8-acc2.c │ │ ├── f32-rminmax-wasmsimd-pminmax-u12-acc3.c │ │ ├── f32-rminmax-wasmsimd-pminmax-u16-acc2.c │ │ ├── f32-rminmax-wasmsimd-pminmax-u16-acc4.c │ │ ├── f32-rminmax-wasmsimd-pminmax-u4.c │ │ └── f32-rminmax-wasmsimd-pminmax-u8-acc2.c │ ├── rvv.c.in │ ├── simd.c.in │ └── wasmsimd.c.in ├── f32-rsum │ ├── f32-rsum-rvv-u1v.c │ ├── f32-rsum.inc │ ├── gen │ │ ├── f32-rsum-avx-u16-acc2.c │ │ ├── f32-rsum-avx-u24-acc3.c │ │ ├── f32-rsum-avx-u32-acc2.c │ │ ├── f32-rsum-avx-u32-acc4.c │ │ ├── f32-rsum-avx-u8.c │ │ ├── f32-rsum-avx512f-u16.c │ │ ├── f32-rsum-avx512f-u32-acc2.c │ │ ├── f32-rsum-avx512f-u48-acc3.c │ │ ├── f32-rsum-avx512f-u64-acc2.c │ │ ├── f32-rsum-avx512f-u64-acc4.c │ │ ├── f32-rsum-hvx-u128-acc2.c │ │ ├── f32-rsum-hvx-u128-acc4.c │ │ ├── f32-rsum-hvx-u32.c │ │ ├── f32-rsum-hvx-u64-acc2.c │ │ ├── f32-rsum-hvx-u96-acc3.c │ │ ├── f32-rsum-neon-u12-acc3.c │ │ ├── f32-rsum-neon-u16-acc2.c │ │ ├── f32-rsum-neon-u16-acc4.c │ │ ├── f32-rsum-neon-u4.c │ │ ├── f32-rsum-neon-u8-acc2.c │ │ ├── f32-rsum-scalar-u1.c │ │ ├── f32-rsum-scalar-u2-acc2.c │ │ ├── f32-rsum-scalar-u3-acc3.c │ │ ├── f32-rsum-scalar-u4-acc2.c │ │ ├── f32-rsum-scalar-u4-acc4.c │ │ ├── f32-rsum-sse2-u12-acc3.c │ │ ├── f32-rsum-sse2-u16-acc2.c │ │ ├── f32-rsum-sse2-u16-acc4.c │ │ ├── f32-rsum-sse2-u4.c │ │ ├── f32-rsum-sse2-u8-acc2.c │ │ ├── f32-rsum-wasmsimd-u12-acc3.c │ │ ├── f32-rsum-wasmsimd-u16-acc2.c │ │ ├── f32-rsum-wasmsimd-u16-acc4.c │ │ ├── f32-rsum-wasmsimd-u4.c │ │ └── f32-rsum-wasmsimd-u8-acc2.c │ ├── hvx.c.in │ └── simd.c.in ├── f32-rsum2 │ ├── f32-rsum2.inc │ ├── gen │ │ ├── f32-rsum2-avx-u8.c │ │ ├── f32-rsum2-avx512f-u16.c │ │ ├── f32-rsum2-neon.c │ │ ├── f32-rsum2-scalar-u1.c │ │ ├── f32-rsum2-sse2-u4.c │ │ └── f32-rsum2-wasmsimd-u4.c │ └── simd.c.in ├── f32-spmm │ ├── f32-spmm-minmax.inc │ ├── gen │ │ ├── f32-spmm-128x1-minmax-hvx-pipelined-u2.c │ │ ├── f32-spmm-128x1-minmax-hvx-pipelined-u4.c │ │ ├── f32-spmm-128x1-minmax-hvx-pipelined.c │ │ ├── f32-spmm-128x1-minmax-hvx-u2.c │ │ ├── f32-spmm-128x1-minmax-hvx-u4.c │ │ ├── f32-spmm-128x1-minmax-hvx.c │ │ ├── f32-spmm-12x1-minmax-neon.c │ │ ├── f32-spmm-12x1-minmax-neonfma.c │ │ ├── f32-spmm-12x2-minmax-aarch64-neonfma.c │ │ ├── f32-spmm-12x4-minmax-aarch64-neonfma.c │ │ ├── f32-spmm-16x1-minmax-neon-pipelined.c │ │ ├── f32-spmm-16x1-minmax-neon-u2.c │ │ ├── f32-spmm-16x1-minmax-neon.c │ │ ├── f32-spmm-16x1-minmax-neonfma-pipelined.c │ │ ├── f32-spmm-16x1-minmax-neonfma-u2.c │ │ ├── f32-spmm-16x1-minmax-neonfma.c │ │ ├── f32-spmm-16x1-minmax-sse.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-arm-pipelined-u2.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-arm-pipelined.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-arm-u2.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-arm-u4.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-arm.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-x86-pipelined-u2.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-x86-pipelined.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-x86-u2.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-x86-u4.c │ │ ├── f32-spmm-16x1-minmax-wasmrelaxedsimd-x86.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-arm-pipelined-u2.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-arm-pipelined.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-arm-u2.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-arm-u4.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-arm.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-x86-pipelined-u2.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-x86-pipelined.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-x86-u2.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-x86-u4.c │ │ ├── f32-spmm-16x1-minmax-wasmsimd-x86.c │ │ ├── f32-spmm-16x2-minmax-aarch64-neonfma.c │ │ ├── f32-spmm-16x4-minmax-aarch64-neonfma.c │ │ ├── f32-spmm-1vx1-minmax-rvv.c │ │ ├── f32-spmm-1vx2-minmax-rvv.c │ │ ├── f32-spmm-1vx4-minmax-rvv.c │ │ ├── f32-spmm-1x1-minmax-scalar-pipelined.c │ │ ├── f32-spmm-1x1-minmax-scalar.c │ │ ├── f32-spmm-2vx1-minmax-rvv.c │ │ ├── f32-spmm-2vx2-minmax-rvv.c │ │ ├── f32-spmm-2vx4-minmax-rvv.c │ │ ├── f32-spmm-2x1-minmax-scalar-pipelined.c │ │ ├── f32-spmm-2x1-minmax-scalar.c │ │ ├── f32-spmm-32x1-minmax-hvx-pipelined-u2.c │ │ ├── f32-spmm-32x1-minmax-hvx-pipelined-u4.c │ │ ├── f32-spmm-32x1-minmax-hvx-pipelined.c │ │ ├── f32-spmm-32x1-minmax-hvx-u2.c │ │ ├── f32-spmm-32x1-minmax-hvx-u4.c │ │ ├── f32-spmm-32x1-minmax-hvx.c │ │ ├── f32-spmm-32x1-minmax-neon-pipelined.c │ │ ├── f32-spmm-32x1-minmax-neon-u2.c │ │ ├── f32-spmm-32x1-minmax-neon.c │ │ ├── f32-spmm-32x1-minmax-neonfma-pipelined.c │ │ ├── f32-spmm-32x1-minmax-neonfma-u2.c │ │ ├── f32-spmm-32x1-minmax-neonfma.c │ │ ├── f32-spmm-32x1-minmax-sse.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-arm-pipelined-u2.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-arm-pipelined.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-arm-u2.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-arm-u4.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-arm.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-x86-pipelined-u2.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-x86-pipelined.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-x86-u2.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-x86-u4.c │ │ ├── f32-spmm-32x1-minmax-wasmrelaxedsimd-x86.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-arm-pipelined-u2.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-arm-pipelined.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-arm-u2.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-arm-u4.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-arm.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-x86-pipelined-u2.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-x86-pipelined.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-x86-u2.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-x86-u4.c │ │ ├── f32-spmm-32x1-minmax-wasmsimd-x86.c │ │ ├── f32-spmm-32x2-minmax-aarch64-neonfma.c │ │ ├── f32-spmm-32x4-minmax-aarch64-neonfma.c │ │ ├── f32-spmm-4vx1-minmax-rvv.c │ │ ├── f32-spmm-4vx2-minmax-rvv.c │ │ ├── f32-spmm-4vx4-minmax-rvv.c │ │ ├── f32-spmm-4x1-minmax-neon-pipelined.c │ │ ├── f32-spmm-4x1-minmax-neon-u2.c │ │ ├── f32-spmm-4x1-minmax-neon.c │ │ ├── f32-spmm-4x1-minmax-neonfma-pipelined.c │ │ ├── f32-spmm-4x1-minmax-neonfma-u2.c │ │ ├── f32-spmm-4x1-minmax-neonfma.c │ │ ├── f32-spmm-4x1-minmax-scalar-pipelined.c │ │ ├── f32-spmm-4x1-minmax-scalar.c │ │ ├── f32-spmm-4x1-minmax-sse.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-arm-pipelined-u2.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-arm-pipelined.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-arm-u2.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-arm-u4.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-arm.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-x86-pipelined-u2.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-x86-pipelined.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-x86-u2.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-x86-u4.c │ │ ├── f32-spmm-4x1-minmax-wasmrelaxedsimd-x86.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-arm-pipelined-u2.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-arm-pipelined.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-arm-u2.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-arm-u4.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-arm.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-x86-pipelined-u2.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-x86-pipelined.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-x86-u2.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-x86-u4.c │ │ ├── f32-spmm-4x1-minmax-wasmsimd-x86.c │ │ ├── f32-spmm-4x2-minmax-aarch64-neonfma.c │ │ ├── f32-spmm-4x4-minmax-aarch64-neonfma.c │ │ ├── f32-spmm-64x1-minmax-hvx-pipelined-u2.c │ │ ├── f32-spmm-64x1-minmax-hvx-pipelined-u4.c │ │ ├── f32-spmm-64x1-minmax-hvx-pipelined.c │ │ ├── f32-spmm-64x1-minmax-hvx-u2.c │ │ ├── f32-spmm-64x1-minmax-hvx-u4.c │ │ ├── f32-spmm-64x1-minmax-hvx.c │ │ ├── f32-spmm-8vx1-minmax-rvv.c │ │ ├── f32-spmm-8vx2-minmax-rvv.c │ │ ├── f32-spmm-8vx4-minmax-rvv.c │ │ ├── f32-spmm-8x1-minmax-neon-pipelined.c │ │ ├── f32-spmm-8x1-minmax-neon-u2.c │ │ ├── f32-spmm-8x1-minmax-neon.c │ │ ├── f32-spmm-8x1-minmax-neonfma-pipelined.c │ │ ├── f32-spmm-8x1-minmax-neonfma-u2.c │ │ ├── f32-spmm-8x1-minmax-neonfma.c │ │ ├── f32-spmm-8x1-minmax-scalar-pipelined.c │ │ ├── f32-spmm-8x1-minmax-scalar.c │ │ ├── f32-spmm-8x1-minmax-sse.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-arm-pipelined-u2.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-arm-pipelined.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-arm-u2.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-arm-u4.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-arm.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-x86-pipelined-u2.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-x86-pipelined.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-x86-u2.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-x86-u4.c │ │ ├── f32-spmm-8x1-minmax-wasmrelaxedsimd-x86.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-arm-pipelined-u2.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-arm-pipelined.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-arm-u2.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-arm-u4.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-arm.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-x86-pipelined-u2.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-x86-pipelined.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-x86-u2.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-x86-u4.c │ │ ├── f32-spmm-8x1-minmax-wasmsimd-x86.c │ │ ├── f32-spmm-8x2-minmax-aarch64-neonfma.c │ │ ├── f32-spmm-8x2-minmax-scalar.c │ │ ├── f32-spmm-8x4-minmax-aarch64-neonfma.c │ │ └── f32-spmm-8x4-minmax-scalar.c │ ├── hvx-pipelined.c.in │ ├── hvx.c.in │ ├── neon-blocked.c.in │ ├── neon-pipelined.c.in │ ├── neon.c.in │ ├── rvv.c.in │ ├── scalar-pipelined.c.in │ ├── scalar.c.in │ ├── sse.c.in │ ├── wasmsimd-pipelined.c.in │ └── wasmsimd.c.in ├── f32-vabs │ └── f32-vabs.inc ├── f32-vapproxgelu │ ├── f32-vapproxgelu.inc │ ├── gen │ │ ├── f32-vapproxgelu-avx-rational-12-10-div.c │ │ ├── f32-vapproxgelu-avx512f-rational-12-10-div.c │ │ ├── f32-vapproxgelu-avx512f-rational-12-10-nr.c │ │ ├── f32-vapproxgelu-fma3-rational-12-10-div.c │ │ ├── f32-vapproxgelu-hvx-rational-12-10-div.c │ │ ├── f32-vapproxgelu-neon-rational-12-10-div.c │ │ ├── f32-vapproxgelu-scalar-rational-12-10-div.c │ │ ├── f32-vapproxgelu-scalar.c │ │ ├── f32-vapproxgelu-sse2-rational-12-10-div.c │ │ ├── f32-vapproxgelu-sse2fma-rational-12-10-div.c │ │ └── f32-vapproxgelu-wasmsimd-rational-12-10-div.c │ ├── rational-12-10.c.in │ └── scalar.c.in ├── f32-vbinary │ ├── f32-vadd.inc │ ├── f32-vaddc.inc │ ├── f32-vcmul.inc │ ├── f32-vcopysign.inc │ ├── f32-vcopysignc.inc │ ├── f32-vdiv.inc │ ├── f32-vdivc.inc │ ├── f32-vmax.inc │ ├── f32-vmaxc.inc │ ├── f32-vmin.inc │ ├── f32-vminc.inc │ ├── f32-vmul.inc │ ├── f32-vmulc.inc │ ├── f32-vprelu.inc │ ├── f32-vpreluc.inc │ ├── f32-vrcopysignc.inc │ ├── f32-vrdivc.inc │ ├── f32-vrpreluc.inc │ ├── f32-vrsubc.inc │ ├── f32-vsqrdiff.inc │ ├── f32-vsqrdiffc.inc │ ├── f32-vsub.inc │ ├── f32-vsubc.inc │ ├── gen │ │ ├── f32-vadd-avx-u16.c │ │ ├── f32-vadd-avx-u8.c │ │ ├── f32-vadd-avx512f-u16.c │ │ ├── f32-vadd-avx512f-u32.c │ │ ├── f32-vadd-hvx-u128.c │ │ ├── f32-vadd-hvx-u32.c │ │ ├── f32-vadd-hvx-u64.c │ │ ├── f32-vadd-neon-u4.c │ │ ├── f32-vadd-neon-u8.c │ │ ├── f32-vadd-rvv-u4v.c │ │ ├── f32-vadd-rvv-u8v.c │ │ ├── f32-vadd-scalar-u1.c │ │ ├── f32-vadd-scalar-u2.c │ │ ├── f32-vadd-scalar-u4.c │ │ ├── f32-vadd-scalar-u8.c │ │ ├── f32-vadd-sse-u4.c │ │ ├── f32-vadd-sse-u8.c │ │ ├── f32-vadd-wasmsimd-u16.c │ │ ├── f32-vadd-wasmsimd-u4.c │ │ ├── f32-vadd-wasmsimd-u8.c │ │ ├── f32-vaddc-avx-u16.c │ │ ├── f32-vaddc-avx-u8.c │ │ ├── f32-vaddc-avx512f-u16.c │ │ ├── f32-vaddc-avx512f-u32.c │ │ ├── f32-vaddc-hvx-u128.c │ │ ├── f32-vaddc-hvx-u32.c │ │ ├── f32-vaddc-hvx-u64.c │ │ ├── f32-vaddc-neon-u4.c │ │ ├── f32-vaddc-neon-u8.c │ │ ├── f32-vaddc-rvv-u4v.c │ │ ├── f32-vaddc-rvv-u8v.c │ │ ├── f32-vaddc-scalar-u1.c │ │ ├── f32-vaddc-scalar-u2.c │ │ ├── f32-vaddc-scalar-u4.c │ │ ├── f32-vaddc-scalar-u8.c │ │ ├── f32-vaddc-sse-u4.c │ │ ├── f32-vaddc-sse-u8.c │ │ ├── f32-vaddc-wasmsimd-u16.c │ │ ├── f32-vaddc-wasmsimd-u4.c │ │ ├── f32-vaddc-wasmsimd-u8.c │ │ ├── f32-vdiv-aarch64-neon-u4.c │ │ ├── f32-vdiv-aarch64-neon-u8.c │ │ ├── f32-vdiv-avx-u16.c │ │ ├── f32-vdiv-avx-u8.c │ │ ├── f32-vdiv-avx512f-u16.c │ │ ├── f32-vdiv-avx512f-u32.c │ │ ├── f32-vdiv-hvx-u128.c │ │ ├── f32-vdiv-hvx-u32.c │ │ ├── f32-vdiv-hvx-u64.c │ │ ├── f32-vdiv-rvv-u4v.c │ │ ├── f32-vdiv-rvv-u8v.c │ │ ├── f32-vdiv-scalar-u1.c │ │ ├── f32-vdiv-scalar-u2.c │ │ ├── f32-vdiv-scalar-u4.c │ │ ├── f32-vdiv-scalar-u8.c │ │ ├── f32-vdiv-sse-u4.c │ │ ├── f32-vdiv-sse-u8.c │ │ ├── f32-vdiv-wasmsimd-u16.c │ │ ├── f32-vdiv-wasmsimd-u4.c │ │ ├── f32-vdiv-wasmsimd-u8.c │ │ ├── f32-vdivc-aarch64-neon-u4.c │ │ ├── f32-vdivc-aarch64-neon-u8.c │ │ ├── f32-vdivc-avx-u16.c │ │ ├── f32-vdivc-avx-u8.c │ │ ├── f32-vdivc-avx512f-u16.c │ │ ├── f32-vdivc-avx512f-u32.c │ │ ├── f32-vdivc-hvx-u128.c │ │ ├── f32-vdivc-hvx-u32.c │ │ ├── f32-vdivc-hvx-u64.c │ │ ├── f32-vdivc-rvv-u4v.c │ │ ├── f32-vdivc-rvv-u8v.c │ │ ├── f32-vdivc-scalar-u1.c │ │ ├── f32-vdivc-scalar-u2.c │ │ ├── f32-vdivc-scalar-u4.c │ │ ├── f32-vdivc-scalar-u8.c │ │ ├── f32-vdivc-sse-u4.c │ │ ├── f32-vdivc-sse-u8.c │ │ ├── f32-vdivc-wasmsimd-u16.c │ │ ├── f32-vdivc-wasmsimd-u4.c │ │ ├── f32-vdivc-wasmsimd-u8.c │ │ ├── f32-vmax-avx-u16.c │ │ ├── f32-vmax-avx-u8.c │ │ ├── f32-vmax-avx512f-u16.c │ │ ├── f32-vmax-avx512f-u32.c │ │ ├── f32-vmax-hvx-u128.c │ │ ├── f32-vmax-hvx-u32.c │ │ ├── f32-vmax-hvx-u64.c │ │ ├── f32-vmax-neon-u4.c │ │ ├── f32-vmax-neon-u8.c │ │ ├── f32-vmax-rvv-u4v.c │ │ ├── f32-vmax-rvv-u8v.c │ │ ├── f32-vmax-scalar-u1.c │ │ ├── f32-vmax-scalar-u2.c │ │ ├── f32-vmax-scalar-u4.c │ │ ├── f32-vmax-scalar-u8.c │ │ ├── f32-vmax-sse-u4.c │ │ ├── f32-vmax-sse-u8.c │ │ ├── f32-vmax-wasmsimd-arm-u16.c │ │ ├── f32-vmax-wasmsimd-arm-u4.c │ │ ├── f32-vmax-wasmsimd-arm-u8.c │ │ ├── f32-vmax-wasmsimd-x86-u16.c │ │ ├── f32-vmax-wasmsimd-x86-u4.c │ │ ├── f32-vmax-wasmsimd-x86-u8.c │ │ ├── f32-vmaxc-avx-u16.c │ │ ├── f32-vmaxc-avx-u8.c │ │ ├── f32-vmaxc-avx512f-u16.c │ │ ├── f32-vmaxc-avx512f-u32.c │ │ ├── f32-vmaxc-hvx-u128.c │ │ ├── f32-vmaxc-hvx-u32.c │ │ ├── f32-vmaxc-hvx-u64.c │ │ ├── f32-vmaxc-neon-u4.c │ │ ├── f32-vmaxc-neon-u8.c │ │ ├── f32-vmaxc-rvv-u4v.c │ │ ├── f32-vmaxc-rvv-u8v.c │ │ ├── f32-vmaxc-scalar-u1.c │ │ ├── f32-vmaxc-scalar-u2.c │ │ ├── f32-vmaxc-scalar-u4.c │ │ ├── f32-vmaxc-scalar-u8.c │ │ ├── f32-vmaxc-sse-u4.c │ │ ├── f32-vmaxc-sse-u8.c │ │ ├── f32-vmaxc-wasmsimd-arm-u16.c │ │ ├── f32-vmaxc-wasmsimd-arm-u4.c │ │ ├── f32-vmaxc-wasmsimd-arm-u8.c │ │ ├── f32-vmaxc-wasmsimd-x86-u16.c │ │ ├── f32-vmaxc-wasmsimd-x86-u4.c │ │ ├── f32-vmaxc-wasmsimd-x86-u8.c │ │ ├── f32-vmin-avx-u16.c │ │ ├── f32-vmin-avx-u8.c │ │ ├── f32-vmin-avx512f-u16.c │ │ ├── f32-vmin-avx512f-u32.c │ │ ├── f32-vmin-hvx-u128.c │ │ ├── f32-vmin-hvx-u32.c │ │ ├── f32-vmin-hvx-u64.c │ │ ├── f32-vmin-neon-u4.c │ │ ├── f32-vmin-neon-u8.c │ │ ├── f32-vmin-rvv-u4v.c │ │ ├── f32-vmin-rvv-u8v.c │ │ ├── f32-vmin-scalar-u1.c │ │ ├── f32-vmin-scalar-u2.c │ │ ├── f32-vmin-scalar-u4.c │ │ ├── f32-vmin-scalar-u8.c │ │ ├── f32-vmin-sse-u4.c │ │ ├── f32-vmin-sse-u8.c │ │ ├── f32-vmin-wasmsimd-arm-u16.c │ │ ├── f32-vmin-wasmsimd-arm-u4.c │ │ ├── f32-vmin-wasmsimd-arm-u8.c │ │ ├── f32-vmin-wasmsimd-x86-u16.c │ │ ├── f32-vmin-wasmsimd-x86-u4.c │ │ ├── f32-vmin-wasmsimd-x86-u8.c │ │ ├── f32-vminc-avx-u16.c │ │ ├── f32-vminc-avx-u8.c │ │ ├── f32-vminc-avx512f-u16.c │ │ ├── f32-vminc-avx512f-u32.c │ │ ├── f32-vminc-hvx-u128.c │ │ ├── f32-vminc-hvx-u32.c │ │ ├── f32-vminc-hvx-u64.c │ │ ├── f32-vminc-neon-u4.c │ │ ├── f32-vminc-neon-u8.c │ │ ├── f32-vminc-rvv-u4v.c │ │ ├── f32-vminc-rvv-u8v.c │ │ ├── f32-vminc-scalar-u1.c │ │ ├── f32-vminc-scalar-u2.c │ │ ├── f32-vminc-scalar-u4.c │ │ ├── f32-vminc-scalar-u8.c │ │ ├── f32-vminc-sse-u4.c │ │ ├── f32-vminc-sse-u8.c │ │ ├── f32-vminc-wasmsimd-arm-u16.c │ │ ├── f32-vminc-wasmsimd-arm-u4.c │ │ ├── f32-vminc-wasmsimd-arm-u8.c │ │ ├── f32-vminc-wasmsimd-x86-u16.c │ │ ├── f32-vminc-wasmsimd-x86-u4.c │ │ ├── f32-vminc-wasmsimd-x86-u8.c │ │ ├── f32-vmul-avx-u16.c │ │ ├── f32-vmul-avx-u8.c │ │ ├── f32-vmul-avx512f-u16.c │ │ ├── f32-vmul-avx512f-u32.c │ │ ├── f32-vmul-hvx-u128.c │ │ ├── f32-vmul-hvx-u32.c │ │ ├── f32-vmul-hvx-u64.c │ │ ├── f32-vmul-neon-u4.c │ │ ├── f32-vmul-neon-u8.c │ │ ├── f32-vmul-rvv-u4v.c │ │ ├── f32-vmul-rvv-u8v.c │ │ ├── f32-vmul-scalar-u1.c │ │ ├── f32-vmul-scalar-u2.c │ │ ├── f32-vmul-scalar-u4.c │ │ ├── f32-vmul-scalar-u8.c │ │ ├── f32-vmul-sse-u4.c │ │ ├── f32-vmul-sse-u8.c │ │ ├── f32-vmul-wasmsimd-u16.c │ │ ├── f32-vmul-wasmsimd-u4.c │ │ ├── f32-vmul-wasmsimd-u8.c │ │ ├── f32-vmulc-avx-u16.c │ │ ├── f32-vmulc-avx-u8.c │ │ ├── f32-vmulc-avx512f-u16.c │ │ ├── f32-vmulc-avx512f-u32.c │ │ ├── f32-vmulc-hvx-u128.c │ │ ├── f32-vmulc-hvx-u32.c │ │ ├── f32-vmulc-hvx-u64.c │ │ ├── f32-vmulc-neon-u4.c │ │ ├── f32-vmulc-neon-u8.c │ │ ├── f32-vmulc-rvv-u4v.c │ │ ├── f32-vmulc-rvv-u8v.c │ │ ├── f32-vmulc-scalar-u1.c │ │ ├── f32-vmulc-scalar-u2.c │ │ ├── f32-vmulc-scalar-u4.c │ │ ├── f32-vmulc-scalar-u8.c │ │ ├── f32-vmulc-sse-u4.c │ │ ├── f32-vmulc-sse-u8.c │ │ ├── f32-vmulc-wasmsimd-u16.c │ │ ├── f32-vmulc-wasmsimd-u4.c │ │ ├── f32-vmulc-wasmsimd-u8.c │ │ ├── f32-vprelu-avx-u16.c │ │ ├── f32-vprelu-avx-u8.c │ │ ├── f32-vprelu-avx512f-u16.c │ │ ├── f32-vprelu-avx512f-u32.c │ │ ├── f32-vprelu-hvx-u128.c │ │ ├── f32-vprelu-hvx-u32.c │ │ ├── f32-vprelu-hvx-u64.c │ │ ├── f32-vprelu-neon-u4.c │ │ ├── f32-vprelu-neon-u8.c │ │ ├── f32-vprelu-scalar-u1.c │ │ ├── f32-vprelu-scalar-u2.c │ │ ├── f32-vprelu-scalar-u4.c │ │ ├── f32-vprelu-scalar-u8.c │ │ ├── f32-vprelu-sse2-u4.c │ │ ├── f32-vprelu-sse2-u8.c │ │ ├── f32-vprelu-sse41-u4.c │ │ ├── f32-vprelu-sse41-u8.c │ │ ├── f32-vprelu-wasmrelaxedsimd-u16.c │ │ ├── f32-vprelu-wasmrelaxedsimd-u4.c │ │ ├── f32-vprelu-wasmrelaxedsimd-u8.c │ │ ├── f32-vprelu-wasmsimd-u16.c │ │ ├── f32-vprelu-wasmsimd-u4.c │ │ ├── f32-vprelu-wasmsimd-u8.c │ │ ├── f32-vpreluc-avx-u16.c │ │ ├── f32-vpreluc-avx-u8.c │ │ ├── f32-vpreluc-avx512f-u16.c │ │ ├── f32-vpreluc-avx512f-u32.c │ │ ├── f32-vpreluc-hvx-u128.c │ │ ├── f32-vpreluc-hvx-u32.c │ │ ├── f32-vpreluc-hvx-u64.c │ │ ├── f32-vpreluc-neon-u4.c │ │ ├── f32-vpreluc-neon-u8.c │ │ ├── f32-vpreluc-scalar-u1.c │ │ ├── f32-vpreluc-scalar-u2.c │ │ ├── f32-vpreluc-scalar-u4.c │ │ ├── f32-vpreluc-scalar-u8.c │ │ ├── f32-vpreluc-sse2-u4.c │ │ ├── f32-vpreluc-sse2-u8.c │ │ ├── f32-vpreluc-sse41-u4.c │ │ ├── f32-vpreluc-sse41-u8.c │ │ ├── f32-vpreluc-wasmrelaxedsimd-u16.c │ │ ├── f32-vpreluc-wasmrelaxedsimd-u4.c │ │ ├── f32-vpreluc-wasmrelaxedsimd-u8.c │ │ ├── f32-vpreluc-wasmsimd-u16.c │ │ ├── f32-vpreluc-wasmsimd-u4.c │ │ ├── f32-vpreluc-wasmsimd-u8.c │ │ ├── f32-vrdivc-aarch64-neon-u4.c │ │ ├── f32-vrdivc-aarch64-neon-u8.c │ │ ├── f32-vrdivc-avx-u16.c │ │ ├── f32-vrdivc-avx-u8.c │ │ ├── f32-vrdivc-avx512f-u16.c │ │ ├── f32-vrdivc-avx512f-u32.c │ │ ├── f32-vrdivc-hvx-u128.c │ │ ├── f32-vrdivc-hvx-u32.c │ │ ├── f32-vrdivc-hvx-u64.c │ │ ├── f32-vrdivc-rvv-u4v.c │ │ ├── f32-vrdivc-rvv-u8v.c │ │ ├── f32-vrdivc-scalar-u1.c │ │ ├── f32-vrdivc-scalar-u2.c │ │ ├── f32-vrdivc-scalar-u4.c │ │ ├── f32-vrdivc-scalar-u8.c │ │ ├── f32-vrdivc-sse-u4.c │ │ ├── f32-vrdivc-sse-u8.c │ │ ├── f32-vrdivc-wasmsimd-u16.c │ │ ├── f32-vrdivc-wasmsimd-u4.c │ │ ├── f32-vrdivc-wasmsimd-u8.c │ │ ├── f32-vrpreluc-avx-u16.c │ │ ├── f32-vrpreluc-avx-u8.c │ │ ├── f32-vrpreluc-avx512f-u16.c │ │ ├── f32-vrpreluc-avx512f-u32.c │ │ ├── f32-vrpreluc-hvx-u128.c │ │ ├── f32-vrpreluc-hvx-u32.c │ │ ├── f32-vrpreluc-hvx-u64.c │ │ ├── f32-vrpreluc-neon-u4.c │ │ ├── f32-vrpreluc-neon-u8.c │ │ ├── f32-vrpreluc-scalar-u1.c │ │ ├── f32-vrpreluc-scalar-u2.c │ │ ├── f32-vrpreluc-scalar-u4.c │ │ ├── f32-vrpreluc-scalar-u8.c │ │ ├── f32-vrpreluc-sse2-u4.c │ │ ├── f32-vrpreluc-sse2-u8.c │ │ ├── f32-vrpreluc-sse41-u4.c │ │ ├── f32-vrpreluc-sse41-u8.c │ │ ├── f32-vrpreluc-wasmrelaxedsimd-u16.c │ │ ├── f32-vrpreluc-wasmrelaxedsimd-u4.c │ │ ├── f32-vrpreluc-wasmrelaxedsimd-u8.c │ │ ├── f32-vrpreluc-wasmsimd-u16.c │ │ ├── f32-vrpreluc-wasmsimd-u4.c │ │ ├── f32-vrpreluc-wasmsimd-u8.c │ │ ├── f32-vrsubc-avx-u16.c │ │ ├── f32-vrsubc-avx-u8.c │ │ ├── f32-vrsubc-avx512f-u16.c │ │ ├── f32-vrsubc-avx512f-u32.c │ │ ├── f32-vrsubc-hvx-u128.c │ │ ├── f32-vrsubc-hvx-u32.c │ │ ├── f32-vrsubc-hvx-u64.c │ │ ├── f32-vrsubc-neon-u4.c │ │ ├── f32-vrsubc-neon-u8.c │ │ ├── f32-vrsubc-rvv-u4v.c │ │ ├── f32-vrsubc-rvv-u8v.c │ │ ├── f32-vrsubc-scalar-u1.c │ │ ├── f32-vrsubc-scalar-u2.c │ │ ├── f32-vrsubc-scalar-u4.c │ │ ├── f32-vrsubc-scalar-u8.c │ │ ├── f32-vrsubc-sse-u4.c │ │ ├── f32-vrsubc-sse-u8.c │ │ ├── f32-vrsubc-wasmsimd-u16.c │ │ ├── f32-vrsubc-wasmsimd-u4.c │ │ ├── f32-vrsubc-wasmsimd-u8.c │ │ ├── f32-vsqrdiff-avx-u16.c │ │ ├── f32-vsqrdiff-avx-u8.c │ │ ├── f32-vsqrdiff-avx512f-u16.c │ │ ├── f32-vsqrdiff-avx512f-u32.c │ │ ├── f32-vsqrdiff-hvx-u128.c │ │ ├── f32-vsqrdiff-hvx-u32.c │ │ ├── f32-vsqrdiff-hvx-u64.c │ │ ├── f32-vsqrdiff-neon-u4.c │ │ ├── f32-vsqrdiff-neon-u8.c │ │ ├── f32-vsqrdiff-rvv-u4v.c │ │ ├── f32-vsqrdiff-rvv-u8v.c │ │ ├── f32-vsqrdiff-scalar-u1.c │ │ ├── f32-vsqrdiff-scalar-u2.c │ │ ├── f32-vsqrdiff-scalar-u4.c │ │ ├── f32-vsqrdiff-scalar-u8.c │ │ ├── f32-vsqrdiff-sse-u4.c │ │ ├── f32-vsqrdiff-sse-u8.c │ │ ├── f32-vsqrdiff-wasmsimd-u16.c │ │ ├── f32-vsqrdiff-wasmsimd-u4.c │ │ ├── f32-vsqrdiff-wasmsimd-u8.c │ │ ├── f32-vsqrdiffc-avx-u16.c │ │ ├── f32-vsqrdiffc-avx-u8.c │ │ ├── f32-vsqrdiffc-avx512f-u16.c │ │ ├── f32-vsqrdiffc-avx512f-u32.c │ │ ├── f32-vsqrdiffc-hvx-u128.c │ │ ├── f32-vsqrdiffc-hvx-u32.c │ │ ├── f32-vsqrdiffc-hvx-u64.c │ │ ├── f32-vsqrdiffc-neon-u4.c │ │ ├── f32-vsqrdiffc-neon-u8.c │ │ ├── f32-vsqrdiffc-rvv-u4v.c │ │ ├── f32-vsqrdiffc-rvv-u8v.c │ │ ├── f32-vsqrdiffc-scalar-u1.c │ │ ├── f32-vsqrdiffc-scalar-u2.c │ │ ├── f32-vsqrdiffc-scalar-u4.c │ │ ├── f32-vsqrdiffc-scalar-u8.c │ │ ├── f32-vsqrdiffc-sse-u4.c │ │ ├── f32-vsqrdiffc-sse-u8.c │ │ ├── f32-vsqrdiffc-wasmsimd-u16.c │ │ ├── f32-vsqrdiffc-wasmsimd-u4.c │ │ ├── f32-vsqrdiffc-wasmsimd-u8.c │ │ ├── f32-vsub-avx-u16.c │ │ ├── f32-vsub-avx-u8.c │ │ ├── f32-vsub-avx512f-u16.c │ │ ├── f32-vsub-avx512f-u32.c │ │ ├── f32-vsub-hvx-u128.c │ │ ├── f32-vsub-hvx-u32.c │ │ ├── f32-vsub-hvx-u64.c │ │ ├── f32-vsub-neon-u4.c │ │ ├── f32-vsub-neon-u8.c │ │ ├── f32-vsub-rvv-u4v.c │ │ ├── f32-vsub-rvv-u8v.c │ │ ├── f32-vsub-scalar-u1.c │ │ ├── f32-vsub-scalar-u2.c │ │ ├── f32-vsub-scalar-u4.c │ │ ├── f32-vsub-scalar-u8.c │ │ ├── f32-vsub-sse-u4.c │ │ ├── f32-vsub-sse-u8.c │ │ ├── f32-vsub-wasmsimd-u16.c │ │ ├── f32-vsub-wasmsimd-u4.c │ │ ├── f32-vsub-wasmsimd-u8.c │ │ ├── f32-vsubc-avx-u16.c │ │ ├── f32-vsubc-avx-u8.c │ │ ├── f32-vsubc-avx512f-u16.c │ │ ├── f32-vsubc-avx512f-u32.c │ │ ├── f32-vsubc-hvx-u128.c │ │ ├── f32-vsubc-hvx-u32.c │ │ ├── f32-vsubc-hvx-u64.c │ │ ├── f32-vsubc-neon-u4.c │ │ ├── f32-vsubc-neon-u8.c │ │ ├── f32-vsubc-rvv-u4v.c │ │ ├── f32-vsubc-rvv-u8v.c │ │ ├── f32-vsubc-scalar-u1.c │ │ ├── f32-vsubc-scalar-u2.c │ │ ├── f32-vsubc-scalar-u4.c │ │ ├── f32-vsubc-scalar-u8.c │ │ ├── f32-vsubc-sse-u4.c │ │ ├── f32-vsubc-sse-u8.c │ │ ├── f32-vsubc-wasmsimd-u16.c │ │ ├── f32-vsubc-wasmsimd-u4.c │ │ └── f32-vsubc-wasmsimd-u8.c │ ├── vop-avx.c.in │ ├── vop-avx512f.c.in │ ├── vop-hvx.c.in │ ├── vop-neon.c.in │ ├── vop-rvv.c.in │ ├── vop-scalar.c.in │ ├── vop-sse.c.in │ ├── vop-wasmsimd.c.in │ ├── vopc-avx.c.in │ ├── vopc-avx512f.c.in │ ├── vopc-hvx.c.in │ ├── vopc-neon.c.in │ ├── vopc-rvv.c.in │ ├── vopc-scalar.c.in │ ├── vopc-sse.c.in │ └── vopc-wasmsimd.c.in ├── f32-vclamp │ ├── avx.c.in │ ├── avx512f.c.in │ ├── f32-vclamp.inc │ ├── gen │ │ ├── f32-vclamp-avx.c │ │ ├── f32-vclamp-avx512f.c │ │ ├── f32-vclamp-hvx.c │ │ ├── f32-vclamp-neon.c │ │ ├── f32-vclamp-rvv-u1v.c │ │ ├── f32-vclamp-rvv-u2v.c │ │ ├── f32-vclamp-rvv-u4v.c │ │ ├── f32-vclamp-rvv-u8v.c │ │ ├── f32-vclamp-scalar.c │ │ ├── f32-vclamp-sse2.c │ │ ├── f32-vclamp-wasmrelaxedsimd.c │ │ └── f32-vclamp-wasmsimd.c │ ├── neon.c.in │ ├── rvv.c.in │ ├── scalar.c.in │ ├── simd.c.in │ ├── sse.c.in │ └── wasmsimd.c.in ├── f32-vcmul │ ├── avx512f.c.in │ ├── gen │ │ ├── f32-vcmul-avx512f-u128.c │ │ ├── f32-vcmul-avx512f-u16.c │ │ ├── f32-vcmul-avx512f-u32.c │ │ ├── f32-vcmul-avx512f-u64.c │ │ ├── f32-vcmul-fma3-u16.c │ │ ├── f32-vcmul-fma3-u32.c │ │ ├── f32-vcmul-fma3-u64.c │ │ ├── f32-vcmul-fma3-u8.c │ │ ├── f32-vcmul-neon-u12.c │ │ ├── f32-vcmul-neon-u16.c │ │ ├── f32-vcmul-neon-u4.c │ │ ├── f32-vcmul-neon-u8.c │ │ ├── f32-vcmul-rvv-u1v.c │ │ ├── f32-vcmul-rvv-u2v.c │ │ ├── f32-vcmul-rvv-u4v.c │ │ ├── f32-vcmul-scalar-u1.c │ │ ├── f32-vcmul-scalar-u2.c │ │ ├── f32-vcmul-scalar-u4.c │ │ ├── f32-vcmul-scalar-u8.c │ │ ├── f32-vcmul-sse-u12.c │ │ ├── f32-vcmul-sse-u16.c │ │ ├── f32-vcmul-sse-u4.c │ │ ├── f32-vcmul-sse-u8.c │ │ ├── f32-vcmul-wasmsimd-u12.c │ │ ├── f32-vcmul-wasmsimd-u16.c │ │ ├── f32-vcmul-wasmsimd-u4.c │ │ └── f32-vcmul-wasmsimd-u8.c │ ├── neon.c.in │ ├── rvv.c.in │ ├── scalar.c.in │ └── wasmsimd.c.in ├── f32-vcopysign │ ├── copysign.c.in │ ├── copysignc.c.in │ ├── gen │ │ ├── f32-vcopysign-avx.c │ │ ├── f32-vcopysign-avx512f.c │ │ ├── f32-vcopysign-hvx.c │ │ ├── f32-vcopysign-neon.c │ │ ├── f32-vcopysign-scalar.c │ │ ├── f32-vcopysign-sse2.c │ │ ├── f32-vcopysign-wasmsimd.c │ │ ├── f32-vcopysignc-avx.c │ │ ├── f32-vcopysignc-avx512f.c │ │ ├── f32-vcopysignc-hvx.c │ │ ├── f32-vcopysignc-neon.c │ │ ├── f32-vcopysignc-scalar.c │ │ ├── f32-vcopysignc-sse2.c │ │ ├── f32-vcopysignc-wasmsimd.c │ │ ├── f32-vrcopysignc-avx.c │ │ ├── f32-vrcopysignc-avx512f.c │ │ ├── f32-vrcopysignc-hvx.c │ │ ├── f32-vrcopysignc-neon.c │ │ ├── f32-vrcopysignc-scalar.c │ │ ├── f32-vrcopysignc-sse2.c │ │ └── f32-vrcopysignc-wasmsimd.c │ └── rcopysignc.c.in ├── f32-vcos │ ├── f32-vcos.inc │ └── gen │ │ ├── f32-vcos-avx-rational-5-4-div.c │ │ ├── f32-vcos-avx512f-rational-5-4-div.c │ │ ├── f32-vcos-avx512f-rational-5-4-nr.c │ │ ├── f32-vcos-fma3-rational-5-4-div.c │ │ ├── f32-vcos-hvx-rational-5-4-div.c │ │ ├── f32-vcos-neon-rational-5-4-div.c │ │ ├── f32-vcos-neon-rational-5-4-nr.c │ │ ├── f32-vcos-scalar-rational-5-4-div.c │ │ ├── f32-vcos-sse2-rational-5-4-div.c │ │ ├── f32-vcos-sse2fma-rational-5-4-div.c │ │ └── f32-vcos-wasmsimd-rational-5-4-div.c ├── f32-velu │ ├── avx-rr2-lut16-p3.c.in │ ├── avx-rr2-lut4-p4-perm.c.in │ ├── avx-rr2-p6.c.in │ ├── avx2-rr1-lut16-p3-gather.c.in │ ├── avx2-rr1-lut4-p4-perm.c.in │ ├── avx2-rr1-lut8-p4-perm.c.in │ ├── avx2-rr1-p6.c.in │ ├── avx512f-rr1-lut16-p3-perm.c.in │ ├── avx512f-rr1-p6.c.in │ ├── f32-velu.inc │ ├── gen │ │ ├── f32-velu-avx-rr2-lut16-p3-u16.c │ │ ├── f32-velu-avx-rr2-lut16-p3-u24.c │ │ ├── f32-velu-avx-rr2-lut16-p3-u32.c │ │ ├── f32-velu-avx-rr2-lut16-p3-u8.c │ │ ├── f32-velu-avx-rr2-lut4-p4-perm-u16.c │ │ ├── f32-velu-avx-rr2-lut4-p4-perm-u24.c │ │ ├── f32-velu-avx-rr2-lut4-p4-perm-u32.c │ │ ├── f32-velu-avx-rr2-lut4-p4-perm-u8.c │ │ ├── f32-velu-avx-rr2-p6-u16.c │ │ ├── f32-velu-avx-rr2-p6-u24.c │ │ ├── f32-velu-avx-rr2-p6-u32.c │ │ ├── f32-velu-avx-rr2-p6-u8.c │ │ ├── f32-velu-avx2-rr1-lut16-p3-gather-u16.c │ │ ├── f32-velu-avx2-rr1-lut16-p3-gather-u24.c │ │ ├── f32-velu-avx2-rr1-lut16-p3-gather-u32.c │ │ ├── f32-velu-avx2-rr1-lut16-p3-gather-u8.c │ │ ├── f32-velu-avx2-rr1-lut4-p4-perm-u16.c │ │ ├── f32-velu-avx2-rr1-lut4-p4-perm-u24.c │ │ ├── f32-velu-avx2-rr1-lut4-p4-perm-u32.c │ │ ├── f32-velu-avx2-rr1-lut4-p4-perm-u8.c │ │ ├── f32-velu-avx2-rr1-lut8-p4-perm-u16.c │ │ ├── f32-velu-avx2-rr1-lut8-p4-perm-u24.c │ │ ├── f32-velu-avx2-rr1-lut8-p4-perm-u32.c │ │ ├── f32-velu-avx2-rr1-lut8-p4-perm-u8.c │ │ ├── f32-velu-avx2-rr1-p6-u16.c │ │ ├── f32-velu-avx2-rr1-p6-u24.c │ │ ├── f32-velu-avx2-rr1-p6-u32.c │ │ ├── f32-velu-avx2-rr1-p6-u8.c │ │ ├── f32-velu-avx512f-rr1-lut16-p3-perm-u16.c │ │ ├── f32-velu-avx512f-rr1-lut16-p3-perm-u32.c │ │ ├── f32-velu-avx512f-rr1-lut16-p3-perm-u48.c │ │ ├── f32-velu-avx512f-rr1-lut16-p3-perm-u64.c │ │ ├── f32-velu-avx512f-rr1-p6-u16.c │ │ ├── f32-velu-avx512f-rr1-p6-u32.c │ │ ├── f32-velu-avx512f-rr1-p6-u48.c │ │ ├── f32-velu-avx512f-rr1-p6-u64.c │ │ ├── f32-velu-neon-rr2-lut16-p3-u12.c │ │ ├── f32-velu-neon-rr2-lut16-p3-u16.c │ │ ├── f32-velu-neon-rr2-lut16-p3-u4.c │ │ ├── f32-velu-neon-rr2-lut16-p3-u8.c │ │ ├── f32-velu-neon-rr2-p6-u12.c │ │ ├── f32-velu-neon-rr2-p6-u16.c │ │ ├── f32-velu-neon-rr2-p6-u4.c │ │ ├── f32-velu-neon-rr2-p6-u8.c │ │ ├── f32-velu-neonfma-rr1-lut16-p3-u12.c │ │ ├── f32-velu-neonfma-rr1-lut16-p3-u16.c │ │ ├── f32-velu-neonfma-rr1-lut16-p3-u4.c │ │ ├── f32-velu-neonfma-rr1-lut16-p3-u8.c │ │ ├── f32-velu-neonfma-rr1-p6-u12.c │ │ ├── f32-velu-neonfma-rr1-p6-u16.c │ │ ├── f32-velu-neonfma-rr1-p6-u4.c │ │ ├── f32-velu-neonfma-rr1-p6-u8.c │ │ ├── f32-velu-scalar-rr2-lut16-p3-u1.c │ │ ├── f32-velu-scalar-rr2-lut16-p3-u2.c │ │ ├── f32-velu-scalar-rr2-lut16-p3-u3.c │ │ ├── f32-velu-scalar-rr2-lut16-p3-u4.c │ │ ├── f32-velu-scalar-rr2-lut16-p3-u5.c │ │ ├── f32-velu-scalar-rr2-lut16-p3-u6.c │ │ ├── f32-velu-scalar-rr2-p6-u1.c │ │ ├── f32-velu-scalar-rr2-p6-u2.c │ │ ├── f32-velu-scalar-rr2-p6-u3.c │ │ ├── f32-velu-scalar-rr2-p6-u4.c │ │ ├── f32-velu-scalar-rr2-p6-u5.c │ │ ├── f32-velu-scalar-rr2-p6-u6.c │ │ ├── f32-velu-sse2-rr2-lut16-p3-u12.c │ │ ├── f32-velu-sse2-rr2-lut16-p3-u16.c │ │ ├── f32-velu-sse2-rr2-lut16-p3-u4.c │ │ ├── f32-velu-sse2-rr2-lut16-p3-u8.c │ │ ├── f32-velu-sse2-rr2-p6-u12.c │ │ ├── f32-velu-sse2-rr2-p6-u16.c │ │ ├── f32-velu-sse2-rr2-p6-u4.c │ │ ├── f32-velu-sse2-rr2-p6-u8.c │ │ ├── f32-velu-sse41-rr2-lut16-p3-u12.c │ │ ├── f32-velu-sse41-rr2-lut16-p3-u16.c │ │ ├── f32-velu-sse41-rr2-lut16-p3-u4.c │ │ ├── f32-velu-sse41-rr2-lut16-p3-u8.c │ │ ├── f32-velu-sse41-rr2-p6-u12.c │ │ ├── f32-velu-sse41-rr2-p6-u16.c │ │ ├── f32-velu-sse41-rr2-p6-u4.c │ │ ├── f32-velu-sse41-rr2-p6-u8.c │ │ ├── f32-velu-wasmrelaxedsimd-fma-rr2-lut16-p3-u12.c │ │ ├── f32-velu-wasmrelaxedsimd-fma-rr2-lut16-p3-u16.c │ │ ├── f32-velu-wasmrelaxedsimd-fma-rr2-lut16-p3-u4.c │ │ ├── f32-velu-wasmrelaxedsimd-fma-rr2-lut16-p3-u8.c │ │ ├── f32-velu-wasmrelaxedsimd-fma-rr2-p6-u12.c │ │ ├── f32-velu-wasmrelaxedsimd-fma-rr2-p6-u16.c │ │ ├── f32-velu-wasmrelaxedsimd-fma-rr2-p6-u4.c │ │ ├── f32-velu-wasmrelaxedsimd-fma-rr2-p6-u8.c │ │ ├── f32-velu-wasmrelaxedsimd-rr2-lut16-p3-u12.c │ │ ├── f32-velu-wasmrelaxedsimd-rr2-lut16-p3-u16.c │ │ ├── f32-velu-wasmrelaxedsimd-rr2-lut16-p3-u4.c │ │ ├── f32-velu-wasmrelaxedsimd-rr2-lut16-p3-u8.c │ │ ├── f32-velu-wasmrelaxedsimd-rr2-p6-u12.c │ │ ├── f32-velu-wasmrelaxedsimd-rr2-p6-u16.c │ │ ├── f32-velu-wasmrelaxedsimd-rr2-p6-u4.c │ │ ├── f32-velu-wasmrelaxedsimd-rr2-p6-u8.c │ │ ├── f32-velu-wasmsimd-arm-rr2-lut16-p3-u12.c │ │ ├── f32-velu-wasmsimd-arm-rr2-lut16-p3-u16.c │ │ ├── f32-velu-wasmsimd-arm-rr2-lut16-p3-u4.c │ │ ├── f32-velu-wasmsimd-arm-rr2-lut16-p3-u8.c │ │ ├── f32-velu-wasmsimd-arm-rr2-p6-u12.c │ │ ├── f32-velu-wasmsimd-arm-rr2-p6-u16.c │ │ ├── f32-velu-wasmsimd-arm-rr2-p6-u4.c │ │ ├── f32-velu-wasmsimd-arm-rr2-p6-u8.c │ │ ├── f32-velu-wasmsimd-x86-rr2-lut16-p3-u12.c │ │ ├── f32-velu-wasmsimd-x86-rr2-lut16-p3-u16.c │ │ ├── f32-velu-wasmsimd-x86-rr2-lut16-p3-u4.c │ │ ├── f32-velu-wasmsimd-x86-rr2-lut16-p3-u8.c │ │ ├── f32-velu-wasmsimd-x86-rr2-p6-u12.c │ │ ├── f32-velu-wasmsimd-x86-rr2-p6-u16.c │ │ ├── f32-velu-wasmsimd-x86-rr2-p6-u4.c │ │ └── f32-velu-wasmsimd-x86-rr2-p6-u8.c │ ├── neon-lut16-p3.c.in │ ├── neon-p6.c.in │ ├── scalar-rr2-lut16-p3.c.in │ ├── scalar-rr2-p6.c.in │ ├── sse-rr2-lut16-p3.c.in │ ├── sse-rr2-p6.c.in │ ├── wasmsimd-rr2-lut16-p3.c.in │ └── wasmsimd-rr2-p6.c.in ├── f32-vexp │ ├── f32-vexp.inc │ ├── gen │ │ ├── f32-vexp-avx-rational-3-2-div.c │ │ ├── f32-vexp-avx512f-rational-3-2-div.c │ │ ├── f32-vexp-avx512f-rational-3-2-nr.c │ │ ├── f32-vexp-fma3-rational-3-2-div.c │ │ ├── f32-vexp-hvx-rational-3-2-div.c │ │ ├── f32-vexp-neon-rational-3-2-div.c │ │ ├── f32-vexp-scalar-exp.c │ │ ├── f32-vexp-scalar-rational-3-2-div.c │ │ ├── f32-vexp-sse2-rational-3-2-div.c │ │ ├── f32-vexp-sse2fma-rational-3-2-div.c │ │ └── f32-vexp-wasmsimd-rational-3-2-div.c │ ├── rational-3-2.c.in │ └── scalar-exp.c.in ├── f32-vgelu │ ├── f32-vgelu.inc │ ├── gen │ │ ├── f32-vgelu-avx-rational-12-10-div.c │ │ ├── f32-vgelu-avx512f-rational-12-10-div.c │ │ ├── f32-vgelu-avx512f-rational-12-10-nr.c │ │ ├── f32-vgelu-fma3-rational-12-10-div.c │ │ ├── f32-vgelu-hvx-rational-12-10-div.c │ │ ├── f32-vgelu-hvx-rational-12-10-nr.c │ │ ├── f32-vgelu-neon-rational-12-10-div.c │ │ ├── f32-vgelu-scalar-rational-12-10-div.c │ │ ├── f32-vgelu-scalar.c │ │ ├── f32-vgelu-sse2-rational-12-10-div.c │ │ ├── f32-vgelu-sse2fma-rational-12-10-div.c │ │ └── f32-vgelu-wasmsimd-rational-12-10-div.c │ ├── rational-12-10.c.in │ └── scalar.c.in ├── f32-vhswish │ ├── f32-vhswish.inc │ ├── gen │ │ ├── f32-vhswish-avx.c │ │ ├── f32-vhswish-avx512f.c │ │ ├── f32-vhswish-fma3.c │ │ ├── f32-vhswish-hvx.c │ │ ├── f32-vhswish-neon.c │ │ ├── f32-vhswish-rvv-u1v.c │ │ ├── f32-vhswish-rvv-u2v.c │ │ ├── f32-vhswish-rvv-u4v.c │ │ ├── f32-vhswish-rvv-u8v.c │ │ ├── f32-vhswish-scalar.c │ │ ├── f32-vhswish-sse2.c │ │ ├── f32-vhswish-sse2fma.c │ │ ├── f32-vhswish-wasmrelaxedsimd.c │ │ └── f32-vhswish-wasmsimd.c │ ├── rvv.c.in │ └── simd.c.in ├── f32-vlog │ ├── f32-vlog.inc │ ├── gen │ │ ├── f32-vlog-avx2-rational-3-3-div.c │ │ ├── f32-vlog-avx512f-rational-3-3-div.c │ │ ├── f32-vlog-avx512f-rational-3-3-nr.c │ │ ├── f32-vlog-fma3-rational-3-3-div.c │ │ ├── f32-vlog-fma3-rational-3-3-nr.c │ │ ├── f32-vlog-hvx-rational-3-3-div.c │ │ ├── f32-vlog-neon-rational-3-3-div.c │ │ ├── f32-vlog-scalar-log.c │ │ ├── f32-vlog-scalar-rational-3-3-div.c │ │ ├── f32-vlog-sse2-rational-3-3-div.c │ │ ├── f32-vlog-sse2fma-rational-3-3-div.c │ │ └── f32-vlog-wasmsimd-rational-3-3-div.c │ ├── rational-3-3.c.in │ └── scalar-log.c.in ├── f32-vlrelu │ ├── avx.c.in │ ├── avx512f.c.in │ ├── f32-vlrelu.inc │ ├── gen │ │ ├── f32-vlrelu-avx-u16.c │ │ ├── f32-vlrelu-avx-u8.c │ │ ├── f32-vlrelu-avx512f-u16.c │ │ ├── f32-vlrelu-avx512f-u32.c │ │ ├── f32-vlrelu-neon-u4.c │ │ ├── f32-vlrelu-neon-u8.c │ │ ├── f32-vlrelu-rvv-u1v.c │ │ ├── f32-vlrelu-rvv-u2v.c │ │ ├── f32-vlrelu-rvv-u4v.c │ │ ├── f32-vlrelu-rvv-u8v.c │ │ ├── f32-vlrelu-scalar-u1.c │ │ ├── f32-vlrelu-scalar-u2.c │ │ ├── f32-vlrelu-scalar-u4.c │ │ ├── f32-vlrelu-sse-u4.c │ │ ├── f32-vlrelu-sse-u8.c │ │ ├── f32-vlrelu-sse2-u4.c │ │ ├── f32-vlrelu-sse2-u8.c │ │ ├── f32-vlrelu-sse41-u4.c │ │ ├── f32-vlrelu-sse41-u8.c │ │ ├── f32-vlrelu-wasmrelaxedsimd-iminmax-u4.c │ │ ├── f32-vlrelu-wasmrelaxedsimd-iminmax-u8.c │ │ ├── f32-vlrelu-wasmrelaxedsimd-laneselect-u4.c │ │ ├── f32-vlrelu-wasmrelaxedsimd-laneselect-u8.c │ │ ├── f32-vlrelu-wasmsimd-iminmax-u4.c │ │ ├── f32-vlrelu-wasmsimd-iminmax-u8.c │ │ ├── f32-vlrelu-wasmsimd-laneselect-u4.c │ │ └── f32-vlrelu-wasmsimd-laneselect-u8.c │ ├── neon.c.in │ ├── rvv.c.in │ ├── scalar.c.in │ ├── sse.c.in │ ├── wasm.c.in │ ├── wasmsimd-iminmax.c.in │ └── wasmsimd-laneselect.c.in ├── f32-vmulcaddc │ ├── f32-vmulcaddc.inc │ ├── gen │ │ ├── f32-vmulcaddc-c1-minmax-scalar-2x.c │ │ ├── f32-vmulcaddc-c2-minmax-scalar-2x.c │ │ ├── f32-vmulcaddc-c4-minmax-neon-2x.c │ │ ├── f32-vmulcaddc-c4-minmax-neonfma-2x.c │ │ ├── f32-vmulcaddc-c4-minmax-scalar-2x.c │ │ ├── f32-vmulcaddc-c4-minmax-sse-2x.c │ │ ├── f32-vmulcaddc-c4-minmax-wasmrelaxedsimd-2x.c │ │ ├── f32-vmulcaddc-c4-minmax-wasmrelaxedsimd-fma-2x.c │ │ ├── f32-vmulcaddc-c4-minmax-wasmsimd-arm-2x.c │ │ ├── f32-vmulcaddc-c4-minmax-wasmsimd-x86-2x.c │ │ ├── f32-vmulcaddc-c8-minmax-neon-2x.c │ │ ├── f32-vmulcaddc-c8-minmax-neonfma-2x.c │ │ ├── f32-vmulcaddc-c8-minmax-sse-2x.c │ │ ├── f32-vmulcaddc-c8-minmax-wasmrelaxedsimd-2x.c │ │ ├── f32-vmulcaddc-c8-minmax-wasmrelaxedsimd-fma-2x.c │ │ ├── f32-vmulcaddc-c8-minmax-wasmsimd-arm-2x.c │ │ └── f32-vmulcaddc-c8-minmax-wasmsimd-x86-2x.c │ ├── neon.c.in │ ├── scalar.c.in │ ├── sse.c.in │ └── wasmsimd.c.in ├── f32-vneg │ └── f32-vneg.inc ├── f32-vrnd │ ├── avx.c.in │ ├── avx512f.c.in │ ├── f32-vrndd.inc │ ├── f32-vrndne.inc │ ├── f32-vrndu.inc │ ├── f32-vrndz.inc │ ├── gen │ │ ├── f32-vrndd-avx-u16.c │ │ ├── f32-vrndd-avx-u8.c │ │ ├── f32-vrndd-avx512f-u16.c │ │ ├── f32-vrndd-avx512f-u32.c │ │ ├── f32-vrndd-hvx-u128.c │ │ ├── f32-vrndd-hvx-u32.c │ │ ├── f32-vrndd-hvx-u64.c │ │ ├── f32-vrndd-neon-u4.c │ │ ├── f32-vrndd-neon-u8.c │ │ ├── f32-vrndd-neonv8-u4.c │ │ ├── f32-vrndd-neonv8-u8.c │ │ ├── f32-vrndd-rvv-u1v.c │ │ ├── f32-vrndd-rvv-u2v.c │ │ ├── f32-vrndd-rvv-u4v.c │ │ ├── f32-vrndd-rvv-u8v.c │ │ ├── f32-vrndd-scalar-libm-u1.c │ │ ├── f32-vrndd-scalar-libm-u2.c │ │ ├── f32-vrndd-scalar-libm-u4.c │ │ ├── f32-vrndd-sse2-u4.c │ │ ├── f32-vrndd-sse2-u8.c │ │ ├── f32-vrndd-sse41-u4.c │ │ ├── f32-vrndd-sse41-u8.c │ │ ├── f32-vrndd-wasmsimd-u4.c │ │ ├── f32-vrndd-wasmsimd-u8.c │ │ ├── f32-vrndne-avx-u16.c │ │ ├── f32-vrndne-avx-u8.c │ │ ├── f32-vrndne-avx512f-u16.c │ │ ├── f32-vrndne-avx512f-u32.c │ │ ├── f32-vrndne-hvx-u128.c │ │ ├── f32-vrndne-hvx-u32.c │ │ ├── f32-vrndne-hvx-u64.c │ │ ├── f32-vrndne-neon-u4.c │ │ ├── f32-vrndne-neon-u8.c │ │ ├── f32-vrndne-neonv8-u4.c │ │ ├── f32-vrndne-neonv8-u8.c │ │ ├── f32-vrndne-rvv-u1v.c │ │ ├── f32-vrndne-rvv-u2v.c │ │ ├── f32-vrndne-rvv-u4v.c │ │ ├── f32-vrndne-rvv-u8v.c │ │ ├── f32-vrndne-scalar-libm-u1.c │ │ ├── f32-vrndne-scalar-libm-u2.c │ │ ├── f32-vrndne-scalar-libm-u4.c │ │ ├── f32-vrndne-sse2-u4.c │ │ ├── f32-vrndne-sse2-u8.c │ │ ├── f32-vrndne-sse41-u4.c │ │ ├── f32-vrndne-sse41-u8.c │ │ ├── f32-vrndne-wasmsimd-u4.c │ │ ├── f32-vrndne-wasmsimd-u8.c │ │ ├── f32-vrndu-avx-u16.c │ │ ├── f32-vrndu-avx-u8.c │ │ ├── f32-vrndu-avx512f-u16.c │ │ ├── f32-vrndu-avx512f-u32.c │ │ ├── f32-vrndu-hvx-u128.c │ │ ├── f32-vrndu-hvx-u32.c │ │ ├── f32-vrndu-hvx-u64.c │ │ ├── f32-vrndu-neon-u4.c │ │ ├── f32-vrndu-neon-u8.c │ │ ├── f32-vrndu-neonv8-u4.c │ │ ├── f32-vrndu-neonv8-u8.c │ │ ├── f32-vrndu-rvv-u1v.c │ │ ├── f32-vrndu-rvv-u2v.c │ │ ├── f32-vrndu-rvv-u4v.c │ │ ├── f32-vrndu-rvv-u8v.c │ │ ├── f32-vrndu-scalar-libm-u1.c │ │ ├── f32-vrndu-scalar-libm-u2.c │ │ ├── f32-vrndu-scalar-libm-u4.c │ │ ├── f32-vrndu-sse2-u4.c │ │ ├── f32-vrndu-sse2-u8.c │ │ ├── f32-vrndu-sse41-u4.c │ │ ├── f32-vrndu-sse41-u8.c │ │ ├── f32-vrndu-wasmsimd-u4.c │ │ ├── f32-vrndu-wasmsimd-u8.c │ │ ├── f32-vrndz-avx-u16.c │ │ ├── f32-vrndz-avx-u8.c │ │ ├── f32-vrndz-avx512f-u16.c │ │ ├── f32-vrndz-avx512f-u32.c │ │ ├── f32-vrndz-hvx-u128.c │ │ ├── f32-vrndz-hvx-u32.c │ │ ├── f32-vrndz-hvx-u64.c │ │ ├── f32-vrndz-neon-u4.c │ │ ├── f32-vrndz-neon-u8.c │ │ ├── f32-vrndz-neonv8-u4.c │ │ ├── f32-vrndz-neonv8-u8.c │ │ ├── f32-vrndz-rvv-u1v.c │ │ ├── f32-vrndz-rvv-u2v.c │ │ ├── f32-vrndz-rvv-u4v.c │ │ ├── f32-vrndz-rvv-u8v.c │ │ ├── f32-vrndz-scalar-libm-u1.c │ │ ├── f32-vrndz-scalar-libm-u2.c │ │ ├── f32-vrndz-scalar-libm-u4.c │ │ ├── f32-vrndz-sse2-u4.c │ │ ├── f32-vrndz-sse2-u8.c │ │ ├── f32-vrndz-sse41-u4.c │ │ ├── f32-vrndz-sse41-u8.c │ │ ├── f32-vrndz-wasmsimd-u4.c │ │ └── f32-vrndz-wasmsimd-u8.c │ ├── neonv8.c.in │ ├── rvv.c.in │ ├── scalar-libm.c.in │ ├── simd.c.in │ ├── sse41.c.in │ ├── vrndd-neon.c.in │ ├── vrndd-sse2.c.in │ ├── vrndne-neon.c.in │ ├── vrndne-sse2.c.in │ ├── vrndu-neon.c.in │ ├── vrndu-sse2.c.in │ ├── vrndz-neon.c.in │ ├── vrndz-sse2.c.in │ └── wasmsimd.c.in ├── f32-vrsqrt │ ├── f32-vrsqrt.inc │ ├── gen │ │ ├── f32-vrsqrt-avx-rsqrt.c │ │ ├── f32-vrsqrt-avx-sqrt.c │ │ ├── f32-vrsqrt-avx512f-rsqrt.c │ │ ├── f32-vrsqrt-avx512f-sqrt.c │ │ ├── f32-vrsqrt-neon-rsqrt.c │ │ ├── f32-vrsqrt-rvv-rsqrt-u1v.c │ │ ├── f32-vrsqrt-rvv-rsqrt-u2v.c │ │ ├── f32-vrsqrt-rvv-rsqrt-u4v.c │ │ ├── f32-vrsqrt-scalar-rsqrt-u1.c │ │ ├── f32-vrsqrt-scalar-rsqrt-u2.c │ │ ├── f32-vrsqrt-scalar-rsqrt-u4.c │ │ ├── f32-vrsqrt-scalar-sqrt.c │ │ ├── f32-vrsqrt-sse2-rsqrt.c │ │ └── f32-vrsqrt-sse2-sqrt.c │ ├── rvv.c.in │ ├── scalar-rsqrt.c.in │ ├── simd-rsqrt.c.in │ └── simd-sqrt.c.in ├── f32-vscaleexpminusmax │ ├── avx2-p5.c.in │ ├── avx512f-p5-scalef.c.in │ ├── f32-vscaleexpminusmax.inc │ └── gen │ │ ├── f32-vscaleexpminusmax-avx2-p5-u16.c │ │ ├── f32-vscaleexpminusmax-avx2-p5-u24.c │ │ ├── f32-vscaleexpminusmax-avx2-p5-u32.c │ │ ├── f32-vscaleexpminusmax-avx2-p5-u8.c │ │ ├── f32-vscaleexpminusmax-avx512f-p5-scalef-u16.c │ │ ├── f32-vscaleexpminusmax-avx512f-p5-scalef-u32.c │ │ ├── f32-vscaleexpminusmax-avx512f-p5-scalef-u48.c │ │ └── f32-vscaleexpminusmax-avx512f-p5-scalef-u64.c ├── f32-vscaleextexp │ ├── avx2-p5.c.in │ ├── avx512f-p5-scalef.c.in │ ├── f32-vscaleextexp.inc │ └── gen │ │ ├── f32-vscaleextexp-avx2-p5-u16.c │ │ ├── f32-vscaleextexp-avx2-p5-u24.c │ │ ├── f32-vscaleextexp-avx2-p5-u32.c │ │ ├── f32-vscaleextexp-avx2-p5-u8.c │ │ ├── f32-vscaleextexp-avx512f-p5-scalef-u16.c │ │ ├── f32-vscaleextexp-avx512f-p5-scalef-u32.c │ │ ├── f32-vscaleextexp-avx512f-p5-scalef-u48.c │ │ └── f32-vscaleextexp-avx512f-p5-scalef-u64.c ├── f32-vsigmoid │ ├── avx-rr2-p5.c.in │ ├── avx2-rr1-p5.c.in │ ├── avx512f-rr1-lut16-p3-perm-scalef.c.in │ ├── avx512f-rr1-p5-scalef.c.in │ ├── avx512f-rr2-lut32-p2-perm2-scalef.c.in │ ├── f32-vsigmoid.inc │ ├── gen │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-lut2048-p1-div-u12.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-lut2048-p1-div-u16.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-lut2048-p1-div-u4.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-lut2048-p1-div-u8.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-lut64-p2-div-u12.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-lut64-p2-div-u16.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-lut64-p2-div-u4.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-lut64-p2-div-u8.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-p5-div-u12.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-p5-div-u16.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-p5-div-u4.c │ │ ├── f32-vsigmoid-aarch64-neonfma-rr1-p5-div-u8.c │ │ ├── f32-vsigmoid-avx-rr2-p5-div-u16.c │ │ ├── f32-vsigmoid-avx-rr2-p5-div-u24.c │ │ ├── f32-vsigmoid-avx-rr2-p5-div-u32.c │ │ ├── f32-vsigmoid-avx-rr2-p5-div-u8.c │ │ ├── f32-vsigmoid-avx-rr2-p5-nr2-u16.c │ │ ├── f32-vsigmoid-avx-rr2-p5-nr2-u24.c │ │ ├── f32-vsigmoid-avx-rr2-p5-nr2-u32.c │ │ ├── f32-vsigmoid-avx-rr2-p5-nr2-u8.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-div-u16.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-div-u24.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-div-u32.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-div-u8.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-nr1fma-u16.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-nr1fma-u24.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-nr1fma-u32.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-nr1fma-u8.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-nr2fma-u16.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-nr2fma-u24.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-nr2fma-u32.c │ │ ├── f32-vsigmoid-avx2-rr1-p5-nr2fma-u8.c │ │ ├── f32-vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-div-u16.c │ │ ├── f32-vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-div-u32.c │ │ ├── f32-vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-div-u48.c │ │ ├── f32-vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-div-u64.c │ │ ├── f32-vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-u16.c │ │ ├── f32-vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-u32.c │ │ ├── f32-vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-u48.c │ │ ├── f32-vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma-u64.c │ │ ├── f32-vsigmoid-avx512f-rr1-p5-scalef-div-u16.c │ │ ├── f32-vsigmoid-avx512f-rr1-p5-scalef-div-u32.c │ │ ├── f32-vsigmoid-avx512f-rr1-p5-scalef-div-u48.c │ │ ├── f32-vsigmoid-avx512f-rr1-p5-scalef-div-u64.c │ │ ├── f32-vsigmoid-avx512f-rr1-p5-scalef-nr1fma-u16.c │ │ ├── f32-vsigmoid-avx512f-rr1-p5-scalef-nr1fma-u32.c │ │ ├── f32-vsigmoid-avx512f-rr1-p5-scalef-nr1fma-u48.c │ │ ├── f32-vsigmoid-avx512f-rr1-p5-scalef-nr1fma-u64.c │ │ ├── f32-vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div-u16.c │ │ ├── f32-vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div-u32.c │ │ ├── f32-vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div-u48.c │ │ ├── f32-vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div-u64.c │ │ ├── f32-vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-u16.c │ │ ├── f32-vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-u32.c │ │ ├── f32-vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-u48.c │ │ ├── f32-vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-u64.c │ │ ├── f32-vsigmoid-neon-rr2-lut2048-p1-nr2recps-u12.c │ │ ├── f32-vsigmoid-neon-rr2-lut2048-p1-nr2recps-u16.c │ │ ├── f32-vsigmoid-neon-rr2-lut2048-p1-nr2recps-u4.c │ │ ├── f32-vsigmoid-neon-rr2-lut2048-p1-nr2recps-u8.c │ │ ├── f32-vsigmoid-neon-rr2-lut64-p2-nr2recps-u12.c │ │ ├── f32-vsigmoid-neon-rr2-lut64-p2-nr2recps-u16.c │ │ ├── f32-vsigmoid-neon-rr2-lut64-p2-nr2recps-u4.c │ │ ├── f32-vsigmoid-neon-rr2-lut64-p2-nr2recps-u8.c │ │ ├── f32-vsigmoid-neon-rr2-p5-nr2recps-u12.c │ │ ├── f32-vsigmoid-neon-rr2-p5-nr2recps-u16.c │ │ ├── f32-vsigmoid-neon-rr2-p5-nr2recps-u4.c │ │ ├── f32-vsigmoid-neon-rr2-p5-nr2recps-u8.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-u12.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-u16.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-u4.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-u8.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr2fma-u12.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr2fma-u16.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr2fma-u4.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr2fma-u8.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr2recps-u12.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr2recps-u16.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr2recps-u4.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut2048-p1-nr2recps-u8.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-u12.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-u16.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-u4.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-u8.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr2fma-u12.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr2fma-u16.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr2fma-u4.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr2fma-u8.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr2recps-u12.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr2recps-u16.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr2recps-u4.c │ │ ├── f32-vsigmoid-neonfma-rr1-lut64-p2-nr2recps-u8.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr1recps1fma-u12.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr1recps1fma-u16.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr1recps1fma-u4.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr1recps1fma-u8.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr2fma-u12.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr2fma-u16.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr2fma-u4.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr2fma-u8.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr2recps-u12.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr2recps-u16.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr2recps-u4.c │ │ ├── f32-vsigmoid-neonfma-rr1-p5-nr2recps-u8.c │ │ ├── f32-vsigmoid-scalar-rr2-lut2048-p1-div-u1.c │ │ ├── f32-vsigmoid-scalar-rr2-lut2048-p1-div-u2.c │ │ ├── f32-vsigmoid-scalar-rr2-lut2048-p1-div-u4.c │ │ ├── f32-vsigmoid-scalar-rr2-lut64-p2-div-u1.c │ │ ├── f32-vsigmoid-scalar-rr2-lut64-p2-div-u2.c │ │ ├── f32-vsigmoid-scalar-rr2-lut64-p2-div-u4.c │ │ ├── f32-vsigmoid-scalar-rr2-p5-div-u1.c │ │ ├── f32-vsigmoid-scalar-rr2-p5-div-u2.c │ │ ├── f32-vsigmoid-scalar-rr2-p5-div-u4.c │ │ ├── f32-vsigmoid-sse2-rr2-lut64-p2-div-u12.c │ │ ├── f32-vsigmoid-sse2-rr2-lut64-p2-div-u16.c │ │ ├── f32-vsigmoid-sse2-rr2-lut64-p2-div-u4.c │ │ ├── f32-vsigmoid-sse2-rr2-lut64-p2-div-u8.c │ │ ├── f32-vsigmoid-sse2-rr2-p5-div-u12.c │ │ ├── f32-vsigmoid-sse2-rr2-p5-div-u16.c │ │ ├── f32-vsigmoid-sse2-rr2-p5-div-u4.c │ │ ├── f32-vsigmoid-sse2-rr2-p5-div-u8.c │ │ ├── f32-vsigmoid-sse41-rr2-lut64-p2-div-u12.c │ │ ├── f32-vsigmoid-sse41-rr2-lut64-p2-div-u16.c │ │ ├── f32-vsigmoid-sse41-rr2-lut64-p2-div-u4.c │ │ ├── f32-vsigmoid-sse41-rr2-lut64-p2-div-u8.c │ │ ├── f32-vsigmoid-sse41-rr2-p5-div-u12.c │ │ ├── f32-vsigmoid-sse41-rr2-p5-div-u16.c │ │ ├── f32-vsigmoid-sse41-rr2-p5-div-u4.c │ │ ├── f32-vsigmoid-sse41-rr2-p5-div-u8.c │ │ ├── f32-vsigmoid-wasmblendvps-fma-rr2-p5-div-u12.c │ │ ├── f32-vsigmoid-wasmblendvps-fma-rr2-p5-div-u16.c │ │ ├── f32-vsigmoid-wasmblendvps-fma-rr2-p5-div-u4.c │ │ ├── f32-vsigmoid-wasmblendvps-fma-rr2-p5-div-u8.c │ │ ├── f32-vsigmoid-wasmblendvps-rr2-p5-div-u12.c │ │ ├── f32-vsigmoid-wasmblendvps-rr2-p5-div-u16.c │ │ ├── f32-vsigmoid-wasmblendvps-rr2-p5-div-u4.c │ │ ├── f32-vsigmoid-wasmblendvps-rr2-p5-div-u8.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-fma-rr2-lut64-p2-div-u12.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-fma-rr2-lut64-p2-div-u16.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-fma-rr2-lut64-p2-div-u4.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-fma-rr2-lut64-p2-div-u8.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-fma-rr2-p5-div-u12.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-fma-rr2-p5-div-u16.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-fma-rr2-p5-div-u4.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-fma-rr2-p5-div-u8.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-rr2-lut64-p2-div-u12.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-rr2-lut64-p2-div-u16.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-rr2-lut64-p2-div-u4.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-rr2-lut64-p2-div-u8.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-rr2-p5-div-u12.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-rr2-p5-div-u16.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-rr2-p5-div-u4.c │ │ ├── f32-vsigmoid-wasmrelaxedsimd-rr2-p5-div-u8.c │ │ ├── f32-vsigmoid-wasmsimd-rr2-lut64-p2-div-u12.c │ │ ├── f32-vsigmoid-wasmsimd-rr2-lut64-p2-div-u16.c │ │ ├── f32-vsigmoid-wasmsimd-rr2-lut64-p2-div-u4.c │ │ ├── f32-vsigmoid-wasmsimd-rr2-lut64-p2-div-u8.c │ │ ├── f32-vsigmoid-wasmsimd-rr2-p5-div-u12.c │ │ ├── f32-vsigmoid-wasmsimd-rr2-p5-div-u16.c │ │ ├── f32-vsigmoid-wasmsimd-rr2-p5-div-u4.c │ │ └── f32-vsigmoid-wasmsimd-rr2-p5-div-u8.c │ ├── neon-lut2048-p1.c.in │ ├── neon-lut64-p2.c.in │ ├── neon-p5.c.in │ ├── scalar-rr2-lut2048-p1-div.c.in │ ├── scalar-rr2-lut64-p2-div.c.in │ ├── scalar-rr2-p5-div.c.in │ ├── sse-rr2-lut64-p2-div.c.in │ ├── sse-rr2-p5-div.c.in │ ├── wasmsimd-rr2-lut64-p2-div.c.in │ └── wasmsimd-rr2-p5-div.c.in ├── f32-vsin │ ├── f32-vsin.inc │ ├── gen │ │ ├── f32-vsin-avx-rational-5-4-div.c │ │ ├── f32-vsin-avx512f-rational-5-4-div.c │ │ ├── f32-vsin-avx512f-rational-5-4-nr.c │ │ ├── f32-vsin-fma3-rational-5-4-div.c │ │ ├── f32-vsin-hvx-rational-5-4-div.c │ │ ├── f32-vsin-neon-rational-5-4-div.c │ │ ├── f32-vsin-neon-rational-5-4-nr.c │ │ ├── f32-vsin-scalar-rational-5-4-div.c │ │ ├── f32-vsin-sse2-rational-5-4-div.c │ │ ├── f32-vsin-sse2fma-rational-5-4-div.c │ │ └── f32-vsin-wasmsimd-rational-5-4-div.c │ └── rational-5-4.c.in ├── f32-vsqr │ └── f32-vsqr.inc ├── f32-vsqrt │ ├── avx512f-nr1fma1adj.c.in │ ├── f32-vsqrt.inc │ ├── fma3-nr1fma1adj.c.in │ ├── gen │ │ ├── f32-vsqrt-aarch64-neon-sqrt.c │ │ ├── f32-vsqrt-avx-rsqrt.c │ │ ├── f32-vsqrt-avx-sqrt.c │ │ ├── f32-vsqrt-avx512f-rsqrt.c │ │ ├── f32-vsqrt-neon-rsqrt.c │ │ ├── f32-vsqrt-rvv-sqrt-u1v.c │ │ ├── f32-vsqrt-rvv-sqrt-u2v.c │ │ ├── f32-vsqrt-rvv-sqrt-u4v.c │ │ ├── f32-vsqrt-rvv-sqrt-u8v.c │ │ ├── f32-vsqrt-scalar-sqrt.c │ │ ├── f32-vsqrt-sse2-rsqrt.c │ │ ├── f32-vsqrt-sse2-sqrt.c │ │ └── f32-vsqrt-wasmsimd-sqrt.c │ ├── neonfma-nr1rsqrts1fma1adj.c.in │ ├── neonfma-nr2fma1adj.c.in │ ├── rvv-sqrt.c.in │ ├── simd-rsqrt.c.in │ └── simd-sqrt.c.in ├── f32-vtanh │ ├── f32-vtanh.inc │ ├── gen │ │ ├── f32-vtanh-avx-rational-9-8-div.c │ │ ├── f32-vtanh-avx-rational-9-8-nr.c │ │ ├── f32-vtanh-avx512f-rational-9-8-div.c │ │ ├── f32-vtanh-avx512f-rational-9-8-nr.c │ │ ├── f32-vtanh-fma3-rational-9-8-div.c │ │ ├── f32-vtanh-fma3-rational-9-8-nr.c │ │ ├── f32-vtanh-hvx-rational-9-8-div.c │ │ ├── f32-vtanh-hvx-rational-9-8-nr.c │ │ ├── f32-vtanh-neon-rational-9-8-div.c │ │ ├── f32-vtanh-neon-rational-9-8-nr.c │ │ ├── f32-vtanh-scalar-rational-9-8-div.c │ │ ├── f32-vtanh-sse2-rational-9-8-div.c │ │ ├── f32-vtanh-sse2-rational-9-8-nr.c │ │ ├── f32-vtanh-sse2fma-rational-9-8-div.c │ │ └── f32-vtanh-wasmsimd-rational-9-8-div.c │ └── rational-9-8.c.in ├── f32-vunary │ ├── gen │ │ ├── f32-vabs-avx.c │ │ ├── f32-vabs-avx512f.c │ │ ├── f32-vabs-hvx.c │ │ ├── f32-vabs-neon.c │ │ ├── f32-vabs-rvv-u1v.c │ │ ├── f32-vabs-rvv-u2v.c │ │ ├── f32-vabs-rvv-u4v.c │ │ ├── f32-vabs-rvv-u8v.c │ │ ├── f32-vabs-scalar.c │ │ ├── f32-vabs-sse2.c │ │ ├── f32-vabs-wasmsimd.c │ │ ├── f32-vneg-avx.c │ │ ├── f32-vneg-avx512f.c │ │ ├── f32-vneg-hvx.c │ │ ├── f32-vneg-neon.c │ │ ├── f32-vneg-rvv-u1v.c │ │ ├── f32-vneg-rvv-u2v.c │ │ ├── f32-vneg-rvv-u4v.c │ │ ├── f32-vneg-rvv-u8v.c │ │ ├── f32-vneg-scalar.c │ │ ├── f32-vneg-sse2.c │ │ ├── f32-vneg-wasmsimd.c │ │ ├── f32-vsqr-avx.c │ │ ├── f32-vsqr-avx512f.c │ │ ├── f32-vsqr-hvx.c │ │ ├── f32-vsqr-neon.c │ │ ├── f32-vsqr-rvv-u1v.c │ │ ├── f32-vsqr-rvv-u2v.c │ │ ├── f32-vsqr-rvv-u4v.c │ │ ├── f32-vsqr-rvv-u8v.c │ │ ├── f32-vsqr-scalar.c │ │ ├── f32-vsqr-sse2.c │ │ └── f32-vsqr-wasmsimd.c │ ├── rvv.c.in │ └── simd.c.in ├── indirection.c ├── init.c ├── log.c ├── memory-planner.c ├── memory.c ├── microkernel-utils.c ├── microparams-init.c ├── mutex.c ├── normalization.c ├── operator-delete.c ├── operator-run.c ├── operator-utils.c ├── operators │ ├── argmax-pooling-nhwc.c │ ├── average-pooling-nhwc.c │ ├── batch-matrix-multiply-nc.c │ ├── binary-elementwise-nd.c │ ├── constant-pad-nd.c │ ├── convolution-nchw.c │ ├── convolution-nhwc.c │ ├── deconvolution-nhwc.c │ ├── dynamic-fully-connected-nc.c │ ├── fingerprint_cache.c │ ├── fingerprint_cache.h │ ├── fingerprint_id.c │ ├── fingerprint_id.h │ ├── fingerprint_id.h.inc │ ├── fully-connected-nc.c │ ├── max-pooling-nhwc.c │ ├── pack-lh.c │ ├── reduce-nd.c │ ├── resize-bilinear-nchw.c │ ├── resize-bilinear-nhwc.c │ ├── rope-nthc.c │ ├── slice-nd.c │ ├── softmax-nc.c │ ├── transpose-nd.c │ ├── unary-elementwise-nc.c │ └── unpooling-nhwc.c ├── pack-lh.cc ├── params.c ├── pf16-f16-f16-igemm │ ├── pf16-f16-f16-igemm-32x32c2-minmax-neonsme.c │ └── pf16-f16-f16-igemm-32x32c2-minmax-neonsme2.c ├── pf16-gemm │ ├── pf16-gemm-1x32c2-minmax-neonsme.c │ ├── pf16-gemm-1x32c2-minmax-neonsme2.c │ ├── pf16-gemm-32x32c2-minmax-neonsme.c │ └── pf16-gemm-32x32c2-minmax-neonsme2.c ├── pf32-gemm │ ├── pf32-gemm-1x32-minmax-neonsme.c │ ├── pf32-gemm-1x32-minmax-neonsme2.c │ ├── pf32-gemm-32x32-minmax-neonsme.c │ └── pf32-gemm-32x32-minmax-neonsme2.c ├── pf32-igemm │ ├── pf32-igemm-32x32-minmax-neonsme.c │ └── pf32-igemm-32x32-minmax-neonsme2.c ├── pqs8-f32-qc8w-igemm │ └── pqs8-f32-qc8w-igemm-32x32c4-minmax-neonsme2.c ├── pqs8-qc8w-gemm │ ├── pqs8-qc8w-gemm-1x32c4-minmax-neonsme2.c │ └── pqs8-qc8w-gemm-32x32c4-minmax-neonsme2.c ├── qb4-packw │ ├── c4-aarch64-neondot.c.in │ ├── c8-aarch64-neondot.c.in │ ├── gen │ │ ├── qb4-packw-x16c4-gemm-goi-aarch64-neondot.c │ │ ├── qb4-packw-x16c4-gemm-goi-scalar.c │ │ ├── qb4-packw-x16c8-gemm-goi-aarch64-neondot.c │ │ └── qb4-packw-x16c8-gemm-goi-scalar.c │ ├── kr-scalar.c.in │ └── qb4-packw.inc ├── qd8-f16-qb4w-gemm │ └── gen │ │ ├── qd8-f16-qb4w-gemm-1x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qb4w-gemm-1x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qb4w-gemm-1x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-1x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-1x2-minmax-scalar.c │ │ ├── qd8-f16-qb4w-gemm-1x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-1x4-minmax-scalar.c │ │ ├── qd8-f16-qb4w-gemm-1x8-minmax-scalar.c │ │ ├── qd8-f16-qb4w-gemm-1x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-1x8c8-minmax-avx2.c │ │ ├── qd8-f16-qb4w-gemm-1x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-2x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qb4w-gemm-2x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qb4w-gemm-2x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-2x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-2x2-minmax-scalar.c │ │ ├── qd8-f16-qb4w-gemm-2x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-2x4-minmax-scalar.c │ │ ├── qd8-f16-qb4w-gemm-2x8-minmax-scalar.c │ │ ├── qd8-f16-qb4w-gemm-2x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-2x8c8-minmax-avx2.c │ │ ├── qd8-f16-qb4w-gemm-2x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-3x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qb4w-gemm-3x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qb4w-gemm-3x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-3x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-3x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-3x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-3x8c8-minmax-avx2.c │ │ ├── qd8-f16-qb4w-gemm-3x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-4x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qb4w-gemm-4x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qb4w-gemm-4x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-4x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-4x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-4x4-minmax-scalar.c │ │ ├── qd8-f16-qb4w-gemm-4x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-4x8c8-minmax-avx2.c │ │ ├── qd8-f16-qb4w-gemm-4x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-5x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-5x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-5x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-5x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-5x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-6x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qb4w-gemm-6x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qb4w-gemm-6x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-6x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-6x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-6x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qb4w-gemm-6x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-7x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-7x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-7x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-8x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qb4w-gemm-8x32c8-minmax-neoni8mm.c │ │ └── qd8-f16-qb4w-gemm-8x8c8-minmax-neoni8mm.c ├── qd8-f16-qc4w-gemm │ └── gen │ │ ├── qd8-f16-qc4w-gemm-10x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-10x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f16-qc4w-gemm-10x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-10x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc4w-gemm-10x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-10x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f16-qc4w-gemm-12x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-12x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f16-qc4w-gemm-12x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-12x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc4w-gemm-12x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-12x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f16-qc4w-gemm-14x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-14x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f16-qc4w-gemm-14x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-14x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc4w-gemm-14x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-14x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f16-qc4w-gemm-1x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qc4w-gemm-1x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qc4w-gemm-1x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-1x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-1x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-1x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx2-madd.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc4w-gemm-1x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-2x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qc4w-gemm-2x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qc4w-gemm-2x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-2x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-2x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-2x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-2x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-2x8c8-minmax-avx2-madd.c │ │ ├── qd8-f16-qc4w-gemm-2x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc4w-gemm-2x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc4w-gemm-2x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-2x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc4w-gemm-2x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-3x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qc4w-gemm-3x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qc4w-gemm-3x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-3x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-3x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-3x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-3x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-3x8c8-minmax-avx2-madd.c │ │ ├── qd8-f16-qc4w-gemm-3x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc4w-gemm-3x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc4w-gemm-3x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-3x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc4w-gemm-3x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-4x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qc4w-gemm-4x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qc4w-gemm-4x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-4x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-4x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-4x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-4x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-4x8c8-minmax-avx2-madd.c │ │ ├── qd8-f16-qc4w-gemm-4x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc4w-gemm-4x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc4w-gemm-4x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-4x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc4w-gemm-4x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-5x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-5x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-5x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-5x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx2-madd.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc4w-gemm-5x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-6x16-minmax-neonfp16arith-mlal-lane-prfm.c │ │ ├── qd8-f16-qc4w-gemm-6x16-minmax-neonfp16arith-mlal-lane.c │ │ ├── qd8-f16-qc4w-gemm-6x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-6x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-6x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-6x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc4w-gemm-6x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-6x8c8-minmax-avx2-madd.c │ │ ├── qd8-f16-qc4w-gemm-6x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc4w-gemm-6x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc4w-gemm-6x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-6x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc4w-gemm-6x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-7x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-7x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx2-madd.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc4w-gemm-7x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-8x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-8x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx2-madd.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc4w-gemm-8x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc4w-gemm-9x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f16-qc4w-gemm-9x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f16-qc4w-gemm-9x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc4w-gemm-9x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc4w-gemm-9x8c8-minmax-avx256vnnigfni-prfm.c │ │ └── qd8-f16-qc4w-gemm-9x8c8-minmax-avx256vnnigfni.c ├── qd8-f16-qc8w-gemm │ └── gen │ │ ├── qd8-f16-qc8w-gemm-10x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-10x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-gemm-12x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-12x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-gemm-14x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-14x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-gemm-16x64c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f16-qc8w-gemm-16x64c4-minmax-avx512amx.c │ │ ├── qd8-f16-qc8w-gemm-1x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-1x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-1x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-1x64c4-minmax-avx512amx.c │ │ ├── qd8-f16-qc8w-gemm-1x8c2s4-minmax-neonfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-1x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-1x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc8w-gemm-1x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc8w-gemm-1x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-1x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-gemm-1x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-1x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-gemm-1x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-2x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-2x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-2x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-2x8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-2x8c2s4-minmax-neonfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-2x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-2x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc8w-gemm-2x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-2x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-gemm-3x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-3x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-3x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-3x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-3x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc8w-gemm-3x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-3x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-gemm-3x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f16-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondotfp16arith-cortex-a55.S │ │ ├── qd8-f16-qc8w-gemm-4x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-4x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-4x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-4x8c4-minmax-asm-aarch32-neondotfp16arith-cortex-a55.S │ │ ├── qd8-f16-qc8w-gemm-4x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-4x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc8w-gemm-4x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-4x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-gemm-4x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-5x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-5x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-5x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-5x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-5x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc8w-gemm-5x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-5x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-gemm-5x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-5x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-gemm-5x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-6x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-6x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-6x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-6x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-gemm-6x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-6x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-gemm-6x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-7x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-7x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-7x64c4-minmax-avx512amx.c │ │ ├── qd8-f16-qc8w-gemm-7x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc8w-gemm-7x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-7x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-gemm-7x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-7x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-gemm-7x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-8x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-8x32c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-8x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc8w-gemm-8x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-8x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-gemm-8x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-gemm-8x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-gemm-8x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-gemm-9x8c8-minmax-avx256vnni-prfm.c │ │ └── qd8-f16-qc8w-gemm-9x8c8-minmax-avx256vnni.c ├── qd8-f16-qc8w-igemm │ └── gen │ │ ├── qd8-f16-qc8w-igemm-10x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-10x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-igemm-12x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-12x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-igemm-14x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-14x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-igemm-16x64c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f16-qc8w-igemm-16x64c4-minmax-avx512amx.c │ │ ├── qd8-f16-qc8w-igemm-1x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-1x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-1x32c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-1x64c4-minmax-avx512amx.c │ │ ├── qd8-f16-qc8w-igemm-1x8c2s4-minmax-neonfp16arith-mlal.c │ │ ├── qd8-f16-qc8w-igemm-1x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-1x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc8w-igemm-1x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc8w-igemm-1x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-1x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-igemm-1x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-1x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-igemm-1x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-2x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-2x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-2x32c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-2x8c2s4-minmax-neonfp16arith-mlal.c │ │ ├── qd8-f16-qc8w-igemm-2x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-2x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc8w-igemm-2x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-2x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-igemm-2x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-3x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-3x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc8w-igemm-3x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-3x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-igemm-3x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-cortex-a55.S │ │ ├── qd8-f16-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f16-qc8w-igemm-4x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-4x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-4x32c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-4x8c4-minmax-asm-aarch32-neondotfp16arith-cortex-a55.S │ │ ├── qd8-f16-qc8w-igemm-4x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-4x8c8-minmax-avx2.c │ │ ├── qd8-f16-qc8w-igemm-4x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-4x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-igemm-4x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-5x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc8w-igemm-5x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-5x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-igemm-5x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-5x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-igemm-6x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-6x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-6x32c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-6x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-6x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-6x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-igemm-6x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-7x64c4-minmax-avx512amx.c │ │ ├── qd8-f16-qc8w-igemm-7x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc8w-igemm-7x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-7x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-igemm-7x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-7x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-igemm-8x16c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-8x16c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-8x32c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-8x8c4-minmax-neondotfp16arith.c │ │ ├── qd8-f16-qc8w-igemm-8x8c8-minmax-avx256skx.c │ │ ├── qd8-f16-qc8w-igemm-8x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-8x8c8-minmax-avx256vnni.c │ │ ├── qd8-f16-qc8w-igemm-8x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f16-qc8w-igemm-8x8c8-minmax-avxvnni.c │ │ ├── qd8-f16-qc8w-igemm-8x8c8-minmax-neoni8mm.c │ │ ├── qd8-f16-qc8w-igemm-9x8c8-minmax-avx256vnni-prfm.c │ │ └── qd8-f16-qc8w-igemm-9x8c8-minmax-avx256vnni.c ├── qd8-f32-qb4w-gemm │ └── gen │ │ ├── qd8-f32-qb4w-gemm-10x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-10x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qb4w-gemm-10x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-10x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qb4w-gemm-12x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-12x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qb4w-gemm-12x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-12x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qb4w-gemm-14x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-14x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qb4w-gemm-14x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-14x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qb4w-gemm-1x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qb4w-gemm-1x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qb4w-gemm-1x16c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-1x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-1x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qb4w-gemm-1x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-1x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qb4w-gemm-1x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-1x2-minmax-scalar.c │ │ ├── qd8-f32-qb4w-gemm-1x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-1x4-minmax-scalar.c │ │ ├── qd8-f32-qb4w-gemm-1x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qb4w-gemm-1x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qb4w-gemm-1x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qb4w-gemm-1x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qb4w-gemm-1x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qb4w-gemm-1x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qb4w-gemm-1x8-minmax-scalar.c │ │ ├── qd8-f32-qb4w-gemm-1x8c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-1x8c8-minmax-avx2.c │ │ ├── qd8-f32-qb4w-gemm-1x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-2x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qb4w-gemm-2x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qb4w-gemm-2x16c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-2x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-2x2-minmax-scalar.c │ │ ├── qd8-f32-qb4w-gemm-2x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-2x4-minmax-scalar.c │ │ ├── qd8-f32-qb4w-gemm-2x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qb4w-gemm-2x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qb4w-gemm-2x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qb4w-gemm-2x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qb4w-gemm-2x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qb4w-gemm-2x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qb4w-gemm-2x8-minmax-scalar.c │ │ ├── qd8-f32-qb4w-gemm-2x8c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-2x8c8-minmax-avx2.c │ │ ├── qd8-f32-qb4w-gemm-2x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-3x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qb4w-gemm-3x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qb4w-gemm-3x16c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-3x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-3x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-3x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qb4w-gemm-3x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qb4w-gemm-3x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qb4w-gemm-3x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qb4w-gemm-3x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qb4w-gemm-3x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qb4w-gemm-3x8c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-3x8c8-minmax-avx2.c │ │ ├── qd8-f32-qb4w-gemm-3x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-4x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qb4w-gemm-4x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qb4w-gemm-4x16c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-4x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-4x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-4x4-minmax-scalar.c │ │ ├── qd8-f32-qb4w-gemm-4x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qb4w-gemm-4x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qb4w-gemm-4x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qb4w-gemm-4x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qb4w-gemm-4x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qb4w-gemm-4x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qb4w-gemm-4x8c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-4x8c8-minmax-avx2.c │ │ ├── qd8-f32-qb4w-gemm-4x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-5x16c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-5x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-5x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qb4w-gemm-5x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-5x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qb4w-gemm-5x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-5x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-5x8c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-5x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-6x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qb4w-gemm-6x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qb4w-gemm-6x16c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-6x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-6x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-6x8c4-minmax-neondot.c │ │ ├── qd8-f32-qb4w-gemm-6x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-7x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-7x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qb4w-gemm-7x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-7x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qb4w-gemm-7x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-7x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-7x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-8x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-8x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qb4w-gemm-8x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-8x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qb4w-gemm-8x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-8x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-8x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qb4w-gemm-9x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qb4w-gemm-9x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qb4w-gemm-9x16c8-minmax-avx512vnnigfni-prfm.c │ │ └── qd8-f32-qb4w-gemm-9x16c8-minmax-avx512vnnigfni.c ├── qd8-f32-qc2w-gemm │ ├── qd8-f32-qc2w-gemm-1x16c4-minmax-neondot.c │ └── qd8-f32-qc2w-gemm-4x16c4-minmax-neondot.c ├── qd8-f32-qc4w-gemm │ ├── gen │ │ ├── qd8-f32-qc4w-gemm-10x16c4-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-10x16c4-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-10x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-10x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-10x16c4-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-10x16c4-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-10x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-10x16c8-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-10x16c8-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-10x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-10x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-10x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-10x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-10x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-10x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-10x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-10x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-10x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc4w-gemm-10x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-10x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-11x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-11x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-12x16c4-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-12x16c4-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-12x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-12x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-12x16c4-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-12x16c4-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-12x16c8-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-12x16c8-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-12x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-12x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-12x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-12x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-12x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-12x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-12x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-12x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc4w-gemm-12x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-12x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-14x16c4-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-14x16c4-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-14x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-14x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-14x16c4-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-14x16c4-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-14x16c8-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-14x16c8-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-14x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-14x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-14x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-14x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-14x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-14x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-14x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-14x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc4w-gemm-14x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-14x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-16x16c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f32-qc4w-gemm-16x16c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc4w-gemm-16x32c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f32-qc4w-gemm-16x32c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc4w-gemm-16x64c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f32-qc4w-gemm-16x64c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc4w-gemm-1x1-minmax-scalar.c │ │ ├── qd8-f32-qc4w-gemm-1x16-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-1x16-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-1x16-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-1x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc4w-gemm-1x16c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc4w-gemm-1x16c4-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x16c4-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-1x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-1x16c4-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x16c4-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-1x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-1x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-1x2-minmax-scalar.c │ │ ├── qd8-f32-qc4w-gemm-1x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-1x32c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc4w-gemm-1x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-1x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-1x4-minmax-scalar.c │ │ ├── qd8-f32-qc4w-gemm-1x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc4w-gemm-1x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc4w-gemm-1x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc4w-gemm-1x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc4w-gemm-1x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc4w-gemm-1x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc4w-gemm-1x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x4c8-minmax-ssse3-madd.c │ │ ├── qd8-f32-qc4w-gemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc4w-gemm-1x4v-minmax-rvv.c │ │ ├── qd8-f32-qc4w-gemm-1x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-1x64c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc4w-gemm-1x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-1x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-1x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-1x8-minmax-scalar.c │ │ ├── qd8-f32-qc4w-gemm-1x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx2-madd.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc4w-gemm-1x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-2x16-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-2x16-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-2x16-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-2x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc4w-gemm-2x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc4w-gemm-2x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc4w-gemm-2x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-2x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-2x2-minmax-scalar.c │ │ ├── qd8-f32-qc4w-gemm-2x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-2x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-2x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-2x4-minmax-scalar.c │ │ ├── qd8-f32-qc4w-gemm-2x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc4w-gemm-2x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc4w-gemm-2x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc4w-gemm-2x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc4w-gemm-2x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc4w-gemm-2x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc4w-gemm-2x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-2x4c8-minmax-ssse3-madd.c │ │ ├── qd8-f32-qc4w-gemm-2x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc4w-gemm-2x4v-minmax-rvv.c │ │ ├── qd8-f32-qc4w-gemm-2x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-2x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-2x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-2x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-2x8-minmax-scalar.c │ │ ├── qd8-f32-qc4w-gemm-2x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc4w-gemm-2x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-2x8c8-minmax-avx2-madd.c │ │ ├── qd8-f32-qc4w-gemm-2x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc4w-gemm-2x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc4w-gemm-2x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-2x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc4w-gemm-2x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-3x16-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-3x16-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-3x16-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-3x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc4w-gemm-3x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc4w-gemm-3x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc4w-gemm-3x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-3x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-3x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-3x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-3x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-3x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc4w-gemm-3x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc4w-gemm-3x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc4w-gemm-3x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc4w-gemm-3x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc4w-gemm-3x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc4w-gemm-3x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-3x4c8-minmax-ssse3-madd.c │ │ ├── qd8-f32-qc4w-gemm-3x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc4w-gemm-3x4v-minmax-rvv.c │ │ ├── qd8-f32-qc4w-gemm-3x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-3x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-3x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-3x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-3x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-3x8c8-minmax-avx2-madd.c │ │ ├── qd8-f32-qc4w-gemm-3x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc4w-gemm-3x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc4w-gemm-3x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-3x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc4w-gemm-3x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-4x16-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-4x16-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-4x16-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-4x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc4w-gemm-4x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc4w-gemm-4x16c4-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-4x16c4-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-4x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-4x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-4x16c4-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-4x16c4-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-4x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc4w-gemm-4x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-4x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-4x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-4x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-4x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-4x4-minmax-scalar.c │ │ ├── qd8-f32-qc4w-gemm-4x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc4w-gemm-4x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc4w-gemm-4x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc4w-gemm-4x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc4w-gemm-4x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc4w-gemm-4x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc4w-gemm-4x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-4x4c8-minmax-ssse3-madd.c │ │ ├── qd8-f32-qc4w-gemm-4x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc4w-gemm-4x4v-minmax-rvv.c │ │ ├── qd8-f32-qc4w-gemm-4x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-4x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-4x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-4x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-4x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc4w-gemm-4x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-4x8c8-minmax-avx2-madd.c │ │ ├── qd8-f32-qc4w-gemm-4x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc4w-gemm-4x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc4w-gemm-4x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-4x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc4w-gemm-4x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-5x16c4-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x16c4-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-5x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-5x16c4-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x16c4-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-5x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-5x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-5x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-5x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-5x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x4c8-minmax-ssse3-madd.c │ │ ├── qd8-f32-qc4w-gemm-5x4v-minmax-rvv.c │ │ ├── qd8-f32-qc4w-gemm-5x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-5x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-5x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-5x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx2-madd.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc4w-gemm-5x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-6x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc4w-gemm-6x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc4w-gemm-6x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc4w-gemm-6x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-6x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-6x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-6x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-6x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-6x4c8-minmax-ssse3-madd.c │ │ ├── qd8-f32-qc4w-gemm-6x4v-minmax-rvv.c │ │ ├── qd8-f32-qc4w-gemm-6x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-6x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-6x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-6x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc4w-gemm-6x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-6x8c8-minmax-avx2-madd.c │ │ ├── qd8-f32-qc4w-gemm-6x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc4w-gemm-6x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc4w-gemm-6x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-6x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc4w-gemm-6x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-7x16c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc4w-gemm-7x16c4-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x16c4-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-7x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-7x16c4-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x16c4-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-7x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-7x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-7x32c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc4w-gemm-7x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-7x4v-minmax-rvv.c │ │ ├── qd8-f32-qc4w-gemm-7x64c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc4w-gemm-7x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc4w-gemm-7x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc4w-gemm-7x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx2-madd.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc4w-gemm-7x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-8x16c4-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x16c4-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-8x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-8x16c4-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x16c4-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-8x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-8x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-8x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-8x4v-minmax-rvv.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx2-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx2-madd.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx256vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avx256vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc4w-gemm-8x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc4w-gemm-9x16c4-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-9x16c4-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-9x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-9x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-9x16c4-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-9x16c4-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-9x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-9x16c8-minmax-avx512skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-9x16c8-minmax-avx512skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-9x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-9x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc4w-gemm-9x16c8-minmax-avx512vnnigfni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-9x16c8-minmax-avx512vnnigfni.c │ │ ├── qd8-f32-qc4w-gemm-9x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc4w-gemm-9x8c8-minmax-avx256skx-madd-prfm.c │ │ ├── qd8-f32-qc4w-gemm-9x8c8-minmax-avx256skx-madd.c │ │ ├── qd8-f32-qc4w-gemm-9x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc4w-gemm-9x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc4w-gemm-9x8c8-minmax-avx256vnnigfni-prfm.c │ │ └── qd8-f32-qc4w-gemm-9x8c8-minmax-avx256vnnigfni.c │ ├── qd8-f32-qc4w-gemm-1x8-minmax-asm-aarch32-neonmlal-ld64.S │ └── qd8-f32-qc4w-gemm-4x8-minmax-asm-aarch32-neonmlal-ld64.S ├── qd8-f32-qc8w-gemm │ └── gen │ │ ├── qd8-f16-qc8w-gemm-1x8-minmax-asm-aarch32-neonfp16arith-ld64.S │ │ ├── qd8-f16-qc8w-gemm-2x8-minmax-asm-aarch32-neonfp16arith-ld64.S │ │ ├── qd8-f16-qc8w-gemm-3x8-minmax-asm-aarch32-neonfp16arith-ld64.S │ │ ├── qd8-f16-qc8w-gemm-4x8-minmax-asm-aarch32-neonfp16arith-ld64.S │ │ ├── qd8-f32-qc8w-gemm-10x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-10x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-10x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-10x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-10x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-10x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-10x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-10x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-10x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-gemm-11x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-11x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-11x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-12x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-12x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-12x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-12x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-12x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-12x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-gemm-14x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-14x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-14x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-14x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-14x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-14x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-gemm-16x16c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f32-qc8w-gemm-16x16c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-gemm-16x32c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f32-qc8w-gemm-16x32c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-gemm-16x64c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f32-qc8w-gemm-16x64c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-gemm-1x16-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-1x16-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-1x16-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-1x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-1x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-1x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-1x16c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-gemm-1x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-1x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-1x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-1x16c4-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-1x16c4-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-1x16c8-minmax-aarch64-neondot-ld128.c │ │ ├── qd8-f32-qc8w-gemm-1x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-1x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc8w-gemm-1x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc8w-gemm-1x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-1x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-1x16c8-minmax-neondot-ld64.c │ │ ├── qd8-f32-qc8w-gemm-1x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-1x2-minmax-scalar.c │ │ ├── qd8-f32-qc8w-gemm-1x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-1x32c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-gemm-1x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-1x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-1x4-minmax-scalar.c │ │ ├── qd8-f32-qc8w-gemm-1x4c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-1x4c16-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-1x4c2-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-1x4c2-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-1x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc8w-gemm-1x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc8w-gemm-1x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-1x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-1x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc8w-gemm-1x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc8w-gemm-1x4c8-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-1x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-gemm-1x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-1x64c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-gemm-1x8-minmax-asm-aarch32-neonmlal-ld64.S │ │ ├── qd8-f32-qc8w-gemm-1x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-1x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-1x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-1x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-1x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-1x8-minmax-scalar.c │ │ ├── qd8-f32-qc8w-gemm-1x8c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-1x8c16-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-1x8c2s4-minmax-neon-mlal.c │ │ ├── qd8-f32-qc8w-gemm-1x8c4-minmax-avxvnni-u2-acc2.c │ │ ├── qd8-f32-qc8w-gemm-1x8c4-minmax-avxvnni-u4-acc4.c │ │ ├── qd8-f32-qc8w-gemm-1x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-aarch64-neondot-ld128.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-neondot-ld64.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-wasmsdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-1x8c8-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-2x16-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-2x16-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-2x16-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-2x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-2x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-2x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-2x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-2x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-2x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-2x2-minmax-scalar.c │ │ ├── qd8-f32-qc8w-gemm-2x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-2x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-2x32c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-2x4-minmax-scalar.c │ │ ├── qd8-f32-qc8w-gemm-2x4c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-2x4c16-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-2x4c2-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-2x4c2-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-2x4c2s4-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-2x4c2s4-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-2x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc8w-gemm-2x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc8w-gemm-2x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-2x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-2x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc8w-gemm-2x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc8w-gemm-2x4c8-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-2x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-2x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-gemm-2x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-2x8-minmax-asm-aarch32-neonmlal-ld64.S │ │ ├── qd8-f32-qc8w-gemm-2x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-2x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-2x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-2x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-2x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-2x8-minmax-scalar.c │ │ ├── qd8-f32-qc8w-gemm-2x8c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-2x8c16-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-2x8c2s4-minmax-neon-mlal.c │ │ ├── qd8-f32-qc8w-gemm-2x8c4-minmax-avxvnni-u2-acc2.c │ │ ├── qd8-f32-qc8w-gemm-2x8c4-minmax-avxvnni-u4-acc4.c │ │ ├── qd8-f32-qc8w-gemm-2x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-2x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc8w-gemm-2x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-2x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-gemm-2x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-2x8c8-minmax-wasmsdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-2x8c8-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-2x8c8-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-2x8c8-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-3x16-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-3x16-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-3x16-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-3x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-3x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-3x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-3x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-3x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-3x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-3x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-3x32-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-3x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-3x4c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-3x4c16-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-3x4c2-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-3x4c2-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-3x4c2s4-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-3x4c2s4-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-3x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc8w-gemm-3x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc8w-gemm-3x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-3x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-3x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc8w-gemm-3x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc8w-gemm-3x4c8-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-3x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-3x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-gemm-3x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-3x8-minmax-asm-aarch32-neonmlal-ld64.S │ │ ├── qd8-f32-qc8w-gemm-3x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-3x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-3x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-3x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-3x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-3x8c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-3x8c16-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-3x8c4-minmax-avxvnni-u2-acc2.c │ │ ├── qd8-f32-qc8w-gemm-3x8c4-minmax-avxvnni-u4-acc4.c │ │ ├── qd8-f32-qc8w-gemm-3x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-3x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc8w-gemm-3x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-3x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-gemm-3x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-3x8c8-minmax-wasmsdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-3x8c8-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-3x8c8-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-3x8c8-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-4x16-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-4x16-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-4x16-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-4x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-4x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-4x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-cortex-a55.S │ │ ├── qd8-f32-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-4x16c4-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-4x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-4x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-4x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-4x16c4-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-4x16c4-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-4x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-4x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-4x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-4x32-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-4x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-4x4-minmax-scalar.c │ │ ├── qd8-f32-qc8w-gemm-4x4c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-4x4c16-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-4x4c2-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-4x4c2-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-4x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc8w-gemm-4x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc8w-gemm-4x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-4x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-4x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc8w-gemm-4x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc8w-gemm-4x4c8-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-gemm-4x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-gemm-4x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-gemm-4x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-4x8-minmax-asm-aarch32-neonmlal-ld64.S │ │ ├── qd8-f32-qc8w-gemm-4x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-4x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-4x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-4x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-4x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-4x8c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-4x8c16-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-4x8c4-minmax-asm-aarch32-neondot-cortex-a55.S │ │ ├── qd8-f32-qc8w-gemm-4x8c4-minmax-avxvnni-u2-acc2.c │ │ ├── qd8-f32-qc8w-gemm-4x8c4-minmax-avxvnni-u4-acc4.c │ │ ├── qd8-f32-qc8w-gemm-4x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-4x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc8w-gemm-4x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-4x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-gemm-4x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-4x8c8-minmax-wasmsdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-4x8c8-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-gemm-4x8c8-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-gemm-4x8c8-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-gemm-5x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-5x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-5x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-5x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-5x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-5x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc8w-gemm-5x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc8w-gemm-5x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-5x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-5x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-5x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-5x32-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-5x32c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-5x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-gemm-5x64-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-5x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-5x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-5x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-5x8c4-minmax-avxvnni-u2-acc2.c │ │ ├── qd8-f32-qc8w-gemm-5x8c4-minmax-avxvnni-u4-acc4.c │ │ ├── qd8-f32-qc8w-gemm-5x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-5x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc8w-gemm-5x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-5x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-gemm-5x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-5x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-gemm-5x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-6x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-6x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-6x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-6x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-6x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-6x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-6x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-6x32-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-6x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-gemm-6x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-6x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-6x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-6x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-gemm-6x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-gemm-6x8c4-minmax-avxvnni-u2-acc2.c │ │ ├── qd8-f32-qc8w-gemm-6x8c4-minmax-avxvnni-u4-acc4.c │ │ ├── qd8-f32-qc8w-gemm-6x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-gemm-6x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-6x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-gemm-6x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-7x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-7x16c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-gemm-7x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-7x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-7x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-7x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc8w-gemm-7x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc8w-gemm-7x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-7x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-7x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-7x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-7x32-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-7x32c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-gemm-7x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-gemm-7x64c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-gemm-7x8-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-gemm-7x8-minmax-asm-aarch64-neondot-ld32.S │ │ ├── qd8-f32-qc8w-gemm-7x8-minmax-asm-aarch64-neondot-ld64.S │ │ ├── qd8-f32-qc8w-gemm-7x8c4-minmax-avxvnni-u2-acc2.c │ │ ├── qd8-f32-qc8w-gemm-7x8c4-minmax-avxvnni-u4-acc4.c │ │ ├── qd8-f32-qc8w-gemm-7x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc8w-gemm-7x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-7x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-gemm-7x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-7x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-gemm-7x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-8x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-8x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-8x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-8x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-8x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc8w-gemm-8x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc8w-gemm-8x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-8x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-8x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-8x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-8x32-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-8x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-gemm-8x8c4-minmax-avxvnni-u2-acc2.c │ │ ├── qd8-f32-qc8w-gemm-8x8c4-minmax-avxvnni-u4-acc4.c │ │ ├── qd8-f32-qc8w-gemm-8x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc8w-gemm-8x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-8x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-gemm-8x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-8x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-gemm-8x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-gemm-9x16-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-9x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-9x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-9x16c8-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-9x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-gemm-9x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-gemm-9x32-minmax-asm-amd64-avx512vnni.S │ │ ├── qd8-f32-qc8w-gemm-9x8c8-minmax-avx256vnni-prfm.c │ │ └── qd8-f32-qc8w-gemm-9x8c8-minmax-avx256vnni.c ├── qd8-f32-qc8w-igemm │ └── gen │ │ ├── qd8-f32-qc8w-igemm-10x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-10x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-10x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-10x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-10x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-10x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-igemm-12x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-12x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-12x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-12x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-12x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-12x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-igemm-14x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-14x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-14x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-14x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-14x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-14x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-igemm-16x16c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f32-qc8w-igemm-16x16c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-igemm-16x32c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f32-qc8w-igemm-16x32c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-igemm-16x64c4-minmax-avx512amx-prfm.c │ │ ├── qd8-f32-qc8w-igemm-16x64c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-igemm-1x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-1x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-1x16c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-igemm-1x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-1x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-1x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-1x16c4-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-1x16c4-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-igemm-1x16c8-minmax-aarch64-neondot-ld128.c │ │ ├── qd8-f32-qc8w-igemm-1x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc8w-igemm-1x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc8w-igemm-1x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-1x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-1x16c8-minmax-neondot-ld64.c │ │ ├── qd8-f32-qc8w-igemm-1x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-1x2-minmax-scalar.c │ │ ├── qd8-f32-qc8w-igemm-1x32c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-igemm-1x32c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-1x4-minmax-scalar.c │ │ ├── qd8-f32-qc8w-igemm-1x4c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-igemm-1x4c2-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-1x4c2-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-1x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc8w-igemm-1x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc8w-igemm-1x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-1x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-1x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc8w-igemm-1x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc8w-igemm-1x4c8-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-1x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-igemm-1x64c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-igemm-1x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-1x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-1x8-minmax-scalar.c │ │ ├── qd8-f32-qc8w-igemm-1x8c2s4-minmax-neon-mlal.c │ │ ├── qd8-f32-qc8w-igemm-1x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-aarch64-neondot-ld128.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-neondot-ld64.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-wasmsdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-1x8c8-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-igemm-2x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-2x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-2x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-2x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-2x2-minmax-scalar.c │ │ ├── qd8-f32-qc8w-igemm-2x32c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-2x4-minmax-scalar.c │ │ ├── qd8-f32-qc8w-igemm-2x4c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-igemm-2x4c2-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-2x4c2-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-2x4c2s4-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-2x4c2s4-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-2x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc8w-igemm-2x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc8w-igemm-2x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-2x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-2x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc8w-igemm-2x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc8w-igemm-2x4c8-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-2x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-2x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-igemm-2x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-2x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-2x8-minmax-scalar.c │ │ ├── qd8-f32-qc8w-igemm-2x8c2s4-minmax-neon-mlal.c │ │ ├── qd8-f32-qc8w-igemm-2x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-2x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc8w-igemm-2x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-2x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-igemm-2x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-2x8c8-minmax-wasmsdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-2x8c8-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-igemm-2x8c8-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-2x8c8-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-igemm-3x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-3x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-3x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-3x4c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-igemm-3x4c2-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-3x4c2-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-3x4c2s4-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-3x4c2s4-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-3x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc8w-igemm-3x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc8w-igemm-3x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-3x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-3x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc8w-igemm-3x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc8w-igemm-3x4c8-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-3x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-3x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-igemm-3x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-3x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-3x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc8w-igemm-3x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-3x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-igemm-3x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-3x8c8-minmax-wasmsdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-3x8c8-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-igemm-3x8c8-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-3x8c8-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-igemm-4x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-4x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-cortex-a55.S │ │ ├── qd8-f32-qc8w-igemm-4x16c4-minmax-asm-aarch64-neondot-ld128.S │ │ ├── qd8-f32-qc8w-igemm-4x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-4x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-4x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-4x16c4-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-4x16c4-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-igemm-4x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-4x32c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-4x4-minmax-scalar.c │ │ ├── qd8-f32-qc8w-igemm-4x4c16-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-igemm-4x4c2-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-4x4c2-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-4x4c8-minmax-avx-ld128.c │ │ ├── qd8-f32-qc8w-igemm-4x4c8-minmax-avx-ld64.c │ │ ├── qd8-f32-qc8w-igemm-4x4c8-minmax-sse2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-4x4c8-minmax-sse2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-4x4c8-minmax-sse41-ld128.c │ │ ├── qd8-f32-qc8w-igemm-4x4c8-minmax-sse41-ld64.c │ │ ├── qd8-f32-qc8w-igemm-4x4c8-minmax-wasmsimd-dot16x2-ld128.c │ │ ├── qd8-f32-qc8w-igemm-4x4c8-minmax-wasmsimd-dot16x2-ld64.c │ │ ├── qd8-f32-qc8w-igemm-4x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-igemm-4x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-4x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-4x8c4-minmax-asm-aarch32-neondot-cortex-a55.S │ │ ├── qd8-f32-qc8w-igemm-4x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-4x8c8-minmax-avx2.c │ │ ├── qd8-f32-qc8w-igemm-4x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-4x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-igemm-4x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-4x8c8-minmax-wasmsdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-4x8c8-minmax-wasmsdot.c │ │ ├── qd8-f32-qc8w-igemm-4x8c8-minmax-wasmusdot-u2.c │ │ ├── qd8-f32-qc8w-igemm-4x8c8-minmax-wasmusdot.c │ │ ├── qd8-f32-qc8w-igemm-5x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-5x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-5x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc8w-igemm-5x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc8w-igemm-5x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-5x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-5x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-igemm-5x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc8w-igemm-5x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-5x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-igemm-5x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-5x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-igemm-6x16-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-6x16-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-6x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-6x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-6x32c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-6x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-igemm-6x8-minmax-neon-mlal-lane-prfm.c │ │ ├── qd8-f32-qc8w-igemm-6x8-minmax-neon-mlal-lane.c │ │ ├── qd8-f32-qc8w-igemm-6x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-6x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-6x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-igemm-6x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-7x16c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-igemm-7x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-7x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-7x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc8w-igemm-7x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc8w-igemm-7x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-7x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-7x32c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-igemm-7x4v-minmax-rvv.c │ │ ├── qd8-f32-qc8w-igemm-7x64c4-minmax-avx512amx.c │ │ ├── qd8-f32-qc8w-igemm-7x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc8w-igemm-7x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-7x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-igemm-7x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-7x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-igemm-8x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-8x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-8x16c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-8x16c8-minmax-avx512skx-prfm.c │ │ ├── qd8-f32-qc8w-igemm-8x16c8-minmax-avx512skx.c │ │ ├── qd8-f32-qc8w-igemm-8x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-8x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-8x16c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-8x32c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-8x8c4-minmax-neondot.c │ │ ├── qd8-f32-qc8w-igemm-8x8c8-minmax-avx256skx.c │ │ ├── qd8-f32-qc8w-igemm-8x8c8-minmax-avx256vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-8x8c8-minmax-avx256vnni.c │ │ ├── qd8-f32-qc8w-igemm-8x8c8-minmax-avxvnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-8x8c8-minmax-avxvnni.c │ │ ├── qd8-f32-qc8w-igemm-8x8c8-minmax-neoni8mm.c │ │ ├── qd8-f32-qc8w-igemm-9x16c4-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-9x16c4-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-9x16c8-minmax-avx512vnni-prfm.c │ │ ├── qd8-f32-qc8w-igemm-9x16c8-minmax-avx512vnni.c │ │ ├── qd8-f32-qc8w-igemm-9x8c8-minmax-avx256vnni-prfm.c │ │ └── qd8-f32-qc8w-igemm-9x8c8-minmax-avx256vnni.c ├── qp8-f32-qb4w-gemm │ ├── qp8-f32-qb4w-gemm-minmax-16x4c16s2-mstep4-neoni8mm.c │ ├── qp8-f32-qb4w-gemm-minmax-1x4c16s2-aarch64-neondot.c │ ├── qp8-f32-qb4w-gemm-minmax-1x4c8s2-aarch64-neondot.c │ ├── qp8-f32-qb4w-gemm-minmax-1x8c16s2-aarch64-neondot.c │ ├── qp8-f32-qb4w-gemm-minmax-4x4c8s2-aarch64-neondot.c │ ├── qp8-f32-qb4w-gemm-minmax-4x8c16s2-neoni8mm.c │ └── qp8-f32-qb4w-gemm-minmax-8x4c16s2-mstep2-neoni8mm.c ├── qp8-f32-qc4w-gemm │ ├── qp8-f32-qc4w-gemm-minmax-16x4c8s2-mstep4-aarch64-neondot.c │ ├── qp8-f32-qc4w-gemm-minmax-16x64c4-neonsme2.c │ ├── qp8-f32-qc4w-gemm-minmax-1x4c16s2-aarch64-neondot.c │ ├── qp8-f32-qc4w-gemm-minmax-1x4c8s2-aarch64-neondot.c │ ├── qp8-f32-qc4w-gemm-minmax-1x64c4-neonsme2.c │ ├── qp8-f32-qc4w-gemm-minmax-1x8c16s2-aarch64-neondot.c │ ├── qp8-f32-qc4w-gemm-minmax-4x4c16s2-neoni8mm.c │ ├── qp8-f32-qc4w-gemm-minmax-4x8c16s2-neoni8mm.c │ ├── qp8-f32-qc4w-gemm-minmax-8x4c16s2-mstep2-neoni8mm.c │ └── qp8-f32-qc4w-gemm-minmax-8x8c16s2-mstep2-neoni8mm.c ├── qp8-f32-qc8w-gemm │ ├── qp8-f32-qc8w-gemm-minmax-16x4c4-mstep4-aarch64-neondot.c │ ├── qp8-f32-qc8w-gemm-minmax-16x4c8-mstep4-neoni8mm.c │ ├── qp8-f32-qc8w-gemm-minmax-16x64c4-neonsme.c │ ├── qp8-f32-qc8w-gemm-minmax-16x64c4-neonsme2.c │ ├── qp8-f32-qc8w-gemm-minmax-1x4c4-aarch64-neondot.c │ ├── qp8-f32-qc8w-gemm-minmax-1x4c8-aarch64-neondot.c │ ├── qp8-f32-qc8w-gemm-minmax-1x64c4-neonsme.c │ └── qp8-f32-qc8w-gemm-minmax-1x64c4-neonsme2.c ├── qs8-dwconv │ ├── gen │ │ ├── qs8-dwconv-25p16c-minmax-fp32-avx-mul16-add16.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-avx-mul16.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-avx-mul32.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-avx2-mul16-add16-vpunpck.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-avx2-mul16-vpmovsx.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-avx2-mul16-vpunpck.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-avx512skx-mul32.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-neon-mul16.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-sse2-mul16-add16.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-sse2-mul16.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-sse41-mul16-add16.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-sse41-mul16.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-sse41-mul32.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-wasmsimd-mul16-add16.c │ │ ├── qs8-dwconv-25p16c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qs8-dwconv-25p16c-minmax-rndnu-neon-mla8-ld128.c │ │ ├── qs8-dwconv-25p16c-minmax-rndnu-neon-mla8-ld64.c │ │ ├── qs8-dwconv-25p16c-minmax-rndnu-neon-mul16.c │ │ ├── qs8-dwconv-25p16c-minmax-rndnu-neon-mul8-ld128.c │ │ ├── qs8-dwconv-25p16c-minmax-rndnu-neon-mul8-ld64.c │ │ ├── qs8-dwconv-25p1c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-dwconv-25p1c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-dwconv-25p1c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-dwconv-25p2c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-dwconv-25p2c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-dwconv-25p2c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-dwconv-25p32c-minmax-fp32-avx2-mul16-add16-vpunpck.c │ │ ├── qs8-dwconv-25p32c-minmax-fp32-avx2-mul16-vpmovsx.c │ │ ├── qs8-dwconv-25p32c-minmax-fp32-avx2-mul16-vpunpck.c │ │ ├── qs8-dwconv-25p32c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-dwconv-25p32c-minmax-fp32-avx512skx-mul32.c │ │ ├── qs8-dwconv-25p32c-minmax-fp32-neon-mul16.c │ │ ├── qs8-dwconv-25p32c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-dwconv-25p32c-minmax-rndnu-neon-mul16.c │ │ ├── qs8-dwconv-25p4c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-dwconv-25p4c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-dwconv-25p4c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-avx-mul16-add16.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-avx-mul16.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-avx-mul32.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-neon-mul16.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-sse2-mul16-add16.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-sse2-mul16.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-sse41-mul16-add16.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-sse41-mul16.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-sse41-mul32.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-wasmsimd-mul16-add16.c │ │ ├── qs8-dwconv-25p8c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qs8-dwconv-25p8c-minmax-rndnu-neon-mla8-ld64.c │ │ ├── qs8-dwconv-25p8c-minmax-rndnu-neon-mul16.c │ │ ├── qs8-dwconv-25p8c-minmax-rndnu-neon-mul8-ld64.c │ │ ├── qs8-dwconv-25p8vc-minmax-fp32-rvv.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-avx-mul16-add16.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-avx-mul16.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-avx-mul32.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-avx2-mul16-add16-vpunpck.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-avx2-mul16-vpmovsx.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-avx2-mul16-vpunpck.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-avx512skx-mul32.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-neon-mul16.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-sse2-mul16-add16.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-sse2-mul16.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-sse41-mul16-add16.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-sse41-mul16.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-sse41-mul32.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-wasmsimd-mul16-add16.c │ │ ├── qs8-dwconv-9p16c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qs8-dwconv-9p16c-minmax-rndnu-neon-mla8-ld128.c │ │ ├── qs8-dwconv-9p16c-minmax-rndnu-neon-mla8-ld64.c │ │ ├── qs8-dwconv-9p16c-minmax-rndnu-neon-mul16.c │ │ ├── qs8-dwconv-9p16c-minmax-rndnu-neon-mul8-ld128.c │ │ ├── qs8-dwconv-9p16c-minmax-rndnu-neon-mul8-ld64.c │ │ ├── qs8-dwconv-9p1c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-dwconv-9p1c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-dwconv-9p1c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-dwconv-9p1c-minmax-rndnu-scalar.c │ │ ├── qs8-dwconv-9p2c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-dwconv-9p2c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-dwconv-9p2c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-dwconv-9p2c-minmax-rndnu-scalar.c │ │ ├── qs8-dwconv-9p32c-minmax-fp32-avx2-mul16-add16-vpunpck.c │ │ ├── qs8-dwconv-9p32c-minmax-fp32-avx2-mul16-vpmovsx.c │ │ ├── qs8-dwconv-9p32c-minmax-fp32-avx2-mul16-vpunpck.c │ │ ├── qs8-dwconv-9p32c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-dwconv-9p32c-minmax-fp32-avx512skx-mul32.c │ │ ├── qs8-dwconv-9p32c-minmax-fp32-neon-mul16.c │ │ ├── qs8-dwconv-9p32c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-dwconv-9p32c-minmax-rndnu-neon-mul16.c │ │ ├── qs8-dwconv-9p4c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-dwconv-9p4c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-dwconv-9p4c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-dwconv-9p4c-minmax-rndnu-scalar.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-avx-mul16-add16.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-avx-mul16.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-avx-mul32.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-neon-mul16.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-sse2-mul16-add16.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-sse2-mul16.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-sse41-mul16-add16.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-sse41-mul16.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-sse41-mul32.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-wasmsimd-mul16-add16.c │ │ ├── qs8-dwconv-9p8c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qs8-dwconv-9p8c-minmax-rndnu-neon-mla8-ld64.c │ │ ├── qs8-dwconv-9p8c-minmax-rndnu-neon-mul16.c │ │ ├── qs8-dwconv-9p8c-minmax-rndnu-neon-mul8-ld64.c │ │ └── qs8-dwconv-9p8vc-minmax-fp32-rvv.c │ ├── qs8-dwconv-minmax-fp32.inc │ ├── qs8-dwconv-minmax-rndnu.inc │ ├── unipass-avx2-mul16-vpmovsx.c.in │ ├── unipass-avx2-mul16-vpunpck.c.in │ ├── unipass-avx2-mul32.c.in │ ├── unipass-avx512skx-mul32.c.in │ ├── unipass-neon-mul16.c.in │ ├── unipass-neon-mul8.c.in │ ├── unipass-rvv.c.in │ ├── unipass-scalar.c.in │ ├── unipass-sse-mul16.c.in │ ├── unipass-sse-mul32.c.in │ └── unipass-wasmsimd-mul16.c.in ├── qs8-f16-vcvt │ ├── avx2.c.in │ ├── gen │ │ ├── qs8-f16-vcvt-avx2-u16.c │ │ ├── qs8-f16-vcvt-avx2-u24.c │ │ ├── qs8-f16-vcvt-avx2-u32.c │ │ ├── qs8-f16-vcvt-avx2-u64.c │ │ ├── qs8-f16-vcvt-neonfp16arith-u16.c │ │ ├── qs8-f16-vcvt-neonfp16arith-u24.c │ │ ├── qs8-f16-vcvt-neonfp16arith-u32.c │ │ └── qs8-f16-vcvt-neonfp16arith-u8.c │ ├── neon.c.in │ └── qs8-f16-vcvt.inc ├── qs8-f32-vcvt │ ├── avx.c.in │ ├── avx2.c.in │ ├── avx512skx.c.in │ ├── gen │ │ ├── qs8-f32-vcvt-avx-u16.c │ │ ├── qs8-f32-vcvt-avx-u24.c │ │ ├── qs8-f32-vcvt-avx-u32.c │ │ ├── qs8-f32-vcvt-avx-u8.c │ │ ├── qs8-f32-vcvt-avx2-u16.c │ │ ├── qs8-f32-vcvt-avx2-u24.c │ │ ├── qs8-f32-vcvt-avx2-u32.c │ │ ├── qs8-f32-vcvt-avx2-u8.c │ │ ├── qs8-f32-vcvt-avx512skx-u16.c │ │ ├── qs8-f32-vcvt-avx512skx-u32.c │ │ ├── qs8-f32-vcvt-avx512skx-u48.c │ │ ├── qs8-f32-vcvt-avx512skx-u64.c │ │ ├── qs8-f32-vcvt-neon-u16.c │ │ ├── qs8-f32-vcvt-neon-u24.c │ │ ├── qs8-f32-vcvt-neon-u32.c │ │ ├── qs8-f32-vcvt-neon-u8.c │ │ ├── qs8-f32-vcvt-rvv-u1v.c │ │ ├── qs8-f32-vcvt-rvv-u2v.c │ │ ├── qs8-f32-vcvt-scalar-u1.c │ │ ├── qs8-f32-vcvt-scalar-u2.c │ │ ├── qs8-f32-vcvt-scalar-u3.c │ │ ├── qs8-f32-vcvt-scalar-u4.c │ │ ├── qs8-f32-vcvt-sse2-u16.c │ │ ├── qs8-f32-vcvt-sse2-u24.c │ │ ├── qs8-f32-vcvt-sse2-u32.c │ │ ├── qs8-f32-vcvt-sse2-u8.c │ │ ├── qs8-f32-vcvt-sse41-u16.c │ │ ├── qs8-f32-vcvt-sse41-u24.c │ │ ├── qs8-f32-vcvt-sse41-u32.c │ │ ├── qs8-f32-vcvt-sse41-u8.c │ │ ├── qs8-f32-vcvt-wasmsimd-u16.c │ │ ├── qs8-f32-vcvt-wasmsimd-u24.c │ │ ├── qs8-f32-vcvt-wasmsimd-u32.c │ │ └── qs8-f32-vcvt-wasmsimd-u8.c │ ├── neon.c.in │ ├── qs8-f32-vcvt.inc │ ├── rvv.c.in │ ├── scalar.c.in │ ├── sse2.c.in │ ├── sse4.c.in │ └── wasmsimd.c.in ├── qs8-gemm │ ├── 1x16c4-aarch64-neondot-ld32.S.in │ ├── 1x16c4-aarch64-neondot-ld64.S.in │ ├── 1x8-aarch32-neon-mlal-lane-cortex-a7.S.in │ ├── 1x8c8-aarch64-neon-mlal-cortex-a53.S.in │ ├── 1x8c8-aarch64-neon-mlal.S.in │ ├── 2x8c16-aarch64-neon-mlal.S.in │ ├── 2x8c8-aarch64-neon-mlal-cortex-a53.S.in │ ├── 2x8c8-aarch64-neon-mlal.S.in │ ├── 2x8c8-aarch64-neon-mull.S.in │ ├── 4x16-aarch64-neon-mlal-lane-cortex-a53.S.in │ ├── 4x16-aarch64-neon-mlal-lane-cortex-a75.S.in │ ├── 4x16-aarch64-neon-mlal-lane-ld64.S.in │ ├── 4x16c4-aarch64-neondot-cortex-a55.S.in │ ├── 4x16c4-aarch64-neondot-ld128.S.in │ ├── 4x16c4-aarch64-neondot-ld32.S.in │ ├── 4x16c4-aarch64-neondot-ld64.S.in │ ├── 4x8-aarch32-neon-mlal-lane-cortex-a53.S.in │ ├── 4x8-aarch32-neon-mlal-lane-cortex-a7.S.in │ ├── 4x8-aarch32-neon-mlal-lane-ld64.S.in │ ├── 4x8-aarch64-neon-mlal-lane-ld64.S.in │ ├── 4x8c4-aarch32-neondot-cortex-a55.S.in │ ├── 4x8c4-aarch32-neondot-ld64.S.in │ ├── MRx16c2-wasmsimd-dot16x2.c.in │ ├── MRx16c2s2-wasmsimd-dot16x2.c.in │ ├── MRx16c4-avx512vnni.c.in │ ├── MRx16c4-wasmdot.c.in │ ├── MRx16c8-avx512skx.c.in │ ├── MRx16c8-avx512vnni.c.in │ ├── MRx4c16-wasmdot.c.in │ ├── MRx4c2-sse.c.in │ ├── MRx4c2-wasmsimd-dot16x2.c.in │ ├── MRx4c2s4-sse.c.in │ ├── MRx4c2s4-wasmsimd-dot16x2.c.in │ ├── MRx4c8-sse.c.in │ ├── MRx4c8-ssevnni.c.in │ ├── MRx4c8-wasmsimd-dot16x2.c.in │ ├── MRx8c4-avxvnni.c.in │ ├── MRx8c8-avx2.c.in │ ├── MRx8c8-avx512vnni.c.in │ ├── MRx8c8-avxvnni.c.in │ ├── c16-neon-mlal.c.in │ ├── c2-neon-mull-dup.c.in │ ├── c2-neon-mull-shuffle.c.in │ ├── c4-armsimd32.c.in │ ├── c4-avx512amx.c.in │ ├── c4-hvx.c.in │ ├── c4-neon-mull-dup.c.in │ ├── c4-neon-mull-shuffle.c.in │ ├── c4-neondot.c.in │ ├── c8-neon-mull.c.in │ ├── c8-neondot.c.in │ ├── c8-neoni8mm.c.in │ ├── c8-wasmdot.c.in │ ├── neon-mlal-lane.c.in │ ├── neon-mull-addw-dup.c.in │ ├── rvv.c.in │ └── scalar.c.in ├── qs8-igemm │ ├── 1x8-aarch32-neon-mlal-lane-cortex-a7.S.in │ ├── 1x8c8-aarch64-neon-mlal-cortex-a53.S.in │ ├── 1x8c8-aarch64-neon-mlal.S.in │ ├── 2x8c16-aarch64-neon-mlal.S.in │ ├── 2x8c8-aarch64-neon-mlal-cortex-a53.S.in │ ├── 2x8c8-aarch64-neon-mlal.S.in │ ├── 4x16-aarch64-neon-mlal-lane-cortex-a53.S.in │ ├── 4x16-aarch64-neon-mlal-lane-cortex-a75.S.in │ ├── 4x16-aarch64-neon-mlal-lane-ld64.S.in │ ├── 4x16c4-aarch64-neondot-cortex-a55.S.in │ ├── 4x16c4-aarch64-neondot-ld128.S.in │ ├── 4x16c4-aarch64-neondot-ld64.S.in │ ├── 4x8-aarch32-neon-mlal-lane-cortex-a53.S.in │ ├── 4x8-aarch32-neon-mlal-lane-cortex-a7.S.in │ ├── 4x8-aarch32-neon-mlal-lane-ld64.S.in │ ├── 4x8-aarch64-neon-mlal-lane-ld64.S.in │ ├── 4x8c4-aarch32-neondot-cortex-a55.S.in │ ├── 4x8c4-aarch32-neondot-ld64.S.in │ ├── MRx16c2-wasmsimd-dot16x2.c.in │ ├── MRx16c2s2-wasmsimd-dot16x2.c.in │ ├── MRx16c4-avx512vnni.c.in │ ├── MRx16c4-wasmdot.c.in │ ├── MRx16c8-avx512skx.c.in │ ├── MRx16c8-avx512vnni.c.in │ ├── MRx4c16-wasmdot.c.in │ ├── MRx4c2-sse.c.in │ ├── MRx4c2-wasmsimd-dot16x2.c.in │ ├── MRx4c2s4-sse.c.in │ ├── MRx4c2s4-wasmsimd-dot16x2.c.in │ ├── MRx4c8-sse.c.in │ ├── MRx4c8-wasmsimd-dot16x2.c.in │ ├── MRx8c8-avx2.c.in │ ├── MRx8c8-avxvnni.c.in │ ├── c16-neon-mlal.c.in │ ├── c2-neon-mull-dup.c.in │ ├── c2-neon-mull-shuffle.c.in │ ├── c4-armsimd32.c.in │ ├── c4-avx512amx.c.in │ ├── c4-hvx.c.in │ ├── c4-neon-mull-dup.c.in │ ├── c4-neon-mull-shuffle.c.in │ ├── c4-neondot.c.in │ ├── c8-neon-mull.c.in │ ├── c8-neondot.c.in │ ├── c8-neoni8mm.c.in │ ├── c8-wasmdot.c.in │ ├── neon-mlal-lane.c.in │ ├── neon-mull-addw-dup.c.in │ ├── rvv.c.in │ └── scalar.c.in ├── qs8-packw │ ├── gen │ │ ├── qs8-packw-x128c4-gemm-gio-hvx.c │ │ ├── qs8-packw-x128c4-gemm-goi-hvx.c │ │ ├── qs8-packw-x16c4-gemm-gio-scalar.c │ │ ├── qs8-packw-x16c4-gemm-goi-scalar.c │ │ ├── qs8-packw-x16c8-gemm-gio-scalar.c │ │ ├── qs8-packw-x16c8-gemm-goi-avx256vnni-prfm.c │ │ ├── qs8-packw-x16c8-gemm-goi-avx256vnni.c │ │ ├── qs8-packw-x16c8-gemm-goi-avxvnni-prfm.c │ │ ├── qs8-packw-x16c8-gemm-goi-avxvnni.c │ │ ├── qs8-packw-x16c8-gemm-goi-scalar.c │ │ ├── qs8-packw-x32c4-gemm-gio-scalar.c │ │ ├── qs8-packw-x32c4-gemm-goi-scalar.c │ │ ├── qs8-packw-x4c8-gemm-gio-scalar.c │ │ ├── qs8-packw-x4c8-gemm-goi-scalar.c │ │ ├── qs8-packw-x64c4-gemm-gio-scalar.c │ │ ├── qs8-packw-x64c4-gemm-goi-avx256vnni-prfm.c │ │ ├── qs8-packw-x64c4-gemm-goi-avx256vnni.c │ │ ├── qs8-packw-x64c4-gemm-goi-scalar.c │ │ ├── qs8-packw-x8c4-gemm-gio-scalar.c │ │ ├── qs8-packw-x8c4-gemm-goi-scalar.c │ │ ├── qs8-packw-x8c8-gemm-gio-avxvnni-prfm.c │ │ ├── qs8-packw-x8c8-gemm-gio-avxvnni.c │ │ ├── qs8-packw-x8c8-gemm-gio-scalar.c │ │ ├── qs8-packw-x8c8-gemm-goi-avx2-madd.c │ │ ├── qs8-packw-x8c8-gemm-goi-avx256vnni-prfm.c │ │ ├── qs8-packw-x8c8-gemm-goi-avx256vnni.c │ │ ├── qs8-packw-x8c8-gemm-goi-avxvnni-prfm.c │ │ ├── qs8-packw-x8c8-gemm-goi-avxvnni.c │ │ ├── qs8-packw-x8c8-gemm-goi-scalar.c │ │ ├── qs8-packw-x8c8-gemm-goi-wasmrelaxedsimd.c │ │ ├── qs8-packw-x96c4-gemm-gio-hvx.c │ │ └── qs8-packw-x96c4-gemm-goi-hvx.c │ └── qs8-packw.inc ├── qs8-qc4w-gemm │ └── gen │ │ ├── qs8-qc4w-gemm-10x16c8-minmax-fp32-avx512skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-10x16c8-minmax-fp32-avx512skx-madd.c │ │ ├── qs8-qc4w-gemm-10x16c8-minmax-fp32-avx512vnnigfni-prfm.c │ │ ├── qs8-qc4w-gemm-10x16c8-minmax-fp32-avx512vnnigfni.c │ │ ├── qs8-qc4w-gemm-12x16c8-minmax-fp32-avx512skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-12x16c8-minmax-fp32-avx512skx-madd.c │ │ ├── qs8-qc4w-gemm-12x16c8-minmax-fp32-avx512vnnigfni-prfm.c │ │ ├── qs8-qc4w-gemm-12x16c8-minmax-fp32-avx512vnnigfni.c │ │ ├── qs8-qc4w-gemm-14x16c8-minmax-fp32-avx512skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-14x16c8-minmax-fp32-avx512skx-madd.c │ │ ├── qs8-qc4w-gemm-14x16c8-minmax-fp32-avx512vnnigfni-prfm.c │ │ ├── qs8-qc4w-gemm-14x16c8-minmax-fp32-avx512vnnigfni.c │ │ ├── qs8-qc4w-gemm-1x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc4w-gemm-1x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc4w-gemm-1x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc4w-gemm-1x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc4w-gemm-1x16c8-minmax-fp32-avx512skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-1x16c8-minmax-fp32-avx512skx-madd.c │ │ ├── qs8-qc4w-gemm-1x16c8-minmax-fp32-avx512vnnigfni-prfm.c │ │ ├── qs8-qc4w-gemm-1x16c8-minmax-fp32-avx512vnnigfni.c │ │ ├── qs8-qc4w-gemm-1x2-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc4w-gemm-1x2-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc4w-gemm-1x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc4w-gemm-1x4c8-minmax-avx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-1x4c8-minmax-avx-madd.c │ │ ├── qs8-qc4w-gemm-1x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qs8-qc4w-gemm-1x4c8-minmax-ssse3-madd.c │ │ ├── qs8-qc4w-gemm-1x8c8-minmax-avx2-madd-prfm.c │ │ ├── qs8-qc4w-gemm-1x8c8-minmax-avx2-madd.c │ │ ├── qs8-qc4w-gemm-1x8c8-minmax-avxvnni-prfm.c │ │ ├── qs8-qc4w-gemm-1x8c8-minmax-avxvnni.c │ │ ├── qs8-qc4w-gemm-1x8c8-minmax-fp32-avx256skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-1x8c8-minmax-fp32-avx256skx-madd.c │ │ ├── qs8-qc4w-gemm-1x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc4w-gemm-1x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc4w-gemm-2x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc4w-gemm-2x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc4w-gemm-2x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc4w-gemm-2x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc4w-gemm-2x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc4w-gemm-2x4c8-minmax-avx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-2x4c8-minmax-avx-madd.c │ │ ├── qs8-qc4w-gemm-2x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qs8-qc4w-gemm-2x4c8-minmax-ssse3-madd.c │ │ ├── qs8-qc4w-gemm-2x8c8-minmax-avx2-madd-prfm.c │ │ ├── qs8-qc4w-gemm-2x8c8-minmax-avx2-madd.c │ │ ├── qs8-qc4w-gemm-2x8c8-minmax-avxvnni-prfm.c │ │ ├── qs8-qc4w-gemm-2x8c8-minmax-avxvnni.c │ │ ├── qs8-qc4w-gemm-2x8c8-minmax-fp32-avx256skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-2x8c8-minmax-fp32-avx256skx-madd.c │ │ ├── qs8-qc4w-gemm-2x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc4w-gemm-2x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc4w-gemm-3x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc4w-gemm-3x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc4w-gemm-3x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc4w-gemm-3x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc4w-gemm-3x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc4w-gemm-3x4c8-minmax-avx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-3x4c8-minmax-avx-madd.c │ │ ├── qs8-qc4w-gemm-3x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qs8-qc4w-gemm-3x4c8-minmax-ssse3-madd.c │ │ ├── qs8-qc4w-gemm-3x8c8-minmax-avx2-madd-prfm.c │ │ ├── qs8-qc4w-gemm-3x8c8-minmax-avx2-madd.c │ │ ├── qs8-qc4w-gemm-3x8c8-minmax-avxvnni-prfm.c │ │ ├── qs8-qc4w-gemm-3x8c8-minmax-avxvnni.c │ │ ├── qs8-qc4w-gemm-3x8c8-minmax-fp32-avx256skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-3x8c8-minmax-fp32-avx256skx-madd.c │ │ ├── qs8-qc4w-gemm-3x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc4w-gemm-3x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc4w-gemm-4x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc4w-gemm-4x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc4w-gemm-4x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc4w-gemm-4x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc4w-gemm-4x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc4w-gemm-4x4c8-minmax-avx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-4x4c8-minmax-avx-madd.c │ │ ├── qs8-qc4w-gemm-4x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qs8-qc4w-gemm-4x4c8-minmax-ssse3-madd.c │ │ ├── qs8-qc4w-gemm-4x8c8-minmax-avx2-madd-prfm.c │ │ ├── qs8-qc4w-gemm-4x8c8-minmax-avx2-madd.c │ │ ├── qs8-qc4w-gemm-4x8c8-minmax-avxvnni-prfm.c │ │ ├── qs8-qc4w-gemm-4x8c8-minmax-avxvnni.c │ │ ├── qs8-qc4w-gemm-4x8c8-minmax-fp32-avx256skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-4x8c8-minmax-fp32-avx256skx-madd.c │ │ ├── qs8-qc4w-gemm-4x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc4w-gemm-4x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc4w-gemm-5x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc4w-gemm-5x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc4w-gemm-5x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc4w-gemm-5x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc4w-gemm-5x16c8-minmax-fp32-avx512skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-5x16c8-minmax-fp32-avx512skx-madd.c │ │ ├── qs8-qc4w-gemm-5x16c8-minmax-fp32-avx512vnnigfni-prfm.c │ │ ├── qs8-qc4w-gemm-5x16c8-minmax-fp32-avx512vnnigfni.c │ │ ├── qs8-qc4w-gemm-5x4c8-minmax-avx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-5x4c8-minmax-avx-madd.c │ │ ├── qs8-qc4w-gemm-5x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qs8-qc4w-gemm-5x4c8-minmax-ssse3-madd.c │ │ ├── qs8-qc4w-gemm-5x8c8-minmax-avx2-madd-prfm.c │ │ ├── qs8-qc4w-gemm-5x8c8-minmax-avx2-madd.c │ │ ├── qs8-qc4w-gemm-5x8c8-minmax-avxvnni-prfm.c │ │ ├── qs8-qc4w-gemm-5x8c8-minmax-avxvnni.c │ │ ├── qs8-qc4w-gemm-5x8c8-minmax-fp32-avx256skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-5x8c8-minmax-fp32-avx256skx-madd.c │ │ ├── qs8-qc4w-gemm-5x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc4w-gemm-5x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc4w-gemm-6x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc4w-gemm-6x4c8-minmax-avx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-6x4c8-minmax-avx-madd.c │ │ ├── qs8-qc4w-gemm-6x4c8-minmax-ssse3-madd-prfm.c │ │ ├── qs8-qc4w-gemm-6x4c8-minmax-ssse3-madd.c │ │ ├── qs8-qc4w-gemm-6x8c8-minmax-avx2-madd-prfm.c │ │ ├── qs8-qc4w-gemm-6x8c8-minmax-avx2-madd.c │ │ ├── qs8-qc4w-gemm-6x8c8-minmax-avxvnni-prfm.c │ │ ├── qs8-qc4w-gemm-6x8c8-minmax-avxvnni.c │ │ ├── qs8-qc4w-gemm-6x8c8-minmax-fp32-avx256skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-6x8c8-minmax-fp32-avx256skx-madd.c │ │ ├── qs8-qc4w-gemm-6x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc4w-gemm-6x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc4w-gemm-7x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc4w-gemm-7x16c8-minmax-fp32-avx512skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-7x16c8-minmax-fp32-avx512skx-madd.c │ │ ├── qs8-qc4w-gemm-7x16c8-minmax-fp32-avx512vnnigfni-prfm.c │ │ ├── qs8-qc4w-gemm-7x16c8-minmax-fp32-avx512vnnigfni.c │ │ ├── qs8-qc4w-gemm-7x8c8-minmax-avx2-madd-prfm.c │ │ ├── qs8-qc4w-gemm-7x8c8-minmax-avx2-madd.c │ │ ├── qs8-qc4w-gemm-7x8c8-minmax-avxvnni-prfm.c │ │ ├── qs8-qc4w-gemm-7x8c8-minmax-avxvnni.c │ │ ├── qs8-qc4w-gemm-7x8c8-minmax-fp32-avx256skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-7x8c8-minmax-fp32-avx256skx-madd.c │ │ ├── qs8-qc4w-gemm-7x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc4w-gemm-7x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc4w-gemm-8x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc4w-gemm-8x16c8-minmax-fp32-avx512skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-8x16c8-minmax-fp32-avx512skx-madd.c │ │ ├── qs8-qc4w-gemm-8x16c8-minmax-fp32-avx512vnnigfni-prfm.c │ │ ├── qs8-qc4w-gemm-8x16c8-minmax-fp32-avx512vnnigfni.c │ │ ├── qs8-qc4w-gemm-8x8c8-minmax-avx2-madd-prfm.c │ │ ├── qs8-qc4w-gemm-8x8c8-minmax-avx2-madd.c │ │ ├── qs8-qc4w-gemm-8x8c8-minmax-avxvnni-prfm.c │ │ ├── qs8-qc4w-gemm-8x8c8-minmax-avxvnni.c │ │ ├── qs8-qc4w-gemm-8x8c8-minmax-fp32-avx256skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-8x8c8-minmax-fp32-avx256skx-madd.c │ │ ├── qs8-qc4w-gemm-8x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc4w-gemm-8x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc4w-gemm-9x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc4w-gemm-9x16c8-minmax-fp32-avx512skx-madd-prfm.c │ │ ├── qs8-qc4w-gemm-9x16c8-minmax-fp32-avx512skx-madd.c │ │ ├── qs8-qc4w-gemm-9x16c8-minmax-fp32-avx512vnnigfni-prfm.c │ │ └── qs8-qc4w-gemm-9x16c8-minmax-fp32-avx512vnnigfni.c ├── qs8-qc4w-packw │ ├── gen │ │ ├── qs8-qc4w-packw-x16c8-gemm-goi-avx256vnni-prfm.c │ │ ├── qs8-qc4w-packw-x16c8-gemm-goi-avx256vnni.c │ │ ├── qs8-qc4w-packw-x16c8-gemm-goi-avxvnni-prfm.c │ │ ├── qs8-qc4w-packw-x16c8-gemm-goi-avxvnni.c │ │ ├── qs8-qc4w-packw-x16c8-gemm-goi-scalar.c │ │ ├── qs8-qc4w-packw-x32c8-gemm-goi-scalar.c │ │ ├── qs8-qc4w-packw-x8c8-gemm-goi-avx256vnni-prfm.c │ │ ├── qs8-qc4w-packw-x8c8-gemm-goi-avx256vnni.c │ │ ├── qs8-qc4w-packw-x8c8-gemm-goi-avxvnni-prfm.c │ │ ├── qs8-qc4w-packw-x8c8-gemm-goi-avxvnni.c │ │ └── qs8-qc4w-packw-x8c8-gemm-goi-scalar.c │ └── qs8-qc4w-packw.inc ├── qs8-qc8w-dwconv │ ├── gen │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-avx-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-avx-mul16.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-avx-mul32.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-avx2-mul16-add16-vpunpck.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-avx2-mul16-vpmovsx.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-avx2-mul16-vpunpck.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-avx512skx-mul32.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neon-mla8-ld128.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neon-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neon-mul16.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neon-mul8-ld128.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neon-mul8-ld64.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neonv8-mla8-ld128.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neonv8-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neonv8-mul8-ld128.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-neonv8-mul8-ld64.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-sse2-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-sse2-mul16.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-sse41-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-sse41-mul16.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-sse41-mul32.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-wasmsimd-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-25p16c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qs8-qc8w-dwconv-25p1c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-dwconv-25p1c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-dwconv-25p1c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-dwconv-25p2c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-dwconv-25p2c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-dwconv-25p2c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-dwconv-25p32c-minmax-fp32-avx2-mul16-add16-vpunpck.c │ │ ├── qs8-qc8w-dwconv-25p32c-minmax-fp32-avx2-mul16-vpmovsx.c │ │ ├── qs8-qc8w-dwconv-25p32c-minmax-fp32-avx2-mul16-vpunpck.c │ │ ├── qs8-qc8w-dwconv-25p32c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-qc8w-dwconv-25p32c-minmax-fp32-avx512skx-mul32.c │ │ ├── qs8-qc8w-dwconv-25p32c-minmax-fp32-neon-mul16.c │ │ ├── qs8-qc8w-dwconv-25p32c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-qc8w-dwconv-25p4c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-dwconv-25p4c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-dwconv-25p4c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-avx-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-avx-mul16.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-avx-mul32.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-neon-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-neon-mul16.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-neon-mul8-ld64.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-neonv8-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-neonv8-mul8-ld64.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-sse2-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-sse2-mul16.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-sse41-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-sse41-mul16.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-sse41-mul32.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-wasmsimd-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-25p8c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qs8-qc8w-dwconv-25p8vc-minmax-fp32-rvv.c │ │ ├── qs8-qc8w-dwconv-3p16c-minmax-fp32-avx-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-3p16c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-qc8w-dwconv-3p16c-minmax-fp32-neon-mla8-ld128.c │ │ ├── qs8-qc8w-dwconv-3p16c-minmax-fp32-neon-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-3p16c-minmax-fp32-neonv8-mla8-ld128.c │ │ ├── qs8-qc8w-dwconv-3p16c-minmax-fp32-neonv8-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-3p16c-minmax-fp32-wasmsimd-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-3p1c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-dwconv-3p2c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-dwconv-3p2c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-dwconv-3p32c-minmax-fp32-avx512skx-mul32.c │ │ ├── qs8-qc8w-dwconv-3p8c-minmax-fp32-neon-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-3p8c-minmax-fp32-neonv8-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-3p8c-minmax-fp32-sse2-mul16.c │ │ ├── qs8-qc8w-dwconv-3p8c-minmax-fp32-sse41-mul16.c │ │ ├── qs8-qc8w-dwconv-3p8vc-minmax-fp32-rvv.c │ │ ├── qs8-qc8w-dwconv-4p2c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-dwconv-4p8c-minmax-fp32-neon-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-avx-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-avx-mul16.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-avx-mul32.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-avx2-mul16-add16-vpunpck.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-avx2-mul16-vpmovsx.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-avx2-mul16-vpunpck.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-avx512skx-mul32.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neon-mla8-ld128.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neon-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neon-mul16.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neon-mul8-ld128.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neon-mul8-ld64.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neonv8-mla8-ld128.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neonv8-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neonv8-mul8-ld128.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-neonv8-mul8-ld64.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-sse2-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-sse2-mul16.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-sse41-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-sse41-mul16.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-sse41-mul32.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-wasmsimd-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-9p16c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qs8-qc8w-dwconv-9p1c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-dwconv-9p1c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-dwconv-9p1c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-dwconv-9p2c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-dwconv-9p2c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-dwconv-9p2c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-dwconv-9p32c-minmax-fp32-avx2-mul16-add16-vpunpck.c │ │ ├── qs8-qc8w-dwconv-9p32c-minmax-fp32-avx2-mul16-vpmovsx.c │ │ ├── qs8-qc8w-dwconv-9p32c-minmax-fp32-avx2-mul16-vpunpck.c │ │ ├── qs8-qc8w-dwconv-9p32c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-qc8w-dwconv-9p32c-minmax-fp32-avx512skx-mul32.c │ │ ├── qs8-qc8w-dwconv-9p32c-minmax-fp32-neon-mul16.c │ │ ├── qs8-qc8w-dwconv-9p32c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-qc8w-dwconv-9p4c-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-dwconv-9p4c-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-dwconv-9p4c-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-avx-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-avx-mul16.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-avx-mul32.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-avx2-mul32.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-neon-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-neon-mul16.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-neon-mul8-ld64.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-neonv8-mla8-ld64.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-neonv8-mul16.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-neonv8-mul8-ld64.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-sse2-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-sse2-mul16.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-sse41-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-sse41-mul16.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-sse41-mul32.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-wasmsimd-mul16-add16.c │ │ ├── qs8-qc8w-dwconv-9p8c-minmax-fp32-wasmsimd-mul16.c │ │ └── qs8-qc8w-dwconv-9p8vc-minmax-fp32-rvv.c │ └── qs8-qc8w-dwconv-minmax-fp32.inc ├── qs8-qc8w-gemm │ └── gen │ │ ├── qs8-qc8w-gemm-10x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-10x16c4-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-10x16c4-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-10x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-10x16c8-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-10x16c8-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-10x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-gemm-10x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-gemm-11x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-11x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-12x16c4-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-12x16c4-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-12x16c8-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-12x16c8-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-12x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-gemm-12x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-gemm-14x16c4-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-14x16c4-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-14x16c8-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-14x16c8-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-14x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-gemm-14x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-gemm-16x16c4-minmax-fp32-avx512amx-prfm.c │ │ ├── qs8-qc8w-gemm-16x16c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-gemm-16x32c4-minmax-fp32-avx512amx-prfm.c │ │ ├── qs8-qc8w-gemm-16x32c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-gemm-16x64c4-minmax-fp32-avx512amx-prfm.c │ │ ├── qs8-qc8w-gemm-16x64c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-gemm-1x128c4-minmax-fp32-hvx-prfm.c │ │ ├── qs8-qc8w-gemm-1x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-1x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc8w-gemm-1x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc8w-gemm-1x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc8w-gemm-1x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-1x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-1x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-1x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-1x16c2-minmax-fp32-wasmsimd-dot16x2.c │ │ ├── qs8-qc8w-gemm-1x16c2s2-minmax-fp32-wasmsimd-dot16x2.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-wasmsdot-u2-acc2.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-wasmusdot-u2-acc2.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-gemm-1x16c4-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-1x16c8-minmax-fp32-aarch64-neondot-ld128.c │ │ ├── qs8-qc8w-gemm-1x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-1x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qs8-qc8w-gemm-1x16c8-minmax-fp32-avx512skx.c │ │ ├── qs8-qc8w-gemm-1x16c8-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-1x16c8-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-1x16c8-minmax-fp32-neondot-ld64.c │ │ ├── qs8-qc8w-gemm-1x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-1x1c4-minmax-fp32-armsimd32.c │ │ ├── qs8-qc8w-gemm-1x2-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-gemm-1x2-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-gemm-1x2-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-gemm-1x2c4-minmax-fp32-armsimd32.c │ │ ├── qs8-qc8w-gemm-1x32c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-gemm-1x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-1x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-gemm-1x4-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-gemm-1x4-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-gemm-1x4c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-1x4c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-1x4c2-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c2-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c8-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c8-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-1x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-1x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-1x4v-minmax-fp32-rvv.c │ │ ├── qs8-qc8w-gemm-1x64c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-gemm-1x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-1x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a7-prfm.S │ │ ├── qs8-qc8w-gemm-1x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a7.S │ │ ├── qs8-qc8w-gemm-1x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a35-prfm.S │ │ ├── qs8-qc8w-gemm-1x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a35.S │ │ ├── qs8-qc8w-gemm-1x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-1x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-1x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-1x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-1x8c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-1x8c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-1x8c2-minmax-fp32-neon-mlal-dup.c │ │ ├── qs8-qc8w-gemm-1x8c2-minmax-fp32-neon-mlal-ld1r.c │ │ ├── qs8-qc8w-gemm-1x8c2-minmax-fp32-neon-mlal-ld2r.c │ │ ├── qs8-qc8w-gemm-1x8c2-minmax-fp32-neon-mlal-ld4r.c │ │ ├── qs8-qc8w-gemm-1x8c2-minmax-fp32-neonv8-mlal-dup.c │ │ ├── qs8-qc8w-gemm-1x8c2-minmax-fp32-neonv8-mlal-ld1r.c │ │ ├── qs8-qc8w-gemm-1x8c2-minmax-fp32-neonv8-mlal-ld2r.c │ │ ├── qs8-qc8w-gemm-1x8c2-minmax-fp32-neonv8-mlal-ld4r.c │ │ ├── qs8-qc8w-gemm-1x8c2s4-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-gemm-1x8c2s4-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-gemm-1x8c4-minmax-fp32-neon-mlal-dup.c │ │ ├── qs8-qc8w-gemm-1x8c4-minmax-fp32-neon-mlal-ld1r.c │ │ ├── qs8-qc8w-gemm-1x8c4-minmax-fp32-neon-mlal-ld2r.c │ │ ├── qs8-qc8w-gemm-1x8c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-gemm-1x8c4-minmax-fp32-neonv8-mlal-dup.c │ │ ├── qs8-qc8w-gemm-1x8c4-minmax-fp32-neonv8-mlal-ld1r.c │ │ ├── qs8-qc8w-gemm-1x8c4-minmax-fp32-neonv8-mlal-ld2r.c │ │ ├── qs8-qc8w-gemm-1x8c4s2-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-gemm-1x8c4s2-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-aarch64-neondot-ld128.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53-prfm.S │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53.S │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-prfm.S │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal.S │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-avx2.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-avx256skx.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-avxvnniint8-prfm.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-neondot-ld64.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-gemm-1x8c8-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-2x128c4-minmax-fp32-hvx-prfm.c │ │ ├── qs8-qc8w-gemm-2x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-2x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc8w-gemm-2x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc8w-gemm-2x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc8w-gemm-2x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-2x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-2x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-2x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-2x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-2x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-2x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-2x1c4-minmax-fp32-armsimd32.c │ │ ├── qs8-qc8w-gemm-2x2-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-gemm-2x2-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-gemm-2x2-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-gemm-2x2c4-minmax-fp32-armsimd32.c │ │ ├── qs8-qc8w-gemm-2x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-2x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-gemm-2x4-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-gemm-2x4-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-gemm-2x4c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-2x4c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-2x4c2-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c2-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c8-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c8-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-2x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-2x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-2x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-2x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-2x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-2x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-2x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-2x8c16-minmax-fp32-asm-aarch64-neon-mlal.S │ │ ├── qs8-qc8w-gemm-2x8c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-2x8c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-2x8c2-minmax-fp32-neon-mlal-dup.c │ │ ├── qs8-qc8w-gemm-2x8c2-minmax-fp32-neon-mlal-ld1r.c │ │ ├── qs8-qc8w-gemm-2x8c2-minmax-fp32-neon-mlal-ld2r.c │ │ ├── qs8-qc8w-gemm-2x8c2-minmax-fp32-neon-mlal-ld4r.c │ │ ├── qs8-qc8w-gemm-2x8c2-minmax-fp32-neonv8-mlal-dup.c │ │ ├── qs8-qc8w-gemm-2x8c2-minmax-fp32-neonv8-mlal-ld1r.c │ │ ├── qs8-qc8w-gemm-2x8c2-minmax-fp32-neonv8-mlal-ld2r.c │ │ ├── qs8-qc8w-gemm-2x8c2-minmax-fp32-neonv8-mlal-ld4r.c │ │ ├── qs8-qc8w-gemm-2x8c2s4-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-gemm-2x8c2s4-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-gemm-2x8c4-minmax-fp32-neon-mlal-dup.c │ │ ├── qs8-qc8w-gemm-2x8c4-minmax-fp32-neon-mlal-ld1r.c │ │ ├── qs8-qc8w-gemm-2x8c4-minmax-fp32-neon-mlal-ld2r.c │ │ ├── qs8-qc8w-gemm-2x8c4-minmax-fp32-neonv8-mlal-dup.c │ │ ├── qs8-qc8w-gemm-2x8c4-minmax-fp32-neonv8-mlal-ld1r.c │ │ ├── qs8-qc8w-gemm-2x8c4-minmax-fp32-neonv8-mlal-ld2r.c │ │ ├── qs8-qc8w-gemm-2x8c4s2-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-gemm-2x8c4s2-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53-prfm.S │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53.S │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-asm-aarch64-neon-mlal-prfm.S │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-asm-aarch64-neon-mlal.S │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-asm-aarch64-neon-mull.S │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-avx2.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-avx256skx.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-gemm-2x8c8-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-3x128c4-minmax-fp32-hvx-prfm.c │ │ ├── qs8-qc8w-gemm-3x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-3x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc8w-gemm-3x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc8w-gemm-3x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc8w-gemm-3x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-3x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-3x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-3x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-3x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-3x16c4-minmax-fp32-wasmsdot-u2-acc2.c │ │ ├── qs8-qc8w-gemm-3x16c4-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-gemm-3x16c4-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-3x16c4-minmax-fp32-wasmusdot-u2-acc2.c │ │ ├── qs8-qc8w-gemm-3x16c4-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-gemm-3x16c4-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-3x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-3x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-3x2-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-gemm-3x2-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-gemm-3x2-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-gemm-3x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-3x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-gemm-3x4-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-gemm-3x4-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-gemm-3x4c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-3x4c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-3x4c2-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c2-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c8-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c8-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-3x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-3x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-3x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-3x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-3x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-3x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-3x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-3x8c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-3x8c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-3x8c8-minmax-fp32-avx2.c │ │ ├── qs8-qc8w-gemm-3x8c8-minmax-fp32-avx256skx.c │ │ ├── qs8-qc8w-gemm-3x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-gemm-3x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-gemm-3x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-3x8c8-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-gemm-3x8c8-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-3x8c8-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-gemm-3x8c8-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-4x128c4-minmax-fp32-hvx-prfm.c │ │ ├── qs8-qc8w-gemm-4x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-asm-aarch64-neon-mlal-lane-cortex-a53.S │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-asm-aarch64-neon-mlal-lane-ld64-prfm.S │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-asm-aarch64-neon-mlal-lane-ld64.S │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-4x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-4x16c2-minmax-fp32-wasmsimd-dot16x2.c │ │ ├── qs8-qc8w-gemm-4x16c2s2-minmax-fp32-wasmsimd-dot16x2.c │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-wasmsdot-u2-acc2.c │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-wasmusdot-u2-acc2.c │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-gemm-4x16c4-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-4x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-4x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-4x2-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-gemm-4x2-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-gemm-4x2-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-gemm-4x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-4x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-gemm-4x4-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-gemm-4x4-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-gemm-4x4c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-4x4c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-4x4c2-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-4x4c2-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-4x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-4x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-4x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-4x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-4x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-4x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-4x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-gemm-4x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-gemm-4x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-gemm-4x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-gemm-4x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-gemm-4x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-gemm-4x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-4x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-4x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-gemm-4x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-gemm-4x4v-minmax-fp32-rvv.c │ │ ├── qs8-qc8w-gemm-4x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a53.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a7-prfm.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a7.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-ld64-prfm.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-ld64.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a35-prfm.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a35.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a53-prfm.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a53.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-ld64-prfm.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-ld64.S │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-4x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-4x8c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-4x8c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-4x8c4-minmax-fp32-asm-aarch32-neondot-cortex-a55.S │ │ ├── qs8-qc8w-gemm-4x8c4-minmax-fp32-asm-aarch32-neondot-ld64.S │ │ ├── qs8-qc8w-gemm-4x8c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-gemm-4x8c8-minmax-fp32-avx2.c │ │ ├── qs8-qc8w-gemm-4x8c8-minmax-fp32-avx256skx.c │ │ ├── qs8-qc8w-gemm-4x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-gemm-4x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-gemm-4x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-gemm-5x128c4-minmax-fp32-hvx-prfm.c │ │ ├── qs8-qc8w-gemm-5x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-5x16-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc8w-gemm-5x16-minmax-fp32-asm-aarch64-neondot-ld32.S │ │ ├── qs8-qc8w-gemm-5x16-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc8w-gemm-5x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-5x16c4-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-5x16c4-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-5x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-5x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qs8-qc8w-gemm-5x16c8-minmax-fp32-avx512skx.c │ │ ├── qs8-qc8w-gemm-5x16c8-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-5x16c8-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-5x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-5x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-5x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-gemm-5x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-gemm-5x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-gemm-5x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-gemm-5x8c8-minmax-fp32-avxvnniint8-prfm.c │ │ ├── qs8-qc8w-gemm-6x128c4-minmax-fp32-hvx-prfm.c │ │ ├── qs8-qc8w-gemm-6x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-6x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-6x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-6x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-6x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-6x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-6x16c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-gemm-6x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-6x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-6x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-6x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-6x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-6x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-gemm-6x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-gemm-6x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-gemm-6x8c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-gemm-6x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-gemm-6x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-gemm-6x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-7x128c4-minmax-fp32-hvx-prfm.c │ │ ├── qs8-qc8w-gemm-7x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-7x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-7x16c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-gemm-7x16c4-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-7x16c4-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-7x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-7x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qs8-qc8w-gemm-7x16c8-minmax-fp32-avx512skx.c │ │ ├── qs8-qc8w-gemm-7x16c8-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-7x16c8-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-7x32c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-gemm-7x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-7x4v-minmax-fp32-rvv.c │ │ ├── qs8-qc8w-gemm-7x64c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-gemm-7x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-7x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-gemm-7x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-gemm-7x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-gemm-7x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-gemm-8x128c4-minmax-fp32-hvx-prfm.c │ │ ├── qs8-qc8w-gemm-8x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-8x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-8x16c4-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-8x16c4-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-8x16c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-gemm-8x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-8x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qs8-qc8w-gemm-8x16c8-minmax-fp32-avx512skx.c │ │ ├── qs8-qc8w-gemm-8x16c8-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-8x16c8-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-8x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-8x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-8x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-gemm-8x8c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-gemm-8x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-gemm-8x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-gemm-8x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-gemm-8x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-gemm-8x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-gemm-9x128c4-minmax-fp32-hvx-prfm.c │ │ ├── qs8-qc8w-gemm-9x16c4-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-9x16c4-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-9x16c4-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-9x16c8-minmax-fp32-asm-amd64-avx512vnni.S │ │ ├── qs8-qc8w-gemm-9x16c8-minmax-fp32-avx512vnni-prfm.c │ │ ├── qs8-qc8w-gemm-9x16c8-minmax-fp32-avx512vnni.c │ │ ├── qs8-qc8w-gemm-9x8c8-minmax-fp32-avx256vnni-prfm.c │ │ └── qs8-qc8w-gemm-9x8c8-minmax-fp32-avx256vnni.c ├── qs8-qc8w-igemm │ └── gen │ │ ├── qs8-qc8w-igemm-10x16c4-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-10x16c4-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-10x16c8-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-10x16c8-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-10x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-igemm-10x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-igemm-12x16c4-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-12x16c4-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-12x16c8-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-12x16c8-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-12x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-igemm-12x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-igemm-14x16c4-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-14x16c4-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-14x16c8-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-14x16c8-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-14x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-igemm-14x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-igemm-16x16c4-minmax-fp32-avx512amx-prfm.c │ │ ├── qs8-qc8w-igemm-16x16c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-igemm-16x32c4-minmax-fp32-avx512amx-prfm.c │ │ ├── qs8-qc8w-igemm-16x32c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-igemm-16x64c4-minmax-fp32-avx512amx-prfm.c │ │ ├── qs8-qc8w-igemm-16x64c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-igemm-1x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-1x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-1x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-1x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-1x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-1x16c2-minmax-fp32-wasmsimd-dot16x2.c │ │ ├── qs8-qc8w-igemm-1x16c2s2-minmax-fp32-wasmsimd-dot16x2.c │ │ ├── qs8-qc8w-igemm-1x16c4-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-1x16c4-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-1x16c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-igemm-1x16c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-igemm-1x16c4-minmax-fp32-wasmsdot-u2-acc2.c │ │ ├── qs8-qc8w-igemm-1x16c4-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-igemm-1x16c4-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-1x16c4-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-igemm-1x16c8-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-1x16c8-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-1x16c8-minmax-fp32-aarch64-neondot-ld128.c │ │ ├── qs8-qc8w-igemm-1x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qs8-qc8w-igemm-1x16c8-minmax-fp32-avx512skx.c │ │ ├── qs8-qc8w-igemm-1x16c8-minmax-fp32-neondot-ld64.c │ │ ├── qs8-qc8w-igemm-1x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-1x1c4-minmax-fp32-armsimd32.c │ │ ├── qs8-qc8w-igemm-1x2-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-igemm-1x2-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-igemm-1x2-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-igemm-1x2c4-minmax-fp32-armsimd32.c │ │ ├── qs8-qc8w-igemm-1x32c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-igemm-1x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-1x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-igemm-1x4-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-igemm-1x4-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-1x4c2-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c2-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c8-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c8-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-1x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-1x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-1x4v-minmax-fp32-rvv.c │ │ ├── qs8-qc8w-igemm-1x64c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-igemm-1x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-1x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a7-prfm.S │ │ ├── qs8-qc8w-igemm-1x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a7.S │ │ ├── qs8-qc8w-igemm-1x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a35-prfm.S │ │ ├── qs8-qc8w-igemm-1x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a35.S │ │ ├── qs8-qc8w-igemm-1x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-1x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-1x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-1x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-1x8c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-1x8c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-1x8c2-minmax-fp32-neon-mlal-dup.c │ │ ├── qs8-qc8w-igemm-1x8c2-minmax-fp32-neon-mlal-ld1r.c │ │ ├── qs8-qc8w-igemm-1x8c2-minmax-fp32-neon-mlal-ld2r.c │ │ ├── qs8-qc8w-igemm-1x8c2-minmax-fp32-neon-mlal-ld4r.c │ │ ├── qs8-qc8w-igemm-1x8c2-minmax-fp32-neonv8-mlal-dup.c │ │ ├── qs8-qc8w-igemm-1x8c2-minmax-fp32-neonv8-mlal-ld1r.c │ │ ├── qs8-qc8w-igemm-1x8c2-minmax-fp32-neonv8-mlal-ld2r.c │ │ ├── qs8-qc8w-igemm-1x8c2-minmax-fp32-neonv8-mlal-ld4r.c │ │ ├── qs8-qc8w-igemm-1x8c2s4-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-igemm-1x8c2s4-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-igemm-1x8c4-minmax-fp32-neon-mlal-dup.c │ │ ├── qs8-qc8w-igemm-1x8c4-minmax-fp32-neon-mlal-ld1r.c │ │ ├── qs8-qc8w-igemm-1x8c4-minmax-fp32-neon-mlal-ld2r.c │ │ ├── qs8-qc8w-igemm-1x8c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-igemm-1x8c4-minmax-fp32-neonv8-mlal-dup.c │ │ ├── qs8-qc8w-igemm-1x8c4-minmax-fp32-neonv8-mlal-ld1r.c │ │ ├── qs8-qc8w-igemm-1x8c4-minmax-fp32-neonv8-mlal-ld2r.c │ │ ├── qs8-qc8w-igemm-1x8c4s2-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-igemm-1x8c4s2-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-aarch64-neondot-ld128.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53-prfm.S │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53.S │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal-prfm.S │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-asm-aarch64-neon-mlal.S │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-avx2.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-avx256skx.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-avxvnniint8-prfm.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-neondot-ld64.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-igemm-1x8c8-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-2x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-2x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-2x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-2x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-2x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-2x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-2x1c4-minmax-fp32-armsimd32.c │ │ ├── qs8-qc8w-igemm-2x2-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-igemm-2x2-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-igemm-2x2-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-igemm-2x2c4-minmax-fp32-armsimd32.c │ │ ├── qs8-qc8w-igemm-2x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-2x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-igemm-2x4-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-igemm-2x4-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-igemm-2x4c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-2x4c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-2x4c2-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c2-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c8-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c8-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-2x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-2x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-2x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-2x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-2x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-2x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-2x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-2x8c16-minmax-fp32-asm-aarch64-neon-mlal.S │ │ ├── qs8-qc8w-igemm-2x8c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-2x8c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-2x8c2-minmax-fp32-neon-mlal-dup.c │ │ ├── qs8-qc8w-igemm-2x8c2-minmax-fp32-neon-mlal-ld1r.c │ │ ├── qs8-qc8w-igemm-2x8c2-minmax-fp32-neon-mlal-ld2r.c │ │ ├── qs8-qc8w-igemm-2x8c2-minmax-fp32-neon-mlal-ld4r.c │ │ ├── qs8-qc8w-igemm-2x8c2-minmax-fp32-neonv8-mlal-dup.c │ │ ├── qs8-qc8w-igemm-2x8c2-minmax-fp32-neonv8-mlal-ld1r.c │ │ ├── qs8-qc8w-igemm-2x8c2-minmax-fp32-neonv8-mlal-ld2r.c │ │ ├── qs8-qc8w-igemm-2x8c2-minmax-fp32-neonv8-mlal-ld4r.c │ │ ├── qs8-qc8w-igemm-2x8c2s4-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-igemm-2x8c2s4-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-igemm-2x8c4-minmax-fp32-neon-mlal-dup.c │ │ ├── qs8-qc8w-igemm-2x8c4-minmax-fp32-neon-mlal-ld1r.c │ │ ├── qs8-qc8w-igemm-2x8c4-minmax-fp32-neon-mlal-ld2r.c │ │ ├── qs8-qc8w-igemm-2x8c4-minmax-fp32-neonv8-mlal-dup.c │ │ ├── qs8-qc8w-igemm-2x8c4-minmax-fp32-neonv8-mlal-ld1r.c │ │ ├── qs8-qc8w-igemm-2x8c4-minmax-fp32-neonv8-mlal-ld2r.c │ │ ├── qs8-qc8w-igemm-2x8c4s2-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-igemm-2x8c4s2-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53-prfm.S │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-asm-aarch64-neon-mlal-cortex-a53.S │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-asm-aarch64-neon-mlal-prfm.S │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-asm-aarch64-neon-mlal.S │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-avx2.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-avx256skx.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-neon-mlal.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-neonv8-mlal.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-igemm-2x8c8-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-3x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-3x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-3x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-3x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-3x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-3x16c4-minmax-fp32-wasmsdot-u2-acc2.c │ │ ├── qs8-qc8w-igemm-3x16c4-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-igemm-3x16c4-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-3x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-3x2-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-igemm-3x2-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-igemm-3x2-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-igemm-3x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-3x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-igemm-3x4-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-igemm-3x4-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-igemm-3x4c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-3x4c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-3x4c2-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c2-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c8-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c8-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-3x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-3x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-3x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-3x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-3x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-3x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-3x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-3x8c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-3x8c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-3x8c8-minmax-fp32-avx2.c │ │ ├── qs8-qc8w-igemm-3x8c8-minmax-fp32-avx256skx.c │ │ ├── qs8-qc8w-igemm-3x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-igemm-3x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-igemm-3x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-3x8c8-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-igemm-3x8c8-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-3x8c8-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-igemm-3x8c8-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-4x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-4x16-minmax-fp32-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qs8-qc8w-igemm-4x16-minmax-fp32-asm-aarch64-neon-mlal-lane-cortex-a53.S │ │ ├── qs8-qc8w-igemm-4x16-minmax-fp32-asm-aarch64-neon-mlal-lane-ld64-prfm.S │ │ ├── qs8-qc8w-igemm-4x16-minmax-fp32-asm-aarch64-neon-mlal-lane-ld64.S │ │ ├── qs8-qc8w-igemm-4x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-4x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-4x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-4x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-4x16c2-minmax-fp32-wasmsimd-dot16x2.c │ │ ├── qs8-qc8w-igemm-4x16c2s2-minmax-fp32-wasmsimd-dot16x2.c │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-fp32-wasmsdot-u2-acc2.c │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-4x16c4-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-igemm-4x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-4x2-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-igemm-4x2-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-igemm-4x2-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-igemm-4x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-4x4-minmax-fp32-scalar-fmagic.c │ │ ├── qs8-qc8w-igemm-4x4-minmax-fp32-scalar-imagic.c │ │ ├── qs8-qc8w-igemm-4x4-minmax-fp32-scalar-lrintf.c │ │ ├── qs8-qc8w-igemm-4x4c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-4x4c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-4x4c2-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-4x4c2-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-4x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-4x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-4x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-4x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-4x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-4x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-4x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qs8-qc8w-igemm-4x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qs8-qc8w-igemm-4x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qs8-qc8w-igemm-4x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qs8-qc8w-igemm-4x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qs8-qc8w-igemm-4x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qs8-qc8w-igemm-4x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-4x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-4x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qs8-qc8w-igemm-4x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qs8-qc8w-igemm-4x4v-minmax-fp32-rvv.c │ │ ├── qs8-qc8w-igemm-4x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a53.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a7-prfm.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-cortex-a7.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-ld64-prfm.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neon-mlal-lane-ld64.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a35-prfm.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a35.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a53-prfm.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-cortex-a53.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-ld64-prfm.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-asm-aarch32-neonv8-mlal-lane-ld64.S │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-4x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-4x8c16-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-4x8c16-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-4x8c4-minmax-fp32-asm-aarch32-neondot-cortex-a55.S │ │ ├── qs8-qc8w-igemm-4x8c4-minmax-fp32-asm-aarch32-neondot-ld64.S │ │ ├── qs8-qc8w-igemm-4x8c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-igemm-4x8c8-minmax-fp32-avx2.c │ │ ├── qs8-qc8w-igemm-4x8c8-minmax-fp32-avx256skx.c │ │ ├── qs8-qc8w-igemm-4x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-igemm-4x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-igemm-4x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-4x8c8-minmax-fp32-wasmsdot-u2.c │ │ ├── qs8-qc8w-igemm-4x8c8-minmax-fp32-wasmsdot.c │ │ ├── qs8-qc8w-igemm-4x8c8-minmax-fp32-wasmusdot-u2.c │ │ ├── qs8-qc8w-igemm-4x8c8-minmax-fp32-wasmusdot.c │ │ ├── qs8-qc8w-igemm-5x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-5x16c4-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-5x16c4-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-5x16c8-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-5x16c8-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-5x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qs8-qc8w-igemm-5x16c8-minmax-fp32-avx512skx.c │ │ ├── qs8-qc8w-igemm-5x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-5x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-5x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-igemm-5x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-igemm-5x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-igemm-5x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-igemm-5x8c8-minmax-fp32-avxvnniint8-prfm.c │ │ ├── qs8-qc8w-igemm-6x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-6x16-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-6x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-6x16-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-6x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-6x16c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-igemm-6x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-6x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-6x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-6x8-minmax-fp32-neon-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-6x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qs8-qc8w-igemm-6x8-minmax-fp32-neonv8-mlal-lane-prfm.c │ │ ├── qs8-qc8w-igemm-6x8-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qs8-qc8w-igemm-6x8c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-igemm-6x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-igemm-6x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-igemm-6x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-7x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-7x16c4-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-7x16c4-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-7x16c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-igemm-7x16c8-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-7x16c8-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-7x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qs8-qc8w-igemm-7x16c8-minmax-fp32-avx512skx.c │ │ ├── qs8-qc8w-igemm-7x32c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-igemm-7x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-7x4v-minmax-fp32-rvv.c │ │ ├── qs8-qc8w-igemm-7x64c4-minmax-fp32-avx512amx.c │ │ ├── qs8-qc8w-igemm-7x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-7x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-igemm-7x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-igemm-7x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-igemm-7x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-igemm-8x128c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-8x16c4-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-8x16c4-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-8x16c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-igemm-8x16c8-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-8x16c8-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-8x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qs8-qc8w-igemm-8x16c8-minmax-fp32-avx512skx.c │ │ ├── qs8-qc8w-igemm-8x16c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-8x32c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-8x64c4-minmax-fp32-hvx.c │ │ ├── qs8-qc8w-igemm-8x8c4-minmax-fp32-neondot.c │ │ ├── qs8-qc8w-igemm-8x8c8-minmax-fp32-avx256vnni-prfm.c │ │ ├── qs8-qc8w-igemm-8x8c8-minmax-fp32-avx256vnni.c │ │ ├── qs8-qc8w-igemm-8x8c8-minmax-fp32-avxvnni-prfm.c │ │ ├── qs8-qc8w-igemm-8x8c8-minmax-fp32-avxvnni.c │ │ ├── qs8-qc8w-igemm-8x8c8-minmax-fp32-neoni8mm.c │ │ ├── qs8-qc8w-igemm-9x16c4-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-9x16c4-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-9x16c8-minmax-avx512vnni-prfm.c │ │ ├── qs8-qc8w-igemm-9x16c8-minmax-avx512vnni.c │ │ ├── qs8-qc8w-igemm-9x8c8-minmax-fp32-avx256vnni-prfm.c │ │ └── qs8-qc8w-igemm-9x8c8-minmax-fp32-avx256vnni.c ├── qs8-qu8-packw │ └── gen │ │ ├── qs8-qu8-packw-x16c8-gemm-gio-scalar.c │ │ ├── qs8-qu8-packw-x16c8-gemm-goi-avx256vnni-prfm.c │ │ ├── qs8-qu8-packw-x16c8-gemm-goi-avx256vnni.c │ │ ├── qs8-qu8-packw-x16c8-gemm-goi-avxvnni-prfm.c │ │ ├── qs8-qu8-packw-x16c8-gemm-goi-avxvnni.c │ │ ├── qs8-qu8-packw-x16c8-gemm-goi-scalar.c │ │ ├── qs8-qu8-packw-x4c8-gemm-gio-scalar.c │ │ ├── qs8-qu8-packw-x4c8-gemm-goi-scalar.c │ │ ├── qs8-qu8-packw-x8c8-gemm-gio-scalar.c │ │ ├── qs8-qu8-packw-x8c8-gemm-goi-avx256vnni-prfm.c │ │ ├── qs8-qu8-packw-x8c8-gemm-goi-avx256vnni.c │ │ ├── qs8-qu8-packw-x8c8-gemm-goi-avxvnni-prfm.c │ │ ├── qs8-qu8-packw-x8c8-gemm-goi-avxvnni.c │ │ ├── qs8-qu8-packw-x8c8-gemm-goi-scalar.c │ │ └── qs8-qu8-packw-x8c8-gemm-goi-wasmrelaxedsimd.c ├── qs8-rdsum │ ├── avx2.c.in │ ├── avx512skx.c.in │ ├── gen │ │ ├── qs8-rdsum-7p7x-minmax-fp32-avx2-u32.c │ │ ├── qs8-rdsum-7p7x-minmax-fp32-avx2-u64.c │ │ ├── qs8-rdsum-7p7x-minmax-fp32-avx512skx-u128.c │ │ ├── qs8-rdsum-7p7x-minmax-fp32-avx512skx-u64.c │ │ ├── qs8-rdsum-7p7x-minmax-fp32-neon-u16.c │ │ ├── qs8-rdsum-7p7x-minmax-fp32-neon-u32.c │ │ ├── qs8-rdsum-7p7x-minmax-fp32-neon-u64.c │ │ ├── qs8-rdsum-7p7x-minmax-fp32-sse41-u16.c │ │ ├── qs8-rdsum-7p7x-minmax-fp32-sse41-u32.c │ │ ├── qs8-rdsum-7p7x-minmax-fp32-sse41-u64.c │ │ ├── qs8-rdsum-7p7x-rvv-u1v.c │ │ ├── qs8-rdsum-7p7x-rvv-u2v.c │ │ ├── qs8-rdsum-7p7x-wasmsimd-u16.c │ │ ├── qs8-rdsum-7p7x-wasmsimd-u32.c │ │ ├── qs8-rdsum-7p7x-wasmsimd-u64.c │ │ └── qs8-rdsum-minmax-fp32-scalar-u1-acc1.c │ ├── neon.c.in │ ├── qs8-rdsum-minmax-fp32.inc │ ├── rvv.c.in │ ├── scalar.c.in │ ├── sse41.c.in │ └── wasmsimd.c.in ├── qs8-rsum │ ├── avx2.c.in │ ├── avx512skx.c.in │ ├── avx512vnni.c.in │ ├── avxvnni.c.in │ ├── gen │ │ ├── qs8-rsum-avx2-u128-acc2.c │ │ ├── qs8-rsum-avx2-u128-acc4.c │ │ ├── qs8-rsum-avx2-u32.c │ │ ├── qs8-rsum-avx2-u64-acc2.c │ │ ├── qs8-rsum-avx256skx-u128-acc2.c │ │ ├── qs8-rsum-avx256skx-u128-acc4.c │ │ ├── qs8-rsum-avx256skx-u32.c │ │ ├── qs8-rsum-avx256skx-u64-acc2.c │ │ ├── qs8-rsum-avx256vnni-u128-acc2.c │ │ ├── qs8-rsum-avx256vnni-u128-acc4.c │ │ ├── qs8-rsum-avx256vnni-u32.c │ │ ├── qs8-rsum-avx256vnni-u64-acc2.c │ │ ├── qs8-rsum-avx512skx-u128-acc2.c │ │ ├── qs8-rsum-avx512skx-u256-acc2.c │ │ ├── qs8-rsum-avx512skx-u256-acc4.c │ │ ├── qs8-rsum-avx512skx-u64.c │ │ ├── qs8-rsum-avx512vnni-u128-acc2.c │ │ ├── qs8-rsum-avx512vnni-u256-acc2.c │ │ ├── qs8-rsum-avx512vnni-u256-acc4.c │ │ ├── qs8-rsum-avx512vnni-u64.c │ │ ├── qs8-rsum-avxvnni-u128-acc2.c │ │ ├── qs8-rsum-avxvnni-u128-acc4.c │ │ ├── qs8-rsum-avxvnni-u32.c │ │ ├── qs8-rsum-avxvnni-u64-acc2.c │ │ ├── qs8-rsum-neon-u16.c │ │ ├── qs8-rsum-neon-u32-acc2.c │ │ ├── qs8-rsum-neon-u64-acc2.c │ │ ├── qs8-rsum-neon-u64-acc4.c │ │ ├── qs8-rsum-neondot-u16.c │ │ ├── qs8-rsum-neondot-u32-acc2.c │ │ ├── qs8-rsum-neondot-u64-acc2.c │ │ ├── qs8-rsum-neondot-u64-acc4.c │ │ ├── qs8-rsum-rvv-u1v.c │ │ ├── qs8-rsum-rvv-u2v.c │ │ ├── qs8-rsum-scalar-u1.c │ │ ├── qs8-rsum-scalar-u2.c │ │ ├── qs8-rsum-scalar-u4.c │ │ ├── qs8-rsum-ssse3-u16.c │ │ ├── qs8-rsum-ssse3-u32-acc2.c │ │ ├── qs8-rsum-ssse3-u64-acc2.c │ │ ├── qs8-rsum-ssse3-u64-acc4.c │ │ ├── qs8-rsum-wasmrelaxedsimd-u16.c │ │ ├── qs8-rsum-wasmrelaxedsimd-u32-acc2.c │ │ ├── qs8-rsum-wasmrelaxedsimd-u64-acc2.c │ │ ├── qs8-rsum-wasmrelaxedsimd-u64-acc4.c │ │ ├── qs8-rsum-wasmsimd-u16-acc2.c │ │ ├── qs8-rsum-wasmsimd-u32-acc2.c │ │ ├── qs8-rsum-wasmsimd-u32-acc4.c │ │ └── qs8-rsum-wasmsimd-u8.c │ ├── neon.c.in │ ├── neondot.c.in │ ├── qs8-rsum.inc │ ├── rvv.c.in │ ├── scalar.c.in │ ├── ssse3.c.in │ ├── wasmrelaxedsimd.c.in │ └── wasmsimd.c.in ├── qs8-vadd │ ├── avx2-mul32-ld64.c.in │ ├── avx512skx-mul32-ld128.c.in │ ├── gen │ │ ├── qs8-vadd-minmax-avx-mul16-ld64-u16.c │ │ ├── qs8-vadd-minmax-avx-mul16-ld64-u24.c │ │ ├── qs8-vadd-minmax-avx-mul16-ld64-u32.c │ │ ├── qs8-vadd-minmax-avx-mul16-ld64-u8.c │ │ ├── qs8-vadd-minmax-avx-mul32-ld32-u16.c │ │ ├── qs8-vadd-minmax-avx-mul32-ld32-u24.c │ │ ├── qs8-vadd-minmax-avx-mul32-ld32-u32.c │ │ ├── qs8-vadd-minmax-avx-mul32-ld32-u8.c │ │ ├── qs8-vadd-minmax-avx2-mul32-ld64-u16.c │ │ ├── qs8-vadd-minmax-avx2-mul32-ld64-u24.c │ │ ├── qs8-vadd-minmax-avx2-mul32-ld64-u32.c │ │ ├── qs8-vadd-minmax-avx2-mul32-ld64-u8.c │ │ ├── qs8-vadd-minmax-avx512skx-mul32-ld128-u16.c │ │ ├── qs8-vadd-minmax-avx512skx-mul32-ld128-u32.c │ │ ├── qs8-vadd-minmax-hvx-u128.c │ │ ├── qs8-vadd-minmax-hvx-u32.c │ │ ├── qs8-vadd-minmax-hvx-u64.c │ │ ├── qs8-vadd-minmax-hvx-u96.c │ │ ├── qs8-vadd-minmax-neon-ld128-u16.c │ │ ├── qs8-vadd-minmax-neon-ld128-u32.c │ │ ├── qs8-vadd-minmax-neon-ld64-u16.c │ │ ├── qs8-vadd-minmax-neon-ld64-u24.c │ │ ├── qs8-vadd-minmax-neon-ld64-u32.c │ │ ├── qs8-vadd-minmax-neon-ld64-u8.c │ │ ├── qs8-vadd-minmax-rvv-u1v.c │ │ ├── qs8-vadd-minmax-rvv-u2v.c │ │ ├── qs8-vadd-minmax-scalar-u1.c │ │ ├── qs8-vadd-minmax-scalar-u2.c │ │ ├── qs8-vadd-minmax-scalar-u4.c │ │ ├── qs8-vadd-minmax-sse2-mul16-ld64-u16.c │ │ ├── qs8-vadd-minmax-sse2-mul16-ld64-u24.c │ │ ├── qs8-vadd-minmax-sse2-mul16-ld64-u32.c │ │ ├── qs8-vadd-minmax-sse2-mul16-ld64-u8.c │ │ ├── qs8-vadd-minmax-sse41-mul16-ld64-u16.c │ │ ├── qs8-vadd-minmax-sse41-mul16-ld64-u24.c │ │ ├── qs8-vadd-minmax-sse41-mul16-ld64-u32.c │ │ ├── qs8-vadd-minmax-sse41-mul16-ld64-u8.c │ │ ├── qs8-vadd-minmax-sse41-mul32-ld32-u16.c │ │ ├── qs8-vadd-minmax-sse41-mul32-ld32-u24.c │ │ ├── qs8-vadd-minmax-sse41-mul32-ld32-u32.c │ │ ├── qs8-vadd-minmax-sse41-mul32-ld32-u8.c │ │ ├── qs8-vadd-minmax-wasmsimd-u16.c │ │ ├── qs8-vadd-minmax-wasmsimd-u24.c │ │ ├── qs8-vadd-minmax-wasmsimd-u32.c │ │ └── qs8-vadd-minmax-wasmsimd-u8.c │ ├── hvx.c.in │ ├── neon.c.in │ ├── qs8-vadd-minmax.inc │ ├── rvv.c.in │ ├── scalar.c.in │ ├── sse-mul16-ld64.c.in │ ├── sse-mul32-ld32.c.in │ └── wasmsimd.c.in ├── qs8-vaddc │ ├── avx2-mul32-ld64.c.in │ ├── avx512skx-mul32-ld128.c.in │ ├── gen │ │ ├── qs8-vaddc-minmax-avx-mul16-ld64-u16.c │ │ ├── qs8-vaddc-minmax-avx-mul16-ld64-u24.c │ │ ├── qs8-vaddc-minmax-avx-mul16-ld64-u32.c │ │ ├── qs8-vaddc-minmax-avx-mul16-ld64-u8.c │ │ ├── qs8-vaddc-minmax-avx-mul32-ld32-u16.c │ │ ├── qs8-vaddc-minmax-avx-mul32-ld32-u24.c │ │ ├── qs8-vaddc-minmax-avx-mul32-ld32-u32.c │ │ ├── qs8-vaddc-minmax-avx-mul32-ld32-u8.c │ │ ├── qs8-vaddc-minmax-avx2-mul32-ld64-u16.c │ │ ├── qs8-vaddc-minmax-avx2-mul32-ld64-u24.c │ │ ├── qs8-vaddc-minmax-avx2-mul32-ld64-u32.c │ │ ├── qs8-vaddc-minmax-avx2-mul32-ld64-u8.c │ │ ├── qs8-vaddc-minmax-avx512skx-mul32-ld128-u16.c │ │ ├── qs8-vaddc-minmax-avx512skx-mul32-ld128-u32.c │ │ ├── qs8-vaddc-minmax-hvx-u128.c │ │ ├── qs8-vaddc-minmax-hvx-u32.c │ │ ├── qs8-vaddc-minmax-hvx-u64.c │ │ ├── qs8-vaddc-minmax-hvx-u96.c │ │ ├── qs8-vaddc-minmax-neon-ld128-u16.c │ │ ├── qs8-vaddc-minmax-neon-ld128-u32.c │ │ ├── qs8-vaddc-minmax-neon-ld64-u16.c │ │ ├── qs8-vaddc-minmax-neon-ld64-u24.c │ │ ├── qs8-vaddc-minmax-neon-ld64-u32.c │ │ ├── qs8-vaddc-minmax-neon-ld64-u8.c │ │ ├── qs8-vaddc-minmax-rvv-u1v.c │ │ ├── qs8-vaddc-minmax-rvv-u2v.c │ │ ├── qs8-vaddc-minmax-scalar-u1.c │ │ ├── qs8-vaddc-minmax-scalar-u2.c │ │ ├── qs8-vaddc-minmax-scalar-u4.c │ │ ├── qs8-vaddc-minmax-sse2-mul16-ld64-u16.c │ │ ├── qs8-vaddc-minmax-sse2-mul16-ld64-u24.c │ │ ├── qs8-vaddc-minmax-sse2-mul16-ld64-u32.c │ │ ├── qs8-vaddc-minmax-sse2-mul16-ld64-u8.c │ │ ├── qs8-vaddc-minmax-sse41-mul16-ld64-u16.c │ │ ├── qs8-vaddc-minmax-sse41-mul16-ld64-u24.c │ │ ├── qs8-vaddc-minmax-sse41-mul16-ld64-u32.c │ │ ├── qs8-vaddc-minmax-sse41-mul16-ld64-u8.c │ │ ├── qs8-vaddc-minmax-sse41-mul32-ld32-u16.c │ │ ├── qs8-vaddc-minmax-sse41-mul32-ld32-u24.c │ │ ├── qs8-vaddc-minmax-sse41-mul32-ld32-u32.c │ │ ├── qs8-vaddc-minmax-sse41-mul32-ld32-u8.c │ │ ├── qs8-vaddc-minmax-wasmsimd-u16.c │ │ ├── qs8-vaddc-minmax-wasmsimd-u24.c │ │ ├── qs8-vaddc-minmax-wasmsimd-u32.c │ │ └── qs8-vaddc-minmax-wasmsimd-u8.c │ ├── hvx.c.in │ ├── neon.c.in │ ├── qs8-vaddc-minmax.inc │ ├── rvv.c.in │ ├── scalar.c.in │ ├── sse-mul16-ld64.c.in │ ├── sse-mul32-ld32.c.in │ └── wasmsimd.c.in ├── qs8-vcvt │ ├── armsimd32.c.in │ ├── avx2.c.in │ ├── gen │ │ ├── qs8-vcvt-armsimd32-u4.c │ │ ├── qs8-vcvt-armsimd32-u8.c │ │ ├── qs8-vcvt-avx-u16.c │ │ ├── qs8-vcvt-avx-u32.c │ │ ├── qs8-vcvt-avx-u8.c │ │ ├── qs8-vcvt-avx2-u16.c │ │ ├── qs8-vcvt-avx2-u32.c │ │ ├── qs8-vcvt-avx2-u64.c │ │ ├── qs8-vcvt-neon-u16.c │ │ ├── qs8-vcvt-neon-u32.c │ │ ├── qs8-vcvt-neon-u8.c │ │ ├── qs8-vcvt-scalar-u1.c │ │ ├── qs8-vcvt-scalar-u2.c │ │ ├── qs8-vcvt-scalar-u4.c │ │ ├── qs8-vcvt-sse2-u16.c │ │ ├── qs8-vcvt-sse2-u32.c │ │ ├── qs8-vcvt-sse41-u16.c │ │ ├── qs8-vcvt-sse41-u32.c │ │ ├── qs8-vcvt-sse41-u8.c │ │ ├── qs8-vcvt-ssse3-u16.c │ │ ├── qs8-vcvt-ssse3-u32.c │ │ ├── qs8-vcvt-wasmrelaxedsimd-u16.c │ │ ├── qs8-vcvt-wasmrelaxedsimd-u32.c │ │ ├── qs8-vcvt-wasmrelaxedsimd-u8.c │ │ ├── qs8-vcvt-wasmsimd-u16.c │ │ ├── qs8-vcvt-wasmsimd-u32.c │ │ └── qs8-vcvt-wasmsimd-u8.c │ ├── neon.c.in │ ├── qs8-vcvt.inc │ ├── scalar.c.in │ ├── sse2.c.in │ ├── sse4.c.in │ ├── ssse3.c.in │ └── wasmsimd.c.in ├── qs8-vlrelu │ ├── armsimd32.c.in │ ├── avx2.c.in │ ├── gen │ │ ├── qs8-vlrelu-armsimd32-u4.c │ │ ├── qs8-vlrelu-armsimd32-u8.c │ │ ├── qs8-vlrelu-avx-u16.c │ │ ├── qs8-vlrelu-avx-u32.c │ │ ├── qs8-vlrelu-avx-u8.c │ │ ├── qs8-vlrelu-avx2-u16.c │ │ ├── qs8-vlrelu-avx2-u32.c │ │ ├── qs8-vlrelu-avx2-u64.c │ │ ├── qs8-vlrelu-neon-u16.c │ │ ├── qs8-vlrelu-neon-u32.c │ │ ├── qs8-vlrelu-neon-u8.c │ │ ├── qs8-vlrelu-rvv-u1v.c │ │ ├── qs8-vlrelu-rvv-u2v.c │ │ ├── qs8-vlrelu-scalar-andxor-u1.c │ │ ├── qs8-vlrelu-scalar-andxor-u2.c │ │ ├── qs8-vlrelu-scalar-andxor-u4.c │ │ ├── qs8-vlrelu-scalar-select-u1.c │ │ ├── qs8-vlrelu-scalar-select-u2.c │ │ ├── qs8-vlrelu-scalar-select-u4.c │ │ ├── qs8-vlrelu-sse2-u16.c │ │ ├── qs8-vlrelu-sse2-u32.c │ │ ├── qs8-vlrelu-sse41-u16.c │ │ ├── qs8-vlrelu-sse41-u32.c │ │ ├── qs8-vlrelu-sse41-u8.c │ │ ├── qs8-vlrelu-ssse3-u16.c │ │ ├── qs8-vlrelu-ssse3-u32.c │ │ ├── qs8-vlrelu-wasmrelaxedsimd-arm-u16.c │ │ ├── qs8-vlrelu-wasmrelaxedsimd-arm-u32.c │ │ ├── qs8-vlrelu-wasmrelaxedsimd-x86-u16.c │ │ ├── qs8-vlrelu-wasmrelaxedsimd-x86-u32.c │ │ ├── qs8-vlrelu-wasmrelaxedsimd-x86-u8.c │ │ ├── qs8-vlrelu-wasmsimd-arm-u16.c │ │ ├── qs8-vlrelu-wasmsimd-arm-u32.c │ │ ├── qs8-vlrelu-wasmsimd-x86-u16.c │ │ ├── qs8-vlrelu-wasmsimd-x86-u32.c │ │ └── qs8-vlrelu-wasmsimd-x86-u8.c │ ├── neon.c.in │ ├── qs8-vlrelu.inc │ ├── rvv.c.in │ ├── scalar-andxor.c.in │ ├── scalar-select.c.in │ ├── sse2.c.in │ ├── sse4.c.in │ ├── ssse3.c.in │ ├── wasmsimd-arm.c.in │ └── wasmsimd-x86.c.in ├── qs8-vmul │ ├── gen │ │ ├── qs8-vmul-minmax-f32-rvv-u1v.c │ │ ├── qs8-vmul-minmax-f32-rvv-u2v.c │ │ ├── qs8-vmul-minmax-fp32-avx-mul16-ld64-u16.c │ │ ├── qs8-vmul-minmax-fp32-avx-mul16-ld64-u8.c │ │ ├── qs8-vmul-minmax-fp32-neon-ld128-u16.c │ │ ├── qs8-vmul-minmax-fp32-neon-ld64-u16.c │ │ ├── qs8-vmul-minmax-fp32-neon-ld64-u8.c │ │ ├── qs8-vmul-minmax-fp32-neonv8-ld128-u16.c │ │ ├── qs8-vmul-minmax-fp32-neonv8-ld64-u16.c │ │ ├── qs8-vmul-minmax-fp32-neonv8-ld64-u8.c │ │ ├── qs8-vmul-minmax-fp32-scalar-u1.c │ │ ├── qs8-vmul-minmax-fp32-scalar-u2.c │ │ ├── qs8-vmul-minmax-fp32-scalar-u4.c │ │ ├── qs8-vmul-minmax-fp32-sse2-mul16-ld64-u16.c │ │ ├── qs8-vmul-minmax-fp32-sse2-mul16-ld64-u8.c │ │ ├── qs8-vmul-minmax-fp32-sse41-mul16-ld64-u16.c │ │ ├── qs8-vmul-minmax-fp32-sse41-mul16-ld64-u8.c │ │ ├── qs8-vmul-minmax-fp32-wasmsimd-mul32-ld64-u16.c │ │ ├── qs8-vmul-minmax-fp32-wasmsimd-mul32-ld64-u8.c │ │ ├── qs8-vmul-minmax-rndnu-neon-ld128-u16.c │ │ ├── qs8-vmul-minmax-rndnu-neon-ld64-u16.c │ │ └── qs8-vmul-minmax-rndnu-neon-ld64-u8.c │ ├── neon.c.in │ ├── qs8-vmul-minmax-fp32.inc │ ├── qs8-vmul-minmax-rndnu.inc │ ├── rvv.c.in │ ├── scalar.c.in │ ├── sse-mul16-ld64.c.in │ └── wasmsimd-mul32-ld64.c.in ├── qs8-vmulc │ ├── gen │ │ ├── qs8-vmulc-minmax-f32-rvv-u1v.c │ │ ├── qs8-vmulc-minmax-f32-rvv-u2v.c │ │ ├── qs8-vmulc-minmax-fp32-avx-mul16-ld64-u16.c │ │ ├── qs8-vmulc-minmax-fp32-avx-mul16-ld64-u8.c │ │ ├── qs8-vmulc-minmax-fp32-neon-ld128-u16.c │ │ ├── qs8-vmulc-minmax-fp32-neon-ld64-u16.c │ │ ├── qs8-vmulc-minmax-fp32-neon-ld64-u8.c │ │ ├── qs8-vmulc-minmax-fp32-neonv8-ld128-u16.c │ │ ├── qs8-vmulc-minmax-fp32-neonv8-ld64-u16.c │ │ ├── qs8-vmulc-minmax-fp32-neonv8-ld64-u8.c │ │ ├── qs8-vmulc-minmax-fp32-scalar-u1.c │ │ ├── qs8-vmulc-minmax-fp32-scalar-u2.c │ │ ├── qs8-vmulc-minmax-fp32-scalar-u4.c │ │ ├── qs8-vmulc-minmax-fp32-sse2-mul16-ld64-u16.c │ │ ├── qs8-vmulc-minmax-fp32-sse2-mul16-ld64-u8.c │ │ ├── qs8-vmulc-minmax-fp32-sse41-mul16-ld64-u16.c │ │ ├── qs8-vmulc-minmax-fp32-sse41-mul16-ld64-u8.c │ │ ├── qs8-vmulc-minmax-fp32-wasmsimd-mul32-ld64-u16.c │ │ ├── qs8-vmulc-minmax-fp32-wasmsimd-mul32-ld64-u8.c │ │ ├── qs8-vmulc-minmax-rndnu-neon-ld128-u16.c │ │ ├── qs8-vmulc-minmax-rndnu-neon-ld64-u16.c │ │ └── qs8-vmulc-minmax-rndnu-neon-ld64-u8.c │ ├── neon.c.in │ ├── qs8-vmulc-minmax-fp32.inc │ ├── qs8-vmulc-minmax-rndnu.inc │ ├── rvv.c.in │ ├── scalar.c.in │ ├── sse-mul16-ld64.c.in │ └── wasmsimd-mul32-ld64.c.in ├── qs8-vprelu │ ├── avx2.c.in │ ├── gen │ │ ├── qs8-vprelu-avx2-u16.c │ │ ├── qs8-vprelu-scalar-u1.c │ │ ├── qs8-vprelu-scalar-u2.c │ │ ├── qs8-vprelu-scalar-u4.c │ │ └── qs8-vprelu-scalar-u8.c │ ├── qs8-vprelu.inc │ └── scalar.c.in ├── qs8-vpreluc │ ├── avx2.c.in │ ├── gen │ │ ├── qs8-vpreluc-avx2-u16.c │ │ ├── qs8-vpreluc-scalar-u1.c │ │ ├── qs8-vpreluc-scalar-u2.c │ │ ├── qs8-vpreluc-scalar-u4.c │ │ └── qs8-vpreluc-scalar-u8.c │ ├── qs8-vpreluc.inc │ └── scalar.c.in ├── qs8-vrpreluc │ ├── avx2.c.in │ ├── gen │ │ ├── qs8-vrpreluc-avx2-u16.c │ │ ├── qs8-vrpreluc-scalar-u1.c │ │ ├── qs8-vrpreluc-scalar-u2.c │ │ ├── qs8-vrpreluc-scalar-u4.c │ │ └── qs8-vrpreluc-scalar-u8.c │ ├── qs8-vrpreluc.inc │ └── scalar.c.in ├── qu8-dwconv │ ├── gen │ │ ├── qu8-dwconv-25p16c-minmax-fp32-avx-mul16.c │ │ ├── qu8-dwconv-25p16c-minmax-fp32-avx-mul32.c │ │ ├── qu8-dwconv-25p16c-minmax-fp32-avx2-mul32.c │ │ ├── qu8-dwconv-25p16c-minmax-fp32-avx512skx-mul32.c │ │ ├── qu8-dwconv-25p16c-minmax-fp32-neon-mul16.c │ │ ├── qu8-dwconv-25p16c-minmax-fp32-neonv8-mul16.c │ │ ├── qu8-dwconv-25p16c-minmax-fp32-sse2-mul16.c │ │ ├── qu8-dwconv-25p16c-minmax-fp32-sse41-mul16.c │ │ ├── qu8-dwconv-25p16c-minmax-fp32-sse41-mul32.c │ │ ├── qu8-dwconv-25p16c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qu8-dwconv-25p16c-minmax-rndnu-neon-mul16.c │ │ ├── qu8-dwconv-25p16c-minmax-rndnu-neon-mul8.c │ │ ├── qu8-dwconv-25p1c-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-dwconv-25p1c-minmax-fp32-scalar-imagic.c │ │ ├── qu8-dwconv-25p1c-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-dwconv-25p2c-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-dwconv-25p2c-minmax-fp32-scalar-imagic.c │ │ ├── qu8-dwconv-25p2c-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-dwconv-25p32c-minmax-fp32-avx2-mul32.c │ │ ├── qu8-dwconv-25p32c-minmax-fp32-avx512skx-mul32.c │ │ ├── qu8-dwconv-25p32c-minmax-fp32-neon-mul16.c │ │ ├── qu8-dwconv-25p32c-minmax-fp32-neonv8-mul16.c │ │ ├── qu8-dwconv-25p32c-minmax-rndnu-neon-mul16.c │ │ ├── qu8-dwconv-25p32c-minmax-rndnu-neon-mul8.c │ │ ├── qu8-dwconv-25p4c-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-dwconv-25p4c-minmax-fp32-scalar-imagic.c │ │ ├── qu8-dwconv-25p4c-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-dwconv-25p8c-minmax-fp32-avx-mul16.c │ │ ├── qu8-dwconv-25p8c-minmax-fp32-avx-mul32.c │ │ ├── qu8-dwconv-25p8c-minmax-fp32-avx2-mul32.c │ │ ├── qu8-dwconv-25p8c-minmax-fp32-neon-mul16.c │ │ ├── qu8-dwconv-25p8c-minmax-fp32-neonv8-mul16.c │ │ ├── qu8-dwconv-25p8c-minmax-fp32-sse2-mul16.c │ │ ├── qu8-dwconv-25p8c-minmax-fp32-sse41-mul16.c │ │ ├── qu8-dwconv-25p8c-minmax-fp32-sse41-mul32.c │ │ ├── qu8-dwconv-25p8c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qu8-dwconv-25p8c-minmax-rndnu-neon-mul16.c │ │ ├── qu8-dwconv-25p8c-minmax-rndnu-neon-mul8.c │ │ ├── qu8-dwconv-25p8vc-minmax-fp32-rvv.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-avx-mul16.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-avx-mul32.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-avx2-mul32.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-avx512skx-mul32.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-neon-mul16.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-neonv8-mul16.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-sse2-mul16.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-sse41-mul16.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-sse41-mul32.c │ │ ├── qu8-dwconv-9p16c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qu8-dwconv-9p16c-minmax-rndnu-neon-mul16.c │ │ ├── qu8-dwconv-9p16c-minmax-rndnu-neon-mul8.c │ │ ├── qu8-dwconv-9p1c-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-dwconv-9p1c-minmax-fp32-scalar-imagic.c │ │ ├── qu8-dwconv-9p1c-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-dwconv-9p1c-minmax-rndnu-scalar.c │ │ ├── qu8-dwconv-9p2c-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-dwconv-9p2c-minmax-fp32-scalar-imagic.c │ │ ├── qu8-dwconv-9p2c-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-dwconv-9p2c-minmax-rndnu-scalar.c │ │ ├── qu8-dwconv-9p32c-minmax-fp32-avx2-mul32.c │ │ ├── qu8-dwconv-9p32c-minmax-fp32-avx512skx-mul32.c │ │ ├── qu8-dwconv-9p32c-minmax-fp32-neon-mul16.c │ │ ├── qu8-dwconv-9p32c-minmax-fp32-neonv8-mul16.c │ │ ├── qu8-dwconv-9p32c-minmax-rndnu-neon-mul16.c │ │ ├── qu8-dwconv-9p32c-minmax-rndnu-neon-mul8.c │ │ ├── qu8-dwconv-9p4c-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-dwconv-9p4c-minmax-fp32-scalar-imagic.c │ │ ├── qu8-dwconv-9p4c-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-dwconv-9p4c-minmax-rndnu-scalar.c │ │ ├── qu8-dwconv-9p8c-minmax-fp32-avx-mul16.c │ │ ├── qu8-dwconv-9p8c-minmax-fp32-avx-mul32.c │ │ ├── qu8-dwconv-9p8c-minmax-fp32-avx2-mul32.c │ │ ├── qu8-dwconv-9p8c-minmax-fp32-neon-mul16.c │ │ ├── qu8-dwconv-9p8c-minmax-fp32-neonv8-mul16.c │ │ ├── qu8-dwconv-9p8c-minmax-fp32-sse2-mul16.c │ │ ├── qu8-dwconv-9p8c-minmax-fp32-sse41-mul16.c │ │ ├── qu8-dwconv-9p8c-minmax-fp32-sse41-mul32.c │ │ ├── qu8-dwconv-9p8c-minmax-fp32-wasmsimd-mul16.c │ │ ├── qu8-dwconv-9p8c-minmax-rndnu-neon-mul16.c │ │ ├── qu8-dwconv-9p8c-minmax-rndnu-neon-mul8.c │ │ └── qu8-dwconv-9p8vc-minmax-fp32-rvv.c │ ├── qu8-dwconv-minmax-fp32.inc │ ├── qu8-dwconv-minmax-rndnu.inc │ └── unipass-neon-mul8.c.in ├── qu8-f32-vcvt │ ├── gen │ │ ├── qu8-f32-vcvt-avx-u16.c │ │ ├── qu8-f32-vcvt-avx-u24.c │ │ ├── qu8-f32-vcvt-avx-u32.c │ │ ├── qu8-f32-vcvt-avx-u8.c │ │ ├── qu8-f32-vcvt-avx2-u16.c │ │ ├── qu8-f32-vcvt-avx2-u24.c │ │ ├── qu8-f32-vcvt-avx2-u32.c │ │ ├── qu8-f32-vcvt-avx2-u8.c │ │ ├── qu8-f32-vcvt-avx512skx-u16.c │ │ ├── qu8-f32-vcvt-avx512skx-u32.c │ │ ├── qu8-f32-vcvt-avx512skx-u48.c │ │ ├── qu8-f32-vcvt-avx512skx-u64.c │ │ ├── qu8-f32-vcvt-neon-u16.c │ │ ├── qu8-f32-vcvt-neon-u24.c │ │ ├── qu8-f32-vcvt-neon-u32.c │ │ ├── qu8-f32-vcvt-neon-u8.c │ │ ├── qu8-f32-vcvt-rvv-u1v.c │ │ ├── qu8-f32-vcvt-rvv-u2v.c │ │ ├── qu8-f32-vcvt-scalar-u1.c │ │ ├── qu8-f32-vcvt-scalar-u2.c │ │ ├── qu8-f32-vcvt-scalar-u3.c │ │ ├── qu8-f32-vcvt-scalar-u4.c │ │ ├── qu8-f32-vcvt-sse2-u16.c │ │ ├── qu8-f32-vcvt-sse2-u24.c │ │ ├── qu8-f32-vcvt-sse2-u32.c │ │ ├── qu8-f32-vcvt-sse2-u8.c │ │ ├── qu8-f32-vcvt-sse41-u16.c │ │ ├── qu8-f32-vcvt-sse41-u24.c │ │ ├── qu8-f32-vcvt-sse41-u32.c │ │ ├── qu8-f32-vcvt-sse41-u8.c │ │ ├── qu8-f32-vcvt-wasmsimd-u16.c │ │ ├── qu8-f32-vcvt-wasmsimd-u24.c │ │ ├── qu8-f32-vcvt-wasmsimd-u32.c │ │ └── qu8-f32-vcvt-wasmsimd-u8.c │ └── qu8-f32-vcvt.inc ├── qu8-gemm │ └── gen │ │ ├── qu8-gemm-1x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qu8-gemm-1x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qu8-gemm-1x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-1x16-minmax-rndnu16-neon-mlal-lane.c │ │ ├── qu8-gemm-1x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qu8-gemm-1x16c8-minmax-fp32-avx512skx.c │ │ ├── qu8-gemm-1x1c4-minmax-fp32-armsimd32.c │ │ ├── qu8-gemm-1x2-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-gemm-1x2-minmax-fp32-scalar-imagic.c │ │ ├── qu8-gemm-1x2-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-gemm-1x2-minmax-rndnu-scalar.c │ │ ├── qu8-gemm-1x2c4-minmax-fp32-armsimd32.c │ │ ├── qu8-gemm-1x4-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-gemm-1x4-minmax-fp32-scalar-imagic.c │ │ ├── qu8-gemm-1x4-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-gemm-1x4-minmax-rndnu-scalar.c │ │ ├── qu8-gemm-1x4c2-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-1x4c2-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-1x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-1x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-1x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-1x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-1x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-1x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-1x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-1x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-1x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-1x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-1x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-1x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-1x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-1x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-1x4c8-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-1x4c8-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-1x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-1x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-1x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-1x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-1x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-1x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-1x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qu8-gemm-1x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a7-prfm.S │ │ ├── qu8-gemm-1x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a7.S │ │ ├── qu8-gemm-1x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-1x8c8-minmax-fp32-avx2.c │ │ ├── qu8-gemm-1x8c8-minmax-fp32-avx256skx.c │ │ ├── qu8-gemm-2x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-2x1c4-minmax-fp32-armsimd32.c │ │ ├── qu8-gemm-2x2-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-gemm-2x2-minmax-fp32-scalar-imagic.c │ │ ├── qu8-gemm-2x2-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-gemm-2x2-minmax-rndnu-scalar.c │ │ ├── qu8-gemm-2x2c4-minmax-fp32-armsimd32.c │ │ ├── qu8-gemm-2x4-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-gemm-2x4-minmax-fp32-scalar-imagic.c │ │ ├── qu8-gemm-2x4-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-gemm-2x4-minmax-rndnu-scalar.c │ │ ├── qu8-gemm-2x4c2-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-2x4c2-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-2x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-2x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-2x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-2x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-2x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-2x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-2x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-2x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-2x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-2x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-2x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-2x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-2x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-2x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-2x4c8-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-2x4c8-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-2x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-2x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-2x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-2x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-2x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-2x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-2x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-2x8c8-minmax-fp32-avx2.c │ │ ├── qu8-gemm-3x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-3x2-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-gemm-3x2-minmax-fp32-scalar-imagic.c │ │ ├── qu8-gemm-3x2-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-gemm-3x2-minmax-rndnu-scalar.c │ │ ├── qu8-gemm-3x4-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-gemm-3x4-minmax-fp32-scalar-imagic.c │ │ ├── qu8-gemm-3x4-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-gemm-3x4-minmax-rndnu-scalar.c │ │ ├── qu8-gemm-3x4c2-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-3x4c2-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-3x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-3x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-3x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-3x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-3x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-3x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-3x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-3x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-3x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-3x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-3x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-3x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-3x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-3x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-3x4c8-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-3x4c8-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-3x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-3x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-3x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-3x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-3x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-3x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-3x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-3x8c8-minmax-fp32-avx2.c │ │ ├── qu8-gemm-4x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qu8-gemm-4x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S │ │ ├── qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S │ │ ├── qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S │ │ ├── qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S │ │ ├── qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S │ │ ├── qu8-gemm-4x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-4x16-minmax-rndnu16-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qu8-gemm-4x16-minmax-rndnu16-asm-aarch64-neon-mlal-lane-cortex-a53.S │ │ ├── qu8-gemm-4x2-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-gemm-4x2-minmax-fp32-scalar-imagic.c │ │ ├── qu8-gemm-4x2-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-gemm-4x2-minmax-rndnu-scalar.c │ │ ├── qu8-gemm-4x4-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-gemm-4x4-minmax-fp32-scalar-imagic.c │ │ ├── qu8-gemm-4x4-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-gemm-4x4-minmax-rndnu-scalar.c │ │ ├── qu8-gemm-4x4c2-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-4x4c2-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-4x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-4x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-4x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-4x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-4x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-4x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-4x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qu8-gemm-4x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qu8-gemm-4x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qu8-gemm-4x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qu8-gemm-4x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qu8-gemm-4x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qu8-gemm-4x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-4x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-4x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-gemm-4x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-gemm-4x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qu8-gemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qu8-gemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a53.S │ │ ├── qu8-gemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a7-prfm.S │ │ ├── qu8-gemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a7.S │ │ ├── qu8-gemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-ld64-prfm.S │ │ ├── qu8-gemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-ld64.S │ │ ├── qu8-gemm-4x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-4x8c8-minmax-fp32-avx2.c │ │ ├── qu8-gemm-5x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qu8-gemm-5x16c8-minmax-fp32-avx512skx.c │ │ ├── qu8-gemm-6x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-6x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-gemm-7x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qu8-gemm-7x16c8-minmax-fp32-avx512skx.c │ │ ├── qu8-gemm-8x16c8-minmax-fp32-avx512skx-prfm.c │ │ └── qu8-gemm-8x16c8-minmax-fp32-avx512skx.c ├── qu8-igemm │ └── gen │ │ ├── qu8-igemm-1x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qu8-igemm-1x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qu8-igemm-1x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-1x16-minmax-rndnu16-neon-mlal-lane.c │ │ ├── qu8-igemm-1x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qu8-igemm-1x16c8-minmax-fp32-avx512skx.c │ │ ├── qu8-igemm-1x1c4-minmax-fp32-armsimd32.c │ │ ├── qu8-igemm-1x2-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-igemm-1x2-minmax-fp32-scalar-imagic.c │ │ ├── qu8-igemm-1x2-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-igemm-1x2-minmax-rndnu-scalar.c │ │ ├── qu8-igemm-1x2c4-minmax-fp32-armsimd32.c │ │ ├── qu8-igemm-1x4-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-igemm-1x4-minmax-fp32-scalar-imagic.c │ │ ├── qu8-igemm-1x4-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-igemm-1x4-minmax-rndnu-scalar.c │ │ ├── qu8-igemm-1x4c2-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-1x4c2-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-1x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-1x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-1x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-1x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-1x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-1x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-1x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-1x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-1x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-1x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-1x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-1x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-1x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-1x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-1x4c8-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-1x4c8-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-1x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-1x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-1x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-1x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-1x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-1x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-1x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qu8-igemm-1x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a7-prfm.S │ │ ├── qu8-igemm-1x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a7.S │ │ ├── qu8-igemm-1x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-1x8c8-minmax-fp32-avx2.c │ │ ├── qu8-igemm-1x8c8-minmax-fp32-avx256skx.c │ │ ├── qu8-igemm-2x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-2x1c4-minmax-fp32-armsimd32.c │ │ ├── qu8-igemm-2x2-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-igemm-2x2-minmax-fp32-scalar-imagic.c │ │ ├── qu8-igemm-2x2-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-igemm-2x2-minmax-rndnu-scalar.c │ │ ├── qu8-igemm-2x2c4-minmax-fp32-armsimd32.c │ │ ├── qu8-igemm-2x4-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-igemm-2x4-minmax-fp32-scalar-imagic.c │ │ ├── qu8-igemm-2x4-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-igemm-2x4-minmax-rndnu-scalar.c │ │ ├── qu8-igemm-2x4c2-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-2x4c2-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-2x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-2x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-2x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-2x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-2x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-2x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-2x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-2x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-2x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-2x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-2x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-2x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-2x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-2x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-2x4c8-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-2x4c8-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-2x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-2x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-2x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-2x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-2x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-2x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-2x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-2x8c8-minmax-fp32-avx2.c │ │ ├── qu8-igemm-3x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-3x2-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-igemm-3x2-minmax-fp32-scalar-imagic.c │ │ ├── qu8-igemm-3x2-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-igemm-3x2-minmax-rndnu-scalar.c │ │ ├── qu8-igemm-3x4-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-igemm-3x4-minmax-fp32-scalar-imagic.c │ │ ├── qu8-igemm-3x4-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-igemm-3x4-minmax-rndnu-scalar.c │ │ ├── qu8-igemm-3x4c2-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-3x4c2-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-3x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-3x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-3x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-3x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-3x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-3x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-3x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-3x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-3x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-3x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-3x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-3x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-3x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-3x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-3x4c8-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-3x4c8-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-3x4c8-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-3x4c8-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-3x4c8-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-3x4c8-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-3x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-3x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-3x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-3x8c8-minmax-fp32-avx2.c │ │ ├── qu8-igemm-4x16-minmax-fp32-neon-mlal-lane.c │ │ ├── qu8-igemm-4x16-minmax-fp32-neonv8-mlal-lane.c │ │ ├── qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S │ │ ├── qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S │ │ ├── qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S │ │ ├── qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S │ │ ├── qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S │ │ ├── qu8-igemm-4x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-4x16-minmax-rndnu16-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qu8-igemm-4x16-minmax-rndnu16-asm-aarch64-neon-mlal-lane-cortex-a53.S │ │ ├── qu8-igemm-4x2-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-igemm-4x2-minmax-fp32-scalar-imagic.c │ │ ├── qu8-igemm-4x2-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-igemm-4x2-minmax-rndnu-scalar.c │ │ ├── qu8-igemm-4x4-minmax-fp32-scalar-fmagic.c │ │ ├── qu8-igemm-4x4-minmax-fp32-scalar-imagic.c │ │ ├── qu8-igemm-4x4-minmax-fp32-scalar-lrintf.c │ │ ├── qu8-igemm-4x4-minmax-rndnu-scalar.c │ │ ├── qu8-igemm-4x4c2-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-4x4c2-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-4x4c2-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-4x4c2-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-4x4c2-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-4x4c2-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-4x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-4x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-4x4c2s4-minmax-fp32-avx-ld128.c │ │ ├── qu8-igemm-4x4c2s4-minmax-fp32-avx-ld64.c │ │ ├── qu8-igemm-4x4c2s4-minmax-fp32-sse2-ld128.c │ │ ├── qu8-igemm-4x4c2s4-minmax-fp32-sse2-ld64.c │ │ ├── qu8-igemm-4x4c2s4-minmax-fp32-sse41-ld128.c │ │ ├── qu8-igemm-4x4c2s4-minmax-fp32-sse41-ld64.c │ │ ├── qu8-igemm-4x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-4x4c2s4-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-4x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c │ │ ├── qu8-igemm-4x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c │ │ ├── qu8-igemm-4x8-minmax-fp32-neon-mlal-lane.c │ │ ├── qu8-igemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a53-prfm.S │ │ ├── qu8-igemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a53.S │ │ ├── qu8-igemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a7-prfm.S │ │ ├── qu8-igemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-cortex-a7.S │ │ ├── qu8-igemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-ld64-prfm.S │ │ ├── qu8-igemm-4x8-minmax-rndnu-asm-aarch32-neon-mlal-lane-ld64.S │ │ ├── qu8-igemm-4x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-4x8c8-minmax-fp32-avx2.c │ │ ├── qu8-igemm-5x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qu8-igemm-5x16c8-minmax-fp32-avx512skx.c │ │ ├── qu8-igemm-6x16-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-6x8-minmax-rndnu-neon-mlal-lane.c │ │ ├── qu8-igemm-7x16c8-minmax-fp32-avx512skx-prfm.c │ │ ├── qu8-igemm-7x16c8-minmax-fp32-avx512skx.c │ │ ├── qu8-igemm-8x16c8-minmax-fp32-avx512skx-prfm.c │ │ └── qu8-igemm-8x16c8-minmax-fp32-avx512skx.c ├── qu8-rdsum │ ├── gen │ │ ├── qu8-rdsum-7p7x-neon-u16.c │ │ ├── qu8-rdsum-7p7x-neon-u32.c │ │ ├── qu8-rdsum-7p7x-neon-u64.c │ │ ├── qu8-rdsum-7p7x-rvv-u1v.c │ │ ├── qu8-rdsum-7p7x-rvv-u2v.c │ │ ├── qu8-rdsum-7p7x-ssse3-u16.c │ │ ├── qu8-rdsum-7p7x-ssse3-u32.c │ │ ├── qu8-rdsum-7p7x-ssse3-u64.c │ │ ├── qu8-rdsum-7p7x-wasmsimd-u16.c │ │ ├── qu8-rdsum-7p7x-wasmsimd-u32.c │ │ ├── qu8-rdsum-7p7x-wasmsimd-u64.c │ │ └── qu8-rdsum-scalar.c │ ├── neon.c.in │ ├── qu8-rdsum.inc │ ├── rvv.c.in │ ├── scalar.c.in │ └── ssse3.c.in ├── qu8-rsum │ ├── avx2.c.in │ ├── gen │ │ ├── qu8-rsum-avx2-u128-acc2.c │ │ ├── qu8-rsum-avx2-u128-acc4.c │ │ ├── qu8-rsum-avx2-u32.c │ │ ├── qu8-rsum-avx2-u64-acc2.c │ │ ├── qu8-rsum-neon-u16.c │ │ ├── qu8-rsum-neon-u32-acc2.c │ │ ├── qu8-rsum-neon-u64-acc2.c │ │ ├── qu8-rsum-neon-u64-acc4.c │ │ ├── qu8-rsum-rvv-u1v.c │ │ ├── qu8-rsum-rvv-u2v.c │ │ ├── qu8-rsum-scalar-u1.c │ │ ├── qu8-rsum-scalar-u2.c │ │ ├── qu8-rsum-scalar-u4.c │ │ ├── qu8-rsum-sse2-u16.c │ │ ├── qu8-rsum-sse2-u32-acc2.c │ │ ├── qu8-rsum-sse2-u64-acc2.c │ │ ├── qu8-rsum-sse2-u64-acc4.c │ │ ├── qu8-rsum-wasmsimd-u16-acc2.c │ │ ├── qu8-rsum-wasmsimd-u32-acc2.c │ │ ├── qu8-rsum-wasmsimd-u32-acc4.c │ │ └── qu8-rsum-wasmsimd-u8.c │ ├── qu8-rsum.inc │ ├── rvv.c.in │ ├── scalar.c.in │ └── sse2.c.in ├── qu8-vadd │ ├── gen │ │ ├── qu8-vadd-minmax-avx-mul16-ld64-u16.c │ │ ├── qu8-vadd-minmax-avx-mul16-ld64-u8.c │ │ ├── qu8-vadd-minmax-avx-mul32-ld32-u16.c │ │ ├── qu8-vadd-minmax-avx-mul32-ld32-u8.c │ │ ├── qu8-vadd-minmax-avx2-mul32-ld64-u16.c │ │ ├── qu8-vadd-minmax-avx2-mul32-ld64-u8.c │ │ ├── qu8-vadd-minmax-avx512skx-mul32-ld128-u16.c │ │ ├── qu8-vadd-minmax-avx512skx-mul32-ld128-u32.c │ │ ├── qu8-vadd-minmax-neon-ld128-u16.c │ │ ├── qu8-vadd-minmax-neon-ld64-u16.c │ │ ├── qu8-vadd-minmax-neon-ld64-u32.c │ │ ├── qu8-vadd-minmax-neon-ld64-u8.c │ │ ├── qu8-vadd-minmax-rvv-u1v.c │ │ ├── qu8-vadd-minmax-rvv-u2v.c │ │ ├── qu8-vadd-minmax-scalar-u1.c │ │ ├── qu8-vadd-minmax-scalar-u2.c │ │ ├── qu8-vadd-minmax-scalar-u4.c │ │ ├── qu8-vadd-minmax-sse2-mul16-ld64-u16.c │ │ ├── qu8-vadd-minmax-sse2-mul16-ld64-u8.c │ │ ├── qu8-vadd-minmax-sse41-mul16-ld64-u16.c │ │ ├── qu8-vadd-minmax-sse41-mul16-ld64-u8.c │ │ ├── qu8-vadd-minmax-sse41-mul32-ld32-u16.c │ │ ├── qu8-vadd-minmax-sse41-mul32-ld32-u8.c │ │ ├── qu8-vadd-minmax-wasmsimd-u16.c │ │ ├── qu8-vadd-minmax-wasmsimd-u32.c │ │ └── qu8-vadd-minmax-wasmsimd-u8.c │ └── qu8-vadd-minmax.inc ├── qu8-vaddc │ ├── gen │ │ ├── qu8-vaddc-minmax-avx-mul16-ld64-u16.c │ │ ├── qu8-vaddc-minmax-avx-mul16-ld64-u8.c │ │ ├── qu8-vaddc-minmax-avx-mul32-ld32-u16.c │ │ ├── qu8-vaddc-minmax-avx-mul32-ld32-u8.c │ │ ├── qu8-vaddc-minmax-avx2-mul32-ld64-u16.c │ │ ├── qu8-vaddc-minmax-avx2-mul32-ld64-u8.c │ │ ├── qu8-vaddc-minmax-avx512skx-mul32-ld128-u16.c │ │ ├── qu8-vaddc-minmax-avx512skx-mul32-ld128-u32.c │ │ ├── qu8-vaddc-minmax-neon-ld128-u16.c │ │ ├── qu8-vaddc-minmax-neon-ld64-u16.c │ │ ├── qu8-vaddc-minmax-neon-ld64-u32.c │ │ ├── qu8-vaddc-minmax-neon-ld64-u8.c │ │ ├── qu8-vaddc-minmax-rvv-u1v.c │ │ ├── qu8-vaddc-minmax-rvv-u2v.c │ │ ├── qu8-vaddc-minmax-scalar-u1.c │ │ ├── qu8-vaddc-minmax-scalar-u2.c │ │ ├── qu8-vaddc-minmax-scalar-u4.c │ │ ├── qu8-vaddc-minmax-sse2-mul16-ld64-u16.c │ │ ├── qu8-vaddc-minmax-sse2-mul16-ld64-u8.c │ │ ├── qu8-vaddc-minmax-sse41-mul16-ld64-u16.c │ │ ├── qu8-vaddc-minmax-sse41-mul16-ld64-u8.c │ │ ├── qu8-vaddc-minmax-sse41-mul32-ld32-u16.c │ │ ├── qu8-vaddc-minmax-sse41-mul32-ld32-u8.c │ │ ├── qu8-vaddc-minmax-wasmsimd-u16.c │ │ ├── qu8-vaddc-minmax-wasmsimd-u32.c │ │ └── qu8-vaddc-minmax-wasmsimd-u8.c │ └── qu8-vaddc-minmax.inc ├── qu8-vcvt │ ├── gen │ │ ├── qu8-vcvt-armsimd32-u4.c │ │ ├── qu8-vcvt-armsimd32-u8.c │ │ ├── qu8-vcvt-avx-u16.c │ │ ├── qu8-vcvt-avx-u32.c │ │ ├── qu8-vcvt-avx-u8.c │ │ ├── qu8-vcvt-avx2-u16.c │ │ ├── qu8-vcvt-avx2-u32.c │ │ ├── qu8-vcvt-avx2-u64.c │ │ ├── qu8-vcvt-neon-u16.c │ │ ├── qu8-vcvt-neon-u32.c │ │ ├── qu8-vcvt-neon-u8.c │ │ ├── qu8-vcvt-scalar-u1.c │ │ ├── qu8-vcvt-scalar-u2.c │ │ ├── qu8-vcvt-scalar-u4.c │ │ ├── qu8-vcvt-sse2-u16.c │ │ ├── qu8-vcvt-sse2-u32.c │ │ ├── qu8-vcvt-sse41-u16.c │ │ ├── qu8-vcvt-sse41-u32.c │ │ ├── qu8-vcvt-sse41-u8.c │ │ ├── qu8-vcvt-ssse3-u16.c │ │ ├── qu8-vcvt-ssse3-u32.c │ │ ├── qu8-vcvt-wasmrelaxedsimd-u16.c │ │ ├── qu8-vcvt-wasmrelaxedsimd-u32.c │ │ ├── qu8-vcvt-wasmrelaxedsimd-u8.c │ │ ├── qu8-vcvt-wasmsimd-u16.c │ │ ├── qu8-vcvt-wasmsimd-u32.c │ │ └── qu8-vcvt-wasmsimd-u8.c │ └── qu8-vcvt.inc ├── qu8-vlrelu │ ├── gen │ │ ├── qu8-vlrelu-armsimd32-u4.c │ │ ├── qu8-vlrelu-armsimd32-u8.c │ │ ├── qu8-vlrelu-avx-u16.c │ │ ├── qu8-vlrelu-avx-u32.c │ │ ├── qu8-vlrelu-avx-u8.c │ │ ├── qu8-vlrelu-avx2-u16.c │ │ ├── qu8-vlrelu-avx2-u32.c │ │ ├── qu8-vlrelu-avx2-u64.c │ │ ├── qu8-vlrelu-neon-u16.c │ │ ├── qu8-vlrelu-neon-u32.c │ │ ├── qu8-vlrelu-neon-u8.c │ │ ├── qu8-vlrelu-rvv-u1v.c │ │ ├── qu8-vlrelu-rvv-u2v.c │ │ ├── qu8-vlrelu-scalar-andxor-u1.c │ │ ├── qu8-vlrelu-scalar-andxor-u2.c │ │ ├── qu8-vlrelu-scalar-andxor-u4.c │ │ ├── qu8-vlrelu-scalar-select-u1.c │ │ ├── qu8-vlrelu-scalar-select-u2.c │ │ ├── qu8-vlrelu-scalar-select-u4.c │ │ ├── qu8-vlrelu-sse2-u16.c │ │ ├── qu8-vlrelu-sse2-u32.c │ │ ├── qu8-vlrelu-sse41-u16.c │ │ ├── qu8-vlrelu-sse41-u32.c │ │ ├── qu8-vlrelu-sse41-u8.c │ │ ├── qu8-vlrelu-ssse3-u16.c │ │ ├── qu8-vlrelu-ssse3-u32.c │ │ ├── qu8-vlrelu-wasmrelaxedsimd-arm-u16.c │ │ ├── qu8-vlrelu-wasmrelaxedsimd-arm-u32.c │ │ ├── qu8-vlrelu-wasmrelaxedsimd-x86-u16.c │ │ ├── qu8-vlrelu-wasmrelaxedsimd-x86-u32.c │ │ ├── qu8-vlrelu-wasmrelaxedsimd-x86-u8.c │ │ ├── qu8-vlrelu-wasmsimd-arm-u16.c │ │ ├── qu8-vlrelu-wasmsimd-arm-u32.c │ │ ├── qu8-vlrelu-wasmsimd-x86-u16.c │ │ ├── qu8-vlrelu-wasmsimd-x86-u32.c │ │ └── qu8-vlrelu-wasmsimd-x86-u8.c │ └── qu8-vlrelu.inc ├── qu8-vmul │ ├── gen │ │ ├── qu8-vmul-minmax-f32-rvv-u1v.c │ │ ├── qu8-vmul-minmax-f32-rvv-u2v.c │ │ ├── qu8-vmul-minmax-fp32-avx-mul16-ld64-u16.c │ │ ├── qu8-vmul-minmax-fp32-avx-mul16-ld64-u8.c │ │ ├── qu8-vmul-minmax-fp32-neon-ld128-u16.c │ │ ├── qu8-vmul-minmax-fp32-neon-ld64-u16.c │ │ ├── qu8-vmul-minmax-fp32-neon-ld64-u8.c │ │ ├── qu8-vmul-minmax-fp32-neonv8-ld128-u16.c │ │ ├── qu8-vmul-minmax-fp32-neonv8-ld64-u16.c │ │ ├── qu8-vmul-minmax-fp32-neonv8-ld64-u8.c │ │ ├── qu8-vmul-minmax-fp32-scalar-u1.c │ │ ├── qu8-vmul-minmax-fp32-scalar-u2.c │ │ ├── qu8-vmul-minmax-fp32-scalar-u4.c │ │ ├── qu8-vmul-minmax-fp32-sse2-mul16-ld64-u16.c │ │ ├── qu8-vmul-minmax-fp32-sse2-mul16-ld64-u8.c │ │ ├── qu8-vmul-minmax-fp32-sse41-mul16-ld64-u16.c │ │ ├── qu8-vmul-minmax-fp32-sse41-mul16-ld64-u8.c │ │ ├── qu8-vmul-minmax-fp32-wasmsimd-mul32-ld64-u16.c │ │ ├── qu8-vmul-minmax-fp32-wasmsimd-mul32-ld64-u8.c │ │ ├── qu8-vmul-minmax-rndnu-neon-ld128-u16.c │ │ ├── qu8-vmul-minmax-rndnu-neon-ld64-u16.c │ │ └── qu8-vmul-minmax-rndnu-neon-ld64-u8.c │ ├── qu8-vmul-minmax-fp32.inc │ └── qu8-vmul-minmax-rndnu.inc ├── qu8-vmulc │ ├── gen │ │ ├── qu8-vmulc-minmax-f32-rvv-u1v.c │ │ ├── qu8-vmulc-minmax-f32-rvv-u2v.c │ │ ├── qu8-vmulc-minmax-fp32-avx-mul16-ld64-u16.c │ │ ├── qu8-vmulc-minmax-fp32-avx-mul16-ld64-u8.c │ │ ├── qu8-vmulc-minmax-fp32-neon-ld128-u16.c │ │ ├── qu8-vmulc-minmax-fp32-neon-ld64-u16.c │ │ ├── qu8-vmulc-minmax-fp32-neon-ld64-u8.c │ │ ├── qu8-vmulc-minmax-fp32-neonv8-ld128-u16.c │ │ ├── qu8-vmulc-minmax-fp32-neonv8-ld64-u16.c │ │ ├── qu8-vmulc-minmax-fp32-neonv8-ld64-u8.c │ │ ├── qu8-vmulc-minmax-fp32-scalar-u1.c │ │ ├── qu8-vmulc-minmax-fp32-scalar-u2.c │ │ ├── qu8-vmulc-minmax-fp32-scalar-u4.c │ │ ├── qu8-vmulc-minmax-fp32-sse2-mul16-ld64-u16.c │ │ ├── qu8-vmulc-minmax-fp32-sse2-mul16-ld64-u8.c │ │ ├── qu8-vmulc-minmax-fp32-sse41-mul16-ld64-u16.c │ │ ├── qu8-vmulc-minmax-fp32-sse41-mul16-ld64-u8.c │ │ ├── qu8-vmulc-minmax-fp32-wasmsimd-mul32-ld64-u16.c │ │ ├── qu8-vmulc-minmax-fp32-wasmsimd-mul32-ld64-u8.c │ │ ├── qu8-vmulc-minmax-rndnu-neon-ld128-u16.c │ │ ├── qu8-vmulc-minmax-rndnu-neon-ld64-u16.c │ │ └── qu8-vmulc-minmax-rndnu-neon-ld64-u8.c │ ├── qu8-vmulc-minmax-fp32.inc │ └── qu8-vmulc-minmax-rndnu.inc ├── qu8-vprelu │ ├── gen │ │ ├── qu8-vprelu-avx2-u16.c │ │ ├── qu8-vprelu-scalar-u1.c │ │ ├── qu8-vprelu-scalar-u2.c │ │ ├── qu8-vprelu-scalar-u4.c │ │ └── qu8-vprelu-scalar-u8.c │ └── qu8-vprelu.inc ├── qu8-vpreluc │ ├── gen │ │ ├── qu8-vpreluc-avx2-u16.c │ │ ├── qu8-vpreluc-scalar-u1.c │ │ ├── qu8-vpreluc-scalar-u2.c │ │ ├── qu8-vpreluc-scalar-u4.c │ │ └── qu8-vpreluc-scalar-u8.c │ └── qu8-vpreluc.inc ├── qu8-vrpreluc │ ├── gen │ │ ├── qu8-vrpreluc-avx2-u16.c │ │ ├── qu8-vrpreluc-scalar-u1.c │ │ ├── qu8-vrpreluc-scalar-u2.c │ │ ├── qu8-vrpreluc-scalar-u4.c │ │ └── qu8-vrpreluc-scalar-u8.c │ └── qu8-vrpreluc.inc ├── reference │ ├── binary-elementwise.cc │ ├── packing.cc │ └── unary-elementwise.cc ├── runtime.c ├── s8-ibilinear │ ├── gen │ │ ├── s8-ibilinear-neon-u16.c │ │ ├── s8-ibilinear-neon-u8.c │ │ ├── s8-ibilinear-scalar-u1.c │ │ ├── s8-ibilinear-scalar-u2.c │ │ ├── s8-ibilinear-scalar-u4.c │ │ ├── s8-ibilinear-sse2-u16.c │ │ ├── s8-ibilinear-sse2-u8.c │ │ ├── s8-ibilinear-sse41-u16.c │ │ ├── s8-ibilinear-sse41-u8.c │ │ ├── s8-ibilinear-wasmsimd-dot16x2-u16.c │ │ ├── s8-ibilinear-wasmsimd-dot16x2-u8.c │ │ ├── s8-ibilinear-wasmsimd-mul32-u16.c │ │ └── s8-ibilinear-wasmsimd-mul32-u8.c │ ├── neon.c.in │ ├── scalar.c.in │ ├── sse.c.in │ ├── wasmsimd-dot16x2.c.in │ └── wasmsimd-mul32.c.in ├── s8-maxpool │ ├── gen │ │ ├── s8-maxpool-9p-minmax-neon-u16.c │ │ ├── s8-maxpool-9p-minmax-scalar-u1.c │ │ ├── s8-maxpool-9p-minmax-sse41-u16.c │ │ └── s8-maxpool-9p-minmax-wasmsimd-u16.c │ └── s8-maxpool-minmax.inc ├── s8-rdminmax │ ├── gen │ │ ├── s8-rdmax-2p2x-hvx-u128.c │ │ ├── s8-rdmax-2p2x-neon-u32.c │ │ ├── s8-rdmax-2p2x-scalar-u2.c │ │ ├── s8-rdmax-2p2x-sse41-u32.c │ │ ├── s8-rdmax-2p2x-wasmsimd-u32.c │ │ ├── s8-rdmin-2p2x-hvx-u128.c │ │ ├── s8-rdmin-2p2x-neon-u32.c │ │ ├── s8-rdmin-2p2x-scalar-u2.c │ │ ├── s8-rdmin-2p2x-sse41-u32.c │ │ └── s8-rdmin-2p2x-wasmsimd-u32.c │ ├── s8-rdmax.inc │ ├── s8-rdmin.inc │ └── simd.c.in ├── s8-rminmax │ ├── gen │ │ ├── s8-rmax-hvx-u256-acc2.c │ │ ├── s8-rmax-neon-u16.c │ │ ├── s8-rmax-neon-u32-acc2.c │ │ ├── s8-rmax-neon-u48-acc3.c │ │ ├── s8-rmax-neon-u64-acc2.c │ │ ├── s8-rmax-neon-u64-acc4.c │ │ ├── s8-rmax-scalar-u1.c │ │ ├── s8-rmax-scalar-u2-acc2.c │ │ ├── s8-rmax-scalar-u3-acc3.c │ │ ├── s8-rmax-scalar-u4-acc2.c │ │ ├── s8-rmax-scalar-u4-acc4.c │ │ ├── s8-rmax-sse41-u16.c │ │ ├── s8-rmax-sse41-u32-acc2.c │ │ ├── s8-rmax-sse41-u48-acc3.c │ │ ├── s8-rmax-sse41-u64-acc2.c │ │ ├── s8-rmax-sse41-u64-acc4.c │ │ ├── s8-rmax-wasmsimd-u16.c │ │ ├── s8-rmax-wasmsimd-u32-acc2.c │ │ ├── s8-rmax-wasmsimd-u48-acc3.c │ │ ├── s8-rmax-wasmsimd-u64-acc2.c │ │ ├── s8-rmax-wasmsimd-u64-acc4.c │ │ ├── s8-rmin-hvx-u256-acc2.c │ │ ├── s8-rmin-neon-u16.c │ │ ├── s8-rmin-neon-u32-acc2.c │ │ ├── s8-rmin-neon-u48-acc3.c │ │ ├── s8-rmin-neon-u64-acc2.c │ │ ├── s8-rmin-neon-u64-acc4.c │ │ ├── s8-rmin-scalar-u1.c │ │ ├── s8-rmin-scalar-u2-acc2.c │ │ ├── s8-rmin-scalar-u3-acc3.c │ │ ├── s8-rmin-scalar-u4-acc2.c │ │ ├── s8-rmin-scalar-u4-acc4.c │ │ ├── s8-rmin-sse41-u16.c │ │ ├── s8-rmin-sse41-u32-acc2.c │ │ ├── s8-rmin-sse41-u48-acc3.c │ │ ├── s8-rmin-sse41-u64-acc2.c │ │ ├── s8-rmin-sse41-u64-acc4.c │ │ ├── s8-rmin-wasmsimd-u16.c │ │ ├── s8-rmin-wasmsimd-u32-acc2.c │ │ ├── s8-rmin-wasmsimd-u48-acc3.c │ │ ├── s8-rmin-wasmsimd-u64-acc2.c │ │ ├── s8-rmin-wasmsimd-u64-acc4.c │ │ ├── s8-rminmax-hvx-u256-acc2.c │ │ ├── s8-rminmax-neon-u16.c │ │ ├── s8-rminmax-neon-u32-acc2.c │ │ ├── s8-rminmax-neon-u48-acc3.c │ │ ├── s8-rminmax-neon-u64-acc2.c │ │ ├── s8-rminmax-neon-u64-acc4.c │ │ ├── s8-rminmax-scalar-u1.c │ │ ├── s8-rminmax-scalar-u2-acc2.c │ │ ├── s8-rminmax-scalar-u3-acc3.c │ │ ├── s8-rminmax-scalar-u4-acc2.c │ │ ├── s8-rminmax-scalar-u4-acc4.c │ │ ├── s8-rminmax-sse41-u16.c │ │ ├── s8-rminmax-sse41-u32-acc2.c │ │ ├── s8-rminmax-sse41-u48-acc3.c │ │ ├── s8-rminmax-sse41-u64-acc2.c │ │ ├── s8-rminmax-sse41-u64-acc4.c │ │ ├── s8-rminmax-wasmsimd-u16.c │ │ ├── s8-rminmax-wasmsimd-u32-acc2.c │ │ ├── s8-rminmax-wasmsimd-u48-acc3.c │ │ ├── s8-rminmax-wasmsimd-u64-acc2.c │ │ └── s8-rminmax-wasmsimd-u64-acc4.c │ ├── s8-rmax.inc │ ├── s8-rmin.inc │ ├── s8-rminmax.inc │ └── simd.c.in ├── s8-vclamp │ ├── gen │ │ ├── s8-vclamp-rvv-u1v.c │ │ ├── s8-vclamp-rvv-u2v.c │ │ ├── s8-vclamp-rvv-u4v.c │ │ └── s8-vclamp-rvv-u8v.c │ ├── rvv.c.in │ ├── s8-vclamp-avx2-u128.c │ ├── s8-vclamp-avx512skx-u256.c │ ├── s8-vclamp-neon-u64.c │ ├── s8-vclamp-scalar-u4.c │ ├── s8-vclamp-sse2-u64.c │ ├── s8-vclamp-sse41-u64.c │ ├── s8-vclamp-wasmsimd-u64.c │ └── s8-vclamp.inc ├── sanitizers.c ├── subgraph.c ├── subgraph │ ├── argmax-pooling-2d.c │ ├── average-pooling-2d.c │ ├── batch-matrix-multiply.c │ ├── binary.c │ ├── concatenate.c │ ├── convolution-2d.c │ ├── copy.c │ ├── deconvolution-2d.c │ ├── deprecated.c │ ├── depth-to-space-2d.c │ ├── depthwise-convolution-2d.c │ ├── even-split.c │ ├── fully-connected-sparse.c │ ├── fully-connected.c │ ├── max-pooling-2d.c │ ├── pack-lh.c │ ├── reshape-helpers.c │ ├── rope.c │ ├── softmax.c │ ├── space-to-depth-2d.c │ ├── static-constant-pad.c │ ├── static-reduce.c │ ├── static-resize-bilinear-2d.c │ ├── static-slice.c │ ├── static-transpose.c │ ├── subgraph-utils.c │ ├── subgraph-utils.h │ ├── unary.c │ ├── unpooling-2d.c │ └── validation.c ├── tables │ ├── exp2-k-over-2048.c │ ├── exp2-k-over-64.c │ ├── exp2minus-k-over-16.c │ ├── exp2minus-k-over-2048.c │ ├── exp2minus-k-over-32.c │ ├── exp2minus-k-over-4.c │ ├── exp2minus-k-over-64.c │ ├── exp2minus-k-over-8.c │ └── vlog.c ├── tensor.c ├── u8-ibilinear │ └── gen │ │ ├── u8-ibilinear-neon-u16.c │ │ ├── u8-ibilinear-neon-u8.c │ │ ├── u8-ibilinear-scalar-u1.c │ │ ├── u8-ibilinear-scalar-u2.c │ │ ├── u8-ibilinear-scalar-u4.c │ │ ├── u8-ibilinear-sse2-u16.c │ │ ├── u8-ibilinear-sse2-u8.c │ │ ├── u8-ibilinear-sse41-u16.c │ │ ├── u8-ibilinear-sse41-u8.c │ │ ├── u8-ibilinear-wasmsimd-dot16x2-u16.c │ │ ├── u8-ibilinear-wasmsimd-dot16x2-u8.c │ │ ├── u8-ibilinear-wasmsimd-mul32-u16.c │ │ └── u8-ibilinear-wasmsimd-mul32-u8.c ├── u8-lut32norm │ ├── u8-lut32norm-scalar.c │ └── u8-lut32norm.inc ├── u8-maxpool │ ├── gen │ │ ├── u8-maxpool-9p-minmax-neon-u16.c │ │ ├── u8-maxpool-9p-minmax-scalar-u1.c │ │ ├── u8-maxpool-9p-minmax-sse2-u16.c │ │ └── u8-maxpool-9p-minmax-wasmsimd-u16.c │ └── u8-maxpool-minmax.inc ├── u8-rdminmax │ ├── gen │ │ ├── u8-rdmax-2p2x-hvx-u128.c │ │ ├── u8-rdmax-2p2x-neon-u32.c │ │ ├── u8-rdmax-2p2x-scalar-u2.c │ │ ├── u8-rdmax-2p2x-sse2-u32.c │ │ ├── u8-rdmax-2p2x-wasmsimd-u32.c │ │ ├── u8-rdmin-2p2x-hvx-u128.c │ │ ├── u8-rdmin-2p2x-neon-u32.c │ │ ├── u8-rdmin-2p2x-scalar-u2.c │ │ ├── u8-rdmin-2p2x-sse2-u32.c │ │ └── u8-rdmin-2p2x-wasmsimd-u32.c │ ├── u8-rdmax.inc │ └── u8-rdmin.inc ├── u8-rminmax │ ├── gen │ │ ├── u8-rmax-hvx-u256-acc2.c │ │ ├── u8-rmax-neon-u16.c │ │ ├── u8-rmax-neon-u32-acc2.c │ │ ├── u8-rmax-neon-u48-acc3.c │ │ ├── u8-rmax-neon-u64-acc2.c │ │ ├── u8-rmax-neon-u64-acc4.c │ │ ├── u8-rmax-scalar-u1.c │ │ ├── u8-rmax-scalar-u2-acc2.c │ │ ├── u8-rmax-scalar-u3-acc3.c │ │ ├── u8-rmax-scalar-u4-acc2.c │ │ ├── u8-rmax-scalar-u4-acc4.c │ │ ├── u8-rmax-sse2-u16.c │ │ ├── u8-rmax-sse2-u32-acc2.c │ │ ├── u8-rmax-sse2-u48-acc3.c │ │ ├── u8-rmax-sse2-u64-acc2.c │ │ ├── u8-rmax-sse2-u64-acc4.c │ │ ├── u8-rmax-wasmsimd-u32-acc2.c │ │ ├── u8-rmin-hvx-u256-acc2.c │ │ ├── u8-rmin-neon-u16.c │ │ ├── u8-rmin-neon-u32-acc2.c │ │ ├── u8-rmin-neon-u48-acc3.c │ │ ├── u8-rmin-neon-u64-acc2.c │ │ ├── u8-rmin-neon-u64-acc4.c │ │ ├── u8-rmin-scalar-u1.c │ │ ├── u8-rmin-scalar-u2-acc2.c │ │ ├── u8-rmin-scalar-u3-acc3.c │ │ ├── u8-rmin-scalar-u4-acc2.c │ │ ├── u8-rmin-scalar-u4-acc4.c │ │ ├── u8-rmin-sse2-u16.c │ │ ├── u8-rmin-sse2-u32-acc2.c │ │ ├── u8-rmin-sse2-u48-acc3.c │ │ ├── u8-rmin-sse2-u64-acc2.c │ │ ├── u8-rmin-sse2-u64-acc4.c │ │ ├── u8-rmin-wasmsimd-u32-acc2.c │ │ ├── u8-rminmax-hvx-u256-acc2.c │ │ ├── u8-rminmax-neon-u16.c │ │ ├── u8-rminmax-neon-u32-acc2.c │ │ ├── u8-rminmax-neon-u48-acc3.c │ │ ├── u8-rminmax-neon-u64-acc2.c │ │ ├── u8-rminmax-neon-u64-acc4.c │ │ ├── u8-rminmax-scalar-u1.c │ │ ├── u8-rminmax-scalar-u2-acc2.c │ │ ├── u8-rminmax-scalar-u3-acc3.c │ │ ├── u8-rminmax-scalar-u4-acc2.c │ │ ├── u8-rminmax-scalar-u4-acc4.c │ │ ├── u8-rminmax-sse2-u16.c │ │ ├── u8-rminmax-sse2-u32-acc2.c │ │ ├── u8-rminmax-sse2-u48-acc3.c │ │ ├── u8-rminmax-sse2-u64-acc2.c │ │ ├── u8-rminmax-sse2-u64-acc4.c │ │ └── u8-rminmax-wasmsimd-u32-acc2.c │ ├── u8-rmax.inc │ ├── u8-rmin.inc │ └── u8-rminmax.inc ├── u8-vclamp │ ├── gen │ │ ├── u8-vclamp-rvv-u1v.c │ │ ├── u8-vclamp-rvv-u2v.c │ │ ├── u8-vclamp-rvv-u4v.c │ │ └── u8-vclamp-rvv-u8v.c │ ├── u8-vclamp-avx2-u128.c │ ├── u8-vclamp-avx512skx-u256.c │ ├── u8-vclamp-neon-u64.c │ ├── u8-vclamp-scalar-u4.c │ ├── u8-vclamp-sse2-u64.c │ ├── u8-vclamp-wasmsimd-u64.c │ └── u8-vclamp.inc ├── x16-pack-lh │ ├── x16-pack-lh-igemm.inc │ ├── x16-pack-lh.inc │ ├── x16-packlh-igemm-neonsme.c │ ├── x16-packlh-igemm-neonsme2.c │ ├── x16-packlh-neonsme.c │ └── x16-packlh-neonsme2.c ├── x16-packw │ ├── avx.c.in │ ├── gen │ │ ├── x16-packw-x16-gemm-goi-avx2-u16-prfm.c │ │ ├── x16-packw-x16-gemm-goi-avx2-u16.c │ │ ├── x16-packw-x16-gemm-goi-neon-ld4lane-u12-prfm.c │ │ ├── x16-packw-x16-gemm-goi-neon-ld4lane-u12.c │ │ ├── x16-packw-x16-gemm-goi-neon-ld4lane-u16-prfm.c │ │ ├── x16-packw-x16-gemm-goi-neon-ld4lane-u16.c │ │ ├── x16-packw-x16-gemm-goi-neon-ld4lane-u4-prfm.c │ │ ├── x16-packw-x16-gemm-goi-neon-ld4lane-u4.c │ │ ├── x16-packw-x16-gemm-goi-neon-ld4lane-u8-prfm.c │ │ ├── x16-packw-x16-gemm-goi-neon-ld4lane-u8.c │ │ ├── x16-packw-x16-gemm-goi-scalar-int-u4.c │ │ ├── x16-packw-x32-gemm-goi-scalar-int-u4.c │ │ ├── x16-packw-x64-gemm-goi-scalar-int-u4.c │ │ ├── x16-packw-x8-gemm-goi-avx2-u16-prfm.c │ │ ├── x16-packw-x8-gemm-goi-avx2-u16.c │ │ ├── x16-packw-x8-gemm-goi-neon-ld4lane-u12-prfm.c │ │ ├── x16-packw-x8-gemm-goi-neon-ld4lane-u12.c │ │ ├── x16-packw-x8-gemm-goi-neon-ld4lane-u16-prfm.c │ │ ├── x16-packw-x8-gemm-goi-neon-ld4lane-u16.c │ │ ├── x16-packw-x8-gemm-goi-neon-ld4lane-u4-prfm.c │ │ ├── x16-packw-x8-gemm-goi-neon-ld4lane-u4.c │ │ ├── x16-packw-x8-gemm-goi-neon-ld4lane-u8-prfm.c │ │ ├── x16-packw-x8-gemm-goi-neon-ld4lane-u8.c │ │ └── x16-packw-x8-gemm-goi-scalar-int-u4.c │ ├── neon.c.in │ └── x16-packw.inc ├── x16-transposec │ ├── gen │ │ ├── x16-transposec-16x16-reuse-mov-avx2.c │ │ ├── x16-transposec-16x16-reuse-switch-avx2.c │ │ ├── x16-transposec-1x2-scalar-int.c │ │ ├── x16-transposec-1x4-scalar-int.c │ │ ├── x16-transposec-2x1-scalar-int.c │ │ ├── x16-transposec-2x2-scalar-int.c │ │ ├── x16-transposec-2x4-scalar-int.c │ │ ├── x16-transposec-4x1-scalar-int.c │ │ ├── x16-transposec-4x2-scalar-int.c │ │ ├── x16-transposec-4x4-multi-dec-zip-neon.c │ │ ├── x16-transposec-4x4-multi-mov-zip-neon.c │ │ ├── x16-transposec-4x4-multi-multi-zip-neon.c │ │ ├── x16-transposec-4x4-multi-switch-zip-neon.c │ │ ├── x16-transposec-4x4-reuse-dec-zip-neon.c │ │ ├── x16-transposec-4x4-reuse-mov-zip-neon.c │ │ ├── x16-transposec-4x4-reuse-multi-zip-neon.c │ │ ├── x16-transposec-4x4-reuse-switch-zip-neon.c │ │ ├── x16-transposec-4x4-scalar-int.c │ │ ├── x16-transposec-8x8-multi-dec-zip-neon.c │ │ ├── x16-transposec-8x8-multi-mov-sse2.c │ │ ├── x16-transposec-8x8-multi-mov-wasmsimd.c │ │ ├── x16-transposec-8x8-multi-mov-zip-neon.c │ │ ├── x16-transposec-8x8-multi-switch-sse2.c │ │ ├── x16-transposec-8x8-multi-switch-wasmsimd.c │ │ ├── x16-transposec-8x8-multi-switch-zip-neon.c │ │ ├── x16-transposec-8x8-reuse-dec-zip-neon.c │ │ ├── x16-transposec-8x8-reuse-mov-sse2.c │ │ ├── x16-transposec-8x8-reuse-mov-wasmsimd.c │ │ ├── x16-transposec-8x8-reuse-mov-zip-neon.c │ │ ├── x16-transposec-8x8-reuse-multi-sse2.c │ │ ├── x16-transposec-8x8-reuse-multi-wasmsimd.c │ │ ├── x16-transposec-8x8-reuse-multi-zip-neon.c │ │ ├── x16-transposec-8x8-reuse-switch-sse2.c │ │ ├── x16-transposec-8x8-reuse-switch-wasmsimd.c │ │ └── x16-transposec-8x8-reuse-switch-zip-neon.c │ ├── x16-transposec-4x8-sse2.c │ └── x16-transposec.inc ├── x16-x32-packw │ ├── gen │ │ ├── x16-x32-packw-x32c2-gemm-gio-scalar.c │ │ └── x16-x32-packw-x32c2-gemm-goi-scalar.c │ ├── kr-gio-scalar.c.in │ ├── kr-scalar.c.in │ └── x16-x32-packw.inc ├── x24-transposec │ ├── gen │ │ ├── x24-transposec-1x2-scalar.c │ │ ├── x24-transposec-1x4-scalar.c │ │ ├── x24-transposec-2x1-scalar.c │ │ ├── x24-transposec-2x2-scalar.c │ │ ├── x24-transposec-2x4-scalar.c │ │ ├── x24-transposec-4x1-scalar.c │ │ ├── x24-transposec-4x2-scalar.c │ │ └── x24-transposec-4x4-scalar.c │ ├── scalar.c.in │ ├── x24-transposec-2x2-neon-tbl64.c │ ├── x24-transposec-4x4-aarch64-neon-tbl128.c │ ├── x24-transposec-4x4-ssse3.c │ └── x24-transposec.inc ├── x32-pack-lh │ ├── x32-pack-lh-igemm.inc │ ├── x32-pack-lh.inc │ ├── x32-packlh-igemm-neonsme.c │ ├── x32-packlh-igemm-neonsme2.c │ ├── x32-packlh-neonsme.c │ └── x32-packlh-neonsme2.c ├── x32-packb │ ├── gen │ │ ├── x32-packb-2c1s1r-gemm-scalar-float.c │ │ ├── x32-packb-2c1s1r-gemm-scalar-int.c │ │ ├── x32-packb-2c2s1r-gemm-scalar-float.c │ │ ├── x32-packb-2c2s1r-gemm-scalar-int.c │ │ ├── x32-packb-4c1s1r-gemm-scalar-float.c │ │ ├── x32-packb-4c1s1r-gemm-scalar-int.c │ │ ├── x32-packb-4c4s1r-gemm-scalar-float.c │ │ └── x32-packb-4c4s1r-gemm-scalar-int.c │ ├── scalar.c.in │ └── x32-packb.inc ├── x32-packw │ ├── NR2-neon.c.in │ ├── avx.c.in │ ├── avx512.c.in │ ├── avx512c2.c.in │ ├── c4-sse2.c.in │ ├── c4-wasmsimd.c.in │ ├── gen │ │ ├── x32-packw-gio-hvx-u2.c │ │ ├── x32-packw-gio-neon-u2.c │ │ ├── x32-packw-gio-sse41-u2.c │ │ ├── x32-packw-gio-wasmsimd-u2.c │ │ ├── x32-packw-x12-gemm-goi-neon-ld4lane-u4-prfm.c │ │ ├── x32-packw-x12-gemm-goi-neon-ld4lane-u4.c │ │ ├── x32-packw-x12-gemm-goi-neon-ld4lane-u8-prfm.c │ │ ├── x32-packw-x12-gemm-goi-neon-ld4lane-u8.c │ │ ├── x32-packw-x16-gemm-gio-avx-u1-prfm.c │ │ ├── x32-packw-x16-gemm-gio-avx-u1.c │ │ ├── x32-packw-x16-gemm-gio-avx-u8-prfm.c │ │ ├── x32-packw-x16-gemm-gio-avx-u8.c │ │ ├── x32-packw-x16-gemm-gio-avx512f-u1-prfm.c │ │ ├── x32-packw-x16-gemm-gio-avx512f-u1.c │ │ ├── x32-packw-x16-gemm-gio-avx512f-u8-prfm.c │ │ ├── x32-packw-x16-gemm-gio-avx512f-u8.c │ │ ├── x32-packw-x16-gemm-gio-scalar.c │ │ ├── x32-packw-x16-gemm-goi-avx-u4-prfm.c │ │ ├── x32-packw-x16-gemm-goi-avx-u4.c │ │ ├── x32-packw-x16-gemm-goi-avx512f-u4-prfm.c │ │ ├── x32-packw-x16-gemm-goi-avx512f-u4.c │ │ ├── x32-packw-x16-gemm-goi-neon-ld4lane-u4-prfm.c │ │ ├── x32-packw-x16-gemm-goi-neon-ld4lane-u4.c │ │ ├── x32-packw-x16-gemm-goi-neon-ld4lane-u8-prfm.c │ │ ├── x32-packw-x16-gemm-goi-neon-ld4lane-u8.c │ │ ├── x32-packw-x16-gemm-goi-scalar-float-u4.c │ │ ├── x32-packw-x16-gemm-goi-scalar-int-u4.c │ │ ├── x32-packw-x16-gemm-goi-sse2-u4-prfm.c │ │ ├── x32-packw-x16-gemm-goi-sse2-u4.c │ │ ├── x32-packw-x16-gemm-goi-sse2-u8-prfm.c │ │ ├── x32-packw-x16-gemm-goi-sse2-u8.c │ │ ├── x32-packw-x16s4-gemm-goi-avx-u4-prfm.c │ │ ├── x32-packw-x16s4-gemm-goi-avx-u4.c │ │ ├── x32-packw-x16s4-gemm-goi-sse2-u4-prfm.c │ │ ├── x32-packw-x16s4-gemm-goi-sse2-u4.c │ │ ├── x32-packw-x16s4-gemm-goi-sse2-u8-prfm.c │ │ ├── x32-packw-x16s4-gemm-goi-sse2-u8.c │ │ ├── x32-packw-x1v-gemm-goi-rvv-u2.c │ │ ├── x32-packw-x1v-gemm-goi-rvv-u4.c │ │ ├── x32-packw-x1v-gemm-goi-rvv-u8.c │ │ ├── x32-packw-x2-gemm-gio-scalar.c │ │ ├── x32-packw-x2-gemm-goi-neon-ld2lane-u2-prfm.c │ │ ├── x32-packw-x2-gemm-goi-neon-ld2lane-u2.c │ │ ├── x32-packw-x2-gemm-goi-scalar-float-u4.c │ │ ├── x32-packw-x2-gemm-goi-scalar-int-u4.c │ │ ├── x32-packw-x2c4-gemm-goi-sse2-u4-prfm.c │ │ ├── x32-packw-x2c4-gemm-goi-sse2-u4.c │ │ ├── x32-packw-x2c4-gemm-goi-wasmsimd-u4.c │ │ ├── x32-packw-x2v-gemm-goi-rvv-u2.c │ │ ├── x32-packw-x2v-gemm-goi-rvv-u4.c │ │ ├── x32-packw-x2v-gemm-goi-rvv-u8.c │ │ ├── x32-packw-x3-gemm-goi-scalar-float-u4.c │ │ ├── x32-packw-x3-gemm-goi-scalar-int-u4.c │ │ ├── x32-packw-x32-gemm-gio-avx-u1-prfm.c │ │ ├── x32-packw-x32-gemm-gio-avx-u1.c │ │ ├── x32-packw-x32-gemm-gio-avx-u8-prfm.c │ │ ├── x32-packw-x32-gemm-gio-avx-u8.c │ │ ├── x32-packw-x32-gemm-gio-avx512f-u1-prfm.c │ │ ├── x32-packw-x32-gemm-gio-avx512f-u1.c │ │ ├── x32-packw-x32-gemm-gio-avx512f-u8-prfm.c │ │ ├── x32-packw-x32-gemm-gio-avx512f-u8.c │ │ ├── x32-packw-x32-gemm-gio-scalar.c │ │ ├── x32-packw-x32-gemm-goi-avx512f-u4-prfm.c │ │ ├── x32-packw-x32-gemm-goi-avx512f-u4.c │ │ ├── x32-packw-x32-gemm-goi-scalar-float-u2.c │ │ ├── x32-packw-x32-gemm-goi-scalar-int-u2.c │ │ ├── x32-packw-x32c2-gemm-goi-avx512f-u4-prfm.c │ │ ├── x32-packw-x32c2-gemm-goi-avx512f-u4.c │ │ ├── x32-packw-x4-gemm-gio-scalar.c │ │ ├── x32-packw-x4-gemm-goi-scalar-float-u4.c │ │ ├── x32-packw-x4-gemm-goi-scalar-int-u4.c │ │ ├── x32-packw-x4v-gemm-goi-rvv-u2.c │ │ ├── x32-packw-x4v-gemm-goi-rvv-u4.c │ │ ├── x32-packw-x4v-gemm-goi-rvv-u8.c │ │ ├── x32-packw-x64-gemm-gio-scalar.c │ │ ├── x32-packw-x64-gemm-goi-scalar-float-u2.c │ │ ├── x32-packw-x64-gemm-goi-scalar-int-u2.c │ │ ├── x32-packw-x8-gemm-gio-avx-u1-prfm.c │ │ ├── x32-packw-x8-gemm-gio-avx-u1.c │ │ ├── x32-packw-x8-gemm-gio-avx-u8-prfm.c │ │ ├── x32-packw-x8-gemm-gio-avx-u8.c │ │ ├── x32-packw-x8-gemm-gio-scalar.c │ │ ├── x32-packw-x8-gemm-goi-avx-u4-prfm.c │ │ ├── x32-packw-x8-gemm-goi-avx-u4.c │ │ ├── x32-packw-x8-gemm-goi-neon-ld4lane-u4-prfm.c │ │ ├── x32-packw-x8-gemm-goi-neon-ld4lane-u4.c │ │ ├── x32-packw-x8-gemm-goi-neon-ld4lane-u8-prfm.c │ │ ├── x32-packw-x8-gemm-goi-neon-ld4lane-u8.c │ │ ├── x32-packw-x8-gemm-goi-scalar-float-u4.c │ │ ├── x32-packw-x8-gemm-goi-scalar-int-u4.c │ │ ├── x32-packw-x8-gemm-goi-sse2-u4-prfm.c │ │ ├── x32-packw-x8-gemm-goi-sse2-u4.c │ │ ├── x32-packw-x8-gemm-goi-sse2-u8-prfm.c │ │ ├── x32-packw-x8-gemm-goi-sse2-u8.c │ │ ├── x32-packw-x8-gemm-goi-wasmsimd-u4.c │ │ ├── x32-packw-x8s4-gemm-goi-avx-u4-prfm.c │ │ ├── x32-packw-x8s4-gemm-goi-avx-u4.c │ │ ├── x32-packw-x8s4-gemm-goi-neon-ld4lane-u4-prfm.c │ │ ├── x32-packw-x8s4-gemm-goi-neon-ld4lane-u4.c │ │ ├── x32-packw-x8s4-gemm-goi-neon-ld4lane-u8-prfm.c │ │ ├── x32-packw-x8s4-gemm-goi-neon-ld4lane-u8.c │ │ ├── x32-packw-x8s4-gemm-goi-sse2-u4-prfm.c │ │ ├── x32-packw-x8s4-gemm-goi-sse2-u4.c │ │ ├── x32-packw-x8s4-gemm-goi-sse2-u8-prfm.c │ │ ├── x32-packw-x8s4-gemm-goi-sse2-u8.c │ │ ├── x32-packw-x8s4-gemm-goi-wasmsimd-u4.c │ │ ├── x32-packw-x8v-gemm-goi-rvv-u2.c │ │ ├── x32-packw-x8v-gemm-goi-rvv-u4.c │ │ └── x32-packw-x8v-gemm-goi-rvv-u8.c │ ├── gio-avx.c.in │ ├── gio-avx512.c.in │ ├── gio-scalar.c.in │ ├── gio-simd.c.in │ ├── neon.c.in │ ├── rvv.c.in │ ├── s4-avx.c.in │ ├── s4-sse2.c.in │ ├── s4-wasmsimd.c.in │ ├── scalar.c.in │ ├── sse2.c.in │ ├── wasmsimd.c.in │ └── x32-packw.inc ├── x32-packx │ ├── gen │ │ ├── x32-packx-4x-neon-st4-u4-prfm.c │ │ ├── x32-packx-4x-neon-st4-u4.c │ │ ├── x32-packx-4x-neon-st4-u8-prfm.c │ │ ├── x32-packx-4x-neon-st4-u8.c │ │ ├── x32-packx-8x-neon-st4-u4-prfm.c │ │ ├── x32-packx-8x-neon-st4-u4.c │ │ ├── x32-packx-8x-neon-st4-u8-prfm.c │ │ └── x32-packx-8x-neon-st4-u8.c │ ├── neon.c.in │ ├── x32-packx-2x-scalar.c │ ├── x32-packx-3x-scalar.c │ ├── x32-packx-4x-scalar.c │ ├── x32-packx-4x-sse.c │ ├── x32-packx-4x-wasmsimd.c │ └── x32-packx.inc ├── x32-transposec │ ├── avx.c.in │ ├── avx2.c.in │ ├── gen │ │ ├── x32-transposec-16x8-rvv.c │ │ ├── x32-transposec-1x2-scalar-float.c │ │ ├── x32-transposec-1x2-scalar-int.c │ │ ├── x32-transposec-1x4-scalar-float.c │ │ ├── x32-transposec-1x4-scalar-int.c │ │ ├── x32-transposec-2x1-scalar-float.c │ │ ├── x32-transposec-2x1-scalar-int.c │ │ ├── x32-transposec-2x2-multi-dec-zip-neon.c │ │ ├── x32-transposec-2x2-multi-mov-zip-neon.c │ │ ├── x32-transposec-2x2-multi-multi-zip-neon.c │ │ ├── x32-transposec-2x2-multi-switch-zip-neon.c │ │ ├── x32-transposec-2x2-reuse-dec-zip-neon.c │ │ ├── x32-transposec-2x2-reuse-mov-zip-neon.c │ │ ├── x32-transposec-2x2-reuse-multi-zip-neon.c │ │ ├── x32-transposec-2x2-reuse-switch-zip-neon.c │ │ ├── x32-transposec-2x2-scalar-float.c │ │ ├── x32-transposec-2x2-scalar-int.c │ │ ├── x32-transposec-2x4-scalar-float.c │ │ ├── x32-transposec-2x4-scalar-int.c │ │ ├── x32-transposec-32x32-multi-multi-hvx.c │ │ ├── x32-transposec-32x8-rvv.c │ │ ├── x32-transposec-4x1-scalar-float.c │ │ ├── x32-transposec-4x1-scalar-int.c │ │ ├── x32-transposec-4x2-scalar-float.c │ │ ├── x32-transposec-4x2-scalar-int.c │ │ ├── x32-transposec-4x4-multi-dec-zip-neon.c │ │ ├── x32-transposec-4x4-multi-mov-sse2.c │ │ ├── x32-transposec-4x4-multi-mov-wasmsimd.c │ │ ├── x32-transposec-4x4-multi-mov-zip-neon.c │ │ ├── x32-transposec-4x4-multi-multi-sse2.c │ │ ├── x32-transposec-4x4-multi-multi-wasmsimd.c │ │ ├── x32-transposec-4x4-multi-multi-zip-neon.c │ │ ├── x32-transposec-4x4-multi-switch-sse2.c │ │ ├── x32-transposec-4x4-multi-switch-wasmsimd.c │ │ ├── x32-transposec-4x4-multi-switch-zip-neon.c │ │ ├── x32-transposec-4x4-reuse-dec-zip-neon.c │ │ ├── x32-transposec-4x4-reuse-mov-sse2.c │ │ ├── x32-transposec-4x4-reuse-mov-wasmsimd.c │ │ ├── x32-transposec-4x4-reuse-mov-zip-neon.c │ │ ├── x32-transposec-4x4-reuse-multi-sse2.c │ │ ├── x32-transposec-4x4-reuse-multi-wasmsimd.c │ │ ├── x32-transposec-4x4-reuse-multi-zip-neon.c │ │ ├── x32-transposec-4x4-reuse-switch-sse2.c │ │ ├── x32-transposec-4x4-reuse-switch-wasmsimd.c │ │ ├── x32-transposec-4x4-reuse-switch-zip-neon.c │ │ ├── x32-transposec-4x4-rvv.c │ │ ├── x32-transposec-4x4-scalar-float.c │ │ ├── x32-transposec-4x4-scalar-int.c │ │ ├── x32-transposec-8x8-multi-mov-avx.c │ │ ├── x32-transposec-8x8-multi-switch-avx.c │ │ ├── x32-transposec-8x8-reuse-mov-avx.c │ │ ├── x32-transposec-8x8-reuse-multi-avx.c │ │ ├── x32-transposec-8x8-reuse-switch-avx.c │ │ └── x32-transposec-8x8-rvv.c │ ├── hvx.c.in │ ├── neon-zip.c.in │ ├── rvv.c.in │ ├── scalar.c.in │ ├── sse2.c.in │ ├── wasmsimd.c.in │ ├── x32-transposec-4x4-aarch64-neon-tbl128.c │ ├── x32-transposec-4x4-sse.c │ └── x32-transposec.inc ├── x32-unpool │ ├── x32-unpool-neon.c │ ├── x32-unpool-scalar.c │ ├── x32-unpool-sse2.c │ └── x32-unpool-wasmsimd.c ├── x4-packw │ └── scalar.c.in ├── x64-transposec │ ├── gen │ │ ├── x64-transposec-1x2-scalar-float.c │ │ ├── x64-transposec-1x2-scalar-int.c │ │ ├── x64-transposec-2x1-scalar-float.c │ │ ├── x64-transposec-2x1-scalar-int.c │ │ ├── x64-transposec-2x2-multi-dec-zip-neon.c │ │ ├── x64-transposec-2x2-multi-mov-sse2.c │ │ ├── x64-transposec-2x2-multi-mov-zip-neon.c │ │ ├── x64-transposec-2x2-multi-multi-sse2.c │ │ ├── x64-transposec-2x2-multi-multi-zip-neon.c │ │ ├── x64-transposec-2x2-multi-switch-sse2.c │ │ ├── x64-transposec-2x2-multi-switch-zip-neon.c │ │ ├── x64-transposec-2x2-reuse-dec-zip-neon.c │ │ ├── x64-transposec-2x2-reuse-mov-sse2.c │ │ ├── x64-transposec-2x2-reuse-mov-zip-neon.c │ │ ├── x64-transposec-2x2-reuse-multi-sse2.c │ │ ├── x64-transposec-2x2-reuse-multi-zip-neon.c │ │ ├── x64-transposec-2x2-reuse-switch-sse2.c │ │ ├── x64-transposec-2x2-reuse-switch-zip-neon.c │ │ ├── x64-transposec-2x2-scalar-float.c │ │ ├── x64-transposec-2x2-scalar-int.c │ │ ├── x64-transposec-4x1-scalar-float.c │ │ ├── x64-transposec-4x1-scalar-int.c │ │ ├── x64-transposec-4x2-scalar-float.c │ │ ├── x64-transposec-4x2-scalar-int.c │ │ ├── x64-transposec-4x4-multi-mov-avx.c │ │ ├── x64-transposec-4x4-multi-multi-avx.c │ │ ├── x64-transposec-4x4-multi-switch-avx.c │ │ ├── x64-transposec-4x4-reuse-mov-avx.c │ │ ├── x64-transposec-4x4-reuse-multi-avx.c │ │ └── x64-transposec-4x4-reuse-switch-avx.c │ └── x64-transposec.inc ├── x8-lut │ ├── avx2.c.in │ ├── avx512skx-vpshufb.c.in │ ├── avx512vbmi-vpermx2b.c.in │ ├── gen │ │ ├── x8-lut-aarch64-neon-tbx128x4-u16.c │ │ ├── x8-lut-aarch64-neon-tbx128x4-u32.c │ │ ├── x8-lut-aarch64-neon-tbx128x4-u48.c │ │ ├── x8-lut-aarch64-neon-tbx128x4-u64.c │ │ ├── x8-lut-avx-u16.c │ │ ├── x8-lut-avx-u32.c │ │ ├── x8-lut-avx-u48.c │ │ ├── x8-lut-avx-u64.c │ │ ├── x8-lut-avx2-u128.c │ │ ├── x8-lut-avx2-u32.c │ │ ├── x8-lut-avx2-u64.c │ │ ├── x8-lut-avx2-u96.c │ │ ├── x8-lut-avx512skx-vpshufb-u128.c │ │ ├── x8-lut-avx512skx-vpshufb-u192.c │ │ ├── x8-lut-avx512skx-vpshufb-u256.c │ │ ├── x8-lut-avx512skx-vpshufb-u64.c │ │ ├── x8-lut-avx512vbmi-vpermx2b-u128.c │ │ ├── x8-lut-avx512vbmi-vpermx2b-u192.c │ │ ├── x8-lut-avx512vbmi-vpermx2b-u256.c │ │ ├── x8-lut-avx512vbmi-vpermx2b-u64.c │ │ ├── x8-lut-scalar-u1.c │ │ ├── x8-lut-scalar-u16.c │ │ ├── x8-lut-scalar-u2.c │ │ ├── x8-lut-scalar-u4.c │ │ ├── x8-lut-scalar-u8.c │ │ ├── x8-lut-ssse3-u16.c │ │ ├── x8-lut-ssse3-u32.c │ │ ├── x8-lut-wasmpshufb-u16.c │ │ ├── x8-lut-wasmpshufb-u32.c │ │ ├── x8-lut-wasmpshufb-u48.c │ │ ├── x8-lut-wasmpshufb-u64.c │ │ ├── x8-lut-wasmsimd-u16.c │ │ ├── x8-lut-wasmsimd-u32.c │ │ ├── x8-lut-wasmsimd-u48.c │ │ └── x8-lut-wasmsimd-u64.c │ ├── neon-tbx128x4.c.in │ ├── scalar.c.in │ ├── ssse3.c.in │ ├── wasmpshufb.c.in │ └── wasmsimd.c.in ├── x8-pack-lh │ ├── x8-pack-lh-igemm.inc │ ├── x8-pack-lh.inc │ ├── x8-packlh-igemm-neonsme2.c │ └── x8-packlh-neonsme2.c ├── x8-packq │ ├── x8-packq-aarch64-neon-f32qp8-u2.c │ ├── x8-packq-scalar-f32qp8-u1.c │ └── x8-packq.inc ├── x8-packw │ ├── c4-avxvnni.c.in │ ├── gen │ │ ├── x8-packw-x16-gemm-goi-scalar-u2.c │ │ ├── x8-packw-x16-gemm-goi-scalar-u4.c │ │ ├── x8-packw-x16c8-gemm-goi-avx2-prfm.c │ │ ├── x8-packw-x16c8-gemm-goi-avx2.c │ │ ├── x8-packw-x16c8-gemm-goi-avx256skx-prfm.c │ │ ├── x8-packw-x16c8-gemm-goi-avx256skx.c │ │ ├── x8-packw-x2-gemm-goi-scalar-u2.c │ │ ├── x8-packw-x2-gemm-goi-scalar-u4.c │ │ ├── x8-packw-x32-gemm-goi-scalar-u2.c │ │ ├── x8-packw-x32-gemm-goi-scalar-u4.c │ │ ├── x8-packw-x4-gemm-goi-scalar-u2.c │ │ ├── x8-packw-x4-gemm-goi-scalar-u4.c │ │ ├── x8-packw-x8-gemm-goi-scalar-u2.c │ │ ├── x8-packw-x8-gemm-goi-scalar-u4.c │ │ ├── x8-packw-x8c8-gemm-gio-scalar.c │ │ ├── x8-packw-x8c8-gemm-goi-avx2-prfm.c │ │ ├── x8-packw-x8c8-gemm-goi-avx2.c │ │ ├── x8-packw-x8c8-gemm-goi-avx256skx-prfm.c │ │ └── x8-packw-x8c8-gemm-goi-avx256skx.c │ ├── kr-avxvnni.c.in │ ├── kr-gio-avxvnni.c.in │ ├── kr-gio-scalar.c.in │ ├── kr-scalar.c.in │ ├── kr-wasmdot.c.in │ ├── scalar.c.in │ └── x8-packw.inc ├── x8-transposec │ ├── gen │ │ ├── x8-transposec-16x16-reuse-dec-zip-neon.c │ │ ├── x8-transposec-16x16-reuse-mov-sse2.c │ │ ├── x8-transposec-16x16-reuse-mov-wasmsimd.c │ │ ├── x8-transposec-16x16-reuse-mov-zip-neon.c │ │ ├── x8-transposec-16x16-reuse-switch-sse2.c │ │ ├── x8-transposec-16x16-reuse-switch-wasmsimd.c │ │ ├── x8-transposec-16x16-reuse-switch-zip-neon.c │ │ ├── x8-transposec-1x2-scalar-int.c │ │ ├── x8-transposec-1x4-scalar-int.c │ │ ├── x8-transposec-2x1-scalar-int.c │ │ ├── x8-transposec-2x2-scalar-int.c │ │ ├── x8-transposec-2x4-scalar-int.c │ │ ├── x8-transposec-32x32-reuse-mov-avx2.c │ │ ├── x8-transposec-32x32-reuse-switch-avx2.c │ │ ├── x8-transposec-4x1-scalar-int.c │ │ ├── x8-transposec-4x2-scalar-int.c │ │ ├── x8-transposec-4x4-scalar-int.c │ │ ├── x8-transposec-8x8-multi-dec-zip-neon.c │ │ ├── x8-transposec-8x8-multi-mov-zip-neon.c │ │ ├── x8-transposec-8x8-multi-switch-zip-neon.c │ │ ├── x8-transposec-8x8-reuse-dec-zip-neon.c │ │ ├── x8-transposec-8x8-reuse-mov-zip-neon.c │ │ ├── x8-transposec-8x8-reuse-multi-zip-neon.c │ │ └── x8-transposec-8x8-reuse-switch-zip-neon.c │ └── x8-transposec.inc ├── xnnpack │ ├── aligned-allocator.h │ ├── allocation-type-defs.inc │ ├── allocation-type.h │ ├── allocator.h │ ├── argmaxpool.h │ ├── assembly.h │ ├── avgpool.h │ ├── buffer.h │ ├── cache.h │ ├── common.h │ ├── compute.h │ ├── config-types.h │ ├── config.h │ ├── conv.h │ ├── datatype.h │ ├── dwconv.h │ ├── fill.h │ ├── fingerprint_check.c │ ├── fp16.h │ ├── gemm.h │ ├── hardware-config.h │ ├── ibilinear.h │ ├── igemm.h │ ├── indirection.h │ ├── init-once.h │ ├── internal.h │ ├── intrinsics-polyfill.h │ ├── isa-checks.h │ ├── log.h │ ├── lut.h │ ├── math.h │ ├── maxpool.h │ ├── memory-planner.h │ ├── memory.h │ ├── microfnptr.h │ ├── microkernel-type-defs.inc │ ├── microkernel-type.h │ ├── microkernel-utils.h │ ├── microparams-init.h │ ├── microparams.h │ ├── mutex.h │ ├── node-type-defs.inc │ ├── node-type.h │ ├── normalization.h │ ├── operator-type-defs.inc │ ├── operator-type.h │ ├── operator-utils.h │ ├── operator.h │ ├── pack-lh.h │ ├── pack.h │ ├── packb.h │ ├── packq.h │ ├── packw.h │ ├── packx.h │ ├── pad.h │ ├── params.h │ ├── ppmm.h │ ├── prefetch.h │ ├── quantization.h │ ├── raddexpminusmax.h │ ├── raddextexp.h │ ├── raddstoreexpminusmax.h │ ├── reduce.h │ ├── reference-config.h │ ├── reference-utils.h │ ├── requantization-stubs.h │ ├── requantization.h │ ├── reshape-helpers.h │ ├── simd │ │ ├── f16-avx512fp16.h │ │ ├── f16-neonfp16arith.h │ │ ├── f16-scalar.h │ │ ├── f32-avx-base.h │ │ ├── f32-avx.h │ │ ├── f32-avx2.h │ │ ├── f32-avx512f.h │ │ ├── f32-fma3.h │ │ ├── f32-hvx.h │ │ ├── f32-neon.h │ │ ├── f32-scalar.h │ │ ├── f32-sse2-base.h │ │ ├── f32-sse2.h │ │ ├── f32-sse2fma.h │ │ ├── f32-wasmrelaxedsimd.h │ │ ├── f32-wasmsimd-base.h │ │ ├── f32-wasmsimd.h │ │ ├── s16-avx2.h │ │ ├── s16-avx512skx.h │ │ ├── s16-neon.h │ │ ├── s16-scalar.h │ │ ├── s16-sse41.h │ │ ├── s16-wasmsimd.h │ │ ├── s32-avx2.h │ │ ├── s32-avx512f.h │ │ ├── s32-hvx.h │ │ ├── s32-neon.h │ │ ├── s32-scalar.h │ │ ├── s32-sse41.h │ │ ├── s32-wasmsimd.h │ │ ├── s8-hvx.h │ │ ├── s8-neon.h │ │ ├── s8-scalar.h │ │ ├── s8-sse41.h │ │ ├── s8-wasmsimd.h │ │ ├── u8-hvx.h │ │ ├── u8-neon.h │ │ ├── u8-scalar.h │ │ ├── u8-sse2.h │ │ └── u8-wasmsimd.h │ ├── spmm.h │ ├── subgraph-validation.h │ ├── subgraph.h │ ├── transpose.h │ ├── unaligned.h │ ├── unpool.h │ ├── vbinary.h │ ├── vcvt.h │ ├── vmulcaddc.h │ ├── vscaleexpminusmax.h │ ├── vscaleextexp.h │ └── vunary.h ├── xx-copy │ └── xx-copy-scalar-memcpy.c ├── xx-fill │ ├── xx-fill-neon-u64.c │ ├── xx-fill-scalar-u16.c │ ├── xx-fill-sse2-u64.c │ ├── xx-fill-wasmsimd-u64.c │ └── xx-fill.inc ├── xx-pad │ ├── xx-pad-p16-neon-u16.c │ ├── xx-pad-p16-sse2-u16.c │ ├── xx-pad-p16-wasmsimd-u16.c │ ├── xx-pad-p4-scalar-u16.c │ └── xx-pad.inc └── xx-transposev │ ├── xx-transposev-1x1-scalar-memcpy.c │ └── xx-transposev.inc ├── test ├── BUILD.bazel ├── CMakeLists.txt ├── argmaxpool-microkernel-tester.h ├── avgpool-minmax.cc ├── bf16-f32-gemm-minmax.cc ├── bf16-f32-gemm-minmax.yaml ├── bf16-gemm-minmax.cc ├── bf16-gemm-minmax.yaml ├── buffer.cc ├── build-identifier.cc ├── conv-hwc-microkernel-tester.h ├── conv-hwc2chw-microkernel-tester.h ├── dwconv-microkernel-tester.cc ├── dwconv-microkernel-tester.h ├── dwconv2d-microkernel-tester.h ├── f16-conv-hwc2chw.cc ├── f16-conv-hwc2chw.yaml ├── f16-dwconv-minmax.cc ├── f16-dwconv2d-chw.cc ├── f16-dwconv2d-chw.yaml ├── f16-f32-vcvt.cc ├── f16-f32acc-gemm-minmax.cc ├── f16-f32acc-gemm-minmax.yaml ├── f16-f32acc-igemm-minmax.cc ├── f16-f32acc-igemm-minmax.yaml ├── f16-gemm-minmax.cc ├── f16-gemm-minmax.yaml ├── f16-ibilinear-chw.cc ├── f16-ibilinear-chw.yaml ├── f16-ibilinear.cc ├── f16-ibilinear.yaml ├── f16-igemm-minmax.cc ├── f16-igemm-minmax.yaml ├── f16-qs8-vcvt.cc ├── f16-qu8-vcvt.cc ├── f16-raddstoreexpminusmax.cc ├── f16-raddstoreexpminusmax.yaml ├── f16-vabs.cc ├── f16-vadd.cc ├── f16-vaddc.cc ├── f16-vapproxgelu.cc ├── f16-vclamp.cc ├── f16-vcmul.cc ├── f16-vcos.cc ├── f16-vdiv.cc ├── f16-vdivc.cc ├── f16-velu.cc ├── f16-vexp.cc ├── f16-vgelu.cc ├── f16-vhswish.cc ├── f16-vlrelu.cc ├── f16-vmax.cc ├── f16-vmaxc.cc ├── f16-vmin.cc ├── f16-vminc.cc ├── f16-vmul.cc ├── f16-vmulc.cc ├── f16-vmulcaddc-minmax.cc ├── f16-vneg.cc ├── f16-vprelu.cc ├── f16-vpreluc.cc ├── f16-vrdivc.cc ├── f16-vrndd.cc ├── f16-vrndne.cc ├── f16-vrndu.cc ├── f16-vrndz.cc ├── f16-vrpreluc.cc ├── f16-vrsqrt.cc ├── f16-vrsubc.cc ├── f16-vsigmoid.cc ├── f16-vsin.cc ├── f16-vsqr.cc ├── f16-vsqrdiff.cc ├── f16-vsqrdiffc.cc ├── f16-vsqrt.cc ├── f16-vsub.cc ├── f16-vsubc.cc ├── f16-vtanh.cc ├── f32-argmaxpool.cc ├── f32-argmaxpool.yaml ├── f32-conv-hwc.cc ├── f32-conv-hwc2chw.cc ├── f32-conv-hwc2chw.yaml ├── f32-dwconv-minmax.cc ├── f32-dwconv.cc ├── f32-dwconv2d-chw.cc ├── f32-dwconv2d-chw.yaml ├── f32-f16-vcvt.cc ├── f32-gemm-2.cc ├── f32-gemm-minmax-2.cc ├── f32-gemm-minmax.cc ├── f32-gemm-minmax.yaml ├── f32-gemm-relu-2.cc ├── f32-gemm-relu.cc ├── f32-gemm-relu.yaml ├── f32-gemm.cc ├── f32-gemm.yaml ├── f32-ibilinear-chw.cc ├── f32-ibilinear-chw.yaml ├── f32-ibilinear.cc ├── f32-ibilinear.yaml ├── f32-igemm-2.cc ├── f32-igemm-minmax-2.cc ├── f32-igemm-minmax.cc ├── f32-igemm-minmax.yaml ├── f32-igemm-relu-2.cc ├── f32-igemm-relu.cc ├── f32-igemm-relu.yaml ├── f32-igemm.cc ├── f32-igemm.yaml ├── f32-ppmm-minmax.cc ├── f32-ppmm-minmax.yaml ├── f32-qc4w-gemm-minmax.cc ├── f32-qc4w-gemm-minmax.yaml ├── f32-qc8w-gemm-minmax.cc ├── f32-qc8w-gemm-minmax.yaml ├── f32-qc8w-gemm-relu.cc ├── f32-qc8w-gemm-relu.yaml ├── f32-qc8w-gemm.cc ├── f32-qc8w-gemm.yaml ├── f32-qs8-vcvt.cc ├── f32-qu8-vcvt.cc ├── f32-raddexpminusmax.cc ├── f32-raddexpminusmax.yaml ├── f32-raddextexp.cc ├── f32-raddstoreexpminusmax.cc ├── f32-raddstoreexpminusmax.yaml ├── f32-vabs.cc ├── f32-vadd.cc ├── f32-vaddc.cc ├── f32-vapproxgelu.cc ├── f32-vclamp.cc ├── f32-vcmul.cc ├── f32-vcopysign.cc ├── f32-vcopysignc.cc ├── f32-vcos.cc ├── f32-vdiv.cc ├── f32-vdivc.cc ├── f32-velu.cc ├── f32-vexp.cc ├── f32-vgelu.cc ├── f32-vhswish.cc ├── f32-vlog.cc ├── f32-vlrelu.cc ├── f32-vmax.cc ├── f32-vmaxc.cc ├── f32-vmin.cc ├── f32-vminc.cc ├── f32-vmul.cc ├── f32-vmulc.cc ├── f32-vmulcaddc-minmax.cc ├── f32-vneg.cc ├── f32-vprelu.cc ├── f32-vpreluc.cc ├── f32-vrcopysignc.cc ├── f32-vrdivc.cc ├── f32-vrndd.cc ├── f32-vrndne.cc ├── f32-vrndu.cc ├── f32-vrndz.cc ├── f32-vrpreluc.cc ├── f32-vrsqrt.cc ├── f32-vrsubc.cc ├── f32-vscaleexpminusmax.cc ├── f32-vscaleextexp.cc ├── f32-vsigmoid.cc ├── f32-vsin.cc ├── f32-vsqr.cc ├── f32-vsqrdiff.cc ├── f32-vsqrdiffc.cc ├── f32-vsqrt.cc ├── f32-vsub.cc ├── f32-vsubc.cc ├── f32-vtanh.cc ├── fingerprint.cc ├── fingerprint_cache.cc ├── gemm-microkernel-tester.cc ├── gemm-microkernel-tester.h ├── ibilinear-microkernel-tester.h ├── indirection.cc ├── lut-microkernel-tester.h ├── maxpool-microkernel-tester.h ├── maxpool-minmax.cc ├── microkernel-utils.cc ├── microkernel_lists_test.sh ├── mutex.cc ├── next_prime.cc ├── next_prime.h ├── operators │ ├── BUILD │ ├── CMakeLists.txt │ ├── argmax-pooling-nhwc.cc │ ├── argmax-pooling-operator-tester.h │ ├── average-pooling-nhwc.cc │ ├── average-pooling-operator-tester.h │ ├── batch-matrix-multiply-nc.cc │ ├── batch-matrix-multiply-operator-tester.h │ ├── binary-elementwise-nd.cc │ ├── constant-pad-nd-eager.cc │ ├── constant-pad-nd.cc │ ├── constant-pad-operator-tester.h │ ├── convert-nc.cc │ ├── convolution-nchw.cc │ ├── convolution-nhwc.cc │ ├── convolution-operator-tester.h │ ├── convolution-test-helpers.cc │ ├── convolution-test-helpers.h │ ├── copy-nc-eager.cc │ ├── copy-nc.cc │ ├── copy-operator-tester.h │ ├── deconvolution-nhwc-qd8-f32-qc8w.cc │ ├── deconvolution-nhwc.cc │ ├── deconvolution-operator-tester.h │ ├── depth-to-space-nchw2nhwc.cc │ ├── depth-to-space-nhwc.cc │ ├── depth-to-space-operator-tester.h │ ├── dynamic-fully-connected-nc.cc │ ├── dynamic-fully-connected-operator-tester.h │ ├── fully-connected-nc.cc │ ├── fully-connected-operator-tester.h │ ├── max-pooling-nhwc.cc │ ├── max-pooling-operator-tester.h │ ├── operator-test-utils.h │ ├── operator-utils.cc │ ├── reduce-nd.cc │ ├── reduce-normalization-tester.h │ ├── reduce-normalization.cc │ ├── resize-bilinear-nchw.cc │ ├── resize-bilinear-nhwc.cc │ ├── resize-bilinear-operator-tester.h │ ├── rope-nthc.cc │ ├── rope-operator-tester.h │ ├── slice-nd-eager.cc │ ├── slice-nd.cc │ ├── slice-normalization-tester.h │ ├── slice-normalization.cc │ ├── slice-operator-tester.h │ ├── softmax-nc.cc │ ├── softmax-operator-tester.h │ ├── space-to-depth-nhwc.cc │ ├── space-to-depth-operator-tester.h │ ├── transpose-nd-eager.cc │ ├── transpose-nd.cc │ ├── transpose-normalization-tester.h │ ├── transpose-normalization.cc │ ├── transpose-operator-tester.h │ ├── unary-elementwise-nc.cc │ ├── unpooling-nhwc.cc │ └── unpooling-operator-tester.h ├── pack-microkernel-tester.h ├── packb-microkernel-tester.h ├── packing.cc ├── packq-microkernel-tester.cc ├── packq-microkernel-tester.h ├── packw-microkernel-tester.h ├── pf16-f16-igemm-minmax.cc ├── pf16-f16-igemm-minmax.yaml ├── pf16-gemm-minmax.cc ├── pf16-gemm-minmax.yaml ├── pf32-gemm-minmax.cc ├── pf32-gemm-minmax.yaml ├── pqs8-qc8w-gemm-minmax.cc ├── pqs8-qc8w-gemm-minmax.yaml ├── qb4-packw.cc ├── qd8-f16-qb4w-gemm-minmax.cc ├── qd8-f16-qb4w-gemm-minmax.yaml ├── qd8-f16-qc4w-gemm-minmax-2.cc ├── qd8-f16-qc4w-gemm-minmax-3.cc ├── qd8-f16-qc4w-gemm-minmax-4.cc ├── qd8-f16-qc4w-gemm-minmax.cc ├── qd8-f16-qc4w-gemm-minmax.yaml ├── qd8-f16-qc8w-gemm-minmax-2.cc ├── qd8-f16-qc8w-gemm-minmax-3.cc ├── qd8-f16-qc8w-gemm-minmax-4.cc ├── qd8-f16-qc8w-gemm-minmax.cc ├── qd8-f16-qc8w-gemm-minmax.yaml ├── qd8-f16-qc8w-igemm-minmax-2.cc ├── qd8-f16-qc8w-igemm-minmax-3.cc ├── qd8-f16-qc8w-igemm-minmax-4.cc ├── qd8-f16-qc8w-igemm-minmax.cc ├── qd8-f16-qc8w-igemm-minmax.yaml ├── qd8-f32-qb4w-gemm-minmax.cc ├── qd8-f32-qb4w-gemm-minmax.yaml ├── qd8-f32-qc2w-gemm-minmax.cc ├── qd8-f32-qc2w-gemm-minmax.yaml ├── qd8-f32-qc4w-gemm-minmax-2.cc ├── qd8-f32-qc4w-gemm-minmax-3.cc ├── qd8-f32-qc4w-gemm-minmax-4.cc ├── qd8-f32-qc4w-gemm-minmax.cc ├── qd8-f32-qc4w-gemm-minmax.yaml ├── qd8-f32-qc8w-gemm-minmax-2.cc ├── qd8-f32-qc8w-gemm-minmax-3.cc ├── qd8-f32-qc8w-gemm-minmax-4.cc ├── qd8-f32-qc8w-gemm-minmax.cc ├── qd8-f32-qc8w-gemm-minmax.yaml ├── qd8-f32-qc8w-igemm-minmax-2.cc ├── qd8-f32-qc8w-igemm-minmax-3.cc ├── qd8-f32-qc8w-igemm-minmax.cc ├── qd8-f32-qc8w-igemm-minmax.yaml ├── qp8-f32-qb4w-gemm-minmax.cc ├── qp8-f32-qb4w-gemm-minmax.yaml ├── qp8-f32-qc4w-gemm-minmax.cc ├── qp8-f32-qc4w-gemm-minmax.yaml ├── qp8-f32-qc8w-gemm-minmax.cc ├── qp8-f32-qc8w-gemm-minmax.yaml ├── qs8-dwconv-minmax-fp32.cc ├── qs8-dwconv-minmax-rndnu.cc ├── qs8-f16-vcvt.cc ├── qs8-f32-vcvt.cc ├── qs8-packw.cc ├── qs8-qc4w-gemm-minmax-fp32.cc ├── qs8-qc4w-gemm-minmax-fp32.yaml ├── qs8-qc4w-packw.cc ├── qs8-qc8w-dwconv-minmax-fp32.cc ├── qs8-qc8w-gemm-minmax-fp32-2.cc ├── qs8-qc8w-gemm-minmax-fp32-3.cc ├── qs8-qc8w-gemm-minmax-fp32.cc ├── qs8-qc8w-gemm-minmax-fp32.yaml ├── qs8-qc8w-igemm-minmax-fp32-2.cc ├── qs8-qc8w-igemm-minmax-fp32-3.cc ├── qs8-qc8w-igemm-minmax-fp32.cc ├── qs8-qc8w-igemm-minmax-fp32.yaml ├── qs8-vadd-minmax.cc ├── qs8-vaddc-minmax.cc ├── qs8-vcvt.cc ├── qs8-vlrelu.cc ├── qs8-vmul-minmax-fp32.cc ├── qs8-vmul-minmax-rndnu.cc ├── qs8-vmulc-minmax-fp32.cc ├── qs8-vmulc-minmax-rndnu.cc ├── qs8-vprelu.cc ├── qs8-vpreluc.cc ├── qs8-vrpreluc.cc ├── qu8-dwconv-minmax-fp32.cc ├── qu8-dwconv-minmax-rndnu.cc ├── qu8-f32-vcvt.cc ├── qu8-gemm-minmax-fp32-2.cc ├── qu8-gemm-minmax-fp32.cc ├── qu8-gemm-minmax-fp32.yaml ├── qu8-gemm-minmax-rndnu-2.cc ├── qu8-gemm-minmax-rndnu.cc ├── qu8-gemm-minmax-rndnu.yaml ├── qu8-gemm-minmax-rndnu16.cc ├── qu8-igemm-minmax-fp32-2.cc ├── qu8-igemm-minmax-fp32.cc ├── qu8-igemm-minmax-fp32.yaml ├── qu8-igemm-minmax-rndnu-2.cc ├── qu8-igemm-minmax-rndnu.cc ├── qu8-igemm-minmax-rndnu.yaml ├── qu8-vadd-minmax.cc ├── qu8-vaddc-minmax.cc ├── qu8-vcvt.cc ├── qu8-vlrelu.cc ├── qu8-vmul-minmax-fp32.cc ├── qu8-vmul-minmax-rndnu.cc ├── qu8-vmulc-minmax-fp32.cc ├── qu8-vmulc-minmax-rndnu.cc ├── qu8-vprelu.cc ├── qu8-vpreluc.cc ├── qu8-vrpreluc.cc ├── raddexpminusmax-microkernel-tester.h ├── raddstoreexpminusmax-microkernel-tester.h ├── rdminmax.cc ├── rdsum.cc ├── rdsum2.cc ├── replicable_random_device.h ├── rminmax.cc ├── rsum.cc ├── rsum2.cc ├── s8-ibilinear.cc ├── s8-ibilinear.yaml ├── s8-vclamp.cc ├── simd │ ├── BUILD │ ├── f16-simd-avx512fp16.cc │ ├── f16-simd-neonfp16arith.cc │ ├── f16-simd-scalar.cc │ ├── f16-simd.cc.in │ ├── f32-simd-avx.cc │ ├── f32-simd-avx2.cc │ ├── f32-simd-avx512f.cc │ ├── f32-simd-fma3.cc │ ├── f32-simd-hvx.cc │ ├── f32-simd-neon.cc │ ├── f32-simd-scalar.cc │ ├── f32-simd-sse2.cc │ ├── f32-simd-sse2fma.cc │ ├── f32-simd-wasmrelaxedsimd.cc │ ├── f32-simd-wasmsimd.cc │ ├── f32-simd.cc.in │ ├── s16-simd-avx2.cc │ ├── s16-simd-avx512skx.cc │ ├── s16-simd-neon.cc │ ├── s16-simd-scalar.cc │ ├── s16-simd-sse41.cc │ ├── s16-simd-wasmsimd.cc │ ├── s16-simd.cc.in │ ├── s32-simd-avx2.cc │ ├── s32-simd-avx512f.cc │ ├── s32-simd-hvx.cc │ ├── s32-simd-neon.cc │ ├── s32-simd-scalar.cc │ ├── s32-simd-sse41.cc │ ├── s32-simd-wasmsimd.cc │ ├── s32-simd.cc.in │ ├── s8-simd-hvx.cc │ ├── s8-simd-neon.cc │ ├── s8-simd-scalar.cc │ ├── s8-simd-sse41.cc │ ├── s8-simd-wasmsimd.cc │ ├── s8-simd.cc.in │ ├── u8-simd-hvx.cc │ ├── u8-simd-neon.cc │ ├── u8-simd-scalar.cc │ ├── u8-simd-sse2.cc │ ├── u8-simd-wasmsimd.cc │ └── u8-simd.cc.in ├── spmm-minmax.cc ├── subgraph │ ├── BUILD │ ├── CMakeLists.txt │ ├── argmax-pooling-2d.cc │ ├── average-pooling-2d.cc │ ├── batch-matrix-multiply.cc │ ├── binary.cc │ ├── broadcast.cc │ ├── concatenate.cc │ ├── convolution-2d.cc │ ├── copy.cc │ ├── deconvolution-2d.cc │ ├── depth-to-space-2d.cc │ ├── depthwise-convolution-2d.cc │ ├── even-split.cc │ ├── fully-connected.cc │ ├── fusion.cc │ ├── input-output.cc │ ├── max-pooling-2d.cc │ ├── memory-planner.cc │ ├── mock-allocator.h │ ├── quantization-helpers.h │ ├── rewrites.cc │ ├── rope.cc │ ├── runtime-flags.cc │ ├── runtime-flags.h │ ├── runtime-tester.h │ ├── runtime.cc │ ├── softmax.cc │ ├── space-to-depth-2d.cc │ ├── split-fuse.cc │ ├── static-constant-pad.cc │ ├── static-expand-dims.cc │ ├── static-reduce.cc │ ├── static-reshape.cc │ ├── static-resize-bilinear-2d.cc │ ├── static-slice.cc │ ├── static-transpose.cc │ ├── stencil.cc │ ├── stencil.h │ ├── subgraph-fp16.cc │ ├── subgraph-nchw.cc │ ├── subgraph-size.c │ ├── subgraph-tester.cc │ ├── subgraph-tester.h │ ├── subgraph.cc │ ├── unary.cc │ ├── unpooling-2d.cc │ └── workspace.cc ├── u8-ibilinear.cc ├── u8-ibilinear.yaml ├── u8-lut32norm.cc ├── u8-vclamp.cc ├── unary-ops.cc ├── unary-ops.h ├── unpool-microkernel-tester.h ├── vbinary-microkernel-tester.cc ├── vbinary-microkernel-tester.h ├── vcmul-microkernel-tester.h ├── vmulcaddc-microkernel-tester.h ├── vunary-microkernel-tester.h ├── weights-cache.cc ├── x16-packw.cc ├── x16-x32-packw.cc ├── x32-packw.cc ├── x32-packx.cc ├── x32-unpool.cc ├── x8-lut.cc ├── x8-lut.yaml ├── x8-packq.cc ├── x8-packw.cc ├── xN-transpose.cc ├── xx-fill.cc └── xx-pad.cc ├── third_party ├── BUILD └── FP16.BUILD ├── tools ├── BUILD ├── generate-argmaxpool-test.py ├── generate-conv-hwc-test.py ├── generate-conv-hwc2chw-test.py ├── generate-dwconv-test.py ├── generate-dwconv2d-chw-test.py ├── generate-gemm-test.py ├── generate-ibilinear-chw-test.py ├── generate-ibilinear-test.py ├── generate-lut-test.py ├── generate-raddexpminusmax-test.py ├── generate-raddstoreexpminusmax-test.py ├── generate-reduce-discontiguous-test.py ├── generate-reduce-test.py ├── generate-spmm-test.py ├── generate-vbinary-test.py ├── generate-vunary-test.py ├── parse-microkernel-bench.py ├── primes.py ├── update-microkernels.py ├── xngen ├── xngen.py └── xnncommon.py └── ynnpack ├── BUILD ├── README.md ├── base ├── BUILD ├── arch.cc ├── arch.h ├── arithmetic.h ├── base.h ├── bfloat16.h ├── bit_cast.h ├── fp16.h ├── half.h ├── log.h ├── simd │ ├── BUILD │ ├── arm_neon.h │ ├── multi_vec.h │ ├── test │ │ ├── BUILD │ │ ├── arm_neon.cc │ │ ├── generic.h │ │ ├── x86_avx.cc │ │ ├── x86_avx2.cc │ │ ├── x86_avx512bw.cc │ │ ├── x86_avx512f.cc │ │ ├── x86_sse2.cc │ │ └── x86_sse41.cc │ ├── vec.h │ ├── x86_avx.h │ ├── x86_avx2.h │ ├── x86_avx512bw.h │ ├── x86_avx512f.h │ ├── x86_sse2.h │ └── x86_sse41.h ├── test │ ├── BUILD │ ├── buffer.h │ ├── float_test.cc │ ├── fuzz_test.h │ ├── random.h │ ├── random_test.cc │ ├── tensor.h │ ├── tensor_test.cc │ ├── util.cc │ └── util.h ├── to_string.cc ├── to_string.h ├── type.cc └── type.h ├── build_defs.bzl ├── include └── ynnpack.h ├── kernels ├── binary │ ├── BUILD │ ├── bench.cc │ ├── binary.cc │ ├── binary.h │ ├── generator.py │ ├── kernels.inc │ ├── kernels.py │ ├── reference.cc │ ├── reference.h │ └── test.cc ├── dot │ ├── BUILD │ ├── arm64_sme.cc │ ├── arm64_sme.h │ ├── arm64_sme2.cc │ ├── arm64_sme_internal.h │ ├── arm64_sme_stubs.cc │ ├── bench.cc │ ├── consistent_arithmetic_test.cc │ ├── dot.cc │ ├── dot.h │ ├── generator │ │ ├── BUILD │ │ ├── arm.py │ │ ├── arm_bf16_bf16_fp32.py │ │ ├── arm_bf16_bf16_fp32_k2.py │ │ ├── arm_fp32.py │ │ ├── arm_int8_int8_int32.py │ │ ├── dot_base.py │ │ ├── generator.py │ │ ├── x86.py │ │ ├── x86_bf16_bf16_fp32.py │ │ ├── x86_fp16_fp16_fp32.py │ │ ├── x86_fp32.py │ │ ├── x86_fp32_k2.py │ │ ├── x86_fp32_k4.py │ │ ├── x86_int8_int8_int32.py │ │ ├── x86_int8_int8_int32_k16.py │ │ ├── x86_uint8_int8_int32.py │ │ └── x86_uint8_int8_int32_k16.py │ ├── get_dot_kernel_test.cc │ ├── kernels.inc │ ├── pack.cc │ ├── pack.h │ ├── pack_test.cc │ ├── pack_test_tensor.h │ ├── schedule.cc │ ├── schedule.h │ ├── schedule_test.cc │ ├── test.cc │ ├── x86_amx.h │ ├── x86_amxbf16.cc │ ├── x86_amxfp16.cc │ └── x86_amxint8.cc ├── elementwise │ ├── BUILD │ ├── arm.py │ ├── common_rules.py │ ├── compiler.py │ ├── compiler_test.py │ ├── generator.py │ └── x86.py ├── lut │ ├── BUILD │ ├── bench.cc │ ├── kernels.inc │ ├── lut.cc │ ├── lut.h │ └── test.cc ├── reduce │ ├── BUILD │ ├── arm_neon.cc │ ├── arm_neonbf16.cc │ ├── arm_neondot.cc │ ├── arm_neonfp16arith.cc │ ├── bench.cc │ ├── generic.h │ ├── max.inc │ ├── min.inc │ ├── min_max.inc │ ├── min_max_accumulator.h │ ├── reduce.cc │ ├── reduce.h │ ├── sum.inc │ ├── sum_accumulator.h │ ├── sum_squared.inc │ ├── test.cc │ ├── x86_avx2.cc │ ├── x86_avx2_fma3.cc │ ├── x86_avx512bf16.cc │ ├── x86_avx512bw.cc │ ├── x86_avx512f.cc │ ├── x86_f16c.cc │ ├── x86_sse2.cc │ ├── x86_sse41.cc │ └── x86_ssse3.cc ├── ternary │ ├── BUILD │ ├── bench.cc │ ├── convert.py │ ├── generator.py │ ├── kernels.inc │ ├── kernels.py │ ├── reference.cc │ ├── reference.h │ ├── ternary.cc │ ├── ternary.h │ └── test.cc ├── transpose │ ├── BUILD │ ├── arm_neon.cc │ ├── arm_neon.h │ ├── bench.cc │ ├── generic.h │ ├── interleave.cc │ ├── interleave.h │ ├── interleave.inc │ ├── switch_element_size.h │ ├── test.cc │ ├── transpose.cc │ ├── transpose.h │ ├── transpose.inc │ ├── x86_avx.cc │ ├── x86_avx2.cc │ ├── x86_avx2.h │ ├── x86_sse2.cc │ └── x86_sse2.h └── unary │ ├── BUILD │ ├── bench.cc │ ├── convert.py │ ├── exp.py │ ├── generator.py │ ├── kernels.inc │ ├── kernels.py │ ├── reference.cc │ ├── reference.h │ ├── sigmoid.py │ ├── sine_cosine.py │ ├── tanh.py │ ├── test.cc │ ├── unary.cc │ └── unary.h ├── subgraph ├── BUILD ├── broadcast.cc ├── broadcast_like.cc ├── concatenate.cc ├── copy.cc ├── dot.cc ├── elementwise.cc ├── elementwise.h ├── even_split.cc ├── fusion.cc ├── get_tensor_shape.cc ├── perfetto.cc ├── perfetto.h ├── reduce.cc ├── runtime.cc ├── runtime.h ├── slinky.cc ├── slinky.h ├── slinky_thread_pool.cc ├── slinky_thread_pool.h ├── stack.cc ├── static_pad.cc ├── static_slice.cc ├── static_transpose.cc ├── stencil_copy.cc ├── subgraph.cc ├── subgraph.h ├── tensor.cc ├── tensor.h ├── test │ ├── BUILD │ ├── binary.cc │ ├── broadcast_like.cc │ ├── concatenate.cc │ ├── copy.cc │ ├── dot.cc │ ├── errors.cc │ ├── even_split.cc │ ├── fold_constants.cc │ ├── fuse_dim.cc │ ├── fuse_dims.cc │ ├── fusion.cc │ ├── get_tensor_shape.cc │ ├── invalidate_dead_values.cc │ ├── reduce.cc │ ├── runtime.cc │ ├── scheduler.cc │ ├── scheduler.h │ ├── slinky_thread_pool.cc │ ├── split_dim.cc │ ├── stack.cc │ ├── static_broadcast.cc │ ├── static_expand_dims.cc │ ├── static_pad.cc │ ├── static_reshape.cc │ ├── static_slice.cc │ ├── static_transpose.cc │ ├── stencil_copy.cc │ ├── subgraph_builder.cc │ ├── subgraph_builder.h │ └── unary.cc ├── threadpool.cc ├── utils.cc └── utils.h └── xnnpack ├── BUILD ├── deprecated.cc ├── dynamic_quantization.cc ├── dynamic_quantization.h ├── dynamic_quantization_test.cc ├── operator.cc ├── runtime.cc ├── subgraph.cc ├── tensor.cc ├── utils.cc ├── utils.h ├── weights_cache.cc ├── workspace.cc └── xnnpack.h /.bazelrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/.bazelrc -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/.github/workflows/build.yml -------------------------------------------------------------------------------- /.github/workflows/on-push.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/.github/workflows/on-push.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/.gitignore -------------------------------------------------------------------------------- /BUILD.bazel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/BUILD.bazel -------------------------------------------------------------------------------- /BUILD.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/BUILD.md -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /DEPS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/DEPS -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/LICENSE -------------------------------------------------------------------------------- /MODULE.bazel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/MODULE.bazel -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/README.md -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/WORKSPACE -------------------------------------------------------------------------------- /WORKSPACE.bzlmod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/WORKSPACE.bzlmod -------------------------------------------------------------------------------- /bench/BUILD.bazel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/BUILD.bazel -------------------------------------------------------------------------------- /bench/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/CMakeLists.txt -------------------------------------------------------------------------------- /bench/bf16-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/bf16-gemm.cc -------------------------------------------------------------------------------- /bench/bgemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/bgemm.h -------------------------------------------------------------------------------- /bench/conv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/conv.h -------------------------------------------------------------------------------- /bench/dconv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/dconv.h -------------------------------------------------------------------------------- /bench/dwconv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/dwconv.h -------------------------------------------------------------------------------- /bench/f16-conv-hwc2chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f16-conv-hwc2chw.cc -------------------------------------------------------------------------------- /bench/f16-dwconv.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f16-dwconv.cc -------------------------------------------------------------------------------- /bench/f16-dwconv2d-chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f16-dwconv2d-chw.cc -------------------------------------------------------------------------------- /bench/f16-f32acc-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f16-f32acc-gemm.cc -------------------------------------------------------------------------------- /bench/f16-f32acc-igemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f16-f32acc-igemm.cc -------------------------------------------------------------------------------- /bench/f16-gemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f16-gemm-minmax.cc -------------------------------------------------------------------------------- /bench/f16-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f16-gemm.cc -------------------------------------------------------------------------------- /bench/f16-igemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f16-igemm.cc -------------------------------------------------------------------------------- /bench/f16-vcmul.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f16-vcmul.cc -------------------------------------------------------------------------------- /bench/f32-bgemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-bgemm.cc -------------------------------------------------------------------------------- /bench/f32-conv-hwc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-conv-hwc.cc -------------------------------------------------------------------------------- /bench/f32-conv-hwc2chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-conv-hwc2chw.cc -------------------------------------------------------------------------------- /bench/f32-dwconv.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-dwconv.cc -------------------------------------------------------------------------------- /bench/f32-dwconv2d-chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-dwconv2d-chw.cc -------------------------------------------------------------------------------- /bench/f32-gemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-gemm-minmax.cc -------------------------------------------------------------------------------- /bench/f32-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-gemm.cc -------------------------------------------------------------------------------- /bench/f32-igemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-igemm.cc -------------------------------------------------------------------------------- /bench/f32-qc4w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-qc4w-gemm.cc -------------------------------------------------------------------------------- /bench/f32-qc8w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-qc8w-gemm.cc -------------------------------------------------------------------------------- /bench/f32-raddexpminusmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-raddexpminusmax.cc -------------------------------------------------------------------------------- /bench/f32-raddextexp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-raddextexp.cc -------------------------------------------------------------------------------- /bench/f32-softmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-softmax.cc -------------------------------------------------------------------------------- /bench/f32-vcmul.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-vcmul.cc -------------------------------------------------------------------------------- /bench/f32-vscaleextexp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/f32-vscaleextexp.cc -------------------------------------------------------------------------------- /bench/gemm-benchmark.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/gemm-benchmark.cc -------------------------------------------------------------------------------- /bench/gemm-benchmark.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/gemm-benchmark.h -------------------------------------------------------------------------------- /bench/gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/gemm.h -------------------------------------------------------------------------------- /bench/operators/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/operators/BUILD -------------------------------------------------------------------------------- /bench/operators/binary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/operators/binary.cc -------------------------------------------------------------------------------- /bench/operators/prelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/operators/prelu.cc -------------------------------------------------------------------------------- /bench/operators/softmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/operators/softmax.cc -------------------------------------------------------------------------------- /bench/operators/unary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/operators/unary.cc -------------------------------------------------------------------------------- /bench/packq-benchmark.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/packq-benchmark.cc -------------------------------------------------------------------------------- /bench/packq-benchmark.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/packq-benchmark.h -------------------------------------------------------------------------------- /bench/packw-benchmark.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/packw-benchmark.h -------------------------------------------------------------------------------- /bench/pf16-gemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/pf16-gemm-minmax.cc -------------------------------------------------------------------------------- /bench/pf32-gemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/pf32-gemm-minmax.cc -------------------------------------------------------------------------------- /bench/qb4-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qb4-packw.cc -------------------------------------------------------------------------------- /bench/qd8-f16-qb4w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qd8-f16-qb4w-gemm.cc -------------------------------------------------------------------------------- /bench/qd8-f16-qc4w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qd8-f16-qc4w-gemm.cc -------------------------------------------------------------------------------- /bench/qd8-f16-qc8w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qd8-f16-qc8w-gemm.cc -------------------------------------------------------------------------------- /bench/qd8-f32-qb4w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qd8-f32-qb4w-gemm.cc -------------------------------------------------------------------------------- /bench/qd8-f32-qc2w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qd8-f32-qc2w-gemm.cc -------------------------------------------------------------------------------- /bench/qd8-f32-qc4w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qd8-f32-qc4w-gemm.cc -------------------------------------------------------------------------------- /bench/qd8-f32-qc8w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qd8-f32-qc8w-gemm.cc -------------------------------------------------------------------------------- /bench/qp8-f32-qb4w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qp8-f32-qb4w-gemm.cc -------------------------------------------------------------------------------- /bench/qp8-f32-qc4w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qp8-f32-qc4w-gemm.cc -------------------------------------------------------------------------------- /bench/qp8-f32-qc8w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qp8-f32-qc8w-gemm.cc -------------------------------------------------------------------------------- /bench/qs8-dwconv.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qs8-dwconv.cc -------------------------------------------------------------------------------- /bench/qs8-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qs8-gemm.cc -------------------------------------------------------------------------------- /bench/qs8-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qs8-packw.cc -------------------------------------------------------------------------------- /bench/qs8-qc4w-gemm-fp32.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qs8-qc4w-gemm-fp32.cc -------------------------------------------------------------------------------- /bench/qs8-qc4w-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qs8-qc4w-packw.cc -------------------------------------------------------------------------------- /bench/qs8-qc8w-gemm-fp32.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qs8-qc8w-gemm-fp32.cc -------------------------------------------------------------------------------- /bench/qu8-gemm-fp32.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qu8-gemm-fp32.cc -------------------------------------------------------------------------------- /bench/qu8-gemm-rndnu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qu8-gemm-rndnu.cc -------------------------------------------------------------------------------- /bench/qu8-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/qu8-gemm.cc -------------------------------------------------------------------------------- /bench/rdminmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/rdminmax.cc -------------------------------------------------------------------------------- /bench/rdsum.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/rdsum.cc -------------------------------------------------------------------------------- /bench/rdsum2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/rdsum2.cc -------------------------------------------------------------------------------- /bench/rminmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/rminmax.cc -------------------------------------------------------------------------------- /bench/rsum.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/rsum.cc -------------------------------------------------------------------------------- /bench/rsum2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/rsum2.cc -------------------------------------------------------------------------------- /bench/spmm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/spmm.cc -------------------------------------------------------------------------------- /bench/spmm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/spmm.h -------------------------------------------------------------------------------- /bench/subgraph/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/BUILD -------------------------------------------------------------------------------- /bench/subgraph/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/CMakeLists.txt -------------------------------------------------------------------------------- /bench/subgraph/attention.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/attention.cc -------------------------------------------------------------------------------- /bench/subgraph/benchmark.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/benchmark.cc -------------------------------------------------------------------------------- /bench/subgraph/benchmark.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/benchmark.h -------------------------------------------------------------------------------- /bench/subgraph/binary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/binary.cc -------------------------------------------------------------------------------- /bench/subgraph/convolution.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/convolution.cc -------------------------------------------------------------------------------- /bench/subgraph/elementwise.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/elementwise.cc -------------------------------------------------------------------------------- /bench/subgraph/l2-norm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/l2-norm.cc -------------------------------------------------------------------------------- /bench/subgraph/layer-norm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/layer-norm.cc -------------------------------------------------------------------------------- /bench/subgraph/mobilenet.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/mobilenet.cc -------------------------------------------------------------------------------- /bench/subgraph/models.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/models.h -------------------------------------------------------------------------------- /bench/subgraph/softmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/softmax.cc -------------------------------------------------------------------------------- /bench/subgraph/transformer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/transformer.cc -------------------------------------------------------------------------------- /bench/subgraph/unary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/subgraph/unary.cc -------------------------------------------------------------------------------- /bench/utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/utils.cc -------------------------------------------------------------------------------- /bench/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/utils.h -------------------------------------------------------------------------------- /bench/vbinary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/vbinary.cc -------------------------------------------------------------------------------- /bench/vunary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/vunary.cc -------------------------------------------------------------------------------- /bench/x16-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/x16-packw.cc -------------------------------------------------------------------------------- /bench/x16-x32-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/x16-x32-packw.cc -------------------------------------------------------------------------------- /bench/x32-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/x32-packw.cc -------------------------------------------------------------------------------- /bench/x8-lut.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/x8-lut.cc -------------------------------------------------------------------------------- /bench/x8-packq.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/x8-packq.cc -------------------------------------------------------------------------------- /bench/x8-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/x8-packw.cc -------------------------------------------------------------------------------- /bench/xN-transposec.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/xN-transposec.cc -------------------------------------------------------------------------------- /bench/xx-transposev.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/bench/xx-transposev.cc -------------------------------------------------------------------------------- /cmake/DownloadCpuinfo.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/DownloadCpuinfo.cmake -------------------------------------------------------------------------------- /cmake/DownloadFXdiv.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/DownloadFXdiv.cmake -------------------------------------------------------------------------------- /cmake/DownloadKleidiAI.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/DownloadKleidiAI.cmake -------------------------------------------------------------------------------- /cmake/aarch64.toolchain: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/aarch64.toolchain -------------------------------------------------------------------------------- /cmake/armhf.toolchain: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/armhf.toolchain -------------------------------------------------------------------------------- /cmake/gen/microkernels.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/gen/microkernels.cmake -------------------------------------------------------------------------------- /cmake/hexagon.toolchain: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/hexagon.toolchain -------------------------------------------------------------------------------- /cmake/hexagon.toolchain.v73: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/hexagon.toolchain.v73 -------------------------------------------------------------------------------- /cmake/riscv64.toolchain: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/riscv64.toolchain -------------------------------------------------------------------------------- /cmake/x64_arm64.toolchain: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/cmake/x64_arm64.toolchain -------------------------------------------------------------------------------- /doc/dwconv.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/doc/dwconv.md -------------------------------------------------------------------------------- /emscripten.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/emscripten.bzl -------------------------------------------------------------------------------- /gemm_compiler/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gemm_compiler/BUILD -------------------------------------------------------------------------------- /gemm_compiler/arm_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gemm_compiler/arm_template.py -------------------------------------------------------------------------------- /gemm_compiler/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gemm_compiler/generate.py -------------------------------------------------------------------------------- /gemm_compiler/x64_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gemm_compiler/x64_template.py -------------------------------------------------------------------------------- /gen/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/BUILD -------------------------------------------------------------------------------- /gen/aarch32_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/aarch32_microkernels.bzl -------------------------------------------------------------------------------- /gen/aarch64_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/aarch64_microkernels.bzl -------------------------------------------------------------------------------- /gen/amd64_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/amd64_microkernels.bzl -------------------------------------------------------------------------------- /gen/avx2_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/avx2_microkernels.bzl -------------------------------------------------------------------------------- /gen/avx512f_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/avx512f_microkernels.bzl -------------------------------------------------------------------------------- /gen/avx_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/avx_microkernels.bzl -------------------------------------------------------------------------------- /gen/avxvnni_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/avxvnni_microkernels.bzl -------------------------------------------------------------------------------- /gen/f16c_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/f16c_microkernels.bzl -------------------------------------------------------------------------------- /gen/fma3_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/fma3_microkernels.bzl -------------------------------------------------------------------------------- /gen/hexagon_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/hexagon_microkernels.bzl -------------------------------------------------------------------------------- /gen/hvx_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/hvx_microkernels.bzl -------------------------------------------------------------------------------- /gen/microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/microkernels.bzl -------------------------------------------------------------------------------- /gen/neon_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/neon_microkernels.bzl -------------------------------------------------------------------------------- /gen/neonbf16_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/neonbf16_microkernels.bzl -------------------------------------------------------------------------------- /gen/neondot_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/neondot_microkernels.bzl -------------------------------------------------------------------------------- /gen/neonfma_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/neonfma_microkernels.bzl -------------------------------------------------------------------------------- /gen/neonfp16_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/neonfp16_microkernels.bzl -------------------------------------------------------------------------------- /gen/neoni8mm_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/neoni8mm_microkernels.bzl -------------------------------------------------------------------------------- /gen/neonsme2_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/neonsme2_microkernels.bzl -------------------------------------------------------------------------------- /gen/neonsme_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/neonsme_microkernels.bzl -------------------------------------------------------------------------------- /gen/neonv8_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/neonv8_microkernels.bzl -------------------------------------------------------------------------------- /gen/rvv_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/rvv_microkernels.bzl -------------------------------------------------------------------------------- /gen/scalar_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/scalar_microkernels.bzl -------------------------------------------------------------------------------- /gen/sme_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/sme_microkernels.bzl -------------------------------------------------------------------------------- /gen/sse2_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/sse2_microkernels.bzl -------------------------------------------------------------------------------- /gen/sse2fma_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/sse2fma_microkernels.bzl -------------------------------------------------------------------------------- /gen/sse41_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/sse41_microkernels.bzl -------------------------------------------------------------------------------- /gen/sse_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/sse_microkernels.bzl -------------------------------------------------------------------------------- /gen/ssse3_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/ssse3_microkernels.bzl -------------------------------------------------------------------------------- /gen/wasm32_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/wasm32_microkernels.bzl -------------------------------------------------------------------------------- /gen/wasmsimd_microkernels.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/gen/wasmsimd_microkernels.bzl -------------------------------------------------------------------------------- /generated_file.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/generated_file.bzl -------------------------------------------------------------------------------- /include/experimental.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/include/experimental.h -------------------------------------------------------------------------------- /include/xnnpack.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/include/xnnpack.h -------------------------------------------------------------------------------- /preamble.js.lds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/preamble.js.lds -------------------------------------------------------------------------------- /register_extension_info.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/register_extension_info.bzl -------------------------------------------------------------------------------- /scripts/build-android-x86.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/build-android-x86.sh -------------------------------------------------------------------------------- /scripts/build-linux-armhf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/build-linux-armhf.sh -------------------------------------------------------------------------------- /scripts/build-local.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/build-local.sh -------------------------------------------------------------------------------- /scripts/build-qurt-v68.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/build-qurt-v68.sh -------------------------------------------------------------------------------- /scripts/build-qurt-v73.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/build-qurt-v73.sh -------------------------------------------------------------------------------- /scripts/build-wasm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/build-wasm.sh -------------------------------------------------------------------------------- /scripts/build-windows-x64.cmd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/build-windows-x64.cmd -------------------------------------------------------------------------------- /scripts/build-windows-x86.cmd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/build-windows-x86.cmd -------------------------------------------------------------------------------- /scripts/check_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/check_config.py -------------------------------------------------------------------------------- /scripts/generate-bf16-gemm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-bf16-gemm.sh -------------------------------------------------------------------------------- /scripts/generate-f16-gemm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-gemm.sh -------------------------------------------------------------------------------- /scripts/generate-f16-igemm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-igemm.sh -------------------------------------------------------------------------------- /scripts/generate-f16-rsum.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-rsum.sh -------------------------------------------------------------------------------- /scripts/generate-f16-spmm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-spmm.sh -------------------------------------------------------------------------------- /scripts/generate-f16-vcos.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-vcos.sh -------------------------------------------------------------------------------- /scripts/generate-f16-velu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-velu.sh -------------------------------------------------------------------------------- /scripts/generate-f16-vexp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-vexp.sh -------------------------------------------------------------------------------- /scripts/generate-f16-vgelu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-vgelu.sh -------------------------------------------------------------------------------- /scripts/generate-f16-vrnd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-vrnd.sh -------------------------------------------------------------------------------- /scripts/generate-f16-vsin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-vsin.sh -------------------------------------------------------------------------------- /scripts/generate-f16-vsqrt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-vsqrt.sh -------------------------------------------------------------------------------- /scripts/generate-f16-vtanh.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f16-vtanh.sh -------------------------------------------------------------------------------- /scripts/generate-f32-gemm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-gemm.sh -------------------------------------------------------------------------------- /scripts/generate-f32-igemm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-igemm.sh -------------------------------------------------------------------------------- /scripts/generate-f32-ppmm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-ppmm.sh -------------------------------------------------------------------------------- /scripts/generate-f32-rdsum.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-rdsum.sh -------------------------------------------------------------------------------- /scripts/generate-f32-rsum.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-rsum.sh -------------------------------------------------------------------------------- /scripts/generate-f32-rsum2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-rsum2.sh -------------------------------------------------------------------------------- /scripts/generate-f32-spmm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-spmm.sh -------------------------------------------------------------------------------- /scripts/generate-f32-vcmul.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-vcmul.sh -------------------------------------------------------------------------------- /scripts/generate-f32-vcos.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-vcos.sh -------------------------------------------------------------------------------- /scripts/generate-f32-velu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-velu.sh -------------------------------------------------------------------------------- /scripts/generate-f32-vexp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-vexp.sh -------------------------------------------------------------------------------- /scripts/generate-f32-vgelu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-vgelu.sh -------------------------------------------------------------------------------- /scripts/generate-f32-vlog.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-vlog.sh -------------------------------------------------------------------------------- /scripts/generate-f32-vrnd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-vrnd.sh -------------------------------------------------------------------------------- /scripts/generate-f32-vsin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-vsin.sh -------------------------------------------------------------------------------- /scripts/generate-f32-vsqrt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-vsqrt.sh -------------------------------------------------------------------------------- /scripts/generate-f32-vtanh.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-f32-vtanh.sh -------------------------------------------------------------------------------- /scripts/generate-qb4-packw.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qb4-packw.sh -------------------------------------------------------------------------------- /scripts/generate-qs8-gemm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qs8-gemm.sh -------------------------------------------------------------------------------- /scripts/generate-qs8-igemm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qs8-igemm.sh -------------------------------------------------------------------------------- /scripts/generate-qs8-rdsum.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qs8-rdsum.sh -------------------------------------------------------------------------------- /scripts/generate-qs8-rsum.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qs8-rsum.sh -------------------------------------------------------------------------------- /scripts/generate-qs8-vadd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qs8-vadd.sh -------------------------------------------------------------------------------- /scripts/generate-qs8-vcvt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qs8-vcvt.sh -------------------------------------------------------------------------------- /scripts/generate-qs8-vmul.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qs8-vmul.sh -------------------------------------------------------------------------------- /scripts/generate-qu8-rdsum.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qu8-rdsum.sh -------------------------------------------------------------------------------- /scripts/generate-qu8-rsum.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-qu8-rsum.sh -------------------------------------------------------------------------------- /scripts/generate-tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-tests.sh -------------------------------------------------------------------------------- /scripts/generate-x16-packw.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-x16-packw.sh -------------------------------------------------------------------------------- /scripts/generate-x32-packb.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-x32-packb.sh -------------------------------------------------------------------------------- /scripts/generate-x32-packw.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-x32-packw.sh -------------------------------------------------------------------------------- /scripts/generate-x32-packx.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-x32-packx.sh -------------------------------------------------------------------------------- /scripts/generate-x4-packw.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-x4-packw.sh -------------------------------------------------------------------------------- /scripts/generate-x8-lut.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-x8-lut.sh -------------------------------------------------------------------------------- /scripts/generate-x8-packw.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-x8-packw.sh -------------------------------------------------------------------------------- /scripts/generate-x8-vclamp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/generate-x8-vclamp.sh -------------------------------------------------------------------------------- /scripts/genxnn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/genxnn -------------------------------------------------------------------------------- /scripts/run-on-hexagon-sim.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/run-on-hexagon-sim.sh -------------------------------------------------------------------------------- /scripts/sort-filenames.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/scripts/sort-filenames.py -------------------------------------------------------------------------------- /src/allocator.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/allocator.c -------------------------------------------------------------------------------- /src/bf16-vunary/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/bf16-vunary/neon.c.in -------------------------------------------------------------------------------- /src/cache.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/cache.c -------------------------------------------------------------------------------- /src/configs/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/BUILD -------------------------------------------------------------------------------- /src/configs/avgpool-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/avgpool-config.c -------------------------------------------------------------------------------- /src/configs/cmul-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/cmul-config.c -------------------------------------------------------------------------------- /src/configs/dwconv-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/dwconv-config.c -------------------------------------------------------------------------------- /src/configs/gemm-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/gemm-config.c -------------------------------------------------------------------------------- /src/configs/hardware-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/hardware-config.c -------------------------------------------------------------------------------- /src/configs/maxpool-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/maxpool-config.c -------------------------------------------------------------------------------- /src/configs/pack-lh-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/pack-lh-config.c -------------------------------------------------------------------------------- /src/configs/reduce-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/reduce-config.c -------------------------------------------------------------------------------- /src/configs/spmm-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/spmm-config.c -------------------------------------------------------------------------------- /src/configs/unpool-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/unpool-config.c -------------------------------------------------------------------------------- /src/configs/x8-lut-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/x8-lut-config.c -------------------------------------------------------------------------------- /src/configs/xx-fill-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/xx-fill-config.c -------------------------------------------------------------------------------- /src/configs/xx-pad-config.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/configs/xx-pad-config.c -------------------------------------------------------------------------------- /src/datatype.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/datatype.c -------------------------------------------------------------------------------- /src/enums/allocation-type.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/enums/allocation-type.c -------------------------------------------------------------------------------- /src/enums/datatype-strings.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/enums/datatype-strings.c -------------------------------------------------------------------------------- /src/enums/microkernel-type.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/enums/microkernel-type.c -------------------------------------------------------------------------------- /src/enums/node-type.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/enums/node-type.c -------------------------------------------------------------------------------- /src/enums/operator-type.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/enums/operator-type.c -------------------------------------------------------------------------------- /src/f16-avgpool/f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-avgpool/f16c.c.in -------------------------------------------------------------------------------- /src/f16-f32-vcvt/f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-f32-vcvt/f16c.c.in -------------------------------------------------------------------------------- /src/f16-f32-vcvt/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-f32-vcvt/scalar.c.in -------------------------------------------------------------------------------- /src/f16-f32acc-rdsum/avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-f32acc-rdsum/avx.c.in -------------------------------------------------------------------------------- /src/f16-f32acc-rsum/f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-f32acc-rsum/f16c.c.in -------------------------------------------------------------------------------- /src/f16-ibilinear/fma3.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-ibilinear/fma3.c.in -------------------------------------------------------------------------------- /src/f16-rdminmax/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-rdminmax/simd.c.in -------------------------------------------------------------------------------- /src/f16-rminmax/f16-rmax.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-rminmax/f16-rmax.inc -------------------------------------------------------------------------------- /src/f16-rminmax/f16-rmin.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-rminmax/f16-rmin.inc -------------------------------------------------------------------------------- /src/f16-rminmax/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-rminmax/scalar.c.in -------------------------------------------------------------------------------- /src/f16-rsum/avx512fp16.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-rsum/avx512fp16.c.in -------------------------------------------------------------------------------- /src/f16-rsum/f16-rsum.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-rsum/f16-rsum.inc -------------------------------------------------------------------------------- /src/f16-vabs/f16-vabs.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vabs/f16-vabs.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vadd.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vadd.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vaddc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vaddc.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vcmul.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vcmul.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vdiv.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vdiv.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vdivc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vdivc.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vmax.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vmax.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vmaxc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vmaxc.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vmin.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vmin.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vminc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vminc.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vmul.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vmul.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vmulc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vmulc.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vsub.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vsub.inc -------------------------------------------------------------------------------- /src/f16-vbinary/f16-vsubc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/f16-vsubc.inc -------------------------------------------------------------------------------- /src/f16-vbinary/vop-f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vbinary/vop-f16c.c.in -------------------------------------------------------------------------------- /src/f16-vclamp/f16-vclamp.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vclamp/f16-vclamp.inc -------------------------------------------------------------------------------- /src/f16-vclamp/f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vclamp/f16c.c.in -------------------------------------------------------------------------------- /src/f16-vcos/f16-vcos.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vcos/f16-vcos.inc -------------------------------------------------------------------------------- /src/f16-velu/avx2-rr1-p3.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-velu/avx2-rr1-p3.c.in -------------------------------------------------------------------------------- /src/f16-velu/f16-velu.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-velu/f16-velu.inc -------------------------------------------------------------------------------- /src/f16-vexp/f16-vexp.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vexp/f16-vexp.inc -------------------------------------------------------------------------------- /src/f16-vexp/poly-3.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vexp/poly-3.c.in -------------------------------------------------------------------------------- /src/f16-vgelu/f16-vgelu.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vgelu/f16-vgelu.inc -------------------------------------------------------------------------------- /src/f16-vhswish/f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vhswish/f16c.c.in -------------------------------------------------------------------------------- /src/f16-vlrelu/f16-vlrelu.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vlrelu/f16-vlrelu.inc -------------------------------------------------------------------------------- /src/f16-vlrelu/f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vlrelu/f16c.c.in -------------------------------------------------------------------------------- /src/f16-vmulcaddc/fma3.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vmulcaddc/fma3.c.in -------------------------------------------------------------------------------- /src/f16-vneg/f16-vneg.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vneg/f16-vneg.inc -------------------------------------------------------------------------------- /src/f16-vrnd/f16-vrndd.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vrnd/f16-vrndd.inc -------------------------------------------------------------------------------- /src/f16-vrnd/f16-vrndne.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vrnd/f16-vrndne.inc -------------------------------------------------------------------------------- /src/f16-vrnd/f16-vrndu.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vrnd/f16-vrndu.inc -------------------------------------------------------------------------------- /src/f16-vrnd/f16-vrndz.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vrnd/f16-vrndz.inc -------------------------------------------------------------------------------- /src/f16-vrnd/f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vrnd/f16c.c.in -------------------------------------------------------------------------------- /src/f16-vrsqrt/f16-vrsqrt.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vrsqrt/f16-vrsqrt.inc -------------------------------------------------------------------------------- /src/f16-vsigmoid/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vsigmoid/avx2.c.in -------------------------------------------------------------------------------- /src/f16-vsin/f16-vsin.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vsin/f16-vsin.inc -------------------------------------------------------------------------------- /src/f16-vsqr/f16-vsqr.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vsqr/f16-vsqr.inc -------------------------------------------------------------------------------- /src/f16-vsqrt/f16-vsqrt.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vsqrt/f16-vsqrt.inc -------------------------------------------------------------------------------- /src/f16-vsqrt/f16c-rsqrt.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vsqrt/f16c-rsqrt.c.in -------------------------------------------------------------------------------- /src/f16-vsqrt/f16c-sqrt.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vsqrt/f16c-sqrt.c.in -------------------------------------------------------------------------------- /src/f16-vtanh/f16-vtanh.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vtanh/f16-vtanh.inc -------------------------------------------------------------------------------- /src/f16-vunary/f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vunary/f16c.c.in -------------------------------------------------------------------------------- /src/f16-vunary/sse2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f16-vunary/sse2.c.in -------------------------------------------------------------------------------- /src/f32-avgpool/avgpool.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-avgpool/avgpool.c.in -------------------------------------------------------------------------------- /src/f32-dwconv/f32-dwconv.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-dwconv/f32-dwconv.inc -------------------------------------------------------------------------------- /src/f32-dwconv/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-dwconv/simd.c.in -------------------------------------------------------------------------------- /src/f32-f16-vcvt/f16c.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-f16-vcvt/f16c.c.in -------------------------------------------------------------------------------- /src/f32-f16-vcvt/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-f16-vcvt/neon.c.in -------------------------------------------------------------------------------- /src/f32-f16-vcvt/sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-f16-vcvt/sse.c.in -------------------------------------------------------------------------------- /src/f32-gemm/MRx2c4-sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-gemm/MRx2c4-sse.c.in -------------------------------------------------------------------------------- /src/f32-gemm/MRxNRv-rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-gemm/MRxNRv-rvv.c.in -------------------------------------------------------------------------------- /src/f32-gemm/neon-ld128.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-gemm/neon-ld128.c.in -------------------------------------------------------------------------------- /src/f32-gemm/neon-ld64.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-gemm/neon-ld64.c.in -------------------------------------------------------------------------------- /src/f32-gemm/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-gemm/scalar.c.in -------------------------------------------------------------------------------- /src/f32-gemm/sse-dup.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-gemm/sse-dup.c.in -------------------------------------------------------------------------------- /src/f32-gemm/sse-load1.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-gemm/sse-load1.c.in -------------------------------------------------------------------------------- /src/f32-gemm/sse-shuffle.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-gemm/sse-shuffle.c.in -------------------------------------------------------------------------------- /src/f32-gemm/wasmsimd-s4.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-gemm/wasmsimd-s4.c.in -------------------------------------------------------------------------------- /src/f32-ibilinear/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-ibilinear/neon.c.in -------------------------------------------------------------------------------- /src/f32-ibilinear/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-ibilinear/scalar.c.in -------------------------------------------------------------------------------- /src/f32-ibilinear/sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-ibilinear/sse.c.in -------------------------------------------------------------------------------- /src/f32-igemm/MRx2c4-sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-igemm/MRx2c4-sse.c.in -------------------------------------------------------------------------------- /src/f32-igemm/MRxNRv-rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-igemm/MRxNRv-rvv.c.in -------------------------------------------------------------------------------- /src/f32-igemm/neon-ld128.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-igemm/neon-ld128.c.in -------------------------------------------------------------------------------- /src/f32-igemm/neon-ld64.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-igemm/neon-ld64.c.in -------------------------------------------------------------------------------- /src/f32-igemm/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-igemm/scalar.c.in -------------------------------------------------------------------------------- /src/f32-igemm/sse-dup.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-igemm/sse-dup.c.in -------------------------------------------------------------------------------- /src/f32-igemm/sse-load1.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-igemm/sse-load1.c.in -------------------------------------------------------------------------------- /src/f32-maxpool/maxpool.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-maxpool/maxpool.c.in -------------------------------------------------------------------------------- /src/f32-maxpool/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-maxpool/rvv.c.in -------------------------------------------------------------------------------- /src/f32-ppmm/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-ppmm/neon.c.in -------------------------------------------------------------------------------- /src/f32-ppmm/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-ppmm/scalar.c.in -------------------------------------------------------------------------------- /src/f32-ppmm/sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-ppmm/sse.c.in -------------------------------------------------------------------------------- /src/f32-qs8-vcvt/avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-qs8-vcvt/avx.c.in -------------------------------------------------------------------------------- /src/f32-qs8-vcvt/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-qs8-vcvt/avx2.c.in -------------------------------------------------------------------------------- /src/f32-qs8-vcvt/hvx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-qs8-vcvt/hvx.c.in -------------------------------------------------------------------------------- /src/f32-qs8-vcvt/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-qs8-vcvt/neon.c.in -------------------------------------------------------------------------------- /src/f32-qs8-vcvt/neonv8.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-qs8-vcvt/neonv8.c.in -------------------------------------------------------------------------------- /src/f32-qs8-vcvt/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-qs8-vcvt/rvv.c.in -------------------------------------------------------------------------------- /src/f32-qs8-vcvt/sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-qs8-vcvt/sse.c.in -------------------------------------------------------------------------------- /src/f32-rdminmax/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rdminmax/simd.c.in -------------------------------------------------------------------------------- /src/f32-rdsum/f32-rdsum.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rdsum/f32-rdsum.inc -------------------------------------------------------------------------------- /src/f32-rdsum/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rdsum/rvv.c.in -------------------------------------------------------------------------------- /src/f32-rdsum/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rdsum/simd.c.in -------------------------------------------------------------------------------- /src/f32-rdsum2/f32-rdsum2.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rdsum2/f32-rdsum2.inc -------------------------------------------------------------------------------- /src/f32-rdsum2/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rdsum2/simd.c.in -------------------------------------------------------------------------------- /src/f32-rminmax/f32-rmax.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rminmax/f32-rmax.inc -------------------------------------------------------------------------------- /src/f32-rminmax/f32-rmin.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rminmax/f32-rmin.inc -------------------------------------------------------------------------------- /src/f32-rminmax/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rminmax/rvv.c.in -------------------------------------------------------------------------------- /src/f32-rminmax/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rminmax/simd.c.in -------------------------------------------------------------------------------- /src/f32-rminmax/wasmsimd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rminmax/wasmsimd.c.in -------------------------------------------------------------------------------- /src/f32-rsum/f32-rsum.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rsum/f32-rsum.inc -------------------------------------------------------------------------------- /src/f32-rsum/hvx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rsum/hvx.c.in -------------------------------------------------------------------------------- /src/f32-rsum/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rsum/simd.c.in -------------------------------------------------------------------------------- /src/f32-rsum2/f32-rsum2.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rsum2/f32-rsum2.inc -------------------------------------------------------------------------------- /src/f32-rsum2/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-rsum2/simd.c.in -------------------------------------------------------------------------------- /src/f32-spmm/hvx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-spmm/hvx.c.in -------------------------------------------------------------------------------- /src/f32-spmm/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-spmm/neon.c.in -------------------------------------------------------------------------------- /src/f32-spmm/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-spmm/rvv.c.in -------------------------------------------------------------------------------- /src/f32-spmm/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-spmm/scalar.c.in -------------------------------------------------------------------------------- /src/f32-spmm/sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-spmm/sse.c.in -------------------------------------------------------------------------------- /src/f32-spmm/wasmsimd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-spmm/wasmsimd.c.in -------------------------------------------------------------------------------- /src/f32-vabs/f32-vabs.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vabs/f32-vabs.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vadd.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vadd.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vaddc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vaddc.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vcmul.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vcmul.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vdiv.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vdiv.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vdivc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vdivc.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vmax.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vmax.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vmaxc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vmaxc.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vmin.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vmin.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vminc.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vminc.inc -------------------------------------------------------------------------------- /src/f32-vbinary/f32-vmul.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vbinary/f32-vmul.inc -------------------------------------------------------------------------------- /src/f32-vclamp/avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vclamp/avx.c.in -------------------------------------------------------------------------------- /src/f32-vclamp/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vclamp/neon.c.in -------------------------------------------------------------------------------- /src/f32-vclamp/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vclamp/rvv.c.in -------------------------------------------------------------------------------- /src/f32-vclamp/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vclamp/scalar.c.in -------------------------------------------------------------------------------- /src/f32-vclamp/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vclamp/simd.c.in -------------------------------------------------------------------------------- /src/f32-vclamp/sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vclamp/sse.c.in -------------------------------------------------------------------------------- /src/f32-vcmul/avx512f.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vcmul/avx512f.c.in -------------------------------------------------------------------------------- /src/f32-vcmul/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vcmul/neon.c.in -------------------------------------------------------------------------------- /src/f32-vcmul/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vcmul/rvv.c.in -------------------------------------------------------------------------------- /src/f32-vcmul/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vcmul/scalar.c.in -------------------------------------------------------------------------------- /src/f32-vcos/f32-vcos.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vcos/f32-vcos.inc -------------------------------------------------------------------------------- /src/f32-velu/f32-velu.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-velu/f32-velu.inc -------------------------------------------------------------------------------- /src/f32-velu/neon-p6.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-velu/neon-p6.c.in -------------------------------------------------------------------------------- /src/f32-vexp/f32-vexp.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vexp/f32-vexp.inc -------------------------------------------------------------------------------- /src/f32-vgelu/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vgelu/scalar.c.in -------------------------------------------------------------------------------- /src/f32-vhswish/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vhswish/rvv.c.in -------------------------------------------------------------------------------- /src/f32-vhswish/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vhswish/simd.c.in -------------------------------------------------------------------------------- /src/f32-vlog/f32-vlog.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vlog/f32-vlog.inc -------------------------------------------------------------------------------- /src/f32-vlrelu/avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vlrelu/avx.c.in -------------------------------------------------------------------------------- /src/f32-vlrelu/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vlrelu/neon.c.in -------------------------------------------------------------------------------- /src/f32-vlrelu/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vlrelu/rvv.c.in -------------------------------------------------------------------------------- /src/f32-vlrelu/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vlrelu/scalar.c.in -------------------------------------------------------------------------------- /src/f32-vlrelu/sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vlrelu/sse.c.in -------------------------------------------------------------------------------- /src/f32-vlrelu/wasm.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vlrelu/wasm.c.in -------------------------------------------------------------------------------- /src/f32-vmulcaddc/sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vmulcaddc/sse.c.in -------------------------------------------------------------------------------- /src/f32-vneg/f32-vneg.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vneg/f32-vneg.inc -------------------------------------------------------------------------------- /src/f32-vrnd/avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/avx.c.in -------------------------------------------------------------------------------- /src/f32-vrnd/avx512f.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/avx512f.c.in -------------------------------------------------------------------------------- /src/f32-vrnd/f32-vrndd.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/f32-vrndd.inc -------------------------------------------------------------------------------- /src/f32-vrnd/f32-vrndu.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/f32-vrndu.inc -------------------------------------------------------------------------------- /src/f32-vrnd/f32-vrndz.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/f32-vrndz.inc -------------------------------------------------------------------------------- /src/f32-vrnd/neonv8.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/neonv8.c.in -------------------------------------------------------------------------------- /src/f32-vrnd/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/rvv.c.in -------------------------------------------------------------------------------- /src/f32-vrnd/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/simd.c.in -------------------------------------------------------------------------------- /src/f32-vrnd/sse41.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/sse41.c.in -------------------------------------------------------------------------------- /src/f32-vrnd/wasmsimd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrnd/wasmsimd.c.in -------------------------------------------------------------------------------- /src/f32-vrsqrt/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vrsqrt/rvv.c.in -------------------------------------------------------------------------------- /src/f32-vsin/f32-vsin.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vsin/f32-vsin.inc -------------------------------------------------------------------------------- /src/f32-vsqr/f32-vsqr.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vsqr/f32-vsqr.inc -------------------------------------------------------------------------------- /src/f32-vunary/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vunary/rvv.c.in -------------------------------------------------------------------------------- /src/f32-vunary/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/f32-vunary/simd.c.in -------------------------------------------------------------------------------- /src/indirection.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/indirection.c -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/init.c -------------------------------------------------------------------------------- /src/log.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/log.c -------------------------------------------------------------------------------- /src/memory-planner.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/memory-planner.c -------------------------------------------------------------------------------- /src/memory.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/memory.c -------------------------------------------------------------------------------- /src/microkernel-utils.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/microkernel-utils.c -------------------------------------------------------------------------------- /src/microparams-init.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/microparams-init.c -------------------------------------------------------------------------------- /src/mutex.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/mutex.c -------------------------------------------------------------------------------- /src/normalization.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/normalization.c -------------------------------------------------------------------------------- /src/operator-delete.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/operator-delete.c -------------------------------------------------------------------------------- /src/operator-run.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/operator-run.c -------------------------------------------------------------------------------- /src/operator-utils.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/operator-utils.c -------------------------------------------------------------------------------- /src/operators/pack-lh.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/operators/pack-lh.c -------------------------------------------------------------------------------- /src/operators/reduce-nd.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/operators/reduce-nd.c -------------------------------------------------------------------------------- /src/operators/rope-nthc.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/operators/rope-nthc.c -------------------------------------------------------------------------------- /src/operators/slice-nd.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/operators/slice-nd.c -------------------------------------------------------------------------------- /src/operators/softmax-nc.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/operators/softmax-nc.c -------------------------------------------------------------------------------- /src/pack-lh.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/pack-lh.cc -------------------------------------------------------------------------------- /src/params.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/params.c -------------------------------------------------------------------------------- /src/qs8-f16-vcvt/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-f16-vcvt/avx2.c.in -------------------------------------------------------------------------------- /src/qs8-f16-vcvt/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-f16-vcvt/neon.c.in -------------------------------------------------------------------------------- /src/qs8-f32-vcvt/avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-f32-vcvt/avx.c.in -------------------------------------------------------------------------------- /src/qs8-f32-vcvt/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-f32-vcvt/avx2.c.in -------------------------------------------------------------------------------- /src/qs8-f32-vcvt/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-f32-vcvt/neon.c.in -------------------------------------------------------------------------------- /src/qs8-f32-vcvt/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-f32-vcvt/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-f32-vcvt/sse2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-f32-vcvt/sse2.c.in -------------------------------------------------------------------------------- /src/qs8-f32-vcvt/sse4.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-f32-vcvt/sse4.c.in -------------------------------------------------------------------------------- /src/qs8-gemm/c4-hvx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-gemm/c4-hvx.c.in -------------------------------------------------------------------------------- /src/qs8-gemm/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-gemm/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-gemm/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-gemm/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-igemm/c4-hvx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-igemm/c4-hvx.c.in -------------------------------------------------------------------------------- /src/qs8-igemm/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-igemm/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-igemm/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-igemm/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-rdsum/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rdsum/avx2.c.in -------------------------------------------------------------------------------- /src/qs8-rdsum/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rdsum/neon.c.in -------------------------------------------------------------------------------- /src/qs8-rdsum/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rdsum/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-rdsum/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rdsum/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-rdsum/sse41.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rdsum/sse41.c.in -------------------------------------------------------------------------------- /src/qs8-rsum/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rsum/avx2.c.in -------------------------------------------------------------------------------- /src/qs8-rsum/avxvnni.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rsum/avxvnni.c.in -------------------------------------------------------------------------------- /src/qs8-rsum/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rsum/neon.c.in -------------------------------------------------------------------------------- /src/qs8-rsum/neondot.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rsum/neondot.c.in -------------------------------------------------------------------------------- /src/qs8-rsum/qs8-rsum.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rsum/qs8-rsum.inc -------------------------------------------------------------------------------- /src/qs8-rsum/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rsum/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-rsum/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rsum/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-rsum/ssse3.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rsum/ssse3.c.in -------------------------------------------------------------------------------- /src/qs8-rsum/wasmsimd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-rsum/wasmsimd.c.in -------------------------------------------------------------------------------- /src/qs8-vadd/hvx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vadd/hvx.c.in -------------------------------------------------------------------------------- /src/qs8-vadd/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vadd/neon.c.in -------------------------------------------------------------------------------- /src/qs8-vadd/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vadd/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-vadd/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vadd/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-vadd/wasmsimd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vadd/wasmsimd.c.in -------------------------------------------------------------------------------- /src/qs8-vaddc/hvx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vaddc/hvx.c.in -------------------------------------------------------------------------------- /src/qs8-vaddc/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vaddc/neon.c.in -------------------------------------------------------------------------------- /src/qs8-vaddc/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vaddc/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-vaddc/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vaddc/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-vcvt/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vcvt/avx2.c.in -------------------------------------------------------------------------------- /src/qs8-vcvt/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vcvt/neon.c.in -------------------------------------------------------------------------------- /src/qs8-vcvt/qs8-vcvt.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vcvt/qs8-vcvt.inc -------------------------------------------------------------------------------- /src/qs8-vcvt/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vcvt/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-vcvt/sse2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vcvt/sse2.c.in -------------------------------------------------------------------------------- /src/qs8-vcvt/sse4.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vcvt/sse4.c.in -------------------------------------------------------------------------------- /src/qs8-vcvt/ssse3.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vcvt/ssse3.c.in -------------------------------------------------------------------------------- /src/qs8-vcvt/wasmsimd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vcvt/wasmsimd.c.in -------------------------------------------------------------------------------- /src/qs8-vlrelu/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vlrelu/avx2.c.in -------------------------------------------------------------------------------- /src/qs8-vlrelu/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vlrelu/neon.c.in -------------------------------------------------------------------------------- /src/qs8-vlrelu/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vlrelu/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-vlrelu/sse2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vlrelu/sse2.c.in -------------------------------------------------------------------------------- /src/qs8-vlrelu/sse4.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vlrelu/sse4.c.in -------------------------------------------------------------------------------- /src/qs8-vlrelu/ssse3.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vlrelu/ssse3.c.in -------------------------------------------------------------------------------- /src/qs8-vmul/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vmul/neon.c.in -------------------------------------------------------------------------------- /src/qs8-vmul/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vmul/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-vmul/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vmul/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-vmulc/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vmulc/neon.c.in -------------------------------------------------------------------------------- /src/qs8-vmulc/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vmulc/rvv.c.in -------------------------------------------------------------------------------- /src/qs8-vmulc/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vmulc/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-vprelu/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vprelu/avx2.c.in -------------------------------------------------------------------------------- /src/qs8-vprelu/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vprelu/scalar.c.in -------------------------------------------------------------------------------- /src/qs8-vpreluc/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vpreluc/avx2.c.in -------------------------------------------------------------------------------- /src/qs8-vrpreluc/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qs8-vrpreluc/avx2.c.in -------------------------------------------------------------------------------- /src/qu8-rdsum/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-rdsum/neon.c.in -------------------------------------------------------------------------------- /src/qu8-rdsum/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-rdsum/rvv.c.in -------------------------------------------------------------------------------- /src/qu8-rdsum/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-rdsum/scalar.c.in -------------------------------------------------------------------------------- /src/qu8-rdsum/ssse3.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-rdsum/ssse3.c.in -------------------------------------------------------------------------------- /src/qu8-rsum/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-rsum/avx2.c.in -------------------------------------------------------------------------------- /src/qu8-rsum/qu8-rsum.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-rsum/qu8-rsum.inc -------------------------------------------------------------------------------- /src/qu8-rsum/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-rsum/rvv.c.in -------------------------------------------------------------------------------- /src/qu8-rsum/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-rsum/scalar.c.in -------------------------------------------------------------------------------- /src/qu8-rsum/sse2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-rsum/sse2.c.in -------------------------------------------------------------------------------- /src/qu8-vcvt/qu8-vcvt.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/qu8-vcvt/qu8-vcvt.inc -------------------------------------------------------------------------------- /src/reference/packing.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/reference/packing.cc -------------------------------------------------------------------------------- /src/runtime.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/runtime.c -------------------------------------------------------------------------------- /src/s8-ibilinear/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/s8-ibilinear/neon.c.in -------------------------------------------------------------------------------- /src/s8-ibilinear/sse.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/s8-ibilinear/sse.c.in -------------------------------------------------------------------------------- /src/s8-rdminmax/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/s8-rdminmax/simd.c.in -------------------------------------------------------------------------------- /src/s8-rminmax/s8-rmax.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/s8-rminmax/s8-rmax.inc -------------------------------------------------------------------------------- /src/s8-rminmax/s8-rmin.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/s8-rminmax/s8-rmin.inc -------------------------------------------------------------------------------- /src/s8-rminmax/simd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/s8-rminmax/simd.c.in -------------------------------------------------------------------------------- /src/s8-vclamp/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/s8-vclamp/rvv.c.in -------------------------------------------------------------------------------- /src/sanitizers.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/sanitizers.c -------------------------------------------------------------------------------- /src/subgraph.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph.c -------------------------------------------------------------------------------- /src/subgraph/binary.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/binary.c -------------------------------------------------------------------------------- /src/subgraph/concatenate.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/concatenate.c -------------------------------------------------------------------------------- /src/subgraph/copy.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/copy.c -------------------------------------------------------------------------------- /src/subgraph/deprecated.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/deprecated.c -------------------------------------------------------------------------------- /src/subgraph/even-split.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/even-split.c -------------------------------------------------------------------------------- /src/subgraph/pack-lh.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/pack-lh.c -------------------------------------------------------------------------------- /src/subgraph/rope.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/rope.c -------------------------------------------------------------------------------- /src/subgraph/softmax.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/softmax.c -------------------------------------------------------------------------------- /src/subgraph/unary.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/unary.c -------------------------------------------------------------------------------- /src/subgraph/validation.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/subgraph/validation.c -------------------------------------------------------------------------------- /src/tables/vlog.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/tables/vlog.c -------------------------------------------------------------------------------- /src/tensor.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/tensor.c -------------------------------------------------------------------------------- /src/u8-rminmax/u8-rmax.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/u8-rminmax/u8-rmax.inc -------------------------------------------------------------------------------- /src/u8-rminmax/u8-rmin.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/u8-rminmax/u8-rmin.inc -------------------------------------------------------------------------------- /src/x16-packw/avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x16-packw/avx.c.in -------------------------------------------------------------------------------- /src/x16-packw/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x16-packw/neon.c.in -------------------------------------------------------------------------------- /src/x32-packb/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packb/scalar.c.in -------------------------------------------------------------------------------- /src/x32-packw/avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/avx.c.in -------------------------------------------------------------------------------- /src/x32-packw/avx512.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/avx512.c.in -------------------------------------------------------------------------------- /src/x32-packw/c4-sse2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/c4-sse2.c.in -------------------------------------------------------------------------------- /src/x32-packw/gio-avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/gio-avx.c.in -------------------------------------------------------------------------------- /src/x32-packw/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/neon.c.in -------------------------------------------------------------------------------- /src/x32-packw/rvv.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/rvv.c.in -------------------------------------------------------------------------------- /src/x32-packw/s4-avx.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/s4-avx.c.in -------------------------------------------------------------------------------- /src/x32-packw/s4-sse2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/s4-sse2.c.in -------------------------------------------------------------------------------- /src/x32-packw/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/scalar.c.in -------------------------------------------------------------------------------- /src/x32-packw/sse2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packw/sse2.c.in -------------------------------------------------------------------------------- /src/x32-packx/neon.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x32-packx/neon.c.in -------------------------------------------------------------------------------- /src/x4-packw/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x4-packw/scalar.c.in -------------------------------------------------------------------------------- /src/x8-lut/avx2.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x8-lut/avx2.c.in -------------------------------------------------------------------------------- /src/x8-lut/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x8-lut/scalar.c.in -------------------------------------------------------------------------------- /src/x8-lut/ssse3.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x8-lut/ssse3.c.in -------------------------------------------------------------------------------- /src/x8-lut/wasmpshufb.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x8-lut/wasmpshufb.c.in -------------------------------------------------------------------------------- /src/x8-lut/wasmsimd.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x8-lut/wasmsimd.c.in -------------------------------------------------------------------------------- /src/x8-packq/x8-packq.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x8-packq/x8-packq.inc -------------------------------------------------------------------------------- /src/x8-packw/scalar.c.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x8-packw/scalar.c.in -------------------------------------------------------------------------------- /src/x8-packw/x8-packw.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/x8-packw/x8-packw.inc -------------------------------------------------------------------------------- /src/xnnpack/allocator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/allocator.h -------------------------------------------------------------------------------- /src/xnnpack/argmaxpool.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/argmaxpool.h -------------------------------------------------------------------------------- /src/xnnpack/assembly.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/assembly.h -------------------------------------------------------------------------------- /src/xnnpack/avgpool.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/avgpool.h -------------------------------------------------------------------------------- /src/xnnpack/buffer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/buffer.h -------------------------------------------------------------------------------- /src/xnnpack/cache.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/cache.h -------------------------------------------------------------------------------- /src/xnnpack/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/common.h -------------------------------------------------------------------------------- /src/xnnpack/compute.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/compute.h -------------------------------------------------------------------------------- /src/xnnpack/config-types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/config-types.h -------------------------------------------------------------------------------- /src/xnnpack/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/config.h -------------------------------------------------------------------------------- /src/xnnpack/conv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/conv.h -------------------------------------------------------------------------------- /src/xnnpack/datatype.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/datatype.h -------------------------------------------------------------------------------- /src/xnnpack/dwconv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/dwconv.h -------------------------------------------------------------------------------- /src/xnnpack/fill.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/fill.h -------------------------------------------------------------------------------- /src/xnnpack/fp16.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/fp16.h -------------------------------------------------------------------------------- /src/xnnpack/gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/gemm.h -------------------------------------------------------------------------------- /src/xnnpack/ibilinear.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/ibilinear.h -------------------------------------------------------------------------------- /src/xnnpack/igemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/igemm.h -------------------------------------------------------------------------------- /src/xnnpack/indirection.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/indirection.h -------------------------------------------------------------------------------- /src/xnnpack/init-once.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/init-once.h -------------------------------------------------------------------------------- /src/xnnpack/internal.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/internal.h -------------------------------------------------------------------------------- /src/xnnpack/isa-checks.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/isa-checks.h -------------------------------------------------------------------------------- /src/xnnpack/log.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/log.h -------------------------------------------------------------------------------- /src/xnnpack/lut.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/lut.h -------------------------------------------------------------------------------- /src/xnnpack/math.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/math.h -------------------------------------------------------------------------------- /src/xnnpack/maxpool.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/maxpool.h -------------------------------------------------------------------------------- /src/xnnpack/memory.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/memory.h -------------------------------------------------------------------------------- /src/xnnpack/microfnptr.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/microfnptr.h -------------------------------------------------------------------------------- /src/xnnpack/microparams.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/microparams.h -------------------------------------------------------------------------------- /src/xnnpack/mutex.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/mutex.h -------------------------------------------------------------------------------- /src/xnnpack/node-type.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/node-type.h -------------------------------------------------------------------------------- /src/xnnpack/operator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/operator.h -------------------------------------------------------------------------------- /src/xnnpack/pack-lh.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/pack-lh.h -------------------------------------------------------------------------------- /src/xnnpack/pack.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/pack.h -------------------------------------------------------------------------------- /src/xnnpack/packb.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/packb.h -------------------------------------------------------------------------------- /src/xnnpack/packq.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/packq.h -------------------------------------------------------------------------------- /src/xnnpack/packw.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/packw.h -------------------------------------------------------------------------------- /src/xnnpack/packx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/packx.h -------------------------------------------------------------------------------- /src/xnnpack/pad.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/pad.h -------------------------------------------------------------------------------- /src/xnnpack/params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/params.h -------------------------------------------------------------------------------- /src/xnnpack/ppmm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/ppmm.h -------------------------------------------------------------------------------- /src/xnnpack/prefetch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/prefetch.h -------------------------------------------------------------------------------- /src/xnnpack/quantization.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/quantization.h -------------------------------------------------------------------------------- /src/xnnpack/raddextexp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/raddextexp.h -------------------------------------------------------------------------------- /src/xnnpack/reduce.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/reduce.h -------------------------------------------------------------------------------- /src/xnnpack/simd/f32-avx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/simd/f32-avx.h -------------------------------------------------------------------------------- /src/xnnpack/simd/f32-hvx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/simd/f32-hvx.h -------------------------------------------------------------------------------- /src/xnnpack/simd/s32-hvx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/simd/s32-hvx.h -------------------------------------------------------------------------------- /src/xnnpack/simd/s8-hvx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/simd/s8-hvx.h -------------------------------------------------------------------------------- /src/xnnpack/simd/s8-neon.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/simd/s8-neon.h -------------------------------------------------------------------------------- /src/xnnpack/simd/u8-hvx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/simd/u8-hvx.h -------------------------------------------------------------------------------- /src/xnnpack/simd/u8-neon.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/simd/u8-neon.h -------------------------------------------------------------------------------- /src/xnnpack/simd/u8-sse2.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/simd/u8-sse2.h -------------------------------------------------------------------------------- /src/xnnpack/spmm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/spmm.h -------------------------------------------------------------------------------- /src/xnnpack/subgraph.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/subgraph.h -------------------------------------------------------------------------------- /src/xnnpack/transpose.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/transpose.h -------------------------------------------------------------------------------- /src/xnnpack/unaligned.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/unaligned.h -------------------------------------------------------------------------------- /src/xnnpack/unpool.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/unpool.h -------------------------------------------------------------------------------- /src/xnnpack/vbinary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/vbinary.h -------------------------------------------------------------------------------- /src/xnnpack/vcvt.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/vcvt.h -------------------------------------------------------------------------------- /src/xnnpack/vmulcaddc.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/vmulcaddc.h -------------------------------------------------------------------------------- /src/xnnpack/vscaleextexp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/vscaleextexp.h -------------------------------------------------------------------------------- /src/xnnpack/vunary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xnnpack/vunary.h -------------------------------------------------------------------------------- /src/xx-fill/xx-fill.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xx-fill/xx-fill.inc -------------------------------------------------------------------------------- /src/xx-pad/xx-pad.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/src/xx-pad/xx-pad.inc -------------------------------------------------------------------------------- /test/BUILD.bazel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/BUILD.bazel -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/CMakeLists.txt -------------------------------------------------------------------------------- /test/avgpool-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/avgpool-minmax.cc -------------------------------------------------------------------------------- /test/bf16-gemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/bf16-gemm-minmax.cc -------------------------------------------------------------------------------- /test/bf16-gemm-minmax.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/bf16-gemm-minmax.yaml -------------------------------------------------------------------------------- /test/buffer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/buffer.cc -------------------------------------------------------------------------------- /test/build-identifier.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/build-identifier.cc -------------------------------------------------------------------------------- /test/f16-conv-hwc2chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-conv-hwc2chw.cc -------------------------------------------------------------------------------- /test/f16-conv-hwc2chw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-conv-hwc2chw.yaml -------------------------------------------------------------------------------- /test/f16-dwconv-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-dwconv-minmax.cc -------------------------------------------------------------------------------- /test/f16-dwconv2d-chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-dwconv2d-chw.cc -------------------------------------------------------------------------------- /test/f16-dwconv2d-chw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-dwconv2d-chw.yaml -------------------------------------------------------------------------------- /test/f16-f32-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-f32-vcvt.cc -------------------------------------------------------------------------------- /test/f16-gemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-gemm-minmax.cc -------------------------------------------------------------------------------- /test/f16-gemm-minmax.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-gemm-minmax.yaml -------------------------------------------------------------------------------- /test/f16-ibilinear-chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-ibilinear-chw.cc -------------------------------------------------------------------------------- /test/f16-ibilinear.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-ibilinear.cc -------------------------------------------------------------------------------- /test/f16-ibilinear.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-ibilinear.yaml -------------------------------------------------------------------------------- /test/f16-igemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-igemm-minmax.cc -------------------------------------------------------------------------------- /test/f16-igemm-minmax.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-igemm-minmax.yaml -------------------------------------------------------------------------------- /test/f16-qs8-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-qs8-vcvt.cc -------------------------------------------------------------------------------- /test/f16-qu8-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-qu8-vcvt.cc -------------------------------------------------------------------------------- /test/f16-vabs.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vabs.cc -------------------------------------------------------------------------------- /test/f16-vadd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vadd.cc -------------------------------------------------------------------------------- /test/f16-vaddc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vaddc.cc -------------------------------------------------------------------------------- /test/f16-vapproxgelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vapproxgelu.cc -------------------------------------------------------------------------------- /test/f16-vclamp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vclamp.cc -------------------------------------------------------------------------------- /test/f16-vcmul.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vcmul.cc -------------------------------------------------------------------------------- /test/f16-vcos.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vcos.cc -------------------------------------------------------------------------------- /test/f16-vdiv.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vdiv.cc -------------------------------------------------------------------------------- /test/f16-vdivc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vdivc.cc -------------------------------------------------------------------------------- /test/f16-velu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-velu.cc -------------------------------------------------------------------------------- /test/f16-vexp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vexp.cc -------------------------------------------------------------------------------- /test/f16-vgelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vgelu.cc -------------------------------------------------------------------------------- /test/f16-vhswish.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vhswish.cc -------------------------------------------------------------------------------- /test/f16-vlrelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vlrelu.cc -------------------------------------------------------------------------------- /test/f16-vmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vmax.cc -------------------------------------------------------------------------------- /test/f16-vmaxc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vmaxc.cc -------------------------------------------------------------------------------- /test/f16-vmin.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vmin.cc -------------------------------------------------------------------------------- /test/f16-vminc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vminc.cc -------------------------------------------------------------------------------- /test/f16-vmul.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vmul.cc -------------------------------------------------------------------------------- /test/f16-vmulc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vmulc.cc -------------------------------------------------------------------------------- /test/f16-vneg.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vneg.cc -------------------------------------------------------------------------------- /test/f16-vprelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vprelu.cc -------------------------------------------------------------------------------- /test/f16-vpreluc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vpreluc.cc -------------------------------------------------------------------------------- /test/f16-vrdivc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vrdivc.cc -------------------------------------------------------------------------------- /test/f16-vrndd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vrndd.cc -------------------------------------------------------------------------------- /test/f16-vrndne.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vrndne.cc -------------------------------------------------------------------------------- /test/f16-vrndu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vrndu.cc -------------------------------------------------------------------------------- /test/f16-vrndz.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vrndz.cc -------------------------------------------------------------------------------- /test/f16-vrpreluc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vrpreluc.cc -------------------------------------------------------------------------------- /test/f16-vrsqrt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vrsqrt.cc -------------------------------------------------------------------------------- /test/f16-vrsubc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vrsubc.cc -------------------------------------------------------------------------------- /test/f16-vsigmoid.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vsigmoid.cc -------------------------------------------------------------------------------- /test/f16-vsin.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vsin.cc -------------------------------------------------------------------------------- /test/f16-vsqr.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vsqr.cc -------------------------------------------------------------------------------- /test/f16-vsqrdiff.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vsqrdiff.cc -------------------------------------------------------------------------------- /test/f16-vsqrdiffc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vsqrdiffc.cc -------------------------------------------------------------------------------- /test/f16-vsqrt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vsqrt.cc -------------------------------------------------------------------------------- /test/f16-vsub.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vsub.cc -------------------------------------------------------------------------------- /test/f16-vsubc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vsubc.cc -------------------------------------------------------------------------------- /test/f16-vtanh.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f16-vtanh.cc -------------------------------------------------------------------------------- /test/f32-argmaxpool.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-argmaxpool.cc -------------------------------------------------------------------------------- /test/f32-argmaxpool.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-argmaxpool.yaml -------------------------------------------------------------------------------- /test/f32-conv-hwc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-conv-hwc.cc -------------------------------------------------------------------------------- /test/f32-conv-hwc2chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-conv-hwc2chw.cc -------------------------------------------------------------------------------- /test/f32-conv-hwc2chw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-conv-hwc2chw.yaml -------------------------------------------------------------------------------- /test/f32-dwconv-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-dwconv-minmax.cc -------------------------------------------------------------------------------- /test/f32-dwconv.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-dwconv.cc -------------------------------------------------------------------------------- /test/f32-dwconv2d-chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-dwconv2d-chw.cc -------------------------------------------------------------------------------- /test/f32-dwconv2d-chw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-dwconv2d-chw.yaml -------------------------------------------------------------------------------- /test/f32-f16-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-f16-vcvt.cc -------------------------------------------------------------------------------- /test/f32-gemm-2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-gemm-2.cc -------------------------------------------------------------------------------- /test/f32-gemm-minmax-2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-gemm-minmax-2.cc -------------------------------------------------------------------------------- /test/f32-gemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-gemm-minmax.cc -------------------------------------------------------------------------------- /test/f32-gemm-minmax.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-gemm-minmax.yaml -------------------------------------------------------------------------------- /test/f32-gemm-relu-2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-gemm-relu-2.cc -------------------------------------------------------------------------------- /test/f32-gemm-relu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-gemm-relu.cc -------------------------------------------------------------------------------- /test/f32-gemm-relu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-gemm-relu.yaml -------------------------------------------------------------------------------- /test/f32-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-gemm.cc -------------------------------------------------------------------------------- /test/f32-gemm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-gemm.yaml -------------------------------------------------------------------------------- /test/f32-ibilinear-chw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-ibilinear-chw.cc -------------------------------------------------------------------------------- /test/f32-ibilinear.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-ibilinear.cc -------------------------------------------------------------------------------- /test/f32-ibilinear.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-ibilinear.yaml -------------------------------------------------------------------------------- /test/f32-igemm-2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-igemm-2.cc -------------------------------------------------------------------------------- /test/f32-igemm-minmax-2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-igemm-minmax-2.cc -------------------------------------------------------------------------------- /test/f32-igemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-igemm-minmax.cc -------------------------------------------------------------------------------- /test/f32-igemm-minmax.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-igemm-minmax.yaml -------------------------------------------------------------------------------- /test/f32-igemm-relu-2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-igemm-relu-2.cc -------------------------------------------------------------------------------- /test/f32-igemm-relu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-igemm-relu.cc -------------------------------------------------------------------------------- /test/f32-igemm-relu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-igemm-relu.yaml -------------------------------------------------------------------------------- /test/f32-igemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-igemm.cc -------------------------------------------------------------------------------- /test/f32-igemm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-igemm.yaml -------------------------------------------------------------------------------- /test/f32-ppmm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-ppmm-minmax.cc -------------------------------------------------------------------------------- /test/f32-ppmm-minmax.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-ppmm-minmax.yaml -------------------------------------------------------------------------------- /test/f32-qc8w-gemm-relu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-qc8w-gemm-relu.cc -------------------------------------------------------------------------------- /test/f32-qc8w-gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-qc8w-gemm.cc -------------------------------------------------------------------------------- /test/f32-qc8w-gemm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-qc8w-gemm.yaml -------------------------------------------------------------------------------- /test/f32-qs8-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-qs8-vcvt.cc -------------------------------------------------------------------------------- /test/f32-qu8-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-qu8-vcvt.cc -------------------------------------------------------------------------------- /test/f32-raddextexp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-raddextexp.cc -------------------------------------------------------------------------------- /test/f32-vabs.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vabs.cc -------------------------------------------------------------------------------- /test/f32-vadd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vadd.cc -------------------------------------------------------------------------------- /test/f32-vaddc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vaddc.cc -------------------------------------------------------------------------------- /test/f32-vapproxgelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vapproxgelu.cc -------------------------------------------------------------------------------- /test/f32-vclamp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vclamp.cc -------------------------------------------------------------------------------- /test/f32-vcmul.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vcmul.cc -------------------------------------------------------------------------------- /test/f32-vcopysign.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vcopysign.cc -------------------------------------------------------------------------------- /test/f32-vcopysignc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vcopysignc.cc -------------------------------------------------------------------------------- /test/f32-vcos.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vcos.cc -------------------------------------------------------------------------------- /test/f32-vdiv.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vdiv.cc -------------------------------------------------------------------------------- /test/f32-vdivc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vdivc.cc -------------------------------------------------------------------------------- /test/f32-velu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-velu.cc -------------------------------------------------------------------------------- /test/f32-vexp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vexp.cc -------------------------------------------------------------------------------- /test/f32-vgelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vgelu.cc -------------------------------------------------------------------------------- /test/f32-vhswish.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vhswish.cc -------------------------------------------------------------------------------- /test/f32-vlog.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vlog.cc -------------------------------------------------------------------------------- /test/f32-vlrelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vlrelu.cc -------------------------------------------------------------------------------- /test/f32-vmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vmax.cc -------------------------------------------------------------------------------- /test/f32-vmaxc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vmaxc.cc -------------------------------------------------------------------------------- /test/f32-vmin.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vmin.cc -------------------------------------------------------------------------------- /test/f32-vminc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vminc.cc -------------------------------------------------------------------------------- /test/f32-vmul.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vmul.cc -------------------------------------------------------------------------------- /test/f32-vmulc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vmulc.cc -------------------------------------------------------------------------------- /test/f32-vneg.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vneg.cc -------------------------------------------------------------------------------- /test/f32-vprelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vprelu.cc -------------------------------------------------------------------------------- /test/f32-vpreluc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vpreluc.cc -------------------------------------------------------------------------------- /test/f32-vrcopysignc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vrcopysignc.cc -------------------------------------------------------------------------------- /test/f32-vrdivc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vrdivc.cc -------------------------------------------------------------------------------- /test/f32-vrndd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vrndd.cc -------------------------------------------------------------------------------- /test/f32-vrndne.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vrndne.cc -------------------------------------------------------------------------------- /test/f32-vrndu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vrndu.cc -------------------------------------------------------------------------------- /test/f32-vrndz.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vrndz.cc -------------------------------------------------------------------------------- /test/f32-vrpreluc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vrpreluc.cc -------------------------------------------------------------------------------- /test/f32-vrsqrt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vrsqrt.cc -------------------------------------------------------------------------------- /test/f32-vrsubc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vrsubc.cc -------------------------------------------------------------------------------- /test/f32-vscaleextexp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vscaleextexp.cc -------------------------------------------------------------------------------- /test/f32-vsigmoid.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vsigmoid.cc -------------------------------------------------------------------------------- /test/f32-vsin.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vsin.cc -------------------------------------------------------------------------------- /test/f32-vsqr.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vsqr.cc -------------------------------------------------------------------------------- /test/f32-vsqrdiff.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vsqrdiff.cc -------------------------------------------------------------------------------- /test/f32-vsqrdiffc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vsqrdiffc.cc -------------------------------------------------------------------------------- /test/f32-vsqrt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vsqrt.cc -------------------------------------------------------------------------------- /test/f32-vsub.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vsub.cc -------------------------------------------------------------------------------- /test/f32-vsubc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vsubc.cc -------------------------------------------------------------------------------- /test/f32-vtanh.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/f32-vtanh.cc -------------------------------------------------------------------------------- /test/fingerprint.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/fingerprint.cc -------------------------------------------------------------------------------- /test/fingerprint_cache.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/fingerprint_cache.cc -------------------------------------------------------------------------------- /test/indirection.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/indirection.cc -------------------------------------------------------------------------------- /test/maxpool-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/maxpool-minmax.cc -------------------------------------------------------------------------------- /test/microkernel-utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/microkernel-utils.cc -------------------------------------------------------------------------------- /test/mutex.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/mutex.cc -------------------------------------------------------------------------------- /test/next_prime.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/next_prime.cc -------------------------------------------------------------------------------- /test/next_prime.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/next_prime.h -------------------------------------------------------------------------------- /test/operators/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/operators/BUILD -------------------------------------------------------------------------------- /test/operators/copy-nc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/operators/copy-nc.cc -------------------------------------------------------------------------------- /test/operators/slice-nd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/operators/slice-nd.cc -------------------------------------------------------------------------------- /test/packing.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/packing.cc -------------------------------------------------------------------------------- /test/pf16-gemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/pf16-gemm-minmax.cc -------------------------------------------------------------------------------- /test/pf16-gemm-minmax.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/pf16-gemm-minmax.yaml -------------------------------------------------------------------------------- /test/pf32-gemm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/pf32-gemm-minmax.cc -------------------------------------------------------------------------------- /test/pf32-gemm-minmax.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/pf32-gemm-minmax.yaml -------------------------------------------------------------------------------- /test/qb4-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qb4-packw.cc -------------------------------------------------------------------------------- /test/qs8-f16-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-f16-vcvt.cc -------------------------------------------------------------------------------- /test/qs8-f32-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-f32-vcvt.cc -------------------------------------------------------------------------------- /test/qs8-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-packw.cc -------------------------------------------------------------------------------- /test/qs8-qc4w-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-qc4w-packw.cc -------------------------------------------------------------------------------- /test/qs8-vadd-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-vadd-minmax.cc -------------------------------------------------------------------------------- /test/qs8-vaddc-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-vaddc-minmax.cc -------------------------------------------------------------------------------- /test/qs8-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-vcvt.cc -------------------------------------------------------------------------------- /test/qs8-vlrelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-vlrelu.cc -------------------------------------------------------------------------------- /test/qs8-vprelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-vprelu.cc -------------------------------------------------------------------------------- /test/qs8-vpreluc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-vpreluc.cc -------------------------------------------------------------------------------- /test/qs8-vrpreluc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qs8-vrpreluc.cc -------------------------------------------------------------------------------- /test/qu8-f32-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qu8-f32-vcvt.cc -------------------------------------------------------------------------------- /test/qu8-vadd-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qu8-vadd-minmax.cc -------------------------------------------------------------------------------- /test/qu8-vaddc-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qu8-vaddc-minmax.cc -------------------------------------------------------------------------------- /test/qu8-vcvt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qu8-vcvt.cc -------------------------------------------------------------------------------- /test/qu8-vlrelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qu8-vlrelu.cc -------------------------------------------------------------------------------- /test/qu8-vprelu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qu8-vprelu.cc -------------------------------------------------------------------------------- /test/qu8-vpreluc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qu8-vpreluc.cc -------------------------------------------------------------------------------- /test/qu8-vrpreluc.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/qu8-vrpreluc.cc -------------------------------------------------------------------------------- /test/rdminmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/rdminmax.cc -------------------------------------------------------------------------------- /test/rdsum.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/rdsum.cc -------------------------------------------------------------------------------- /test/rdsum2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/rdsum2.cc -------------------------------------------------------------------------------- /test/rminmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/rminmax.cc -------------------------------------------------------------------------------- /test/rsum.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/rsum.cc -------------------------------------------------------------------------------- /test/rsum2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/rsum2.cc -------------------------------------------------------------------------------- /test/s8-ibilinear.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/s8-ibilinear.cc -------------------------------------------------------------------------------- /test/s8-ibilinear.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/s8-ibilinear.yaml -------------------------------------------------------------------------------- /test/s8-vclamp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/s8-vclamp.cc -------------------------------------------------------------------------------- /test/simd/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/BUILD -------------------------------------------------------------------------------- /test/simd/f16-simd.cc.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/f16-simd.cc.in -------------------------------------------------------------------------------- /test/simd/f32-simd-avx.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/f32-simd-avx.cc -------------------------------------------------------------------------------- /test/simd/f32-simd-avx2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/f32-simd-avx2.cc -------------------------------------------------------------------------------- /test/simd/f32-simd-fma3.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/f32-simd-fma3.cc -------------------------------------------------------------------------------- /test/simd/f32-simd-hvx.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/f32-simd-hvx.cc -------------------------------------------------------------------------------- /test/simd/f32-simd-neon.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/f32-simd-neon.cc -------------------------------------------------------------------------------- /test/simd/f32-simd-sse2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/f32-simd-sse2.cc -------------------------------------------------------------------------------- /test/simd/f32-simd.cc.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/f32-simd.cc.in -------------------------------------------------------------------------------- /test/simd/s16-simd-avx2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s16-simd-avx2.cc -------------------------------------------------------------------------------- /test/simd/s16-simd-neon.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s16-simd-neon.cc -------------------------------------------------------------------------------- /test/simd/s16-simd.cc.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s16-simd.cc.in -------------------------------------------------------------------------------- /test/simd/s32-simd-avx2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s32-simd-avx2.cc -------------------------------------------------------------------------------- /test/simd/s32-simd-hvx.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s32-simd-hvx.cc -------------------------------------------------------------------------------- /test/simd/s32-simd-neon.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s32-simd-neon.cc -------------------------------------------------------------------------------- /test/simd/s32-simd.cc.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s32-simd.cc.in -------------------------------------------------------------------------------- /test/simd/s8-simd-hvx.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s8-simd-hvx.cc -------------------------------------------------------------------------------- /test/simd/s8-simd-neon.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s8-simd-neon.cc -------------------------------------------------------------------------------- /test/simd/s8-simd-sse41.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s8-simd-sse41.cc -------------------------------------------------------------------------------- /test/simd/s8-simd.cc.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/s8-simd.cc.in -------------------------------------------------------------------------------- /test/simd/u8-simd-hvx.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/u8-simd-hvx.cc -------------------------------------------------------------------------------- /test/simd/u8-simd-neon.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/u8-simd-neon.cc -------------------------------------------------------------------------------- /test/simd/u8-simd-sse2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/u8-simd-sse2.cc -------------------------------------------------------------------------------- /test/simd/u8-simd.cc.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/simd/u8-simd.cc.in -------------------------------------------------------------------------------- /test/spmm-minmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/spmm-minmax.cc -------------------------------------------------------------------------------- /test/subgraph/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/BUILD -------------------------------------------------------------------------------- /test/subgraph/binary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/binary.cc -------------------------------------------------------------------------------- /test/subgraph/broadcast.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/broadcast.cc -------------------------------------------------------------------------------- /test/subgraph/copy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/copy.cc -------------------------------------------------------------------------------- /test/subgraph/fusion.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/fusion.cc -------------------------------------------------------------------------------- /test/subgraph/rewrites.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/rewrites.cc -------------------------------------------------------------------------------- /test/subgraph/rope.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/rope.cc -------------------------------------------------------------------------------- /test/subgraph/runtime.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/runtime.cc -------------------------------------------------------------------------------- /test/subgraph/softmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/softmax.cc -------------------------------------------------------------------------------- /test/subgraph/stencil.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/stencil.cc -------------------------------------------------------------------------------- /test/subgraph/stencil.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/stencil.h -------------------------------------------------------------------------------- /test/subgraph/subgraph.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/subgraph.cc -------------------------------------------------------------------------------- /test/subgraph/unary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/unary.cc -------------------------------------------------------------------------------- /test/subgraph/workspace.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/subgraph/workspace.cc -------------------------------------------------------------------------------- /test/u8-ibilinear.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/u8-ibilinear.cc -------------------------------------------------------------------------------- /test/u8-ibilinear.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/u8-ibilinear.yaml -------------------------------------------------------------------------------- /test/u8-lut32norm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/u8-lut32norm.cc -------------------------------------------------------------------------------- /test/u8-vclamp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/u8-vclamp.cc -------------------------------------------------------------------------------- /test/unary-ops.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/unary-ops.cc -------------------------------------------------------------------------------- /test/unary-ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/unary-ops.h -------------------------------------------------------------------------------- /test/weights-cache.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/weights-cache.cc -------------------------------------------------------------------------------- /test/x16-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/x16-packw.cc -------------------------------------------------------------------------------- /test/x16-x32-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/x16-x32-packw.cc -------------------------------------------------------------------------------- /test/x32-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/x32-packw.cc -------------------------------------------------------------------------------- /test/x32-packx.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/x32-packx.cc -------------------------------------------------------------------------------- /test/x32-unpool.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/x32-unpool.cc -------------------------------------------------------------------------------- /test/x8-lut.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/x8-lut.cc -------------------------------------------------------------------------------- /test/x8-lut.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/x8-lut.yaml -------------------------------------------------------------------------------- /test/x8-packq.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/x8-packq.cc -------------------------------------------------------------------------------- /test/x8-packw.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/x8-packw.cc -------------------------------------------------------------------------------- /test/xN-transpose.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/xN-transpose.cc -------------------------------------------------------------------------------- /test/xx-fill.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/xx-fill.cc -------------------------------------------------------------------------------- /test/xx-pad.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/test/xx-pad.cc -------------------------------------------------------------------------------- /third_party/BUILD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /third_party/FP16.BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/third_party/FP16.BUILD -------------------------------------------------------------------------------- /tools/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/tools/BUILD -------------------------------------------------------------------------------- /tools/generate-lut-test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/tools/generate-lut-test.py -------------------------------------------------------------------------------- /tools/primes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/tools/primes.py -------------------------------------------------------------------------------- /tools/xngen: -------------------------------------------------------------------------------- 1 | xngen.py -------------------------------------------------------------------------------- /tools/xngen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/tools/xngen.py -------------------------------------------------------------------------------- /tools/xnncommon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/tools/xnncommon.py -------------------------------------------------------------------------------- /ynnpack/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/BUILD -------------------------------------------------------------------------------- /ynnpack/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/README.md -------------------------------------------------------------------------------- /ynnpack/base/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/BUILD -------------------------------------------------------------------------------- /ynnpack/base/arch.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/arch.cc -------------------------------------------------------------------------------- /ynnpack/base/arch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/arch.h -------------------------------------------------------------------------------- /ynnpack/base/arithmetic.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/arithmetic.h -------------------------------------------------------------------------------- /ynnpack/base/base.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/base.h -------------------------------------------------------------------------------- /ynnpack/base/bfloat16.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/bfloat16.h -------------------------------------------------------------------------------- /ynnpack/base/bit_cast.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/bit_cast.h -------------------------------------------------------------------------------- /ynnpack/base/fp16.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/fp16.h -------------------------------------------------------------------------------- /ynnpack/base/half.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/half.h -------------------------------------------------------------------------------- /ynnpack/base/log.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/log.h -------------------------------------------------------------------------------- /ynnpack/base/simd/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/simd/BUILD -------------------------------------------------------------------------------- /ynnpack/base/simd/vec.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/simd/vec.h -------------------------------------------------------------------------------- /ynnpack/base/test/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/test/BUILD -------------------------------------------------------------------------------- /ynnpack/base/test/buffer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/test/buffer.h -------------------------------------------------------------------------------- /ynnpack/base/test/random.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/test/random.h -------------------------------------------------------------------------------- /ynnpack/base/test/tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/test/tensor.h -------------------------------------------------------------------------------- /ynnpack/base/test/util.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/test/util.cc -------------------------------------------------------------------------------- /ynnpack/base/test/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/test/util.h -------------------------------------------------------------------------------- /ynnpack/base/to_string.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/to_string.cc -------------------------------------------------------------------------------- /ynnpack/base/to_string.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/to_string.h -------------------------------------------------------------------------------- /ynnpack/base/type.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/type.cc -------------------------------------------------------------------------------- /ynnpack/base/type.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/base/type.h -------------------------------------------------------------------------------- /ynnpack/build_defs.bzl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/build_defs.bzl -------------------------------------------------------------------------------- /ynnpack/include/ynnpack.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/include/ynnpack.h -------------------------------------------------------------------------------- /ynnpack/kernels/dot/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/kernels/dot/BUILD -------------------------------------------------------------------------------- /ynnpack/kernels/dot/dot.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/kernels/dot/dot.cc -------------------------------------------------------------------------------- /ynnpack/kernels/dot/dot.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/kernels/dot/dot.h -------------------------------------------------------------------------------- /ynnpack/kernels/dot/pack.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/kernels/dot/pack.h -------------------------------------------------------------------------------- /ynnpack/kernels/lut/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/kernels/lut/BUILD -------------------------------------------------------------------------------- /ynnpack/kernels/lut/lut.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/kernels/lut/lut.cc -------------------------------------------------------------------------------- /ynnpack/kernels/lut/lut.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/kernels/lut/lut.h -------------------------------------------------------------------------------- /ynnpack/subgraph/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/BUILD -------------------------------------------------------------------------------- /ynnpack/subgraph/copy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/copy.cc -------------------------------------------------------------------------------- /ynnpack/subgraph/dot.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/dot.cc -------------------------------------------------------------------------------- /ynnpack/subgraph/fusion.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/fusion.cc -------------------------------------------------------------------------------- /ynnpack/subgraph/reduce.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/reduce.cc -------------------------------------------------------------------------------- /ynnpack/subgraph/runtime.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/runtime.h -------------------------------------------------------------------------------- /ynnpack/subgraph/slinky.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/slinky.cc -------------------------------------------------------------------------------- /ynnpack/subgraph/slinky.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/slinky.h -------------------------------------------------------------------------------- /ynnpack/subgraph/stack.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/stack.cc -------------------------------------------------------------------------------- /ynnpack/subgraph/tensor.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/tensor.cc -------------------------------------------------------------------------------- /ynnpack/subgraph/tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/tensor.h -------------------------------------------------------------------------------- /ynnpack/subgraph/utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/utils.cc -------------------------------------------------------------------------------- /ynnpack/subgraph/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/subgraph/utils.h -------------------------------------------------------------------------------- /ynnpack/xnnpack/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/xnnpack/BUILD -------------------------------------------------------------------------------- /ynnpack/xnnpack/runtime.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/xnnpack/runtime.cc -------------------------------------------------------------------------------- /ynnpack/xnnpack/tensor.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/xnnpack/tensor.cc -------------------------------------------------------------------------------- /ynnpack/xnnpack/utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/xnnpack/utils.cc -------------------------------------------------------------------------------- /ynnpack/xnnpack/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/xnnpack/utils.h -------------------------------------------------------------------------------- /ynnpack/xnnpack/xnnpack.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/XNNPACK/HEAD/ynnpack/xnnpack/xnnpack.h --------------------------------------------------------------------------------