├── .gitignore ├── README.md └── microbench ├── .gitignore ├── Makefile ├── appendix ├── Makefile ├── mma_baseline.cu ├── mma_permuted.cu └── mma_pipeline.cu ├── bin └── README.md ├── common └── common.mk ├── hw_def ├── common │ ├── common.h │ └── deviceQuery.h └── hw_def.h ├── numericbench ├── bf16numeric │ ├── bf16_numeric │ │ ├── Makefile │ │ ├── bf16_chain_matmul.cu │ │ └── bf16_numeric.cu │ ├── bf16add │ │ ├── Makefile │ │ └── bf16add.cu │ ├── bf16mul │ │ ├── Makefile │ │ └── bf16mul.cu │ ├── m16n8k16 │ │ ├── Makefile │ │ └── m16n8k16_bf16.cu │ └── m16n8k8 │ │ ├── Makefile │ │ └── m16n8k8_bf16.cu ├── cpu_base.h ├── cpu_int_base.h ├── fp16numeric │ ├── fp16_numeric │ │ ├── Makefile │ │ └── fp16_chain_matmul.cu │ ├── fp16add │ │ ├── Makefile │ │ └── fp16add.cu │ └── fp16mul │ │ ├── Makefile │ │ └── fp16mul.cu ├── int8numeric │ ├── int8add │ │ ├── Makefile │ │ └── int8add.cu │ └── s8numeric │ │ └── s8numeric.cu └── tf32numeric │ ├── m16n8k4 │ ├── Makefile │ └── m16n8k4_tf32.cu │ ├── m16n8k8 │ ├── Makefile │ └── m16n8k8_tf32.cu │ ├── tf32_numeric │ ├── Makefile │ ├── tf32_chain_matmul.cu │ └── tf32_numeric.cu │ ├── tf32add │ ├── Makefile │ └── tf32add.cu │ └── tf32mul │ ├── Makefile │ └── tf32mul.cu ├── run_all.sh └── ubench ├── ldmatrix ├── ldmatrix_ILP │ ├── Makefile │ └── ldmatrix_ilp.cu ├── ldmatrix_lat │ ├── Makefile │ └── ldmatrix_lat.cu ├── ldmatrix_x2_lat │ ├── Makefile │ └── ldmatrix_x2_lat.cu ├── ldmatrix_x4_lat │ ├── Makefile │ └── ldmatrix_x4_lat.cu ├── shared_bw │ ├── Makefile │ └── shared_bw.cu ├── shared_bw_64 │ ├── Makefile │ └── shared_bw_64.cu ├── shared_lat │ ├── Makefile │ └── shared_lat.cu ├── shared_x2_lat │ ├── Makefile │ └── shared_x2_lat.cu ├── shared_x4_lat │ ├── Makefile │ └── shared_x4_lat.cu ├── shared_x8 │ ├── Makefile │ └── shared_x8.cu └── shd_config │ ├── Makefile │ └── shd_config.cu ├── mma ├── mma_m16n8k128_int1 │ ├── Makefile │ └── mma_m16n8k128_int1.cu ├── mma_m16n8k16_bf16fp32 │ ├── Makefile │ └── mma_m16n8k16_bf16fp32.cu ├── mma_m16n8k16_fp │ ├── Makefile │ └── mma_m16n8k16_fp32.cu ├── mma_m16n8k16_half │ ├── Makefile │ └── mma_m16n8k16_half.cu ├── mma_m16n8k16_int │ ├── Makefile │ └── mma_m16n8k16_int.cu ├── mma_m16n8k256_int1 │ ├── Makefile │ └── mma_m16n8k256_int1.cu ├── mma_m16n8k32_fp8 │ ├── Makefile │ └── mma_m16n8k32_fp8.cu ├── mma_m16n8k32_int │ ├── Makefile │ └── mma_m16n8k32_int.cu ├── mma_m16n8k32_int4 │ ├── Makefile │ └── mma_m16n8k32_int4.cu ├── mma_m16n8k4_tf32 │ ├── Makefile │ └── mma_m16n8k4_tf32.cu ├── mma_m16n8k64_int4 │ ├── Makefile │ └── mma_m16n8k64_int4.cu ├── mma_m16n8k8_bf16fp32 │ ├── Makefile │ └── mma_m16n8k8_bf16fp32.cu ├── mma_m16n8k8_fp │ ├── Makefile │ └── mma_m16n8k8_fp32.cu ├── mma_m16n8k8_half │ ├── Makefile │ └── mma_m16n8k8_half.cu ├── mma_m16n8k8_tf32 │ ├── Makefile │ └── mma_m16n8k8_tf32.cu ├── mma_m8n8k16_int │ ├── Makefile │ └── mma_m8n8k16_int8.cu └── mma_m8n8k4_fp16fp32 │ ├── Makefile │ └── mma_m8n8k4_fp16fp32.cu ├── mmasp ├── mmasp_m16n8k16_fp │ ├── Makefile │ └── mmasp_m16n8k16_fp32.cu ├── mmasp_m16n8k16_fp16fp16 │ ├── Makefile │ └── mmasp_m16n8k16_fp16fp16.cu ├── mmasp_m16n8k16_tf32 │ ├── Makefile │ └── mmasp_m16n8k16_tf32.cu ├── mmasp_m16n8k32_fp │ ├── Makefile │ └── mmasp_m16n8k32_fp32.cu ├── mmasp_m16n8k32_fp16fp16 │ ├── Makefile │ └── mmasp_m16n8k32_fp16fp16.cu ├── mmasp_m16n8k32_int │ ├── Makefile │ └── mmasp_m16n8k32_int.cu ├── mmasp_m16n8k64_fp8 │ ├── Makefile │ └── mmasp_m16n8k64_fp8.cu ├── mmasp_m16n8k64_int │ ├── Makefile │ └── mmasp_m16n8k64_int.cu └── mmasp_m16n8k8_tf32 │ ├── Makefile │ └── mmasp_m16n8k8_tf32.cu └── wmma_load ├── loadbf16 ├── Makefile └── load_bf16.cu └── loadfp16 ├── Makefile └── load_fp16.cu /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/README.md -------------------------------------------------------------------------------- /microbench/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/.gitignore -------------------------------------------------------------------------------- /microbench/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/Makefile -------------------------------------------------------------------------------- /microbench/appendix/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/appendix/Makefile -------------------------------------------------------------------------------- /microbench/appendix/mma_baseline.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/appendix/mma_baseline.cu -------------------------------------------------------------------------------- /microbench/appendix/mma_permuted.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/appendix/mma_permuted.cu -------------------------------------------------------------------------------- /microbench/appendix/mma_pipeline.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/appendix/mma_pipeline.cu -------------------------------------------------------------------------------- /microbench/bin/README.md: -------------------------------------------------------------------------------- 1 | #Programs -------------------------------------------------------------------------------- /microbench/common/common.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/common/common.mk -------------------------------------------------------------------------------- /microbench/hw_def/common/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/hw_def/common/common.h -------------------------------------------------------------------------------- /microbench/hw_def/common/deviceQuery.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/hw_def/common/deviceQuery.h -------------------------------------------------------------------------------- /microbench/hw_def/hw_def.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/hw_def/hw_def.h -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/bf16_numeric/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/bf16_numeric/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/bf16_numeric/bf16_chain_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/bf16_numeric/bf16_chain_matmul.cu -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/bf16_numeric/bf16_numeric.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/bf16_numeric/bf16_numeric.cu -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/bf16add/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/bf16add/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/bf16add/bf16add.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/bf16add/bf16add.cu -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/bf16mul/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/bf16mul/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/bf16mul/bf16mul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/bf16mul/bf16mul.cu -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/m16n8k16/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/m16n8k16/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/m16n8k16/m16n8k16_bf16.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/m16n8k16/m16n8k16_bf16.cu -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/m16n8k8/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/m16n8k8/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/bf16numeric/m16n8k8/m16n8k8_bf16.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/bf16numeric/m16n8k8/m16n8k8_bf16.cu -------------------------------------------------------------------------------- /microbench/numericbench/cpu_base.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/cpu_base.h -------------------------------------------------------------------------------- /microbench/numericbench/cpu_int_base.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/cpu_int_base.h -------------------------------------------------------------------------------- /microbench/numericbench/fp16numeric/fp16_numeric/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/fp16numeric/fp16_numeric/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/fp16numeric/fp16_numeric/fp16_chain_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/fp16numeric/fp16_numeric/fp16_chain_matmul.cu -------------------------------------------------------------------------------- /microbench/numericbench/fp16numeric/fp16add/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/fp16numeric/fp16add/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/fp16numeric/fp16add/fp16add.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/fp16numeric/fp16add/fp16add.cu -------------------------------------------------------------------------------- /microbench/numericbench/fp16numeric/fp16mul/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/fp16numeric/fp16mul/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/fp16numeric/fp16mul/fp16mul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/fp16numeric/fp16mul/fp16mul.cu -------------------------------------------------------------------------------- /microbench/numericbench/int8numeric/int8add/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/int8numeric/int8add/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/int8numeric/int8add/int8add.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/int8numeric/int8add/int8add.cu -------------------------------------------------------------------------------- /microbench/numericbench/int8numeric/s8numeric/s8numeric.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/int8numeric/s8numeric/s8numeric.cu -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/m16n8k4/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/m16n8k4/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/m16n8k4/m16n8k4_tf32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/m16n8k4/m16n8k4_tf32.cu -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/m16n8k8/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/m16n8k8/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/m16n8k8/m16n8k8_tf32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/m16n8k8/m16n8k8_tf32.cu -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/tf32_numeric/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/tf32_numeric/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/tf32_numeric/tf32_chain_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/tf32_numeric/tf32_chain_matmul.cu -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/tf32_numeric/tf32_numeric.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/tf32_numeric/tf32_numeric.cu -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/tf32add/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/tf32add/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/tf32add/tf32add.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/tf32add/tf32add.cu -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/tf32mul/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/tf32mul/Makefile -------------------------------------------------------------------------------- /microbench/numericbench/tf32numeric/tf32mul/tf32mul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/numericbench/tf32numeric/tf32mul/tf32mul.cu -------------------------------------------------------------------------------- /microbench/run_all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/run_all.sh -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/ldmatrix_ILP/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/ldmatrix_ILP/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/ldmatrix_ILP/ldmatrix_ilp.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/ldmatrix_ILP/ldmatrix_ilp.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/ldmatrix_lat/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/ldmatrix_lat/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/ldmatrix_lat/ldmatrix_lat.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/ldmatrix_lat/ldmatrix_lat.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/ldmatrix_x2_lat/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/ldmatrix_x2_lat/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/ldmatrix_x2_lat/ldmatrix_x2_lat.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/ldmatrix_x2_lat/ldmatrix_x2_lat.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/ldmatrix_x4_lat/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/ldmatrix_x4_lat/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/ldmatrix_x4_lat/ldmatrix_x4_lat.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/ldmatrix_x4_lat/ldmatrix_x4_lat.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_bw/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_bw/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_bw/shared_bw.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_bw/shared_bw.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_bw_64/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_bw_64/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_bw_64/shared_bw_64.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_bw_64/shared_bw_64.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_lat/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_lat/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_lat/shared_lat.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_lat/shared_lat.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_x2_lat/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_x2_lat/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_x2_lat/shared_x2_lat.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_x2_lat/shared_x2_lat.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_x4_lat/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_x4_lat/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_x4_lat/shared_x4_lat.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_x4_lat/shared_x4_lat.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_x8/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_x8/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shared_x8/shared_x8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shared_x8/shared_x8.cu -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shd_config/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shd_config/Makefile -------------------------------------------------------------------------------- /microbench/ubench/ldmatrix/shd_config/shd_config.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/ldmatrix/shd_config/shd_config.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k128_int1/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k128_int1/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k128_int1/mma_m16n8k128_int1.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k128_int1/mma_m16n8k128_int1.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k16_bf16fp32/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k16_bf16fp32/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k16_bf16fp32/mma_m16n8k16_bf16fp32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k16_bf16fp32/mma_m16n8k16_bf16fp32.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k16_fp/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k16_fp/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k16_fp/mma_m16n8k16_fp32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k16_fp/mma_m16n8k16_fp32.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k16_half/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k16_half/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k16_half/mma_m16n8k16_half.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k16_half/mma_m16n8k16_half.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k16_int/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k16_int/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k16_int/mma_m16n8k16_int.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k16_int/mma_m16n8k16_int.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k256_int1/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k256_int1/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k256_int1/mma_m16n8k256_int1.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k256_int1/mma_m16n8k256_int1.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k32_fp8/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k32_fp8/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k32_fp8/mma_m16n8k32_fp8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k32_fp8/mma_m16n8k32_fp8.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k32_int/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k32_int/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k32_int/mma_m16n8k32_int.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k32_int/mma_m16n8k32_int.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k32_int4/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k32_int4/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k32_int4/mma_m16n8k32_int4.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k32_int4/mma_m16n8k32_int4.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k4_tf32/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k4_tf32/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k4_tf32/mma_m16n8k4_tf32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k4_tf32/mma_m16n8k4_tf32.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k64_int4/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k64_int4/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k64_int4/mma_m16n8k64_int4.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k64_int4/mma_m16n8k64_int4.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k8_bf16fp32/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k8_bf16fp32/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k8_bf16fp32/mma_m16n8k8_bf16fp32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k8_bf16fp32/mma_m16n8k8_bf16fp32.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k8_fp/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k8_fp/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k8_fp/mma_m16n8k8_fp32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k8_fp/mma_m16n8k8_fp32.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k8_half/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k8_half/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k8_half/mma_m16n8k8_half.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k8_half/mma_m16n8k8_half.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k8_tf32/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k8_tf32/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m16n8k8_tf32/mma_m16n8k8_tf32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m16n8k8_tf32/mma_m16n8k8_tf32.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m8n8k16_int/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m8n8k16_int/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m8n8k16_int/mma_m8n8k16_int8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m8n8k16_int/mma_m8n8k16_int8.cu -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m8n8k4_fp16fp32/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m8n8k4_fp16fp32/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mma/mma_m8n8k4_fp16fp32/mma_m8n8k4_fp16fp32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mma/mma_m8n8k4_fp16fp32/mma_m8n8k4_fp16fp32.cu -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k16_fp/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k16_fp/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k16_fp/mmasp_m16n8k16_fp32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k16_fp/mmasp_m16n8k16_fp32.cu -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k16_fp16fp16/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k16_fp16fp16/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k16_fp16fp16/mmasp_m16n8k16_fp16fp16.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k16_fp16fp16/mmasp_m16n8k16_fp16fp16.cu -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k16_tf32/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k16_tf32/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k16_tf32/mmasp_m16n8k16_tf32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k16_tf32/mmasp_m16n8k16_tf32.cu -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k32_fp/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k32_fp/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k32_fp/mmasp_m16n8k32_fp32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k32_fp/mmasp_m16n8k32_fp32.cu -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k32_fp16fp16/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k32_fp16fp16/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k32_fp16fp16/mmasp_m16n8k32_fp16fp16.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k32_fp16fp16/mmasp_m16n8k32_fp16fp16.cu -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k32_int/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k32_int/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k32_int/mmasp_m16n8k32_int.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k32_int/mmasp_m16n8k32_int.cu -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k64_fp8/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k64_fp8/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k64_fp8/mmasp_m16n8k64_fp8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k64_fp8/mmasp_m16n8k64_fp8.cu -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k64_int/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k64_int/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k64_int/mmasp_m16n8k64_int.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k64_int/mmasp_m16n8k64_int.cu -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k8_tf32/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k8_tf32/Makefile -------------------------------------------------------------------------------- /microbench/ubench/mmasp/mmasp_m16n8k8_tf32/mmasp_m16n8k8_tf32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/mmasp/mmasp_m16n8k8_tf32/mmasp_m16n8k8_tf32.cu -------------------------------------------------------------------------------- /microbench/ubench/wmma_load/loadbf16/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/wmma_load/loadbf16/Makefile -------------------------------------------------------------------------------- /microbench/ubench/wmma_load/loadbf16/load_bf16.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/wmma_load/loadbf16/load_bf16.cu -------------------------------------------------------------------------------- /microbench/ubench/wmma_load/loadfp16/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/wmma_load/loadfp16/Makefile -------------------------------------------------------------------------------- /microbench/ubench/wmma_load/loadfp16/load_fp16.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunlex0717/DissectingTensorCores/HEAD/microbench/ubench/wmma_load/loadfp16/load_fp16.cu --------------------------------------------------------------------------------