├── .gitignore ├── LICENSE ├── README.md ├── examples ├── bench_model_shape.py ├── bench_single_shape.py └── config.yml ├── kernels ├── __init__.py ├── baseline │ └── __init__.py ├── fp6 │ └── deepseed │ │ ├── __init__.py │ │ ├── common │ │ ├── ds_kernel_utils.h │ │ ├── include │ │ │ ├── configs.h │ │ │ ├── kernel_matmul.cuh │ │ │ ├── kernel_reduction.cuh │ │ │ ├── ptx_cp.async.cuh │ │ │ ├── ptx_mma.cuh │ │ │ ├── utils_core.cuh │ │ │ ├── utils_gmem.cuh │ │ │ ├── utils_paralleldequant.cuh │ │ │ └── weight_prepacking.h │ │ ├── linear_kernels.cpp │ │ ├── linear_kernels.h │ │ ├── linear_kernels_cuda.cu │ │ └── linear_kernels_cuda.h │ │ └── pybind_deepseed.cpp ├── fp8 │ ├── __init__.py │ ├── common.cuh │ ├── dispatch_utils.h │ └── fp8_cuda_kernel.cu ├── quant.py ├── w3a16 │ └── gptq │ │ ├── __init__.py │ │ ├── quant_cuda.cpp │ │ └── quant_cuda_kernel.cu ├── w4a16 │ └── marlin │ │ ├── __init__.py │ │ ├── marlin_cuda.cpp │ │ └── marlin_cuda_kernel.cu ├── w4a16_sparse │ └── marlin │ │ ├── __init__.py │ │ ├── _semi_structured_conversions.py │ │ ├── common │ │ ├── base.h │ │ ├── mem.h │ │ └── mma.h │ │ ├── marlin_cuda.cpp │ │ └── marlin_cuda_kernel_nm.cu └── w4a8 │ └── qoq │ ├── __init__.py │ └── csrc │ ├── activation.cpp │ ├── activation_kernels.cu │ ├── dispatch_utils.h │ ├── fused.cpp │ ├── fused_attention │ ├── applyBiasRopeUpdateKVCache.h │ ├── cudaBf16Fallbacks.cuh │ ├── cudaFp8Utils.h │ ├── cudaTypeUtils.cuh │ ├── decoderMaskedMultiheadAttention.cu │ ├── decoderMaskedMultiheadAttention.h │ ├── decoderMaskedMultiheadAttentionTemplate.hpp │ ├── decoderMaskedMultiheadAttentionUtils.h │ ├── fused_attention.cpp │ ├── fused_attention.h │ ├── gptKernels.h │ ├── input_metadata_helper.cu │ ├── input_metadata_helper.h │ ├── kvCacheUtils.h │ ├── memoryUtils.h │ ├── update_kv_cache.cu │ └── update_kv_cache.h │ ├── fused_kernels.cu │ ├── layernorm.cpp │ ├── layernorm_kernels.cu │ ├── qgemm │ ├── w4a8_per_chn │ │ ├── gemm_cuda.cu │ │ ├── gemm_cuda.h │ │ └── pybind.cpp │ ├── w4a8_per_group │ │ ├── gemm_cuda.cu │ │ ├── gemm_cuda.h │ │ └── pybind.cpp │ └── w8a8 │ │ ├── pybind.cpp │ │ ├── w8a8_gemm_cuda.cu │ │ └── w8a8_gemm_cuda.h │ ├── reduction_utils.cuh │ └── utils.cuh ├── setup.py ├── tools └── get_gpu_info │ ├── CMakeLists.txt │ ├── README.md │ └── src │ ├── CMakeLists.txt │ └── main.cu └── utils ├── registry_factory.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/README.md -------------------------------------------------------------------------------- /examples/bench_model_shape.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/examples/bench_model_shape.py -------------------------------------------------------------------------------- /examples/bench_single_shape.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/examples/bench_single_shape.py -------------------------------------------------------------------------------- /examples/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/examples/config.yml -------------------------------------------------------------------------------- /kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/__init__.py -------------------------------------------------------------------------------- /kernels/baseline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/baseline/__init__.py -------------------------------------------------------------------------------- /kernels/fp6/deepseed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/__init__.py -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/ds_kernel_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/ds_kernel_utils.h -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/include/configs.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/include/configs.h -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/include/kernel_matmul.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/include/kernel_matmul.cuh -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/include/kernel_reduction.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/include/kernel_reduction.cuh -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/include/ptx_cp.async.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/include/ptx_cp.async.cuh -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/include/ptx_mma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/include/ptx_mma.cuh -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/include/utils_core.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/include/utils_core.cuh -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/include/utils_gmem.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/include/utils_gmem.cuh -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/include/utils_paralleldequant.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/include/utils_paralleldequant.cuh -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/include/weight_prepacking.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/include/weight_prepacking.h -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/linear_kernels.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/linear_kernels.cpp -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/linear_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/linear_kernels.h -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/linear_kernels_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/linear_kernels_cuda.cu -------------------------------------------------------------------------------- /kernels/fp6/deepseed/common/linear_kernels_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/common/linear_kernels_cuda.h -------------------------------------------------------------------------------- /kernels/fp6/deepseed/pybind_deepseed.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp6/deepseed/pybind_deepseed.cpp -------------------------------------------------------------------------------- /kernels/fp8/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp8/__init__.py -------------------------------------------------------------------------------- /kernels/fp8/common.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp8/common.cuh -------------------------------------------------------------------------------- /kernels/fp8/dispatch_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp8/dispatch_utils.h -------------------------------------------------------------------------------- /kernels/fp8/fp8_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/fp8/fp8_cuda_kernel.cu -------------------------------------------------------------------------------- /kernels/quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/quant.py -------------------------------------------------------------------------------- /kernels/w3a16/gptq/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w3a16/gptq/__init__.py -------------------------------------------------------------------------------- /kernels/w3a16/gptq/quant_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w3a16/gptq/quant_cuda.cpp -------------------------------------------------------------------------------- /kernels/w3a16/gptq/quant_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w3a16/gptq/quant_cuda_kernel.cu -------------------------------------------------------------------------------- /kernels/w4a16/marlin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16/marlin/__init__.py -------------------------------------------------------------------------------- /kernels/w4a16/marlin/marlin_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16/marlin/marlin_cuda.cpp -------------------------------------------------------------------------------- /kernels/w4a16/marlin/marlin_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16/marlin/marlin_cuda_kernel.cu -------------------------------------------------------------------------------- /kernels/w4a16_sparse/marlin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16_sparse/marlin/__init__.py -------------------------------------------------------------------------------- /kernels/w4a16_sparse/marlin/_semi_structured_conversions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16_sparse/marlin/_semi_structured_conversions.py -------------------------------------------------------------------------------- /kernels/w4a16_sparse/marlin/common/base.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16_sparse/marlin/common/base.h -------------------------------------------------------------------------------- /kernels/w4a16_sparse/marlin/common/mem.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16_sparse/marlin/common/mem.h -------------------------------------------------------------------------------- /kernels/w4a16_sparse/marlin/common/mma.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16_sparse/marlin/common/mma.h -------------------------------------------------------------------------------- /kernels/w4a16_sparse/marlin/marlin_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16_sparse/marlin/marlin_cuda.cpp -------------------------------------------------------------------------------- /kernels/w4a16_sparse/marlin/marlin_cuda_kernel_nm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a16_sparse/marlin/marlin_cuda_kernel_nm.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/__init__.py -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/activation.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/activation.cpp -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/activation_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/activation_kernels.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/dispatch_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/dispatch_utils.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused.cpp -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/applyBiasRopeUpdateKVCache.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/applyBiasRopeUpdateKVCache.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/cudaBf16Fallbacks.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/cudaBf16Fallbacks.cuh -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/cudaFp8Utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/cudaFp8Utils.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/cudaTypeUtils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/cudaTypeUtils.cuh -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/decoderMaskedMultiheadAttention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/decoderMaskedMultiheadAttention.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/decoderMaskedMultiheadAttention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/decoderMaskedMultiheadAttention.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/decoderMaskedMultiheadAttentionTemplate.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/decoderMaskedMultiheadAttentionTemplate.hpp -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/decoderMaskedMultiheadAttentionUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/decoderMaskedMultiheadAttentionUtils.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/fused_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/fused_attention.cpp -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/fused_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/fused_attention.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/gptKernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/gptKernels.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/input_metadata_helper.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/input_metadata_helper.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/input_metadata_helper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/input_metadata_helper.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/kvCacheUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/kvCacheUtils.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/memoryUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/memoryUtils.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/update_kv_cache.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/update_kv_cache.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_attention/update_kv_cache.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_attention/update_kv_cache.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/fused_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/fused_kernels.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/layernorm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/layernorm.cpp -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/layernorm_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/layernorm_kernels.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/qgemm/w4a8_per_chn/gemm_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/qgemm/w4a8_per_chn/gemm_cuda.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/qgemm/w4a8_per_chn/gemm_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/qgemm/w4a8_per_chn/gemm_cuda.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/qgemm/w4a8_per_chn/pybind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/qgemm/w4a8_per_chn/pybind.cpp -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/qgemm/w4a8_per_group/gemm_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/qgemm/w4a8_per_group/gemm_cuda.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/qgemm/w4a8_per_group/gemm_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/qgemm/w4a8_per_group/gemm_cuda.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/qgemm/w4a8_per_group/pybind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/qgemm/w4a8_per_group/pybind.cpp -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/qgemm/w8a8/pybind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/qgemm/w8a8/pybind.cpp -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/qgemm/w8a8/w8a8_gemm_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/qgemm/w8a8/w8a8_gemm_cuda.cu -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/qgemm/w8a8/w8a8_gemm_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/qgemm/w8a8/w8a8_gemm_cuda.h -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/reduction_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/reduction_utils.cuh -------------------------------------------------------------------------------- /kernels/w4a8/qoq/csrc/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/kernels/w4a8/qoq/csrc/utils.cuh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/setup.py -------------------------------------------------------------------------------- /tools/get_gpu_info/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.20) 2 | project(get_gpu_info) 3 | add_subdirectory(src) 4 | -------------------------------------------------------------------------------- /tools/get_gpu_info/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/tools/get_gpu_info/README.md -------------------------------------------------------------------------------- /tools/get_gpu_info/src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/tools/get_gpu_info/src/CMakeLists.txt -------------------------------------------------------------------------------- /tools/get_gpu_info/src/main.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/tools/get_gpu_info/src/main.cu -------------------------------------------------------------------------------- /utils/registry_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/utils/registry_factory.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/quant_horizon/HEAD/utils/utils.py --------------------------------------------------------------------------------