├── .github └── workflows │ ├── build.yaml │ └── scripts │ └── github_create_release.js ├── .gitignore ├── LICENSE ├── README.md ├── awq_ext ├── exllama │ ├── cu_compat.cuh │ ├── cuda_buffers.cu │ ├── cuda_buffers.cuh │ ├── cuda_func │ │ ├── column_remap.cu │ │ ├── column_remap.cuh │ │ ├── q4_matmul.cu │ │ ├── q4_matmul.cuh │ │ ├── q4_matrix.cu │ │ └── q4_matrix.cuh │ ├── exllama_ext.cpp │ ├── hip_compat.cuh │ ├── matrix.cuh │ ├── tuning.h │ └── util.cuh ├── exllamav2 │ ├── config.h │ ├── cpp │ │ └── util.h │ ├── cuda │ │ ├── compat.cuh │ │ ├── compat_gemm.cuh │ │ ├── matrix_view.cuh │ │ ├── q_gemm.cu │ │ ├── q_gemm.cuh │ │ ├── q_gemm_kernel.cuh │ │ ├── q_gemm_kernel_gptq.cuh │ │ ├── q_matrix.cu │ │ ├── q_matrix.cuh │ │ ├── quant │ │ │ ├── qdq_2.cuh │ │ │ ├── qdq_3.cuh │ │ │ ├── qdq_4.cuh │ │ │ ├── qdq_5.cuh │ │ │ ├── qdq_6.cuh │ │ │ ├── qdq_8.cuh │ │ │ └── qdq_util.cuh │ │ └── util.cuh │ └── ext.cpp ├── layernorm │ ├── layernorm.cu │ ├── layernorm.h │ └── reduction.cuh ├── position_embedding │ ├── pos_encoding.h │ └── pos_encoding_kernels.cu ├── pybind_awq.cpp ├── pybind_awq_v2.cpp ├── quantization │ ├── dequantize.cuh │ ├── gemm_cuda.h │ ├── gemm_cuda_gen.cu │ ├── gemv_cuda.cu │ └── gemv_cuda.h ├── quantization_new │ ├── dequantize.cuh │ ├── gemm │ │ ├── gemm_cuda.cu │ │ ├── gemm_cuda.h │ │ └── semaphore.h │ └── gemv │ │ ├── gemv_cuda.cu │ │ └── gemv_cuda.h └── vllm │ ├── activation.cu │ ├── activation.h │ ├── moe_alig_block.cu │ ├── moe_alig_block.h │ ├── topk_softmax_kernels.cu │ └── topk_softmax_kernels.h ├── scripts └── download_wheels.sh └── setup.py /.github/workflows/build.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/.github/workflows/build.yaml -------------------------------------------------------------------------------- /.github/workflows/scripts/github_create_release.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/.github/workflows/scripts/github_create_release.js -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/README.md -------------------------------------------------------------------------------- /awq_ext/exllama/cu_compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/cu_compat.cuh -------------------------------------------------------------------------------- /awq_ext/exllama/cuda_buffers.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/cuda_buffers.cu -------------------------------------------------------------------------------- /awq_ext/exllama/cuda_buffers.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/cuda_buffers.cuh -------------------------------------------------------------------------------- /awq_ext/exllama/cuda_func/column_remap.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/cuda_func/column_remap.cu -------------------------------------------------------------------------------- /awq_ext/exllama/cuda_func/column_remap.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/cuda_func/column_remap.cuh -------------------------------------------------------------------------------- /awq_ext/exllama/cuda_func/q4_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/cuda_func/q4_matmul.cu -------------------------------------------------------------------------------- /awq_ext/exllama/cuda_func/q4_matmul.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/cuda_func/q4_matmul.cuh -------------------------------------------------------------------------------- /awq_ext/exllama/cuda_func/q4_matrix.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/cuda_func/q4_matrix.cu -------------------------------------------------------------------------------- /awq_ext/exllama/cuda_func/q4_matrix.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/cuda_func/q4_matrix.cuh -------------------------------------------------------------------------------- /awq_ext/exllama/exllama_ext.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/exllama_ext.cpp -------------------------------------------------------------------------------- /awq_ext/exllama/hip_compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/hip_compat.cuh -------------------------------------------------------------------------------- /awq_ext/exllama/matrix.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/matrix.cuh -------------------------------------------------------------------------------- /awq_ext/exllama/tuning.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/tuning.h -------------------------------------------------------------------------------- /awq_ext/exllama/util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllama/util.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/config.h -------------------------------------------------------------------------------- /awq_ext/exllamav2/cpp/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cpp/util.h -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/compat.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/compat_gemm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/compat_gemm.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/matrix_view.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/matrix_view.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/q_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/q_gemm.cu -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/q_gemm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/q_gemm.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/q_gemm_kernel.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/q_gemm_kernel.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/q_gemm_kernel_gptq.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/q_gemm_kernel_gptq.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/q_matrix.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/q_matrix.cu -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/q_matrix.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/q_matrix.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/quant/qdq_2.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/quant/qdq_2.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/quant/qdq_3.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/quant/qdq_3.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/quant/qdq_4.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/quant/qdq_4.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/quant/qdq_5.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/quant/qdq_5.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/quant/qdq_6.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/quant/qdq_6.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/quant/qdq_8.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/quant/qdq_8.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/quant/qdq_util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/quant/qdq_util.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/cuda/util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/cuda/util.cuh -------------------------------------------------------------------------------- /awq_ext/exllamav2/ext.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/exllamav2/ext.cpp -------------------------------------------------------------------------------- /awq_ext/layernorm/layernorm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/layernorm/layernorm.cu -------------------------------------------------------------------------------- /awq_ext/layernorm/layernorm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/layernorm/layernorm.h -------------------------------------------------------------------------------- /awq_ext/layernorm/reduction.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/layernorm/reduction.cuh -------------------------------------------------------------------------------- /awq_ext/position_embedding/pos_encoding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/position_embedding/pos_encoding.h -------------------------------------------------------------------------------- /awq_ext/position_embedding/pos_encoding_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/position_embedding/pos_encoding_kernels.cu -------------------------------------------------------------------------------- /awq_ext/pybind_awq.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/pybind_awq.cpp -------------------------------------------------------------------------------- /awq_ext/pybind_awq_v2.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/pybind_awq_v2.cpp -------------------------------------------------------------------------------- /awq_ext/quantization/dequantize.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization/dequantize.cuh -------------------------------------------------------------------------------- /awq_ext/quantization/gemm_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization/gemm_cuda.h -------------------------------------------------------------------------------- /awq_ext/quantization/gemm_cuda_gen.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization/gemm_cuda_gen.cu -------------------------------------------------------------------------------- /awq_ext/quantization/gemv_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization/gemv_cuda.cu -------------------------------------------------------------------------------- /awq_ext/quantization/gemv_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization/gemv_cuda.h -------------------------------------------------------------------------------- /awq_ext/quantization_new/dequantize.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization_new/dequantize.cuh -------------------------------------------------------------------------------- /awq_ext/quantization_new/gemm/gemm_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization_new/gemm/gemm_cuda.cu -------------------------------------------------------------------------------- /awq_ext/quantization_new/gemm/gemm_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization_new/gemm/gemm_cuda.h -------------------------------------------------------------------------------- /awq_ext/quantization_new/gemm/semaphore.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization_new/gemm/semaphore.h -------------------------------------------------------------------------------- /awq_ext/quantization_new/gemv/gemv_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization_new/gemv/gemv_cuda.cu -------------------------------------------------------------------------------- /awq_ext/quantization_new/gemv/gemv_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/quantization_new/gemv/gemv_cuda.h -------------------------------------------------------------------------------- /awq_ext/vllm/activation.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/vllm/activation.cu -------------------------------------------------------------------------------- /awq_ext/vllm/activation.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/vllm/activation.h -------------------------------------------------------------------------------- /awq_ext/vllm/moe_alig_block.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/vllm/moe_alig_block.cu -------------------------------------------------------------------------------- /awq_ext/vllm/moe_alig_block.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/vllm/moe_alig_block.h -------------------------------------------------------------------------------- /awq_ext/vllm/topk_softmax_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/vllm/topk_softmax_kernels.cu -------------------------------------------------------------------------------- /awq_ext/vllm/topk_softmax_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/awq_ext/vllm/topk_softmax_kernels.h -------------------------------------------------------------------------------- /scripts/download_wheels.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/scripts/download_wheels.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/casper-hansen/AutoAWQ_kernels/HEAD/setup.py --------------------------------------------------------------------------------