├── .github
    └── workflows
    │   ├── build-wheels-release-rocm.yml
    │   ├── build-wheels-release.yml
    │   ├── build-wheels-rocm.yml
    │   └── build-wheels.yml
├── .gitignore
├── LICENSE
├── README.md
├── gptq_for_llama
    ├── __init__.py
    ├── gptq_new
    │   ├── __init__.py
    │   ├── datautils.py
    │   ├── fused_attn.py
    │   ├── gptq.py
    │   ├── llama.py
    │   ├── llama_inference.py
    │   ├── llama_inference_dmapauto.py
    │   ├── llama_inference_offload.py
    │   ├── modelutils.py
    │   ├── opt.py
    │   ├── quant.py
    │   ├── share_tensors_across_processes.py
    │   └── test_kernel.py
    └── gptq_old
    │   ├── __init__.py
    │   ├── datautils.py
    │   ├── gptq.py
    │   ├── llama.py
    │   ├── llama_inference.py
    │   ├── llama_inference_offload.py
    │   ├── modelutils.py
    │   ├── opt.py
    │   ├── quant.py
    │   └── test_kernel.py
├── quant_cuda
    ├── quant_cuda.cpp
    ├── quant_cuda_kernel.cu
    ├── quant_hip.cpp
    └── quant_hip_kernel.hip
├── quant_cuda_faster
    ├── quant_cuda.cpp
    ├── quant_cuda_kernel.cu
    ├── quant_hip.cpp
    └── quant_hip_kernel.hip
├── requirements.txt
└── setup.py


/.github/workflows/build-wheels-release-rocm.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/.github/workflows/build-wheels-release-rocm.yml


--------------------------------------------------------------------------------
/.github/workflows/build-wheels-release.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/.github/workflows/build-wheels-release.yml


--------------------------------------------------------------------------------
/.github/workflows/build-wheels-rocm.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/.github/workflows/build-wheels-rocm.yml


--------------------------------------------------------------------------------
/.github/workflows/build-wheels.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/.github/workflows/build-wheels.yml


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | gptq_for_llama.egg-info/
3 | **__pycache__/
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/README.md


--------------------------------------------------------------------------------
/gptq_for_llama/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/__init__.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/datautils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/datautils.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/fused_attn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/fused_attn.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/gptq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/gptq.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/llama.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/llama_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/llama_inference.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/llama_inference_dmapauto.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/llama_inference_dmapauto.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/llama_inference_offload.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/llama_inference_offload.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/modelutils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/modelutils.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/opt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/opt.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/quant.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/share_tensors_across_processes.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/share_tensors_across_processes.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_new/test_kernel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_new/test_kernel.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/datautils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_old/datautils.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/gptq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_old/gptq.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_old/llama.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/llama_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_old/llama_inference.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/llama_inference_offload.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_old/llama_inference_offload.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/modelutils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_old/modelutils.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/opt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_old/opt.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_old/quant.py


--------------------------------------------------------------------------------
/gptq_for_llama/gptq_old/test_kernel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/gptq_for_llama/gptq_old/test_kernel.py


--------------------------------------------------------------------------------
/quant_cuda/quant_cuda.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/quant_cuda/quant_cuda.cpp


--------------------------------------------------------------------------------
/quant_cuda/quant_cuda_kernel.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/quant_cuda/quant_cuda_kernel.cu


--------------------------------------------------------------------------------
/quant_cuda/quant_hip.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/quant_cuda/quant_hip.cpp


--------------------------------------------------------------------------------
/quant_cuda/quant_hip_kernel.hip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/quant_cuda/quant_hip_kernel.hip


--------------------------------------------------------------------------------
/quant_cuda_faster/quant_cuda.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/quant_cuda_faster/quant_cuda.cpp


--------------------------------------------------------------------------------
/quant_cuda_faster/quant_cuda_kernel.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/quant_cuda_faster/quant_cuda_kernel.cu


--------------------------------------------------------------------------------
/quant_cuda_faster/quant_hip.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/quant_cuda_faster/quant_hip.cpp


--------------------------------------------------------------------------------
/quant_cuda_faster/quant_hip_kernel.hip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/quant_cuda_faster/quant_hip_kernel.hip


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jllllll/GPTQ-for-LLaMa-CUDA/HEAD/setup.py


--------------------------------------------------------------------------------