├── LICENSE ├── README.md ├── codellama_2b_inference.py ├── datautils.py ├── evaluate.py ├── llama_2b_evaluate.py ├── llama_2b_inference.py ├── llama_2b_sft_alpaca.py ├── llama_4b_evaluate.py ├── model.py ├── peft_tuners_lora.py ├── requirements.txt └── scripts ├── evaluate ├── llama1_30b_w2a16g8.sh ├── llama1_7b_w2a16g32.sh ├── llama2_70b_w2a16g8.sh ├── llama2_7b_w2a16g32.sh ├── llama2_7b_w2a16g8.sh ├── llama3b_w2a16g16.sh ├── llama3b_w2a16g32.sh ├── llama3b_w2a16g8.sh ├── tiny_llama_w2a16g32.sh └── tiny_llama_w2a16g8.sh ├── inference ├── codellama_34b_w2a16g8.sh ├── llama1_30b_w2a16g8.sh ├── llama1_7b_w2a16g32.sh ├── llama2_70b_w2a16g8.sh ├── llama2_7b_w2a16g32.sh ├── llama2_7b_w2a16g8.sh ├── llama3b_w2a16g16.sh ├── llama3b_w2a16g32.sh ├── llama3b_w2a16g8.sh ├── tiny_llama_w2a16g32.sh └── tiny_llama_w2a16g8.sh └── instruction-chat └── llama2_70b_w2a16g8.sh /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/README.md -------------------------------------------------------------------------------- /codellama_2b_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/codellama_2b_inference.py -------------------------------------------------------------------------------- /datautils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/datautils.py -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/evaluate.py -------------------------------------------------------------------------------- /llama_2b_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/llama_2b_evaluate.py -------------------------------------------------------------------------------- /llama_2b_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/llama_2b_inference.py -------------------------------------------------------------------------------- /llama_2b_sft_alpaca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/llama_2b_sft_alpaca.py -------------------------------------------------------------------------------- /llama_4b_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/llama_4b_evaluate.py -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/model.py -------------------------------------------------------------------------------- /peft_tuners_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/peft_tuners_lora.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/evaluate/llama1_30b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 30b -v 1 -g 8 4 | 5 | -------------------------------------------------------------------------------- /scripts/evaluate/llama1_7b_w2a16g32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 7b -v 1 -g 32 4 | 5 | -------------------------------------------------------------------------------- /scripts/evaluate/llama2_70b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 70b -v 2 -g 8 4 | 5 | -------------------------------------------------------------------------------- /scripts/evaluate/llama2_7b_w2a16g32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 7b -v 2 -g 32 4 | 5 | -------------------------------------------------------------------------------- /scripts/evaluate/llama2_7b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 7b -v 2 -g 8 4 | 5 | -------------------------------------------------------------------------------- /scripts/evaluate/llama3b_w2a16g16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 3b -v 1 -g 16 4 | 5 | -------------------------------------------------------------------------------- /scripts/evaluate/llama3b_w2a16g32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 3b -v 1 -g 32 4 | 5 | -------------------------------------------------------------------------------- /scripts/evaluate/llama3b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 3b -v 1 -g 8 4 | 5 | -------------------------------------------------------------------------------- /scripts/evaluate/tiny_llama_w2a16g32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 1.1b -v 2 -g 32 4 | -------------------------------------------------------------------------------- /scripts/evaluate/tiny_llama_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 1.1b -v 2 -g 8 4 | -------------------------------------------------------------------------------- /scripts/inference/codellama_34b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python codellama_2b_inference.py -s 34B -v 2 -g 8 4 | -------------------------------------------------------------------------------- /scripts/inference/llama1_30b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 30b -v 1 -g 8 4 | 5 | -------------------------------------------------------------------------------- /scripts/inference/llama1_7b_w2a16g32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 7b -v 1 -g 32 4 | 5 | -------------------------------------------------------------------------------- /scripts/inference/llama2_70b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 70b -v 2 -g 8 4 | 5 | -------------------------------------------------------------------------------- /scripts/inference/llama2_7b_w2a16g32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 7b -v 2 -g 32 4 | 5 | -------------------------------------------------------------------------------- /scripts/inference/llama2_7b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 7b -v 2 -g 8 4 | 5 | -------------------------------------------------------------------------------- /scripts/inference/llama3b_w2a16g16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 3b -v 1 -g 16 4 | 5 | -------------------------------------------------------------------------------- /scripts/inference/llama3b_w2a16g32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 3b -v 1 -g 32 4 | 5 | -------------------------------------------------------------------------------- /scripts/inference/llama3b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 3b -v 1 -g 8 4 | 5 | -------------------------------------------------------------------------------- /scripts/inference/tiny_llama_w2a16g32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 1.1b -v 2 -g 32 4 | 5 | -------------------------------------------------------------------------------- /scripts/inference/tiny_llama_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 1.1b -v 2 -g 8 4 | 5 | -------------------------------------------------------------------------------- /scripts/instruction-chat/llama2_70b_w2a16g8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 70b-chat -v 2 -g 8 4 | 5 | --------------------------------------------------------------------------------