├── LICENSE
├── README.md
├── codellama_2b_inference.py
├── datautils.py
├── evaluate.py
├── llama_2b_evaluate.py
├── llama_2b_inference.py
├── llama_2b_sft_alpaca.py
├── llama_4b_evaluate.py
├── model.py
├── peft_tuners_lora.py
├── requirements.txt
└── scripts
    ├── evaluate
        ├── llama1_30b_w2a16g8.sh
        ├── llama1_7b_w2a16g32.sh
        ├── llama2_70b_w2a16g8.sh
        ├── llama2_7b_w2a16g32.sh
        ├── llama2_7b_w2a16g8.sh
        ├── llama3b_w2a16g16.sh
        ├── llama3b_w2a16g32.sh
        ├── llama3b_w2a16g8.sh
        ├── tiny_llama_w2a16g32.sh
        └── tiny_llama_w2a16g8.sh
    ├── inference
        ├── codellama_34b_w2a16g8.sh
        ├── llama1_30b_w2a16g8.sh
        ├── llama1_7b_w2a16g32.sh
        ├── llama2_70b_w2a16g8.sh
        ├── llama2_7b_w2a16g32.sh
        ├── llama2_7b_w2a16g8.sh
        ├── llama3b_w2a16g16.sh
        ├── llama3b_w2a16g32.sh
        ├── llama3b_w2a16g8.sh
        ├── tiny_llama_w2a16g32.sh
        └── tiny_llama_w2a16g8.sh
    └── instruction-chat
        └── llama2_70b_w2a16g8.sh


/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/README.md


--------------------------------------------------------------------------------
/codellama_2b_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/codellama_2b_inference.py


--------------------------------------------------------------------------------
/datautils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/datautils.py


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/evaluate.py


--------------------------------------------------------------------------------
/llama_2b_evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/llama_2b_evaluate.py


--------------------------------------------------------------------------------
/llama_2b_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/llama_2b_inference.py


--------------------------------------------------------------------------------
/llama_2b_sft_alpaca.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/llama_2b_sft_alpaca.py


--------------------------------------------------------------------------------
/llama_4b_evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/llama_4b_evaluate.py


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/model.py


--------------------------------------------------------------------------------
/peft_tuners_lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/peft_tuners_lora.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GreenBitAI/low_bit_llama/HEAD/requirements.txt


--------------------------------------------------------------------------------
/scripts/evaluate/llama1_30b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 30b -v 1 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/evaluate/llama1_7b_w2a16g32.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 7b -v 1 -g 32
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/evaluate/llama2_70b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 70b -v 2 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/evaluate/llama2_7b_w2a16g32.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 7b -v 2 -g 32
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/evaluate/llama2_7b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 7b -v 2 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/evaluate/llama3b_w2a16g16.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 3b -v 1 -g 16
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/evaluate/llama3b_w2a16g32.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 3b -v 1 -g 32
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/evaluate/llama3b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 3b -v 1 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/evaluate/tiny_llama_w2a16g32.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 1.1b -v 2 -g 32
4 | 


--------------------------------------------------------------------------------
/scripts/evaluate/tiny_llama_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_evaluate.py -s 1.1b -v 2 -g 8
4 | 


--------------------------------------------------------------------------------
/scripts/inference/codellama_34b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python codellama_2b_inference.py -s 34B -v 2 -g 8
4 | 


--------------------------------------------------------------------------------
/scripts/inference/llama1_30b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 30b -v 1 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/inference/llama1_7b_w2a16g32.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 7b -v 1 -g 32
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/inference/llama2_70b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 70b -v 2 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/inference/llama2_7b_w2a16g32.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 7b -v 2 -g 32
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/inference/llama2_7b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 7b -v 2 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/inference/llama3b_w2a16g16.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 3b -v 1 -g 16
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/inference/llama3b_w2a16g32.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 3b -v 1 -g 32
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/inference/llama3b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 3b -v 1 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/inference/tiny_llama_w2a16g32.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 1.1b -v 2 -g 32
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/inference/tiny_llama_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 1.1b -v 2 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------
/scripts/instruction-chat/llama2_70b_w2a16g8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python llama_2b_inference.py -s 70b-chat -v 2 -g 8
4 | 
5 | 


--------------------------------------------------------------------------------