├── LICENSE
├── README.md
├── configs
    ├── llama_100m.json
    ├── llama_130m.json
    ├── llama_13b.json
    ├── llama_1b.json
    ├── llama_20m.json
    ├── llama_250m.json
    ├── llama_350m.json
    ├── llama_35m.json
    ├── llama_3b.json
    ├── llama_40m.json
    ├── llama_60m.json
    ├── llama_71m.json
    ├── llama_7b.json
    └── llama_9m.json
├── figures
    ├── SLTrain_fig1.png
    ├── SLTrain_fig2.png
    ├── sltrain_result_all.png
    └── sltrain_result_memory.png
├── peft_pretraining
    ├── args_utils.py
    ├── dataloader.py
    ├── modeling_llama.py
    └── training_utils.py
├── requirements.txt
├── scripts
    └── llm_pretrain
    │   ├── sltrain130m.sh
    │   ├── sltrain1b.sh
    │   ├── sltrain350m.sh
    │   ├── sltrain60m.sh
    │   └── sltrain7b.sh
├── setup.py
├── sparse-lora
    ├── setup.py
    └── sparse_linear.cpp
├── splora
    ├── __init__.py
    ├── splora_linear.py
    └── splora_model.py
├── torchrun_main.py
└── train_utils.py


/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/README.md


--------------------------------------------------------------------------------
/configs/llama_100m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_100m.json


--------------------------------------------------------------------------------
/configs/llama_130m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_130m.json


--------------------------------------------------------------------------------
/configs/llama_13b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_13b.json


--------------------------------------------------------------------------------
/configs/llama_1b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_1b.json


--------------------------------------------------------------------------------
/configs/llama_20m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_20m.json


--------------------------------------------------------------------------------
/configs/llama_250m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_250m.json


--------------------------------------------------------------------------------
/configs/llama_350m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_350m.json


--------------------------------------------------------------------------------
/configs/llama_35m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_35m.json


--------------------------------------------------------------------------------
/configs/llama_3b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_3b.json


--------------------------------------------------------------------------------
/configs/llama_40m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_40m.json


--------------------------------------------------------------------------------
/configs/llama_60m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_60m.json


--------------------------------------------------------------------------------
/configs/llama_71m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_71m.json


--------------------------------------------------------------------------------
/configs/llama_7b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_7b.json


--------------------------------------------------------------------------------
/configs/llama_9m.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/configs/llama_9m.json


--------------------------------------------------------------------------------
/figures/SLTrain_fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/figures/SLTrain_fig1.png


--------------------------------------------------------------------------------
/figures/SLTrain_fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/figures/SLTrain_fig2.png


--------------------------------------------------------------------------------
/figures/sltrain_result_all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/figures/sltrain_result_all.png


--------------------------------------------------------------------------------
/figures/sltrain_result_memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/figures/sltrain_result_memory.png


--------------------------------------------------------------------------------
/peft_pretraining/args_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/peft_pretraining/args_utils.py


--------------------------------------------------------------------------------
/peft_pretraining/dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/peft_pretraining/dataloader.py


--------------------------------------------------------------------------------
/peft_pretraining/modeling_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/peft_pretraining/modeling_llama.py


--------------------------------------------------------------------------------
/peft_pretraining/training_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/peft_pretraining/training_utils.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | transformers
3 | bitsandbytes


--------------------------------------------------------------------------------
/scripts/llm_pretrain/sltrain130m.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/scripts/llm_pretrain/sltrain130m.sh


--------------------------------------------------------------------------------
/scripts/llm_pretrain/sltrain1b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/scripts/llm_pretrain/sltrain1b.sh


--------------------------------------------------------------------------------
/scripts/llm_pretrain/sltrain350m.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/scripts/llm_pretrain/sltrain350m.sh


--------------------------------------------------------------------------------
/scripts/llm_pretrain/sltrain60m.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/scripts/llm_pretrain/sltrain60m.sh


--------------------------------------------------------------------------------
/scripts/llm_pretrain/sltrain7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/scripts/llm_pretrain/sltrain7b.sh


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/setup.py


--------------------------------------------------------------------------------
/sparse-lora/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/sparse-lora/setup.py


--------------------------------------------------------------------------------
/sparse-lora/sparse_linear.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/sparse-lora/sparse_linear.cpp


--------------------------------------------------------------------------------
/splora/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/splora/__init__.py


--------------------------------------------------------------------------------
/splora/splora_linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/splora/splora_linear.py


--------------------------------------------------------------------------------
/splora/splora_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/splora/splora_model.py


--------------------------------------------------------------------------------
/torchrun_main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/torchrun_main.py


--------------------------------------------------------------------------------
/train_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andyjm3/SLTrain/HEAD/train_utils.py


--------------------------------------------------------------------------------