├── .gitignore ├── CITATION.cff ├── LICENSE ├── LLAMA2-LICENSE ├── README.md ├── assets ├── demo_screenshot.png └── introducing_tamil_llama.png ├── config ├── lm_studio │ └── model_config.json └── ollama │ └── Modelfile ├── requirements.txt └── scripts ├── eval ├── chatgpt_preds.py └── run_eval.py ├── train ├── finetune │ ├── finetune.py │ ├── make_shards.py │ └── run_finetuning.sh ├── pretrain │ ├── README.md │ ├── flash_attn_patch.py │ ├── run_clm_with_peft.py │ └── run_pt.sh ├── sentencepiece │ ├── generate_text_corpus.py │ ├── merge_tokenizer.py │ ├── test.py │ └── train.py └── utils │ ├── merge_adapter.py │ └── push_to_hub.py └── utils └── count_indic_tokens.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/.gitignore -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/CITATION.cff -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/LICENSE -------------------------------------------------------------------------------- /LLAMA2-LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/LLAMA2-LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/README.md -------------------------------------------------------------------------------- /assets/demo_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/assets/demo_screenshot.png -------------------------------------------------------------------------------- /assets/introducing_tamil_llama.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/assets/introducing_tamil_llama.png -------------------------------------------------------------------------------- /config/lm_studio/model_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/config/lm_studio/model_config.json -------------------------------------------------------------------------------- /config/ollama/Modelfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/config/ollama/Modelfile -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/eval/chatgpt_preds.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/eval/chatgpt_preds.py -------------------------------------------------------------------------------- /scripts/eval/run_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/eval/run_eval.py -------------------------------------------------------------------------------- /scripts/train/finetune/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/finetune/finetune.py -------------------------------------------------------------------------------- /scripts/train/finetune/make_shards.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/finetune/make_shards.py -------------------------------------------------------------------------------- /scripts/train/finetune/run_finetuning.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/finetune/run_finetuning.sh -------------------------------------------------------------------------------- /scripts/train/pretrain/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/pretrain/README.md -------------------------------------------------------------------------------- /scripts/train/pretrain/flash_attn_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/pretrain/flash_attn_patch.py -------------------------------------------------------------------------------- /scripts/train/pretrain/run_clm_with_peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/pretrain/run_clm_with_peft.py -------------------------------------------------------------------------------- /scripts/train/pretrain/run_pt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/pretrain/run_pt.sh -------------------------------------------------------------------------------- /scripts/train/sentencepiece/generate_text_corpus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/sentencepiece/generate_text_corpus.py -------------------------------------------------------------------------------- /scripts/train/sentencepiece/merge_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/sentencepiece/merge_tokenizer.py -------------------------------------------------------------------------------- /scripts/train/sentencepiece/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/sentencepiece/test.py -------------------------------------------------------------------------------- /scripts/train/sentencepiece/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/sentencepiece/train.py -------------------------------------------------------------------------------- /scripts/train/utils/merge_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/utils/merge_adapter.py -------------------------------------------------------------------------------- /scripts/train/utils/push_to_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/train/utils/push_to_hub.py -------------------------------------------------------------------------------- /scripts/utils/count_indic_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinand5/tamil-llama/HEAD/scripts/utils/count_indic_tokens.py --------------------------------------------------------------------------------