├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── configs ├── eval │ ├── all_fr_tasks.txt │ ├── eval_config.yaml │ └── template.yaml ├── mergekit │ ├── merge_linear.yaml │ └── merge_slerp.yaml └── train │ ├── sft_config.yaml │ └── zero1.json ├── media ├── lfm2-luth.png ├── logo_kurakura.png └── luth-graph.png ├── patch_axolotl └── base.py ├── patch_lighteval └── patch.py ├── requirements-train.txt ├── requirements.txt ├── scripts ├── eval_output_visualize.py └── push_to_hub.py └── src ├── data ├── aime-24-fr │ └── calls_translate.py ├── cleaning.py ├── data_instruct.py ├── data_reasoning.py ├── filtering.py └── scholar-fr │ └── scripts │ ├── asynchronous_calls_clean.py │ ├── asynchronous_calls_extraction.py │ ├── count_tokens.py │ ├── crawler │ ├── bac │ │ ├── clean_pdf_url.py │ │ ├── crawl_pdf.sh │ │ └── download_pdf.py │ ├── cpge │ │ ├── clean_pdf_prepa.py │ │ ├── crawl_prepa.py │ │ └── download_pdf.py │ └── make_pdf_pairs.py │ ├── gather_samples.py │ ├── prompts.py │ └── sanity_check_and_gather_clean.py ├── eval ├── eval.py ├── metrics.py ├── prompts.py └── tasks.py └── train └── sft.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/README.md -------------------------------------------------------------------------------- /configs/eval/all_fr_tasks.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/configs/eval/all_fr_tasks.txt -------------------------------------------------------------------------------- /configs/eval/eval_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/configs/eval/eval_config.yaml -------------------------------------------------------------------------------- /configs/eval/template.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/configs/eval/template.yaml -------------------------------------------------------------------------------- /configs/mergekit/merge_linear.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/configs/mergekit/merge_linear.yaml -------------------------------------------------------------------------------- /configs/mergekit/merge_slerp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/configs/mergekit/merge_slerp.yaml -------------------------------------------------------------------------------- /configs/train/sft_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/configs/train/sft_config.yaml -------------------------------------------------------------------------------- /configs/train/zero1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/configs/train/zero1.json -------------------------------------------------------------------------------- /media/lfm2-luth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/media/lfm2-luth.png -------------------------------------------------------------------------------- /media/logo_kurakura.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/media/logo_kurakura.png -------------------------------------------------------------------------------- /media/luth-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/media/luth-graph.png -------------------------------------------------------------------------------- /patch_axolotl/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/patch_axolotl/base.py -------------------------------------------------------------------------------- /patch_lighteval/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/patch_lighteval/patch.py -------------------------------------------------------------------------------- /requirements-train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/requirements-train.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/eval_output_visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/scripts/eval_output_visualize.py -------------------------------------------------------------------------------- /scripts/push_to_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/scripts/push_to_hub.py -------------------------------------------------------------------------------- /src/data/aime-24-fr/calls_translate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/aime-24-fr/calls_translate.py -------------------------------------------------------------------------------- /src/data/cleaning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/cleaning.py -------------------------------------------------------------------------------- /src/data/data_instruct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/data_instruct.py -------------------------------------------------------------------------------- /src/data/data_reasoning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/data_reasoning.py -------------------------------------------------------------------------------- /src/data/filtering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/filtering.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/asynchronous_calls_clean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/asynchronous_calls_clean.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/asynchronous_calls_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/asynchronous_calls_extraction.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/count_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/count_tokens.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/crawler/bac/clean_pdf_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/crawler/bac/clean_pdf_url.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/crawler/bac/crawl_pdf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/crawler/bac/crawl_pdf.sh -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/crawler/bac/download_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/crawler/bac/download_pdf.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/crawler/cpge/clean_pdf_prepa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/crawler/cpge/clean_pdf_prepa.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/crawler/cpge/crawl_prepa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/crawler/cpge/crawl_prepa.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/crawler/cpge/download_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/crawler/cpge/download_pdf.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/crawler/make_pdf_pairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/crawler/make_pdf_pairs.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/gather_samples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/gather_samples.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/prompts.py -------------------------------------------------------------------------------- /src/data/scholar-fr/scripts/sanity_check_and_gather_clean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/data/scholar-fr/scripts/sanity_check_and_gather_clean.py -------------------------------------------------------------------------------- /src/eval/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/eval/eval.py -------------------------------------------------------------------------------- /src/eval/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/eval/metrics.py -------------------------------------------------------------------------------- /src/eval/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/eval/prompts.py -------------------------------------------------------------------------------- /src/eval/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/eval/tasks.py -------------------------------------------------------------------------------- /src/train/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurakurai/Luth/HEAD/src/train/sft.py --------------------------------------------------------------------------------