├── LICENSE ├── README.md ├── data_management ├── .gitignore ├── LICENSE.mecab ├── README.md ├── assets │ └── pipeline.png ├── bin │ ├── concat_jsonl_files │ ├── download_mc4_ja │ └── setup ├── posttraining │ ├── __init__.py │ └── download_dataset.py ├── preprocessing │ ├── __init__.py │ ├── dedup.py │ ├── download_dataset │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── c4.py │ │ ├── redpajama.py │ │ ├── redpajama_v2.py │ │ └── wikipedia.py │ └── filtering │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── custom_document_filters.py │ │ ├── custom_token_filters.py │ │ └── custom_tokenization.py ├── requirements.txt ├── scripts │ ├── .keep │ ├── download_redpajama.sh │ └── download_redpajama_v2.sh └── tmp │ └── .keep ├── eval └── README.md ├── infra ├── README.md └── infra_environment.png └── train ├── .gitignore ├── README-train_abci_multi_node_multi_gpu.md ├── README-train_abci_single_node_single_gpu.md ├── README-train_gcp_honban_multi_node_multi_gpu.md ├── README-train_gcp_play_multi_node_multi_gpu.md ├── README-train_gcp_play_single_node_multi_gpu.md ├── README.md ├── assets ├── wandb_add_tag_in_project_page.png ├── wandb_add_tag_in_run_page.png ├── wandb_finetune_multinode.png ├── wandb_finetune_node1_system_metrics.png ├── wandb_finetune_node1_training_metrics.png ├── wandb_finetune_node2_system_metrics.png ├── wandb_finetune_node2_training_metrics.png ├── wandb_pretrain_multinode.png ├── wandb_pretrain_node1_system_metrics.png ├── wandb_pretrain_node1_training_metrics.png ├── wandb_pretrain_node2_system_metrics.png └── wandb_pretrain_node2_training_metrics.png ├── output └── .gitkeep ├── requirements.txt └── scripts ├── common ├── create_ssh_config_file_for_abci_multi_node_multi_gpu.sh ├── create_ssh_config_file_for_gcp_play_multi_node_multi_gpu.sh ├── special_token_list.py └── upload_tokenizer_and_model_to_huggingface_hub.py ├── step1_train_tokenizer ├── dataset │ └── botchan.txt ├── sample_dataset │ └── botchan.txt └── train_sentencepiece_tokenizer.py ├── step2_pretrain_model ├── abci_node-1_gpu-1 │ └── dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B │ │ └── zero-0_dp-1_pp-1_tp-1_flashattn2-on.sh ├── abci_node-2_gpu-16 │ ├── dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B │ │ ├── zero-1_dp-16_pp-1_tp-1_flashattn2-on.sh │ │ ├── zero-1_dp-1_pp-4_tp-4_flashattn2-on.sh │ │ ├── zero-1_dp-4_pp-2_tp-2_flashattn2-on.sh │ │ ├── zero-1_dp-8_pp-1_tp-2_flashattn2-on.sh │ │ └── zero-1_dp-8_pp-2_tp-1_flashattn2-on.sh │ └── dataset-arxiv_tokenizer-sentencepiece_model-gpt_1.3B │ │ └── zero-1_dp-4_pp-2_tp-2_flashattn2-on.sh ├── gcp_honban_node-2_gpu-16 │ └── dataset-book_wikija_tokenizer-sentencepiece_model-gpt_1.3B │ │ └── zero-0_dp-16_pp-1_tp-1_precision-fp32_flashattn2-on.sh ├── gcp_node-1_gpu │ └── dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B │ │ └── zero-0_dp-1_pp-1_tp-1_flashattn2-on.sh ├── gcp_node-2_gpu │ └── dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B │ │ ├── zero-0_dp-1_pp-1_tp-1_flashattn2-on.sh │ │ └── zero-0_dp-1_pp-1_tp-1_flashattn2-on_sbatch.sh ├── gcp_play_node-1_gpu-2 │ ├── dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B │ │ └── zero-0_dp-2_pp-1_tp-1_flashattn2-on.sh │ └── dataset-mc4ja_wikija-tokenizer-sentencepiece_gpt_neox_xb_gpt_0.125B │ │ ├── preprocess_data.sh │ │ ├── preprocess_data_sbatch.sh │ │ ├── train.sh │ │ └── train_sbatch.sh └── gcp_play_node-2_gpu-16 │ └── dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B │ ├── zero-0_dp-16_pp-1_tp-1_flashattn2-on.sh │ └── zero-0_dp-16_pp-1_tp-1_flashattn2-on_sbatch.sh ├── step3_upload_pretrained_model ├── convert_tokenizer_and_pretrained_model_to_huggingface_transformers.sh ├── convert_tokenizer_from_sentencepiece_to_huggingface_transformers.py ├── sentencepiece_model_pb2.py └── upload_tokenizer_and_pretrained_model_to_huggingface_hub.py ├── step4_finetune_model ├── abci_node-1_gpu-1 │ └── dataset-openassistant │ │ └── launcher-none_zero-none.sh ├── abci_node-2_gpu-16 │ └── dataset-openassistant │ │ ├── deepspeed_config │ │ ├── ds_config_zero2.json │ │ └── ds_config_zero3.json │ │ ├── launcher-deepspeed_zero-2.sh │ │ └── launcher-deepspeed_zero-3.sh ├── gcp_honban_node-2_gpu-16 │ └── dataset-openassistant │ │ ├── deepspeed_config │ │ ├── ds_config_zero2.json │ │ └── ds_config_zero3.json │ │ ├── launcher-deepspeed_zero-2.sh │ │ └── launcher-deepspeed_zero-3.sh ├── gcp_play_node-1_gpu-2 │ └── dataset-openassistant │ │ └── launcher-none_zero-none.sh ├── gcp_play_node-1_gpu │ └── dataset-openassistant │ │ └── launcher-none_zero-none.sh ├── gcp_play_node-2_gpu-16 │ └── dataset-openassistant │ │ ├── deepspeed_config │ │ ├── ds_config_zero2.json │ │ └── ds_config_zero3.json │ │ ├── launcher-deepspeed_zero-2.sh │ │ └── launcher-deepspeed_zero-3.sh └── gcp_play_node-2_gpu │ └── dataset-openassistant │ ├── deepspeed_config │ ├── ds_config_zero2.json │ └── ds_config_zero3.json │ ├── launcher-deepspeed_zero-2.sh │ └── launcher-deepspeed_zero-3.sh └── step5_upload_finetuned_model └── upload_tokenizer_and_finetuned_model_to_huggingface_hub.py /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/README.md -------------------------------------------------------------------------------- /data_management/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/.gitignore -------------------------------------------------------------------------------- /data_management/LICENSE.mecab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/LICENSE.mecab -------------------------------------------------------------------------------- /data_management/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/README.md -------------------------------------------------------------------------------- /data_management/assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/assets/pipeline.png -------------------------------------------------------------------------------- /data_management/bin/concat_jsonl_files: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/bin/concat_jsonl_files -------------------------------------------------------------------------------- /data_management/bin/download_mc4_ja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/bin/download_mc4_ja -------------------------------------------------------------------------------- /data_management/bin/setup: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/bin/setup -------------------------------------------------------------------------------- /data_management/posttraining/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_management/posttraining/download_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/posttraining/download_dataset.py -------------------------------------------------------------------------------- /data_management/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_management/preprocessing/dedup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/dedup.py -------------------------------------------------------------------------------- /data_management/preprocessing/download_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_management/preprocessing/download_dataset/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/download_dataset/__main__.py -------------------------------------------------------------------------------- /data_management/preprocessing/download_dataset/c4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/download_dataset/c4.py -------------------------------------------------------------------------------- /data_management/preprocessing/download_dataset/redpajama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/download_dataset/redpajama.py -------------------------------------------------------------------------------- /data_management/preprocessing/download_dataset/redpajama_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/download_dataset/redpajama_v2.py -------------------------------------------------------------------------------- /data_management/preprocessing/download_dataset/wikipedia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/download_dataset/wikipedia.py -------------------------------------------------------------------------------- /data_management/preprocessing/filtering/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_management/preprocessing/filtering/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/filtering/__main__.py -------------------------------------------------------------------------------- /data_management/preprocessing/filtering/custom_document_filters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/filtering/custom_document_filters.py -------------------------------------------------------------------------------- /data_management/preprocessing/filtering/custom_token_filters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/filtering/custom_token_filters.py -------------------------------------------------------------------------------- /data_management/preprocessing/filtering/custom_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/preprocessing/filtering/custom_tokenization.py -------------------------------------------------------------------------------- /data_management/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/requirements.txt -------------------------------------------------------------------------------- /data_management/scripts/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_management/scripts/download_redpajama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/scripts/download_redpajama.sh -------------------------------------------------------------------------------- /data_management/scripts/download_redpajama_v2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/data_management/scripts/download_redpajama_v2.sh -------------------------------------------------------------------------------- /data_management/tmp/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/eval/README.md -------------------------------------------------------------------------------- /infra/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/infra/README.md -------------------------------------------------------------------------------- /infra/infra_environment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/infra/infra_environment.png -------------------------------------------------------------------------------- /train/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/.gitignore -------------------------------------------------------------------------------- /train/README-train_abci_multi_node_multi_gpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/README-train_abci_multi_node_multi_gpu.md -------------------------------------------------------------------------------- /train/README-train_abci_single_node_single_gpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/README-train_abci_single_node_single_gpu.md -------------------------------------------------------------------------------- /train/README-train_gcp_honban_multi_node_multi_gpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/README-train_gcp_honban_multi_node_multi_gpu.md -------------------------------------------------------------------------------- /train/README-train_gcp_play_multi_node_multi_gpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/README-train_gcp_play_multi_node_multi_gpu.md -------------------------------------------------------------------------------- /train/README-train_gcp_play_single_node_multi_gpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/README-train_gcp_play_single_node_multi_gpu.md -------------------------------------------------------------------------------- /train/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/README.md -------------------------------------------------------------------------------- /train/assets/wandb_add_tag_in_project_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_add_tag_in_project_page.png -------------------------------------------------------------------------------- /train/assets/wandb_add_tag_in_run_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_add_tag_in_run_page.png -------------------------------------------------------------------------------- /train/assets/wandb_finetune_multinode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_finetune_multinode.png -------------------------------------------------------------------------------- /train/assets/wandb_finetune_node1_system_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_finetune_node1_system_metrics.png -------------------------------------------------------------------------------- /train/assets/wandb_finetune_node1_training_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_finetune_node1_training_metrics.png -------------------------------------------------------------------------------- /train/assets/wandb_finetune_node2_system_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_finetune_node2_system_metrics.png -------------------------------------------------------------------------------- /train/assets/wandb_finetune_node2_training_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_finetune_node2_training_metrics.png -------------------------------------------------------------------------------- /train/assets/wandb_pretrain_multinode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_pretrain_multinode.png -------------------------------------------------------------------------------- /train/assets/wandb_pretrain_node1_system_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_pretrain_node1_system_metrics.png -------------------------------------------------------------------------------- /train/assets/wandb_pretrain_node1_training_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_pretrain_node1_training_metrics.png -------------------------------------------------------------------------------- /train/assets/wandb_pretrain_node2_system_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_pretrain_node2_system_metrics.png -------------------------------------------------------------------------------- /train/assets/wandb_pretrain_node2_training_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/assets/wandb_pretrain_node2_training_metrics.png -------------------------------------------------------------------------------- /train/output/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/requirements.txt -------------------------------------------------------------------------------- /train/scripts/common/create_ssh_config_file_for_abci_multi_node_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/common/create_ssh_config_file_for_abci_multi_node_multi_gpu.sh -------------------------------------------------------------------------------- /train/scripts/common/create_ssh_config_file_for_gcp_play_multi_node_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/common/create_ssh_config_file_for_gcp_play_multi_node_multi_gpu.sh -------------------------------------------------------------------------------- /train/scripts/common/special_token_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/common/special_token_list.py -------------------------------------------------------------------------------- /train/scripts/common/upload_tokenizer_and_model_to_huggingface_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/common/upload_tokenizer_and_model_to_huggingface_hub.py -------------------------------------------------------------------------------- /train/scripts/step1_train_tokenizer/dataset/botchan.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step1_train_tokenizer/dataset/botchan.txt -------------------------------------------------------------------------------- /train/scripts/step1_train_tokenizer/sample_dataset/botchan.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step1_train_tokenizer/sample_dataset/botchan.txt -------------------------------------------------------------------------------- /train/scripts/step1_train_tokenizer/train_sentencepiece_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step1_train_tokenizer/train_sentencepiece_tokenizer.py -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/abci_node-1_gpu-1/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-1_pp-1_tp-1_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/abci_node-1_gpu-1/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-1_pp-1_tp-1_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-16_pp-1_tp-1_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-16_pp-1_tp-1_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-1_pp-4_tp-4_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-1_pp-4_tp-4_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-4_pp-2_tp-2_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-4_pp-2_tp-2_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-8_pp-1_tp-2_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-8_pp-1_tp-2_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-8_pp-2_tp-1_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-1_dp-8_pp-2_tp-1_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_1.3B/zero-1_dp-4_pp-2_tp-2_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/abci_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_1.3B/zero-1_dp-4_pp-2_tp-2_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_honban_node-2_gpu-16/dataset-book_wikija_tokenizer-sentencepiece_model-gpt_1.3B/zero-0_dp-16_pp-1_tp-1_precision-fp32_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_honban_node-2_gpu-16/dataset-book_wikija_tokenizer-sentencepiece_model-gpt_1.3B/zero-0_dp-16_pp-1_tp-1_precision-fp32_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_node-1_gpu/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-1_pp-1_tp-1_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_node-1_gpu/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-1_pp-1_tp-1_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_node-2_gpu/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-1_pp-1_tp-1_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_node-2_gpu/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-1_pp-1_tp-1_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_node-2_gpu/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-1_pp-1_tp-1_flashattn2-on_sbatch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_node-2_gpu/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-1_pp-1_tp-1_flashattn2-on_sbatch.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-2_pp-1_tp-1_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-2_pp-1_tp-1_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-mc4ja_wikija-tokenizer-sentencepiece_gpt_neox_xb_gpt_0.125B/preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-mc4ja_wikija-tokenizer-sentencepiece_gpt_neox_xb_gpt_0.125B/preprocess_data.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-mc4ja_wikija-tokenizer-sentencepiece_gpt_neox_xb_gpt_0.125B/preprocess_data_sbatch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-mc4ja_wikija-tokenizer-sentencepiece_gpt_neox_xb_gpt_0.125B/preprocess_data_sbatch.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-mc4ja_wikija-tokenizer-sentencepiece_gpt_neox_xb_gpt_0.125B/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-mc4ja_wikija-tokenizer-sentencepiece_gpt_neox_xb_gpt_0.125B/train.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-mc4ja_wikija-tokenizer-sentencepiece_gpt_neox_xb_gpt_0.125B/train_sbatch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_play_node-1_gpu-2/dataset-mc4ja_wikija-tokenizer-sentencepiece_gpt_neox_xb_gpt_0.125B/train_sbatch.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_play_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-16_pp-1_tp-1_flashattn2-on.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_play_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-16_pp-1_tp-1_flashattn2-on.sh -------------------------------------------------------------------------------- /train/scripts/step2_pretrain_model/gcp_play_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-16_pp-1_tp-1_flashattn2-on_sbatch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step2_pretrain_model/gcp_play_node-2_gpu-16/dataset-arxiv_tokenizer-sentencepiece_model-gpt_0.125B/zero-0_dp-16_pp-1_tp-1_flashattn2-on_sbatch.sh -------------------------------------------------------------------------------- /train/scripts/step3_upload_pretrained_model/convert_tokenizer_and_pretrained_model_to_huggingface_transformers.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step3_upload_pretrained_model/convert_tokenizer_and_pretrained_model_to_huggingface_transformers.sh -------------------------------------------------------------------------------- /train/scripts/step3_upload_pretrained_model/convert_tokenizer_from_sentencepiece_to_huggingface_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step3_upload_pretrained_model/convert_tokenizer_from_sentencepiece_to_huggingface_transformers.py -------------------------------------------------------------------------------- /train/scripts/step3_upload_pretrained_model/sentencepiece_model_pb2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step3_upload_pretrained_model/sentencepiece_model_pb2.py -------------------------------------------------------------------------------- /train/scripts/step3_upload_pretrained_model/upload_tokenizer_and_pretrained_model_to_huggingface_hub.py: -------------------------------------------------------------------------------- 1 | ../common/upload_tokenizer_and_model_to_huggingface_hub.py -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/abci_node-1_gpu-1/dataset-openassistant/launcher-none_zero-none.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/abci_node-1_gpu-1/dataset-openassistant/launcher-none_zero-none.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/abci_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/abci_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero2.json -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/abci_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/abci_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero3.json -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/abci_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/abci_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-2.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/abci_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/abci_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-3.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_honban_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_honban_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero2.json -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_honban_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_honban_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero3.json -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_honban_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_honban_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-2.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_honban_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_honban_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-3.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-1_gpu-2/dataset-openassistant/launcher-none_zero-none.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-1_gpu-2/dataset-openassistant/launcher-none_zero-none.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-1_gpu/dataset-openassistant/launcher-none_zero-none.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-1_gpu/dataset-openassistant/launcher-none_zero-none.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero2.json -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-2_gpu-16/dataset-openassistant/deepspeed_config/ds_config_zero3.json -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-2.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-2_gpu-16/dataset-openassistant/launcher-deepspeed_zero-3.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-2_gpu/dataset-openassistant/deepspeed_config/ds_config_zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-2_gpu/dataset-openassistant/deepspeed_config/ds_config_zero2.json -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-2_gpu/dataset-openassistant/deepspeed_config/ds_config_zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-2_gpu/dataset-openassistant/deepspeed_config/ds_config_zero3.json -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-2_gpu/dataset-openassistant/launcher-deepspeed_zero-2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-2_gpu/dataset-openassistant/launcher-deepspeed_zero-2.sh -------------------------------------------------------------------------------- /train/scripts/step4_finetune_model/gcp_play_node-2_gpu/dataset-openassistant/launcher-deepspeed_zero-3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matsuolab/ucllm_nedo_prod/HEAD/train/scripts/step4_finetune_model/gcp_play_node-2_gpu/dataset-openassistant/launcher-deepspeed_zero-3.sh -------------------------------------------------------------------------------- /train/scripts/step5_upload_finetuned_model/upload_tokenizer_and_finetuned_model_to_huggingface_hub.py: -------------------------------------------------------------------------------- 1 | ../common/upload_tokenizer_and_model_to_huggingface_hub.py --------------------------------------------------------------------------------