├── .demo
    ├── step1_modeling.json
    ├── step2_process_group_manager.json
    ├── step3_dataloader.json
    ├── step4_tensor_parallel.json
    ├── step5_data_parallel_naive.json
    └── step6_data_parallel_bucket.json
├── .gitignore
├── .vscode
    └── settings.json
├── README.md
├── assets
    └── llama1B_sanity_check.png
├── requirements.txt
├── setup.py
├── step1_modeling
    ├── model.py
    ├── train.py
    └── utils.py
├── step2_process_group_manager
    ├── model.py
    ├── patch_step_2.diff
    ├── process_group_manager.py
    ├── train.py
    └── utils.py
├── step3_dataloader
    ├── dataloader.py
    ├── model.py
    ├── patch_step_3.diff
    ├── process_group_manager.py
    ├── train.py
    └── utils.py
├── step4_tensor_parallel
    ├── dataloader.py
    ├── model.py
    ├── patch_step_4.diff
    ├── process_group_manager.py
    ├── tensor_parallel.py
    ├── train.py
    └── utils.py
├── step5_data_parallel_naive
    ├── data_parallel.py
    ├── dataloader.py
    ├── model.py
    ├── patch_step_5.diff
    ├── process_group_manager.py
    ├── tensor_parallel.py
    ├── train.py
    └── utils.py
├── step6_data_parallel_bucket
    ├── data_parallel.py
    ├── dataloader.py
    ├── model.py
    ├── patch_step_6.diff
    ├── process_group_manager.py
    ├── tensor_parallel.py
    ├── train.py
    └── utils.py
├── step7_pipeline_parallel_afab
    ├── data_parallel.py
    ├── dataloader.py
    ├── model.py
    ├── patch_step_7.diff
    ├── pipeline_parallel.py
    ├── process_group_manager.py
    ├── tensor_parallel.py
    ├── train.py
    └── utils.py
└── step8_pipeline_parallel_1f1b
    ├── data_parallel.py
    ├── dataloader.py
    ├── model.py
    ├── patch_step_8.diff
    ├── pipeline_parallel.py
    ├── process_group_manager.py
    ├── tensor_parallel.py
    ├── train.py
    └── utils.py


/.demo/step1_modeling.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/.demo/step1_modeling.json


--------------------------------------------------------------------------------
/.demo/step2_process_group_manager.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/.demo/step2_process_group_manager.json


--------------------------------------------------------------------------------
/.demo/step3_dataloader.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/.demo/step3_dataloader.json


--------------------------------------------------------------------------------
/.demo/step4_tensor_parallel.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/.demo/step4_tensor_parallel.json


--------------------------------------------------------------------------------
/.demo/step5_data_parallel_naive.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/.demo/step5_data_parallel_naive.json


--------------------------------------------------------------------------------
/.demo/step6_data_parallel_bucket.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/.demo/step6_data_parallel_bucket.json


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__/**
2 | picotron_tutorial.egg-info/
3 | **/launch.json


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/.vscode/settings.json


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/README.md


--------------------------------------------------------------------------------
/assets/llama1B_sanity_check.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/assets/llama1B_sanity_check.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/setup.py


--------------------------------------------------------------------------------
/step1_modeling/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step1_modeling/model.py


--------------------------------------------------------------------------------
/step1_modeling/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step1_modeling/train.py


--------------------------------------------------------------------------------
/step1_modeling/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step1_modeling/utils.py


--------------------------------------------------------------------------------
/step2_process_group_manager/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step2_process_group_manager/model.py


--------------------------------------------------------------------------------
/step2_process_group_manager/patch_step_2.diff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step2_process_group_manager/patch_step_2.diff


--------------------------------------------------------------------------------
/step2_process_group_manager/process_group_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step2_process_group_manager/process_group_manager.py


--------------------------------------------------------------------------------
/step2_process_group_manager/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step2_process_group_manager/train.py


--------------------------------------------------------------------------------
/step2_process_group_manager/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step2_process_group_manager/utils.py


--------------------------------------------------------------------------------
/step3_dataloader/dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step3_dataloader/dataloader.py


--------------------------------------------------------------------------------
/step3_dataloader/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step3_dataloader/model.py


--------------------------------------------------------------------------------
/step3_dataloader/patch_step_3.diff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step3_dataloader/patch_step_3.diff


--------------------------------------------------------------------------------
/step3_dataloader/process_group_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step3_dataloader/process_group_manager.py


--------------------------------------------------------------------------------
/step3_dataloader/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step3_dataloader/train.py


--------------------------------------------------------------------------------
/step3_dataloader/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step3_dataloader/utils.py


--------------------------------------------------------------------------------
/step4_tensor_parallel/dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step4_tensor_parallel/dataloader.py


--------------------------------------------------------------------------------
/step4_tensor_parallel/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step4_tensor_parallel/model.py


--------------------------------------------------------------------------------
/step4_tensor_parallel/patch_step_4.diff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step4_tensor_parallel/patch_step_4.diff


--------------------------------------------------------------------------------
/step4_tensor_parallel/process_group_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step4_tensor_parallel/process_group_manager.py


--------------------------------------------------------------------------------
/step4_tensor_parallel/tensor_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step4_tensor_parallel/tensor_parallel.py


--------------------------------------------------------------------------------
/step4_tensor_parallel/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step4_tensor_parallel/train.py


--------------------------------------------------------------------------------
/step4_tensor_parallel/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step4_tensor_parallel/utils.py


--------------------------------------------------------------------------------
/step5_data_parallel_naive/data_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step5_data_parallel_naive/data_parallel.py


--------------------------------------------------------------------------------
/step5_data_parallel_naive/dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step5_data_parallel_naive/dataloader.py


--------------------------------------------------------------------------------
/step5_data_parallel_naive/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step5_data_parallel_naive/model.py


--------------------------------------------------------------------------------
/step5_data_parallel_naive/patch_step_5.diff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step5_data_parallel_naive/patch_step_5.diff


--------------------------------------------------------------------------------
/step5_data_parallel_naive/process_group_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step5_data_parallel_naive/process_group_manager.py


--------------------------------------------------------------------------------
/step5_data_parallel_naive/tensor_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step5_data_parallel_naive/tensor_parallel.py


--------------------------------------------------------------------------------
/step5_data_parallel_naive/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step5_data_parallel_naive/train.py


--------------------------------------------------------------------------------
/step5_data_parallel_naive/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step5_data_parallel_naive/utils.py


--------------------------------------------------------------------------------
/step6_data_parallel_bucket/data_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step6_data_parallel_bucket/data_parallel.py


--------------------------------------------------------------------------------
/step6_data_parallel_bucket/dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step6_data_parallel_bucket/dataloader.py


--------------------------------------------------------------------------------
/step6_data_parallel_bucket/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step6_data_parallel_bucket/model.py


--------------------------------------------------------------------------------
/step6_data_parallel_bucket/patch_step_6.diff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step6_data_parallel_bucket/patch_step_6.diff


--------------------------------------------------------------------------------
/step6_data_parallel_bucket/process_group_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step6_data_parallel_bucket/process_group_manager.py


--------------------------------------------------------------------------------
/step6_data_parallel_bucket/tensor_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step6_data_parallel_bucket/tensor_parallel.py


--------------------------------------------------------------------------------
/step6_data_parallel_bucket/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step6_data_parallel_bucket/train.py


--------------------------------------------------------------------------------
/step6_data_parallel_bucket/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step6_data_parallel_bucket/utils.py


--------------------------------------------------------------------------------
/step7_pipeline_parallel_afab/data_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step7_pipeline_parallel_afab/data_parallel.py


--------------------------------------------------------------------------------
/step7_pipeline_parallel_afab/dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step7_pipeline_parallel_afab/dataloader.py


--------------------------------------------------------------------------------
/step7_pipeline_parallel_afab/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step7_pipeline_parallel_afab/model.py


--------------------------------------------------------------------------------
/step7_pipeline_parallel_afab/patch_step_7.diff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step7_pipeline_parallel_afab/patch_step_7.diff


--------------------------------------------------------------------------------
/step7_pipeline_parallel_afab/pipeline_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step7_pipeline_parallel_afab/pipeline_parallel.py


--------------------------------------------------------------------------------
/step7_pipeline_parallel_afab/process_group_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step7_pipeline_parallel_afab/process_group_manager.py


--------------------------------------------------------------------------------
/step7_pipeline_parallel_afab/tensor_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step7_pipeline_parallel_afab/tensor_parallel.py


--------------------------------------------------------------------------------
/step7_pipeline_parallel_afab/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step7_pipeline_parallel_afab/train.py


--------------------------------------------------------------------------------
/step7_pipeline_parallel_afab/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step7_pipeline_parallel_afab/utils.py


--------------------------------------------------------------------------------
/step8_pipeline_parallel_1f1b/data_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step8_pipeline_parallel_1f1b/data_parallel.py


--------------------------------------------------------------------------------
/step8_pipeline_parallel_1f1b/dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step8_pipeline_parallel_1f1b/dataloader.py


--------------------------------------------------------------------------------
/step8_pipeline_parallel_1f1b/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step8_pipeline_parallel_1f1b/model.py


--------------------------------------------------------------------------------
/step8_pipeline_parallel_1f1b/patch_step_8.diff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step8_pipeline_parallel_1f1b/patch_step_8.diff


--------------------------------------------------------------------------------
/step8_pipeline_parallel_1f1b/pipeline_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step8_pipeline_parallel_1f1b/pipeline_parallel.py


--------------------------------------------------------------------------------
/step8_pipeline_parallel_1f1b/process_group_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step8_pipeline_parallel_1f1b/process_group_manager.py


--------------------------------------------------------------------------------
/step8_pipeline_parallel_1f1b/tensor_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step8_pipeline_parallel_1f1b/tensor_parallel.py


--------------------------------------------------------------------------------
/step8_pipeline_parallel_1f1b/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step8_pipeline_parallel_1f1b/train.py


--------------------------------------------------------------------------------
/step8_pipeline_parallel_1f1b/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/picotron_tutorial/HEAD/step8_pipeline_parallel_1f1b/utils.py


--------------------------------------------------------------------------------