├── .gitignore ├── LICENSE ├── OWNERS ├── README.md ├── SECURITY.md ├── amp_C.py ├── ascendspeed ├── __init__.py ├── arguments.py ├── checkpointing.py ├── core │ ├── __init__.py │ ├── parallel_state.py │ ├── tensor_parallel │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ └── utils.py │ └── utils.py ├── data │ ├── Makefile │ ├── __init__.py │ ├── autoaugment.py │ ├── bert_dataset.py │ ├── biencoder_dataset_utils.py │ ├── blendable_dataset.py │ ├── data_samplers.py │ ├── dataset_utils.py │ ├── decoder_packed_mtf_dataset.py │ ├── distdata.py │ ├── gpt_dataset.py │ ├── helpers.cpp │ ├── helpers.cpython-37m-aarch64-linux-gnu.so │ ├── helpers.cpython-37m-x86_64-linux-gnu.so │ ├── ict_dataset.py │ ├── indexed_dataset.py │ ├── mlm_dataset.py │ ├── mtf_dataset.py │ ├── orqa_wiki_dataset.py │ ├── parse.py │ ├── realm_dataset_utils.py │ ├── realm_index.py │ ├── t5_dataset.py │ ├── test │ │ ├── test_indexed_dataset.py │ │ └── test_preprocess_data.sh │ ├── test_utils.py │ └── vit_dataset.py ├── enums.py ├── fp16_deprecated │ └── loss_scaler.py ├── global_vars.py ├── indexer.py ├── initialize.py ├── learning_rates.py ├── memory.py ├── microbatches.py ├── model │ ├── __init__.py │ ├── distributed.py │ ├── enums.py │ ├── fused_bias_gelu.py │ ├── fused_layer_norm.py │ ├── fused_softmax.py │ ├── glu_activations.py │ ├── gpt_model.py │ ├── language_model.py │ ├── llama2_model.py │ ├── llama_model.py │ ├── module.py │ ├── positional_embeddings.py │ ├── transformer.py │ ├── utils.py │ └── vit_model.py ├── mpu │ ├── __init__.py │ ├── data.py │ ├── layers.py │ ├── mappings.py │ ├── random.py │ └── tests │ │ ├── __init__.py │ │ ├── commons.py │ │ ├── test_cross_entropy.py │ │ ├── test_data.py │ │ ├── test_initialize.py │ │ ├── test_layers.py │ │ └── test_random.py ├── ops │ ├── FlashAttention.cpp │ ├── FlashAttention.py │ ├── README.md │ └── __init__.py ├── optimizer │ ├── __init__.py │ ├── adam.py │ ├── clip_grads.py │ ├── grad_scaler.py │ └── optimizer.py ├── p2p_communication.py ├── package_info.py ├── schedules.py ├── text_generation_utils.py ├── timers.py ├── tokenizer │ ├── __init__.py │ ├── bert_tokenization.py │ ├── gpt2_tokenization.py │ └── tokenizer.py ├── training.py └── utils.py ├── ci ├── access_control_test.py └── build.sh ├── examples ├── README.md ├── bloom │ ├── README.md │ ├── generate_bloom.py │ ├── generate_bloom_7b1.sh │ ├── images │ │ ├── 7b_lm_loss.png │ │ └── relative_error.png │ ├── pretrain_bloom_550m.sh │ └── pretrain_bloom_7b1.sh ├── gpt │ ├── pretrain_gpt_1p.sh │ ├── pretrain_gpt_ptd_8p.sh │ └── pretrain_gpt_td_8p.sh ├── llama │ ├── README.md │ ├── evaluate_llama_zeroshot_7b.sh │ └── pretrain_llama_ptd_16B.sh └── llama2 │ └── pretrain_llama_ptd_7B.sh ├── pretrain_bloom.py ├── pretrain_gpt.py ├── pretrain_llama.py ├── pretrain_llama2.py ├── requirements.txt ├── setup.py ├── tasks ├── ckpt_convert │ ├── examples │ │ └── llama_convert_weights_when_tp_pp_change.sh │ └── llama │ │ ├── ckpt_utils.py │ │ ├── convert_weights_from_huggingface.py │ │ └── convert_weights_when_tp_pp_change.py ├── data_utils.py ├── ensemble_classifier.py ├── eval_harness │ ├── download.py │ ├── evaluate.py │ └── report-to-csv.py ├── eval_utils.py ├── finetune_utils.py ├── glue │ ├── cola.py │ ├── data.py │ ├── finetune.py │ ├── mnli.py │ ├── mrpc.py │ ├── qnli.py │ ├── qqp.py │ ├── rte.py │ ├── sst2.py │ └── stsb.py ├── main.py ├── orqa │ ├── evaluate_orqa.py │ ├── evaluate_utils.py │ └── natural_questions │ │ ├── nq.py │ │ ├── qa_utils.py │ │ └── tokenizers.py ├── race │ ├── data.py │ └── finetune.py ├── vision │ ├── classification.py │ ├── eval_utils.py │ ├── finetune_utils.py │ └── main.py └── zeroshot │ ├── datasets.py │ ├── detokenizer.py │ ├── evaluate.py │ └── evaluate_llama.py └── tests ├── run_megatron.py ├── st ├── test_bloom │ ├── run_bloom_ptd.py │ └── test_bloom_ptd.sh ├── test_gpt │ ├── run_gpt_ptd.py │ └── test_gpt_ptd.sh └── test_llama │ ├── run_llama_ptd.py │ ├── test_llama_opp.sh │ ├── test_llama_ptd.sh │ └── test_llama_vp.sh ├── test_activations.py ├── test_basic.py ├── test_checkpoints.py ├── test_dataloaders.py ├── test_model.py ├── test_preprocessing.py ├── test_tensor_parallel.py └── test_training.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/LICENSE -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/OWNERS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/SECURITY.md -------------------------------------------------------------------------------- /amp_C.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ascendspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/__init__.py -------------------------------------------------------------------------------- /ascendspeed/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/arguments.py -------------------------------------------------------------------------------- /ascendspeed/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/checkpointing.py -------------------------------------------------------------------------------- /ascendspeed/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/core/__init__.py -------------------------------------------------------------------------------- /ascendspeed/core/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/core/parallel_state.py -------------------------------------------------------------------------------- /ascendspeed/core/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/core/tensor_parallel/__init__.py -------------------------------------------------------------------------------- /ascendspeed/core/tensor_parallel/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/core/tensor_parallel/cross_entropy.py -------------------------------------------------------------------------------- /ascendspeed/core/tensor_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/core/tensor_parallel/utils.py -------------------------------------------------------------------------------- /ascendspeed/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/core/utils.py -------------------------------------------------------------------------------- /ascendspeed/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/Makefile -------------------------------------------------------------------------------- /ascendspeed/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ascendspeed/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/autoaugment.py -------------------------------------------------------------------------------- /ascendspeed/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/bert_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /ascendspeed/data/blendable_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/blendable_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/data_samplers.py -------------------------------------------------------------------------------- /ascendspeed/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/dataset_utils.py -------------------------------------------------------------------------------- /ascendspeed/data/decoder_packed_mtf_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/decoder_packed_mtf_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/distdata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/distdata.py -------------------------------------------------------------------------------- /ascendspeed/data/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/gpt_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/helpers.cpp -------------------------------------------------------------------------------- /ascendspeed/data/helpers.cpython-37m-aarch64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/helpers.cpython-37m-aarch64-linux-gnu.so -------------------------------------------------------------------------------- /ascendspeed/data/helpers.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/helpers.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /ascendspeed/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/ict_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/indexed_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/mlm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/mlm_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/mtf_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/mtf_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/parse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/parse.py -------------------------------------------------------------------------------- /ascendspeed/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /ascendspeed/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/realm_index.py -------------------------------------------------------------------------------- /ascendspeed/data/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/t5_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /ascendspeed/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /ascendspeed/data/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/test_utils.py -------------------------------------------------------------------------------- /ascendspeed/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/data/vit_dataset.py -------------------------------------------------------------------------------- /ascendspeed/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/enums.py -------------------------------------------------------------------------------- /ascendspeed/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /ascendspeed/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/global_vars.py -------------------------------------------------------------------------------- /ascendspeed/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/indexer.py -------------------------------------------------------------------------------- /ascendspeed/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/initialize.py -------------------------------------------------------------------------------- /ascendspeed/learning_rates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/learning_rates.py -------------------------------------------------------------------------------- /ascendspeed/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/memory.py -------------------------------------------------------------------------------- /ascendspeed/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/microbatches.py -------------------------------------------------------------------------------- /ascendspeed/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/__init__.py -------------------------------------------------------------------------------- /ascendspeed/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/distributed.py -------------------------------------------------------------------------------- /ascendspeed/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/enums.py -------------------------------------------------------------------------------- /ascendspeed/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /ascendspeed/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/fused_layer_norm.py -------------------------------------------------------------------------------- /ascendspeed/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/fused_softmax.py -------------------------------------------------------------------------------- /ascendspeed/model/glu_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/glu_activations.py -------------------------------------------------------------------------------- /ascendspeed/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/gpt_model.py -------------------------------------------------------------------------------- /ascendspeed/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/language_model.py -------------------------------------------------------------------------------- /ascendspeed/model/llama2_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/llama2_model.py -------------------------------------------------------------------------------- /ascendspeed/model/llama_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/llama_model.py -------------------------------------------------------------------------------- /ascendspeed/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/module.py -------------------------------------------------------------------------------- /ascendspeed/model/positional_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/positional_embeddings.py -------------------------------------------------------------------------------- /ascendspeed/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/transformer.py -------------------------------------------------------------------------------- /ascendspeed/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/utils.py -------------------------------------------------------------------------------- /ascendspeed/model/vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/model/vit_model.py -------------------------------------------------------------------------------- /ascendspeed/mpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/__init__.py -------------------------------------------------------------------------------- /ascendspeed/mpu/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/data.py -------------------------------------------------------------------------------- /ascendspeed/mpu/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/layers.py -------------------------------------------------------------------------------- /ascendspeed/mpu/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/mappings.py -------------------------------------------------------------------------------- /ascendspeed/mpu/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/random.py -------------------------------------------------------------------------------- /ascendspeed/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ascendspeed/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/tests/commons.py -------------------------------------------------------------------------------- /ascendspeed/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /ascendspeed/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/tests/test_data.py -------------------------------------------------------------------------------- /ascendspeed/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /ascendspeed/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /ascendspeed/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/mpu/tests/test_random.py -------------------------------------------------------------------------------- /ascendspeed/ops/FlashAttention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/ops/FlashAttention.cpp -------------------------------------------------------------------------------- /ascendspeed/ops/FlashAttention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/ops/FlashAttention.py -------------------------------------------------------------------------------- /ascendspeed/ops/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/ops/README.md -------------------------------------------------------------------------------- /ascendspeed/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .FlashAttention import * 2 | -------------------------------------------------------------------------------- /ascendspeed/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/optimizer/__init__.py -------------------------------------------------------------------------------- /ascendspeed/optimizer/adam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/optimizer/adam.py -------------------------------------------------------------------------------- /ascendspeed/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/optimizer/clip_grads.py -------------------------------------------------------------------------------- /ascendspeed/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /ascendspeed/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/optimizer/optimizer.py -------------------------------------------------------------------------------- /ascendspeed/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/p2p_communication.py -------------------------------------------------------------------------------- /ascendspeed/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/package_info.py -------------------------------------------------------------------------------- /ascendspeed/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/schedules.py -------------------------------------------------------------------------------- /ascendspeed/text_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/text_generation_utils.py -------------------------------------------------------------------------------- /ascendspeed/timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/timers.py -------------------------------------------------------------------------------- /ascendspeed/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/tokenizer/__init__.py -------------------------------------------------------------------------------- /ascendspeed/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /ascendspeed/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /ascendspeed/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /ascendspeed/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/training.py -------------------------------------------------------------------------------- /ascendspeed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ascendspeed/utils.py -------------------------------------------------------------------------------- /ci/access_control_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ci/access_control_test.py -------------------------------------------------------------------------------- /ci/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/ci/build.sh -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | ## Recipes and Scripts 2 | 3 | -------------------------------------------------------------------------------- /examples/bloom/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/bloom/README.md -------------------------------------------------------------------------------- /examples/bloom/generate_bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/bloom/generate_bloom.py -------------------------------------------------------------------------------- /examples/bloom/generate_bloom_7b1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/bloom/generate_bloom_7b1.sh -------------------------------------------------------------------------------- /examples/bloom/images/7b_lm_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/bloom/images/7b_lm_loss.png -------------------------------------------------------------------------------- /examples/bloom/images/relative_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/bloom/images/relative_error.png -------------------------------------------------------------------------------- /examples/bloom/pretrain_bloom_550m.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/bloom/pretrain_bloom_550m.sh -------------------------------------------------------------------------------- /examples/bloom/pretrain_bloom_7b1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/bloom/pretrain_bloom_7b1.sh -------------------------------------------------------------------------------- /examples/gpt/pretrain_gpt_1p.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/gpt/pretrain_gpt_1p.sh -------------------------------------------------------------------------------- /examples/gpt/pretrain_gpt_ptd_8p.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/gpt/pretrain_gpt_ptd_8p.sh -------------------------------------------------------------------------------- /examples/gpt/pretrain_gpt_td_8p.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/gpt/pretrain_gpt_td_8p.sh -------------------------------------------------------------------------------- /examples/llama/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/llama/README.md -------------------------------------------------------------------------------- /examples/llama/evaluate_llama_zeroshot_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/llama/evaluate_llama_zeroshot_7b.sh -------------------------------------------------------------------------------- /examples/llama/pretrain_llama_ptd_16B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/llama/pretrain_llama_ptd_16B.sh -------------------------------------------------------------------------------- /examples/llama2/pretrain_llama_ptd_7B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/examples/llama2/pretrain_llama_ptd_7B.sh -------------------------------------------------------------------------------- /pretrain_bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/pretrain_bloom.py -------------------------------------------------------------------------------- /pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/pretrain_gpt.py -------------------------------------------------------------------------------- /pretrain_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/pretrain_llama.py -------------------------------------------------------------------------------- /pretrain_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/pretrain_llama2.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/setup.py -------------------------------------------------------------------------------- /tasks/ckpt_convert/examples/llama_convert_weights_when_tp_pp_change.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/ckpt_convert/examples/llama_convert_weights_when_tp_pp_change.sh -------------------------------------------------------------------------------- /tasks/ckpt_convert/llama/ckpt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/ckpt_convert/llama/ckpt_utils.py -------------------------------------------------------------------------------- /tasks/ckpt_convert/llama/convert_weights_from_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/ckpt_convert/llama/convert_weights_from_huggingface.py -------------------------------------------------------------------------------- /tasks/ckpt_convert/llama/convert_weights_when_tp_pp_change.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/ckpt_convert/llama/convert_weights_when_tp_pp_change.py -------------------------------------------------------------------------------- /tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/data_utils.py -------------------------------------------------------------------------------- /tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /tasks/eval_harness/download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/eval_harness/download.py -------------------------------------------------------------------------------- /tasks/eval_harness/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/eval_harness/evaluate.py -------------------------------------------------------------------------------- /tasks/eval_harness/report-to-csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/eval_harness/report-to-csv.py -------------------------------------------------------------------------------- /tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/eval_utils.py -------------------------------------------------------------------------------- /tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/finetune_utils.py -------------------------------------------------------------------------------- /tasks/glue/cola.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/cola.py -------------------------------------------------------------------------------- /tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/data.py -------------------------------------------------------------------------------- /tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/finetune.py -------------------------------------------------------------------------------- /tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/mnli.py -------------------------------------------------------------------------------- /tasks/glue/mrpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/mrpc.py -------------------------------------------------------------------------------- /tasks/glue/qnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/qnli.py -------------------------------------------------------------------------------- /tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/qqp.py -------------------------------------------------------------------------------- /tasks/glue/rte.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/rte.py -------------------------------------------------------------------------------- /tasks/glue/sst2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/sst2.py -------------------------------------------------------------------------------- /tasks/glue/stsb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/glue/stsb.py -------------------------------------------------------------------------------- /tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/main.py -------------------------------------------------------------------------------- /tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /tasks/orqa/natural_questions/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/orqa/natural_questions/nq.py -------------------------------------------------------------------------------- /tasks/orqa/natural_questions/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/orqa/natural_questions/qa_utils.py -------------------------------------------------------------------------------- /tasks/orqa/natural_questions/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/orqa/natural_questions/tokenizers.py -------------------------------------------------------------------------------- /tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/race/data.py -------------------------------------------------------------------------------- /tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/race/finetune.py -------------------------------------------------------------------------------- /tasks/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/vision/classification.py -------------------------------------------------------------------------------- /tasks/vision/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/vision/eval_utils.py -------------------------------------------------------------------------------- /tasks/vision/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/vision/finetune_utils.py -------------------------------------------------------------------------------- /tasks/vision/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/vision/main.py -------------------------------------------------------------------------------- /tasks/zeroshot/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/zeroshot/datasets.py -------------------------------------------------------------------------------- /tasks/zeroshot/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/zeroshot/detokenizer.py -------------------------------------------------------------------------------- /tasks/zeroshot/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/zeroshot/evaluate.py -------------------------------------------------------------------------------- /tasks/zeroshot/evaluate_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tasks/zeroshot/evaluate_llama.py -------------------------------------------------------------------------------- /tests/run_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/run_megatron.py -------------------------------------------------------------------------------- /tests/st/test_bloom/run_bloom_ptd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/st/test_bloom/run_bloom_ptd.py -------------------------------------------------------------------------------- /tests/st/test_bloom/test_bloom_ptd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/st/test_bloom/test_bloom_ptd.sh -------------------------------------------------------------------------------- /tests/st/test_gpt/run_gpt_ptd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/st/test_gpt/run_gpt_ptd.py -------------------------------------------------------------------------------- /tests/st/test_gpt/test_gpt_ptd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/st/test_gpt/test_gpt_ptd.sh -------------------------------------------------------------------------------- /tests/st/test_llama/run_llama_ptd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/st/test_llama/run_llama_ptd.py -------------------------------------------------------------------------------- /tests/st/test_llama/test_llama_opp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/st/test_llama/test_llama_opp.sh -------------------------------------------------------------------------------- /tests/st/test_llama/test_llama_ptd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/st/test_llama/test_llama_ptd.sh -------------------------------------------------------------------------------- /tests/st/test_llama/test_llama_vp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/st/test_llama/test_llama_vp.sh -------------------------------------------------------------------------------- /tests/test_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/test_activations.py -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/test_basic.py -------------------------------------------------------------------------------- /tests/test_checkpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/test_checkpoints.py -------------------------------------------------------------------------------- /tests/test_dataloaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/test_dataloaders.py -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/test_model.py -------------------------------------------------------------------------------- /tests/test_preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/test_preprocessing.py -------------------------------------------------------------------------------- /tests/test_tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/test_tensor_parallel.py -------------------------------------------------------------------------------- /tests/test_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ascend/AscendSpeed/HEAD/tests/test_training.py --------------------------------------------------------------------------------