├── .gitignore ├── DATA_LICENSE.txt ├── LICENSE ├── LICENSE.txt ├── MODEL_LICENSE.txt ├── OpenBA ├── __init__.py ├── configuration_openba.py ├── modeling_openba.py └── tokenization_openba.py ├── README.md ├── README_ZH.md ├── assets ├── bachelor.png ├── data.png ├── downstream.png └── training_process.png ├── convert_megatron_to_hf_ckp.py ├── convert_megatron_to_hf_ckp.sh ├── evaluation ├── CMMLU │ ├── data │ │ └── 5shot │ │ │ └── agronomy.json │ ├── logs │ │ ├── OpenBT5-0shot │ │ └── OpenBT5-5shot │ ├── main.py │ ├── make_data.py │ ├── readme.md │ ├── scripts │ │ ├── eval_fewshot.sh │ │ └── eval_zeroshot.sh │ └── template.py └── MMLU │ ├── data │ └── 5shot │ │ └── abstract_algebra_test.json │ ├── logs │ ├── OpenBT5-0shot │ └── OpenBT5-5shot │ ├── main.py │ ├── make_data.py │ ├── readme.md │ ├── scripts │ ├── eval_fewshot.sh │ └── eval_zeroshot.sh │ └── template.py ├── gradio_chat_demo.py ├── gradio_code_demo.py └── training ├── .coveragerc ├── .gitignore ├── megatron ├── __init__.py ├── arguments.py ├── checkpointing.py ├── core │ ├── __init__.py │ ├── enums.py │ ├── parallel_state.py │ ├── pipeline_parallel │ │ ├── __init__.py │ │ ├── p2p_communication.py │ │ └── schedules.py │ ├── tensor_parallel │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── data.py │ │ ├── layers.py │ │ ├── mappings.py │ │ ├── random.py │ │ └── utils.py │ └── utils.py ├── data │ ├── Makefile │ ├── __init__.py │ ├── autoaugment.py │ ├── bert_dataset.py │ ├── biencoder_dataset_utils.py │ ├── blendable_dataset.py │ ├── data_samplers.py │ ├── dataset_utils.py │ ├── gpt_dataset.py │ ├── helpers.cpp │ ├── ict_dataset.py │ ├── image_folder.py │ ├── indexed_dataset.py │ ├── orqa_wiki_dataset.py │ ├── realm_dataset_utils.py │ ├── realm_index.py │ ├── t5_dataset.py │ ├── test │ │ ├── test_indexed_dataset.py │ │ └── test_preprocess_data.sh │ └── vit_dataset.py ├── dist_signal_handler.py ├── fp16_deprecated │ └── loss_scaler.py ├── fused_kernels │ ├── __init__.py │ ├── compat.h │ ├── fused_weight_gradient_dense.cpp │ ├── fused_weight_gradient_dense.cu │ ├── layer_norm_cuda.cpp │ ├── layer_norm_cuda_kernel.cu │ ├── scaled_masked_softmax.cpp │ ├── scaled_masked_softmax.h │ ├── scaled_masked_softmax_cuda.cu │ ├── scaled_softmax.cpp │ ├── scaled_softmax_cuda.cu │ ├── scaled_upper_triang_masked_softmax.cpp │ ├── scaled_upper_triang_masked_softmax.h │ ├── scaled_upper_triang_masked_softmax_cuda.cu │ ├── tests │ │ ├── __init__.py │ │ └── test_fused_kernels.py │ └── type_shim.h ├── global_vars.py ├── indexer.py ├── initialize.py ├── memory.py ├── microbatches.py ├── model │ ├── __init__.py │ ├── bert_model.py │ ├── biencoder_model.py │ ├── classification.py │ ├── distributed.py │ ├── enums.py │ ├── fused_bias_gelu.py │ ├── fused_layer_norm.py │ ├── fused_softmax.py │ ├── gpt_model.py │ ├── language_model.py │ ├── module.py │ ├── multiple_choice.py │ ├── realm_model.py │ ├── retro_transformer.py │ ├── rotary_embedding_torch.py │ ├── t5_model.py │ ├── transformer.py │ ├── utils.py │ └── vision │ │ ├── classification.py │ │ ├── dino.py │ │ ├── esvit_swin_backbone.py │ │ ├── inpainting.py │ │ ├── knn_monitor.py │ │ ├── mit_backbone.py │ │ ├── swin_backbone.py │ │ ├── utils.py │ │ └── vit_backbone.py ├── mpu │ └── tests │ │ ├── __init__.py │ │ ├── commons.py │ │ ├── test_cross_entropy.py │ │ ├── test_data.py │ │ ├── test_initialize.py │ │ ├── test_layers.py │ │ └── test_random.py ├── optimizer │ ├── __init__.py │ ├── clip_grads.py │ ├── distrib_optimizer.py │ ├── grad_scaler.py │ └── optimizer.py ├── optimizer_param_scheduler.py ├── static │ └── index.html ├── text_generation │ ├── __init__.py │ ├── api.py │ ├── beam_utils.py │ ├── communication.py │ ├── forward_step.py │ ├── generation.py │ ├── sampling.py │ └── tokenization.py ├── text_generation_server.py ├── timers.py ├── tokenizer │ ├── __init__.py │ ├── bert_tokenization.py │ ├── gpt2_tokenization.py │ └── tokenizer.py ├── training.py └── utils.py ├── pretrain_t5.py ├── scripts ├── data_process_flan.sh ├── data_process_span_corr.sh ├── run_flan.sh ├── run_pretrain.sh └── run_stretch.sh └── tools ├── checkpoint_split_megatron.py ├── linter.py ├── merge_datasets.py ├── preprocess_data_chat.py ├── preprocess_data_finetune.py └── preprocess_data_pretrain.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/.gitignore -------------------------------------------------------------------------------- /DATA_LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/DATA_LICENSE.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/LICENSE -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /MODEL_LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/MODEL_LICENSE.txt -------------------------------------------------------------------------------- /OpenBA/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/OpenBA/__init__.py -------------------------------------------------------------------------------- /OpenBA/configuration_openba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/OpenBA/configuration_openba.py -------------------------------------------------------------------------------- /OpenBA/modeling_openba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/OpenBA/modeling_openba.py -------------------------------------------------------------------------------- /OpenBA/tokenization_openba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/OpenBA/tokenization_openba.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/README.md -------------------------------------------------------------------------------- /README_ZH.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/README_ZH.md -------------------------------------------------------------------------------- /assets/bachelor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/assets/bachelor.png -------------------------------------------------------------------------------- /assets/data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/assets/data.png -------------------------------------------------------------------------------- /assets/downstream.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/assets/downstream.png -------------------------------------------------------------------------------- /assets/training_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/assets/training_process.png -------------------------------------------------------------------------------- /convert_megatron_to_hf_ckp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/convert_megatron_to_hf_ckp.py -------------------------------------------------------------------------------- /convert_megatron_to_hf_ckp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/convert_megatron_to_hf_ckp.sh -------------------------------------------------------------------------------- /evaluation/CMMLU/data/5shot/agronomy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/CMMLU/data/5shot/agronomy.json -------------------------------------------------------------------------------- /evaluation/CMMLU/logs/OpenBT5-0shot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/CMMLU/logs/OpenBT5-0shot -------------------------------------------------------------------------------- /evaluation/CMMLU/logs/OpenBT5-5shot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/CMMLU/logs/OpenBT5-5shot -------------------------------------------------------------------------------- /evaluation/CMMLU/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/CMMLU/main.py -------------------------------------------------------------------------------- /evaluation/CMMLU/make_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/CMMLU/make_data.py -------------------------------------------------------------------------------- /evaluation/CMMLU/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/CMMLU/readme.md -------------------------------------------------------------------------------- /evaluation/CMMLU/scripts/eval_fewshot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/CMMLU/scripts/eval_fewshot.sh -------------------------------------------------------------------------------- /evaluation/CMMLU/scripts/eval_zeroshot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/CMMLU/scripts/eval_zeroshot.sh -------------------------------------------------------------------------------- /evaluation/CMMLU/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/CMMLU/template.py -------------------------------------------------------------------------------- /evaluation/MMLU/data/5shot/abstract_algebra_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/MMLU/data/5shot/abstract_algebra_test.json -------------------------------------------------------------------------------- /evaluation/MMLU/logs/OpenBT5-0shot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/MMLU/logs/OpenBT5-0shot -------------------------------------------------------------------------------- /evaluation/MMLU/logs/OpenBT5-5shot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/MMLU/logs/OpenBT5-5shot -------------------------------------------------------------------------------- /evaluation/MMLU/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/MMLU/main.py -------------------------------------------------------------------------------- /evaluation/MMLU/make_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/MMLU/make_data.py -------------------------------------------------------------------------------- /evaluation/MMLU/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/MMLU/readme.md -------------------------------------------------------------------------------- /evaluation/MMLU/scripts/eval_fewshot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/MMLU/scripts/eval_fewshot.sh -------------------------------------------------------------------------------- /evaluation/MMLU/scripts/eval_zeroshot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/MMLU/scripts/eval_zeroshot.sh -------------------------------------------------------------------------------- /evaluation/MMLU/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/evaluation/MMLU/template.py -------------------------------------------------------------------------------- /gradio_chat_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/gradio_chat_demo.py -------------------------------------------------------------------------------- /gradio_code_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/gradio_code_demo.py -------------------------------------------------------------------------------- /training/.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/.coveragerc -------------------------------------------------------------------------------- /training/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/.gitignore -------------------------------------------------------------------------------- /training/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/__init__.py -------------------------------------------------------------------------------- /training/megatron/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/arguments.py -------------------------------------------------------------------------------- /training/megatron/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/checkpointing.py -------------------------------------------------------------------------------- /training/megatron/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/__init__.py -------------------------------------------------------------------------------- /training/megatron/core/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/enums.py -------------------------------------------------------------------------------- /training/megatron/core/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/parallel_state.py -------------------------------------------------------------------------------- /training/megatron/core/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/pipeline_parallel/__init__.py -------------------------------------------------------------------------------- /training/megatron/core/pipeline_parallel/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/pipeline_parallel/p2p_communication.py -------------------------------------------------------------------------------- /training/megatron/core/pipeline_parallel/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/pipeline_parallel/schedules.py -------------------------------------------------------------------------------- /training/megatron/core/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/tensor_parallel/__init__.py -------------------------------------------------------------------------------- /training/megatron/core/tensor_parallel/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/tensor_parallel/cross_entropy.py -------------------------------------------------------------------------------- /training/megatron/core/tensor_parallel/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/tensor_parallel/data.py -------------------------------------------------------------------------------- /training/megatron/core/tensor_parallel/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/tensor_parallel/layers.py -------------------------------------------------------------------------------- /training/megatron/core/tensor_parallel/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/tensor_parallel/mappings.py -------------------------------------------------------------------------------- /training/megatron/core/tensor_parallel/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/tensor_parallel/random.py -------------------------------------------------------------------------------- /training/megatron/core/tensor_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/tensor_parallel/utils.py -------------------------------------------------------------------------------- /training/megatron/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/core/utils.py -------------------------------------------------------------------------------- /training/megatron/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/Makefile -------------------------------------------------------------------------------- /training/megatron/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import indexed_dataset 2 | -------------------------------------------------------------------------------- /training/megatron/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/autoaugment.py -------------------------------------------------------------------------------- /training/megatron/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/bert_dataset.py -------------------------------------------------------------------------------- /training/megatron/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /training/megatron/data/blendable_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/blendable_dataset.py -------------------------------------------------------------------------------- /training/megatron/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/data_samplers.py -------------------------------------------------------------------------------- /training/megatron/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/dataset_utils.py -------------------------------------------------------------------------------- /training/megatron/data/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/gpt_dataset.py -------------------------------------------------------------------------------- /training/megatron/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/helpers.cpp -------------------------------------------------------------------------------- /training/megatron/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/ict_dataset.py -------------------------------------------------------------------------------- /training/megatron/data/image_folder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/image_folder.py -------------------------------------------------------------------------------- /training/megatron/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/indexed_dataset.py -------------------------------------------------------------------------------- /training/megatron/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /training/megatron/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /training/megatron/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/realm_index.py -------------------------------------------------------------------------------- /training/megatron/data/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/t5_dataset.py -------------------------------------------------------------------------------- /training/megatron/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /training/megatron/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /training/megatron/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/data/vit_dataset.py -------------------------------------------------------------------------------- /training/megatron/dist_signal_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/dist_signal_handler.py -------------------------------------------------------------------------------- /training/megatron/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /training/megatron/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/__init__.py -------------------------------------------------------------------------------- /training/megatron/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/compat.h -------------------------------------------------------------------------------- /training/megatron/fused_kernels/fused_weight_gradient_dense.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/fused_weight_gradient_dense.cpp -------------------------------------------------------------------------------- /training/megatron/fused_kernels/fused_weight_gradient_dense.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/fused_weight_gradient_dense.cu -------------------------------------------------------------------------------- /training/megatron/fused_kernels/layer_norm_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/layer_norm_cuda.cpp -------------------------------------------------------------------------------- /training/megatron/fused_kernels/layer_norm_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/layer_norm_cuda_kernel.cu -------------------------------------------------------------------------------- /training/megatron/fused_kernels/scaled_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/scaled_masked_softmax.cpp -------------------------------------------------------------------------------- /training/megatron/fused_kernels/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/scaled_masked_softmax.h -------------------------------------------------------------------------------- /training/megatron/fused_kernels/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /training/megatron/fused_kernels/scaled_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/scaled_softmax.cpp -------------------------------------------------------------------------------- /training/megatron/fused_kernels/scaled_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/scaled_softmax_cuda.cu -------------------------------------------------------------------------------- /training/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -------------------------------------------------------------------------------- /training/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /training/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /training/megatron/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/megatron/fused_kernels/tests/test_fused_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/tests/test_fused_kernels.py -------------------------------------------------------------------------------- /training/megatron/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /training/megatron/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/global_vars.py -------------------------------------------------------------------------------- /training/megatron/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/indexer.py -------------------------------------------------------------------------------- /training/megatron/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/initialize.py -------------------------------------------------------------------------------- /training/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/memory.py -------------------------------------------------------------------------------- /training/megatron/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/microbatches.py -------------------------------------------------------------------------------- /training/megatron/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/__init__.py -------------------------------------------------------------------------------- /training/megatron/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/bert_model.py -------------------------------------------------------------------------------- /training/megatron/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/biencoder_model.py -------------------------------------------------------------------------------- /training/megatron/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/classification.py -------------------------------------------------------------------------------- /training/megatron/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/distributed.py -------------------------------------------------------------------------------- /training/megatron/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/enums.py -------------------------------------------------------------------------------- /training/megatron/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /training/megatron/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/fused_layer_norm.py -------------------------------------------------------------------------------- /training/megatron/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/fused_softmax.py -------------------------------------------------------------------------------- /training/megatron/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/gpt_model.py -------------------------------------------------------------------------------- /training/megatron/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/language_model.py -------------------------------------------------------------------------------- /training/megatron/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/module.py -------------------------------------------------------------------------------- /training/megatron/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/multiple_choice.py -------------------------------------------------------------------------------- /training/megatron/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/realm_model.py -------------------------------------------------------------------------------- /training/megatron/model/retro_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/retro_transformer.py -------------------------------------------------------------------------------- /training/megatron/model/rotary_embedding_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/rotary_embedding_torch.py -------------------------------------------------------------------------------- /training/megatron/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/t5_model.py -------------------------------------------------------------------------------- /training/megatron/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/transformer.py -------------------------------------------------------------------------------- /training/megatron/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/utils.py -------------------------------------------------------------------------------- /training/megatron/model/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/vision/classification.py -------------------------------------------------------------------------------- /training/megatron/model/vision/dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/vision/dino.py -------------------------------------------------------------------------------- /training/megatron/model/vision/esvit_swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/vision/esvit_swin_backbone.py -------------------------------------------------------------------------------- /training/megatron/model/vision/inpainting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/vision/inpainting.py -------------------------------------------------------------------------------- /training/megatron/model/vision/knn_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/vision/knn_monitor.py -------------------------------------------------------------------------------- /training/megatron/model/vision/mit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/vision/mit_backbone.py -------------------------------------------------------------------------------- /training/megatron/model/vision/swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/vision/swin_backbone.py -------------------------------------------------------------------------------- /training/megatron/model/vision/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/vision/utils.py -------------------------------------------------------------------------------- /training/megatron/model/vision/vit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/model/vision/vit_backbone.py -------------------------------------------------------------------------------- /training/megatron/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/megatron/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/mpu/tests/commons.py -------------------------------------------------------------------------------- /training/megatron/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /training/megatron/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/mpu/tests/test_data.py -------------------------------------------------------------------------------- /training/megatron/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /training/megatron/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /training/megatron/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/mpu/tests/test_random.py -------------------------------------------------------------------------------- /training/megatron/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/optimizer/__init__.py -------------------------------------------------------------------------------- /training/megatron/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/optimizer/clip_grads.py -------------------------------------------------------------------------------- /training/megatron/optimizer/distrib_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/optimizer/distrib_optimizer.py -------------------------------------------------------------------------------- /training/megatron/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /training/megatron/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/optimizer/optimizer.py -------------------------------------------------------------------------------- /training/megatron/optimizer_param_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/optimizer_param_scheduler.py -------------------------------------------------------------------------------- /training/megatron/static/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/static/index.html -------------------------------------------------------------------------------- /training/megatron/text_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/text_generation/__init__.py -------------------------------------------------------------------------------- /training/megatron/text_generation/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/text_generation/api.py -------------------------------------------------------------------------------- /training/megatron/text_generation/beam_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/text_generation/beam_utils.py -------------------------------------------------------------------------------- /training/megatron/text_generation/communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/text_generation/communication.py -------------------------------------------------------------------------------- /training/megatron/text_generation/forward_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/text_generation/forward_step.py -------------------------------------------------------------------------------- /training/megatron/text_generation/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/text_generation/generation.py -------------------------------------------------------------------------------- /training/megatron/text_generation/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/text_generation/sampling.py -------------------------------------------------------------------------------- /training/megatron/text_generation/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/text_generation/tokenization.py -------------------------------------------------------------------------------- /training/megatron/text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/text_generation_server.py -------------------------------------------------------------------------------- /training/megatron/timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/timers.py -------------------------------------------------------------------------------- /training/megatron/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/tokenizer/__init__.py -------------------------------------------------------------------------------- /training/megatron/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /training/megatron/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /training/megatron/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /training/megatron/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/training.py -------------------------------------------------------------------------------- /training/megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/megatron/utils.py -------------------------------------------------------------------------------- /training/pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/pretrain_t5.py -------------------------------------------------------------------------------- /training/scripts/data_process_flan.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/scripts/data_process_flan.sh -------------------------------------------------------------------------------- /training/scripts/data_process_span_corr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/scripts/data_process_span_corr.sh -------------------------------------------------------------------------------- /training/scripts/run_flan.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/scripts/run_flan.sh -------------------------------------------------------------------------------- /training/scripts/run_pretrain.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/scripts/run_pretrain.sh -------------------------------------------------------------------------------- /training/scripts/run_stretch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/scripts/run_stretch.sh -------------------------------------------------------------------------------- /training/tools/checkpoint_split_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/tools/checkpoint_split_megatron.py -------------------------------------------------------------------------------- /training/tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/tools/linter.py -------------------------------------------------------------------------------- /training/tools/merge_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/tools/merge_datasets.py -------------------------------------------------------------------------------- /training/tools/preprocess_data_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/tools/preprocess_data_chat.py -------------------------------------------------------------------------------- /training/tools/preprocess_data_finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/tools/preprocess_data_finetune.py -------------------------------------------------------------------------------- /training/tools/preprocess_data_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenNLG/OpenBA/HEAD/training/tools/preprocess_data_pretrain.py --------------------------------------------------------------------------------