├── .github └── workflows │ └── build.yaml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── bigscience ├── docs │ └── tpu.md ├── eval-spreadsheet │ ├── interactive_results_parser.py │ ├── parse_eai_results.py │ ├── parse_eai_tasks.py │ ├── parse_promptsource.py │ └── utils.py ├── gins │ ├── c_dec_c4_full_lm.gin │ ├── c_dec_c4_lm_adapt.gin │ ├── c_dec_c4_span_corruption.gin │ ├── c_dec_t0_adapt.gin │ ├── c_dec_xxl.gin │ ├── dec_xxl_base.gin │ ├── enc_dec_c4_lm_adapt.gin │ ├── enc_dec_c4_prefix_lm.gin │ ├── enc_dec_c4_span_corruption.gin │ ├── enc_dec_t0_adapt.gin │ ├── enc_dec_xxl.gin │ ├── eval_harness.gin │ ├── eval_t0.gin │ ├── inference_tool.gin │ ├── lm_adapter_base.gin │ ├── nc_dec_c4_lm_adapt.gin │ ├── nc_dec_c4_prefix_lm.gin │ ├── nc_dec_c4_span_corruption.gin │ ├── nc_dec_t0_adapt.gin │ ├── nc_dec_xxl.gin │ ├── pretrainer_base.gin │ ├── t0_adapter_base.gin │ └── task.py └── scripts │ ├── eval_harness.sh │ ├── eval_t0.sh │ ├── infer.sh │ ├── inference_tool.py │ ├── iterate_ranking_task.py │ ├── launch_command_in_tmux.sh │ ├── lm_adapt.sh │ ├── plot_eai_eval.py │ ├── plot_t5x_results_vs_t0.py │ ├── pretrain.sh │ ├── run_on_all_vms.sh │ ├── setup_vm.sh │ ├── start_tpu_instance.sh │ ├── t0_adapt.sh │ └── test_seqio_dataset.py ├── gin-primer.md ├── pytest.ini ├── setup.py └── t5x ├── __init__.py ├── adafactor.py ├── adafactor_test.py ├── checkpoint_importer.py ├── checkpoint_importer_test.py ├── checkpoints.py ├── checkpoints_test.py ├── configs ├── __init__.py └── runs │ ├── __init__.py │ ├── eval.gin │ ├── finetune.gin │ ├── infer.gin │ ├── infer_from_tfexample_file.gin │ ├── pretrain.gin │ └── pretrain_deterministic.gin ├── decoding.py ├── decoding_test.py ├── eval.py ├── eval_harness.py ├── examples ├── __init__.py ├── decoder_only │ ├── layers.py │ ├── layers_test.py │ ├── models │ │ └── base.gin │ ├── network.py │ └── network_test.py ├── scalable_t5 │ ├── README.md │ ├── __init__.py │ ├── layers.py │ ├── layers_test.py │ ├── local_tiny.gin │ ├── network.py │ └── t5_1_1 │ │ ├── __init__.py │ │ ├── base.gin │ │ ├── examples │ │ ├── __init__.py │ │ └── wmt19_ende_from_scratch.gin │ │ ├── large.gin │ │ ├── small.gin │ │ ├── xl.gin │ │ └── xxl.gin └── t5 │ ├── README.md │ ├── __init__.py │ ├── layers.py │ ├── layers_test.py │ ├── local_tiny.gin │ ├── network.py │ ├── network_test.py │ └── t5_1_1 │ ├── __init__.py │ ├── base.gin │ ├── examples │ ├── __init__.py │ ├── c4_pretrain.gin │ ├── t5_1_1_base_wmt_eval.gin │ ├── t5_1_1_base_wmt_finetune.gin │ ├── t5_1_1_base_wmt_from_scratch.gin │ ├── t5_1_1_base_wmt_infer.gin │ ├── test_train_t5_tiny.gin │ └── wmt19_ende_from_scratch.gin │ ├── large.gin │ ├── small.gin │ ├── tiny.gin │ ├── xl.gin │ └── xxl.gin ├── gin_utils.py ├── gin_utils_test.py ├── infer.py ├── losses.py ├── losses_test.py ├── metrics.py ├── metrics_test.py ├── models.py ├── models_test.py ├── multihost_utils.py ├── partitioning.py ├── partitioning_test.py ├── state_utils.py ├── state_utils_test.py ├── test_utils.py ├── testdata ├── mtf_tiny_t5 │ ├── checkpoint │ ├── graph.pbtxt │ ├── model-info.txt │ ├── model.ckpt-0.data-00000-of-00002 │ ├── model.ckpt-0.data-00001-of-00002 │ ├── model.ckpt-0.index │ ├── model.ckpt-0.meta │ └── operative_config.gin └── test_t5_tiny.checkpoint_0 ├── train.py ├── train_state.py ├── train_state_test.py ├── trainer.py ├── trainer_test.py ├── utils.py ├── utils_test.py └── version.py /.github/workflows/build.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/.github/workflows/build.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bigscience/results 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | External contributions are not accepted, sorry! 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/README.md -------------------------------------------------------------------------------- /bigscience/docs/tpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/docs/tpu.md -------------------------------------------------------------------------------- /bigscience/eval-spreadsheet/interactive_results_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/eval-spreadsheet/interactive_results_parser.py -------------------------------------------------------------------------------- /bigscience/eval-spreadsheet/parse_eai_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/eval-spreadsheet/parse_eai_results.py -------------------------------------------------------------------------------- /bigscience/eval-spreadsheet/parse_eai_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/eval-spreadsheet/parse_eai_tasks.py -------------------------------------------------------------------------------- /bigscience/eval-spreadsheet/parse_promptsource.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/eval-spreadsheet/parse_promptsource.py -------------------------------------------------------------------------------- /bigscience/eval-spreadsheet/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/eval-spreadsheet/utils.py -------------------------------------------------------------------------------- /bigscience/gins/c_dec_c4_full_lm.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/c_dec_c4_full_lm.gin -------------------------------------------------------------------------------- /bigscience/gins/c_dec_c4_lm_adapt.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/c_dec_c4_lm_adapt.gin -------------------------------------------------------------------------------- /bigscience/gins/c_dec_c4_span_corruption.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/c_dec_c4_span_corruption.gin -------------------------------------------------------------------------------- /bigscience/gins/c_dec_t0_adapt.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/c_dec_t0_adapt.gin -------------------------------------------------------------------------------- /bigscience/gins/c_dec_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/c_dec_xxl.gin -------------------------------------------------------------------------------- /bigscience/gins/dec_xxl_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/dec_xxl_base.gin -------------------------------------------------------------------------------- /bigscience/gins/enc_dec_c4_lm_adapt.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/enc_dec_c4_lm_adapt.gin -------------------------------------------------------------------------------- /bigscience/gins/enc_dec_c4_prefix_lm.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/enc_dec_c4_prefix_lm.gin -------------------------------------------------------------------------------- /bigscience/gins/enc_dec_c4_span_corruption.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/enc_dec_c4_span_corruption.gin -------------------------------------------------------------------------------- /bigscience/gins/enc_dec_t0_adapt.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/enc_dec_t0_adapt.gin -------------------------------------------------------------------------------- /bigscience/gins/enc_dec_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/enc_dec_xxl.gin -------------------------------------------------------------------------------- /bigscience/gins/eval_harness.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/eval_harness.gin -------------------------------------------------------------------------------- /bigscience/gins/eval_t0.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/eval_t0.gin -------------------------------------------------------------------------------- /bigscience/gins/inference_tool.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/inference_tool.gin -------------------------------------------------------------------------------- /bigscience/gins/lm_adapter_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/lm_adapter_base.gin -------------------------------------------------------------------------------- /bigscience/gins/nc_dec_c4_lm_adapt.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/nc_dec_c4_lm_adapt.gin -------------------------------------------------------------------------------- /bigscience/gins/nc_dec_c4_prefix_lm.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/nc_dec_c4_prefix_lm.gin -------------------------------------------------------------------------------- /bigscience/gins/nc_dec_c4_span_corruption.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/nc_dec_c4_span_corruption.gin -------------------------------------------------------------------------------- /bigscience/gins/nc_dec_t0_adapt.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/nc_dec_t0_adapt.gin -------------------------------------------------------------------------------- /bigscience/gins/nc_dec_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/nc_dec_xxl.gin -------------------------------------------------------------------------------- /bigscience/gins/pretrainer_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/pretrainer_base.gin -------------------------------------------------------------------------------- /bigscience/gins/t0_adapter_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/t0_adapter_base.gin -------------------------------------------------------------------------------- /bigscience/gins/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/gins/task.py -------------------------------------------------------------------------------- /bigscience/scripts/eval_harness.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/eval_harness.sh -------------------------------------------------------------------------------- /bigscience/scripts/eval_t0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/eval_t0.sh -------------------------------------------------------------------------------- /bigscience/scripts/infer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/infer.sh -------------------------------------------------------------------------------- /bigscience/scripts/inference_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/inference_tool.py -------------------------------------------------------------------------------- /bigscience/scripts/iterate_ranking_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/iterate_ranking_task.py -------------------------------------------------------------------------------- /bigscience/scripts/launch_command_in_tmux.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/launch_command_in_tmux.sh -------------------------------------------------------------------------------- /bigscience/scripts/lm_adapt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/lm_adapt.sh -------------------------------------------------------------------------------- /bigscience/scripts/plot_eai_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/plot_eai_eval.py -------------------------------------------------------------------------------- /bigscience/scripts/plot_t5x_results_vs_t0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/plot_t5x_results_vs_t0.py -------------------------------------------------------------------------------- /bigscience/scripts/pretrain.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/pretrain.sh -------------------------------------------------------------------------------- /bigscience/scripts/run_on_all_vms.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/run_on_all_vms.sh -------------------------------------------------------------------------------- /bigscience/scripts/setup_vm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/setup_vm.sh -------------------------------------------------------------------------------- /bigscience/scripts/start_tpu_instance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/start_tpu_instance.sh -------------------------------------------------------------------------------- /bigscience/scripts/t0_adapt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/t0_adapt.sh -------------------------------------------------------------------------------- /bigscience/scripts/test_seqio_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/bigscience/scripts/test_seqio_dataset.py -------------------------------------------------------------------------------- /gin-primer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/gin-primer.md -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/pytest.ini -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/setup.py -------------------------------------------------------------------------------- /t5x/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/__init__.py -------------------------------------------------------------------------------- /t5x/adafactor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/adafactor.py -------------------------------------------------------------------------------- /t5x/adafactor_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/adafactor_test.py -------------------------------------------------------------------------------- /t5x/checkpoint_importer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/checkpoint_importer.py -------------------------------------------------------------------------------- /t5x/checkpoint_importer_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/checkpoint_importer_test.py -------------------------------------------------------------------------------- /t5x/checkpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/checkpoints.py -------------------------------------------------------------------------------- /t5x/checkpoints_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/checkpoints_test.py -------------------------------------------------------------------------------- /t5x/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/configs/__init__.py -------------------------------------------------------------------------------- /t5x/configs/runs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/configs/runs/__init__.py -------------------------------------------------------------------------------- /t5x/configs/runs/eval.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/configs/runs/eval.gin -------------------------------------------------------------------------------- /t5x/configs/runs/finetune.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/configs/runs/finetune.gin -------------------------------------------------------------------------------- /t5x/configs/runs/infer.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/configs/runs/infer.gin -------------------------------------------------------------------------------- /t5x/configs/runs/infer_from_tfexample_file.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/configs/runs/infer_from_tfexample_file.gin -------------------------------------------------------------------------------- /t5x/configs/runs/pretrain.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/configs/runs/pretrain.gin -------------------------------------------------------------------------------- /t5x/configs/runs/pretrain_deterministic.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/configs/runs/pretrain_deterministic.gin -------------------------------------------------------------------------------- /t5x/decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/decoding.py -------------------------------------------------------------------------------- /t5x/decoding_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/decoding_test.py -------------------------------------------------------------------------------- /t5x/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/eval.py -------------------------------------------------------------------------------- /t5x/eval_harness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/eval_harness.py -------------------------------------------------------------------------------- /t5x/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/__init__.py -------------------------------------------------------------------------------- /t5x/examples/decoder_only/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/decoder_only/layers.py -------------------------------------------------------------------------------- /t5x/examples/decoder_only/layers_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/decoder_only/layers_test.py -------------------------------------------------------------------------------- /t5x/examples/decoder_only/models/base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/decoder_only/models/base.gin -------------------------------------------------------------------------------- /t5x/examples/decoder_only/network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/decoder_only/network.py -------------------------------------------------------------------------------- /t5x/examples/decoder_only/network_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/decoder_only/network_test.py -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/README.md -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/__init__.py -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/layers.py -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/layers_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/layers_test.py -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/local_tiny.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/local_tiny.gin -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/network.py -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/t5_1_1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/t5_1_1/__init__.py -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/t5_1_1/base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/t5_1_1/base.gin -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/t5_1_1/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/t5_1_1/examples/__init__.py -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/t5_1_1/examples/wmt19_ende_from_scratch.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/t5_1_1/examples/wmt19_ende_from_scratch.gin -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/t5_1_1/large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/t5_1_1/large.gin -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/t5_1_1/small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/t5_1_1/small.gin -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/t5_1_1/xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/t5_1_1/xl.gin -------------------------------------------------------------------------------- /t5x/examples/scalable_t5/t5_1_1/xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/scalable_t5/t5_1_1/xxl.gin -------------------------------------------------------------------------------- /t5x/examples/t5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/README.md -------------------------------------------------------------------------------- /t5x/examples/t5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/__init__.py -------------------------------------------------------------------------------- /t5x/examples/t5/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/layers.py -------------------------------------------------------------------------------- /t5x/examples/t5/layers_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/layers_test.py -------------------------------------------------------------------------------- /t5x/examples/t5/local_tiny.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/local_tiny.gin -------------------------------------------------------------------------------- /t5x/examples/t5/network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/network.py -------------------------------------------------------------------------------- /t5x/examples/t5/network_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/network_test.py -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/__init__.py -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/base.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/examples/__init__.py -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/examples/c4_pretrain.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/examples/c4_pretrain.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/examples/t5_1_1_base_wmt_eval.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/examples/t5_1_1_base_wmt_eval.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/examples/t5_1_1_base_wmt_finetune.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/examples/t5_1_1_base_wmt_finetune.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/examples/t5_1_1_base_wmt_from_scratch.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/examples/t5_1_1_base_wmt_from_scratch.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/examples/t5_1_1_base_wmt_infer.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/examples/t5_1_1_base_wmt_infer.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/examples/test_train_t5_tiny.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/examples/test_train_t5_tiny.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/examples/wmt19_ende_from_scratch.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/examples/wmt19_ende_from_scratch.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/large.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/small.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/tiny.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/tiny.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/xl.gin -------------------------------------------------------------------------------- /t5x/examples/t5/t5_1_1/xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/examples/t5/t5_1_1/xxl.gin -------------------------------------------------------------------------------- /t5x/gin_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/gin_utils.py -------------------------------------------------------------------------------- /t5x/gin_utils_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/gin_utils_test.py -------------------------------------------------------------------------------- /t5x/infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/infer.py -------------------------------------------------------------------------------- /t5x/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/losses.py -------------------------------------------------------------------------------- /t5x/losses_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/losses_test.py -------------------------------------------------------------------------------- /t5x/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/metrics.py -------------------------------------------------------------------------------- /t5x/metrics_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/metrics_test.py -------------------------------------------------------------------------------- /t5x/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/models.py -------------------------------------------------------------------------------- /t5x/models_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/models_test.py -------------------------------------------------------------------------------- /t5x/multihost_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/multihost_utils.py -------------------------------------------------------------------------------- /t5x/partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/partitioning.py -------------------------------------------------------------------------------- /t5x/partitioning_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/partitioning_test.py -------------------------------------------------------------------------------- /t5x/state_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/state_utils.py -------------------------------------------------------------------------------- /t5x/state_utils_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/state_utils_test.py -------------------------------------------------------------------------------- /t5x/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/test_utils.py -------------------------------------------------------------------------------- /t5x/testdata/mtf_tiny_t5/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/testdata/mtf_tiny_t5/checkpoint -------------------------------------------------------------------------------- /t5x/testdata/mtf_tiny_t5/graph.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/testdata/mtf_tiny_t5/graph.pbtxt -------------------------------------------------------------------------------- /t5x/testdata/mtf_tiny_t5/model-info.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/testdata/mtf_tiny_t5/model-info.txt -------------------------------------------------------------------------------- /t5x/testdata/mtf_tiny_t5/model.ckpt-0.data-00000-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/testdata/mtf_tiny_t5/model.ckpt-0.data-00000-of-00002 -------------------------------------------------------------------------------- /t5x/testdata/mtf_tiny_t5/model.ckpt-0.data-00001-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/testdata/mtf_tiny_t5/model.ckpt-0.data-00001-of-00002 -------------------------------------------------------------------------------- /t5x/testdata/mtf_tiny_t5/model.ckpt-0.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/testdata/mtf_tiny_t5/model.ckpt-0.index -------------------------------------------------------------------------------- /t5x/testdata/mtf_tiny_t5/model.ckpt-0.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/testdata/mtf_tiny_t5/model.ckpt-0.meta -------------------------------------------------------------------------------- /t5x/testdata/mtf_tiny_t5/operative_config.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/testdata/mtf_tiny_t5/operative_config.gin -------------------------------------------------------------------------------- /t5x/testdata/test_t5_tiny.checkpoint_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/testdata/test_t5_tiny.checkpoint_0 -------------------------------------------------------------------------------- /t5x/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/train.py -------------------------------------------------------------------------------- /t5x/train_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/train_state.py -------------------------------------------------------------------------------- /t5x/train_state_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/train_state_test.py -------------------------------------------------------------------------------- /t5x/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/trainer.py -------------------------------------------------------------------------------- /t5x/trainer_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/trainer_test.py -------------------------------------------------------------------------------- /t5x/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/utils.py -------------------------------------------------------------------------------- /t5x/utils_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/utils_test.py -------------------------------------------------------------------------------- /t5x/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/architecture-objective/HEAD/t5x/version.py --------------------------------------------------------------------------------