├── .github └── workflows │ └── close-pull-request.yml ├── CONTRIBUTING ├── LICENSE ├── README.md ├── flaxformer ├── __init__.py ├── activation_partitioning.py ├── activation_partitioning_test.py ├── architectures │ ├── __init__.py │ ├── bert │ │ ├── __init__.py │ │ ├── bert.py │ │ ├── bert_checkpoint_converter.py │ │ ├── bert_test.py │ │ ├── configs.py │ │ ├── heads.py │ │ ├── heads_test.py │ │ └── testdata │ │ │ └── model_param_shapes.json │ ├── calm_t5 │ │ ├── __init__.py │ │ ├── calm_architecture.py │ │ └── components.py │ ├── common │ │ ├── __init__.py │ │ ├── param_remapping.py │ │ └── param_remapping_test.py │ ├── dual_encoder │ │ ├── components.py │ │ ├── components_test.py │ │ ├── dual_encoder_architecture.py │ │ ├── dual_encoder_architecture_test.py │ │ ├── l2_norm.py │ │ ├── l2_norm_test.py │ │ ├── poolings.py │ │ ├── poolings_test.py │ │ ├── similarity_functions.py │ │ ├── similarity_functions_test.py │ │ ├── single_tower_logit_functions.py │ │ ├── single_tower_logit_functions_test.py │ │ └── testdata │ │ │ ├── dual_encoder_shapes_batch_dot_product.json │ │ │ ├── dual_encoder_shapes_batch_dot_product_attention_pooling.json │ │ │ ├── dual_encoder_shapes_batch_dot_product_attention_pooling_prompt_3.json │ │ │ ├── dual_encoder_shapes_batch_dot_product_prompt_3.json │ │ │ ├── dual_encoder_shapes_pointwise_ffnn.json │ │ │ └── dual_encoder_shapes_single_tower_pointwise_ffnn.json │ ├── fido │ │ ├── example_configs │ │ │ ├── t5_lsa_base.gin │ │ │ └── t5_lsa_scan_base.gin │ │ ├── fido_architecture.py │ │ ├── fido_architecture_test.py │ │ └── testdata │ │ │ ├── decoder_shapes_blocklsa1.json │ │ │ ├── decoder_shapes_blocklsa2.json │ │ │ ├── decoder_shapes_blocklsa2_noscan.json │ │ │ ├── decoder_shapes_lsa1.json │ │ │ └── decoder_shapes_lsa2.json │ ├── h_transformer │ │ ├── h_attention.py │ │ ├── h_attention_test.py │ │ ├── h_transformer_1d_architecture.py │ │ ├── h_transformer_1d_architecture_test.py │ │ ├── h_transformer_1d_architecture_test_utils.py │ │ ├── h_transformer_utils.py │ │ ├── hierarchical_relative_position_bias.py │ │ ├── hierarchical_relative_position_bias_test.py │ │ ├── partitioning.py │ │ ├── testdata │ │ │ ├── decoder_only_shapes_per_layer.json │ │ │ ├── decoder_shapes_per_layer.json │ │ │ ├── encoder_decoder_shapes_per_layer.json │ │ │ └── encoder_shapes_per_layer.json │ │ ├── token_hierarchy.py │ │ └── token_hierarchy_test.py │ ├── longt5 │ │ ├── __init__.py │ │ ├── long_attention.py │ │ ├── long_attention_test.py │ │ ├── longt5_architecture.py │ │ ├── longt5_architecture_test.py │ │ ├── relative_position_biases_general.py │ │ ├── relative_position_biases_general_test.py │ │ ├── tensor_utils.py │ │ └── tensor_utils_test.py │ ├── moe │ │ ├── __init__.py │ │ ├── moe_architecture.py │ │ ├── moe_architecture_test.py │ │ ├── moe_enums.py │ │ ├── moe_layers.py │ │ ├── moe_layers_test.py │ │ ├── moe_parallel_fused_decoder.py │ │ ├── moe_parallel_fused_decoder_test.py │ │ ├── routing.py │ │ ├── routing_test.py │ │ ├── scatter_utils.py │ │ └── scatter_utils_test.py │ ├── perceiver_ar │ │ ├── README.md │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── attention_test.py │ │ ├── decoder_layer.py │ │ ├── dense_attention.py │ │ ├── parallel_fused_decoder.py │ │ ├── perceiver_ar_architecture.py │ │ ├── perceiver_ar_architecture_test.py │ │ ├── perceiver_ar_architecture_test_utils.py │ │ ├── rotary_embedding.py │ │ ├── rotary_embedding_test.py │ │ ├── slicing.py │ │ ├── t5_models.py │ │ ├── t5_models_test.py │ │ └── testdata │ │ │ ├── decoder_shapes_fused_parallel.json │ │ │ ├── decoder_shapes_per_layer.json │ │ │ └── parallel_decoder_shapes_per_layer.json │ └── t5 │ │ ├── __init__.py │ │ ├── parallel_fused_decoder.py │ │ ├── parallel_fused_decoder_test.py │ │ ├── t5_1_0.py │ │ ├── t5_1_1.py │ │ ├── t5_architecture.py │ │ ├── t5_architecture_test.py │ │ ├── t5_architecture_test_utils.py │ │ ├── t5_common_layers.py │ │ ├── t5_common_layers_test.py │ │ └── testdata │ │ ├── decoder_params_axes_fused_parallel_quantized.json │ │ ├── decoder_shapes_fixed_abs_posemb.json │ │ ├── decoder_shapes_fused_parallel.json │ │ ├── decoder_shapes_fused_parallel_quantized.json │ │ ├── decoder_shapes_learned_abs_posemb.json │ │ ├── decoder_shapes_per_layer_relpos_bias.json │ │ ├── decoder_shapes_shared_relpos_bias.json │ │ ├── encoder_decoder_shared_embedding_shapes.json │ │ ├── encoder_scanned_per_layer_relpos_bias.json │ │ ├── encoder_shapes_per_layer_relpos_bias.json │ │ ├── encoder_shapes_shared_relpos_bias.json │ │ └── parallel_transformer_encoder_shapes.json ├── components │ ├── __init__.py │ ├── attention │ │ ├── __init__.py │ │ ├── attention_benchmarks.py │ │ ├── dense_attention.py │ │ ├── dense_attention_test.py │ │ ├── memory_efficient_attention.py │ │ └── memory_efficient_attention_test.py │ ├── convolution.py │ ├── dense.py │ ├── dense_test.py │ ├── embedding.py │ ├── embedding_test.py │ ├── initializers.py │ ├── initializers_test.py │ ├── layer_norm.py │ ├── layer_norm_test.py │ ├── relative_position_biases.py │ ├── relative_position_biases_export_test.py │ ├── relative_position_biases_test.py │ ├── rich_attention_position_scores.py │ ├── testdata │ │ ├── convolution_itest_encoder_shapes.json │ │ └── relpos_bias_renamed_head_axis.json │ └── transforms.py ├── param_conversion_util.py ├── sharding.py ├── sharding_test.py ├── t5x │ ├── __init__.py │ ├── checkpointing.gin │ ├── checkpointing_util.py │ └── configs │ │ ├── __init__.py │ │ ├── calm │ │ ├── __init__.py │ │ ├── architectures │ │ │ ├── calm_metacls_t5_1_1_flaxformer.gin │ │ │ └── calm_t5_1_1_flaxformer.gin │ │ ├── gin_configs_test.py │ │ └── models │ │ │ ├── calm_metacls_t5_1_1_base.gin │ │ │ ├── calm_metacls_t5_1_1_small.gin │ │ │ ├── calm_t5_1_1_base.gin │ │ │ └── calm_t5_1_1_small.gin │ │ ├── h_transformer │ │ ├── architectures │ │ │ ├── h_transformer_1d_decoder_only.gin │ │ │ └── h_transformer_1d_encoder_decoder.gin │ │ ├── gin_configs_decoder_only_test.py │ │ ├── gin_configs_encoder_decoder_test.py │ │ └── models │ │ │ ├── h_transformer_1d_decoder_only_base.gin │ │ │ ├── h_transformer_1d_decoder_only_large.gin │ │ │ ├── h_transformer_1d_decoder_only_small.gin │ │ │ ├── h_transformer_1d_encoder_decoder_base.gin │ │ │ ├── h_transformer_1d_encoder_decoder_large.gin │ │ │ └── h_transformer_1d_encoder_decoder_small.gin │ │ ├── longt5 │ │ ├── __init__.py │ │ ├── architectures │ │ │ ├── longt5_1_1_flaxformer.gin │ │ │ └── longt5_1_1_transient_global_flaxformer.gin │ │ ├── gin_configs_test.py │ │ └── models │ │ │ ├── longt5_1_1_base.gin │ │ │ ├── longt5_1_1_large.gin │ │ │ ├── longt5_1_1_small.gin │ │ │ ├── longt5_1_1_transient_global_base.gin │ │ │ ├── longt5_1_1_transient_global_large.gin │ │ │ ├── longt5_1_1_transient_global_small.gin │ │ │ ├── longt5_1_1_transient_global_xl.gin │ │ │ ├── longt5_1_1_transient_global_xxl.gin │ │ │ ├── longt5_1_1_xl.gin │ │ │ ├── longt5_1_1_xxl.gin │ │ │ ├── mlongt5_transient_global_base.gin │ │ │ ├── mlongt5_transient_global_large.gin │ │ │ └── mlongt5_transient_global_xl.gin │ │ ├── moe │ │ ├── __init__.py │ │ ├── architectures │ │ │ ├── __init__.py │ │ │ └── moe.gin │ │ ├── gin_configs_test.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── experts_choose_base.gin │ │ │ ├── experts_choose_large.gin │ │ │ ├── experts_choose_small.gin │ │ │ ├── experts_choose_tiny.gin │ │ │ ├── experts_choose_xl.gin │ │ │ ├── experts_choose_xxl.gin │ │ │ ├── st_moe_32b.gin │ │ │ ├── st_moe_base.gin │ │ │ ├── st_moe_xl.gin │ │ │ ├── switch_base.gin │ │ │ ├── switch_c.gin │ │ │ ├── switch_large.gin │ │ │ ├── switch_xxl.gin │ │ │ ├── tokens_choose_base.gin │ │ │ ├── tokens_choose_large.gin │ │ │ ├── tokens_choose_small.gin │ │ │ ├── tokens_choose_tiny.gin │ │ │ ├── tokens_choose_xl.gin │ │ │ └── tokens_choose_xxl.gin │ │ └── runs │ │ │ └── scan.gin │ │ ├── perceiver_ar │ │ ├── architectures │ │ │ └── perceiver_ar.gin │ │ ├── examples │ │ │ └── c4_lm.gin │ │ └── models │ │ │ ├── perceiver_ar_base.gin │ │ │ └── perceiver_ar_small.gin │ │ └── t5 │ │ ├── __init__.py │ │ ├── architectures │ │ ├── flash_attention.gin │ │ ├── t5_1_1_flaxformer.gin │ │ └── t5_flaxformer.gin │ │ ├── gin_configs_test.py │ │ └── models │ │ ├── byt5_base.gin │ │ ├── byt5_large.gin │ │ ├── byt5_small.gin │ │ ├── byt5_xl.gin │ │ ├── byt5_xxl.gin │ │ ├── mt5_base.gin │ │ ├── mt5_large.gin │ │ ├── mt5_small.gin │ │ ├── mt5_xl.gin │ │ ├── mt5_xxl.gin │ │ ├── t5_11B.gin │ │ ├── t5_1_1_base.gin │ │ ├── t5_1_1_large.gin │ │ ├── t5_1_1_small.gin │ │ ├── t5_1_1_xl.gin │ │ ├── t5_1_1_xxl.gin │ │ ├── t5_3B.gin │ │ ├── t5_base.gin │ │ ├── t5_large.gin │ │ └── t5_small.gin ├── testing_utils.py ├── testing_utils_test.py ├── transformer_common.py ├── transformer_common_test.py └── types.py └── setup.py /.github/workflows/close-pull-request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/.github/workflows/close-pull-request.yml -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | External contributions are not accepted, sorry! 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/README.md -------------------------------------------------------------------------------- /flaxformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/activation_partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/activation_partitioning.py -------------------------------------------------------------------------------- /flaxformer/activation_partitioning_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/activation_partitioning_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/architectures/bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/architectures/bert/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/bert/bert.py -------------------------------------------------------------------------------- /flaxformer/architectures/bert/bert_checkpoint_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/bert/bert_checkpoint_converter.py -------------------------------------------------------------------------------- /flaxformer/architectures/bert/bert_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/bert/bert_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/bert/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/bert/configs.py -------------------------------------------------------------------------------- /flaxformer/architectures/bert/heads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/bert/heads.py -------------------------------------------------------------------------------- /flaxformer/architectures/bert/heads_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/bert/heads_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/bert/testdata/model_param_shapes.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/bert/testdata/model_param_shapes.json -------------------------------------------------------------------------------- /flaxformer/architectures/calm_t5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/architectures/calm_t5/calm_architecture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/calm_t5/calm_architecture.py -------------------------------------------------------------------------------- /flaxformer/architectures/calm_t5/components.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/calm_t5/components.py -------------------------------------------------------------------------------- /flaxformer/architectures/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/architectures/common/param_remapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/common/param_remapping.py -------------------------------------------------------------------------------- /flaxformer/architectures/common/param_remapping_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/common/param_remapping_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/components.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/components.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/components_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/components_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/dual_encoder_architecture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/dual_encoder_architecture.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/dual_encoder_architecture_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/dual_encoder_architecture_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/l2_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/l2_norm.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/l2_norm_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/l2_norm_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/poolings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/poolings.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/poolings_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/poolings_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/similarity_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/similarity_functions.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/similarity_functions_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/similarity_functions_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/single_tower_logit_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/single_tower_logit_functions.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/single_tower_logit_functions_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/single_tower_logit_functions_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_batch_dot_product.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_batch_dot_product.json -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_batch_dot_product_attention_pooling.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_batch_dot_product_attention_pooling.json -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_batch_dot_product_attention_pooling_prompt_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_batch_dot_product_attention_pooling_prompt_3.json -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_batch_dot_product_prompt_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_batch_dot_product_prompt_3.json -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_pointwise_ffnn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_pointwise_ffnn.json -------------------------------------------------------------------------------- /flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_single_tower_pointwise_ffnn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/dual_encoder/testdata/dual_encoder_shapes_single_tower_pointwise_ffnn.json -------------------------------------------------------------------------------- /flaxformer/architectures/fido/example_configs/t5_lsa_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/fido/example_configs/t5_lsa_base.gin -------------------------------------------------------------------------------- /flaxformer/architectures/fido/example_configs/t5_lsa_scan_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/fido/example_configs/t5_lsa_scan_base.gin -------------------------------------------------------------------------------- /flaxformer/architectures/fido/fido_architecture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/fido/fido_architecture.py -------------------------------------------------------------------------------- /flaxformer/architectures/fido/fido_architecture_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/fido/fido_architecture_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/fido/testdata/decoder_shapes_blocklsa1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/fido/testdata/decoder_shapes_blocklsa1.json -------------------------------------------------------------------------------- /flaxformer/architectures/fido/testdata/decoder_shapes_blocklsa2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/fido/testdata/decoder_shapes_blocklsa2.json -------------------------------------------------------------------------------- /flaxformer/architectures/fido/testdata/decoder_shapes_blocklsa2_noscan.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/fido/testdata/decoder_shapes_blocklsa2_noscan.json -------------------------------------------------------------------------------- /flaxformer/architectures/fido/testdata/decoder_shapes_lsa1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/fido/testdata/decoder_shapes_lsa1.json -------------------------------------------------------------------------------- /flaxformer/architectures/fido/testdata/decoder_shapes_lsa2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/fido/testdata/decoder_shapes_lsa2.json -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/h_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/h_attention.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/h_attention_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/h_attention_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/h_transformer_1d_architecture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/h_transformer_1d_architecture.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/h_transformer_1d_architecture_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/h_transformer_1d_architecture_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/h_transformer_1d_architecture_test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/h_transformer_1d_architecture_test_utils.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/h_transformer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/h_transformer_utils.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/hierarchical_relative_position_bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/hierarchical_relative_position_bias.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/hierarchical_relative_position_bias_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/hierarchical_relative_position_bias_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/partitioning.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/testdata/decoder_only_shapes_per_layer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/testdata/decoder_only_shapes_per_layer.json -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/testdata/decoder_shapes_per_layer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/testdata/decoder_shapes_per_layer.json -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/testdata/encoder_decoder_shapes_per_layer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/testdata/encoder_decoder_shapes_per_layer.json -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/testdata/encoder_shapes_per_layer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/testdata/encoder_shapes_per_layer.json -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/token_hierarchy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/token_hierarchy.py -------------------------------------------------------------------------------- /flaxformer/architectures/h_transformer/token_hierarchy_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/h_transformer/token_hierarchy_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/longt5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/longt5/__init__.py -------------------------------------------------------------------------------- /flaxformer/architectures/longt5/long_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/longt5/long_attention.py -------------------------------------------------------------------------------- /flaxformer/architectures/longt5/long_attention_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/longt5/long_attention_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/longt5/longt5_architecture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/longt5/longt5_architecture.py -------------------------------------------------------------------------------- /flaxformer/architectures/longt5/longt5_architecture_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/longt5/longt5_architecture_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/longt5/relative_position_biases_general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/longt5/relative_position_biases_general.py -------------------------------------------------------------------------------- /flaxformer/architectures/longt5/relative_position_biases_general_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/longt5/relative_position_biases_general_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/longt5/tensor_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/longt5/tensor_utils.py -------------------------------------------------------------------------------- /flaxformer/architectures/longt5/tensor_utils_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/longt5/tensor_utils_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/architectures/moe/moe_architecture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/moe_architecture.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/moe_architecture_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/moe_architecture_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/moe_enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/moe_enums.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/moe_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/moe_layers.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/moe_layers_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/moe_layers_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/moe_parallel_fused_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/moe_parallel_fused_decoder.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/moe_parallel_fused_decoder_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/moe_parallel_fused_decoder_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/routing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/routing.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/routing_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/routing_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/scatter_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/scatter_utils.py -------------------------------------------------------------------------------- /flaxformer/architectures/moe/scatter_utils_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/moe/scatter_utils_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/README.md -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/attention.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/attention_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/attention_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/decoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/decoder_layer.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/dense_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/dense_attention.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/parallel_fused_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/parallel_fused_decoder.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/perceiver_ar_architecture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/perceiver_ar_architecture.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/perceiver_ar_architecture_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/perceiver_ar_architecture_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/perceiver_ar_architecture_test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/perceiver_ar_architecture_test_utils.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/rotary_embedding.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/rotary_embedding_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/rotary_embedding_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/slicing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/slicing.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/t5_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/t5_models.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/t5_models_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/t5_models_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/testdata/decoder_shapes_fused_parallel.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/testdata/decoder_shapes_fused_parallel.json -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/testdata/decoder_shapes_per_layer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/testdata/decoder_shapes_per_layer.json -------------------------------------------------------------------------------- /flaxformer/architectures/perceiver_ar/testdata/parallel_decoder_shapes_per_layer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/perceiver_ar/testdata/parallel_decoder_shapes_per_layer.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/architectures/t5/parallel_fused_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/parallel_fused_decoder.py -------------------------------------------------------------------------------- /flaxformer/architectures/t5/parallel_fused_decoder_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/parallel_fused_decoder_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/t5/t5_1_0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/t5_1_0.py -------------------------------------------------------------------------------- /flaxformer/architectures/t5/t5_1_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/t5_1_1.py -------------------------------------------------------------------------------- /flaxformer/architectures/t5/t5_architecture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/t5_architecture.py -------------------------------------------------------------------------------- /flaxformer/architectures/t5/t5_architecture_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/t5_architecture_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/t5/t5_architecture_test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/t5_architecture_test_utils.py -------------------------------------------------------------------------------- /flaxformer/architectures/t5/t5_common_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/t5_common_layers.py -------------------------------------------------------------------------------- /flaxformer/architectures/t5/t5_common_layers_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/t5_common_layers_test.py -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/decoder_params_axes_fused_parallel_quantized.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/decoder_params_axes_fused_parallel_quantized.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/decoder_shapes_fixed_abs_posemb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/decoder_shapes_fixed_abs_posemb.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/decoder_shapes_fused_parallel.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/decoder_shapes_fused_parallel.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/decoder_shapes_fused_parallel_quantized.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/decoder_shapes_fused_parallel_quantized.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/decoder_shapes_learned_abs_posemb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/decoder_shapes_learned_abs_posemb.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/decoder_shapes_per_layer_relpos_bias.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/decoder_shapes_per_layer_relpos_bias.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/decoder_shapes_shared_relpos_bias.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/decoder_shapes_shared_relpos_bias.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/encoder_decoder_shared_embedding_shapes.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/encoder_decoder_shared_embedding_shapes.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/encoder_scanned_per_layer_relpos_bias.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/encoder_scanned_per_layer_relpos_bias.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/encoder_shapes_per_layer_relpos_bias.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/encoder_shapes_per_layer_relpos_bias.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/encoder_shapes_shared_relpos_bias.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/encoder_shapes_shared_relpos_bias.json -------------------------------------------------------------------------------- /flaxformer/architectures/t5/testdata/parallel_transformer_encoder_shapes.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/architectures/t5/testdata/parallel_transformer_encoder_shapes.json -------------------------------------------------------------------------------- /flaxformer/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/components/attention/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/components/attention/attention_benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/attention/attention_benchmarks.py -------------------------------------------------------------------------------- /flaxformer/components/attention/dense_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/attention/dense_attention.py -------------------------------------------------------------------------------- /flaxformer/components/attention/dense_attention_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/attention/dense_attention_test.py -------------------------------------------------------------------------------- /flaxformer/components/attention/memory_efficient_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/attention/memory_efficient_attention.py -------------------------------------------------------------------------------- /flaxformer/components/attention/memory_efficient_attention_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/attention/memory_efficient_attention_test.py -------------------------------------------------------------------------------- /flaxformer/components/convolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/convolution.py -------------------------------------------------------------------------------- /flaxformer/components/dense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/dense.py -------------------------------------------------------------------------------- /flaxformer/components/dense_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/dense_test.py -------------------------------------------------------------------------------- /flaxformer/components/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/embedding.py -------------------------------------------------------------------------------- /flaxformer/components/embedding_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/embedding_test.py -------------------------------------------------------------------------------- /flaxformer/components/initializers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/initializers.py -------------------------------------------------------------------------------- /flaxformer/components/initializers_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/initializers_test.py -------------------------------------------------------------------------------- /flaxformer/components/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/layer_norm.py -------------------------------------------------------------------------------- /flaxformer/components/layer_norm_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/layer_norm_test.py -------------------------------------------------------------------------------- /flaxformer/components/relative_position_biases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/relative_position_biases.py -------------------------------------------------------------------------------- /flaxformer/components/relative_position_biases_export_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/relative_position_biases_export_test.py -------------------------------------------------------------------------------- /flaxformer/components/relative_position_biases_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/relative_position_biases_test.py -------------------------------------------------------------------------------- /flaxformer/components/rich_attention_position_scores.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/rich_attention_position_scores.py -------------------------------------------------------------------------------- /flaxformer/components/testdata/convolution_itest_encoder_shapes.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/testdata/convolution_itest_encoder_shapes.json -------------------------------------------------------------------------------- /flaxformer/components/testdata/relpos_bias_renamed_head_axis.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/testdata/relpos_bias_renamed_head_axis.json -------------------------------------------------------------------------------- /flaxformer/components/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/components/transforms.py -------------------------------------------------------------------------------- /flaxformer/param_conversion_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/param_conversion_util.py -------------------------------------------------------------------------------- /flaxformer/sharding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/sharding.py -------------------------------------------------------------------------------- /flaxformer/sharding_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/sharding_test.py -------------------------------------------------------------------------------- /flaxformer/t5x/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/t5x/checkpointing.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/checkpointing.gin -------------------------------------------------------------------------------- /flaxformer/t5x/checkpointing_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/checkpointing_util.py -------------------------------------------------------------------------------- /flaxformer/t5x/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/t5x/configs/calm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/t5x/configs/calm/architectures/calm_metacls_t5_1_1_flaxformer.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/calm/architectures/calm_metacls_t5_1_1_flaxformer.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/calm/architectures/calm_t5_1_1_flaxformer.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/calm/architectures/calm_t5_1_1_flaxformer.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/calm/gin_configs_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/calm/gin_configs_test.py -------------------------------------------------------------------------------- /flaxformer/t5x/configs/calm/models/calm_metacls_t5_1_1_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/calm/models/calm_metacls_t5_1_1_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/calm/models/calm_metacls_t5_1_1_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/calm/models/calm_metacls_t5_1_1_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/calm/models/calm_t5_1_1_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/calm/models/calm_t5_1_1_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/calm/models/calm_t5_1_1_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/calm/models/calm_t5_1_1_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/architectures/h_transformer_1d_decoder_only.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/architectures/h_transformer_1d_decoder_only.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/architectures/h_transformer_1d_encoder_decoder.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/architectures/h_transformer_1d_encoder_decoder.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/gin_configs_decoder_only_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/gin_configs_decoder_only_test.py -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/gin_configs_encoder_decoder_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/gin_configs_encoder_decoder_test.py -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_decoder_only_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_decoder_only_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_decoder_only_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_decoder_only_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_decoder_only_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_decoder_only_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_encoder_decoder_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_encoder_decoder_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_encoder_decoder_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_encoder_decoder_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_encoder_decoder_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/h_transformer/models/h_transformer_1d_encoder_decoder_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/architectures/longt5_1_1_flaxformer.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/architectures/longt5_1_1_flaxformer.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/architectures/longt5_1_1_transient_global_flaxformer.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/architectures/longt5_1_1_transient_global_flaxformer.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/gin_configs_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/gin_configs_test.py -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_xl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_transient_global_xxl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_xl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/longt5_1_1_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/longt5_1_1_xxl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/mlongt5_transient_global_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/mlongt5_transient_global_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/mlongt5_transient_global_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/mlongt5_transient_global_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/longt5/models/mlongt5_transient_global_xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/longt5/models/mlongt5_transient_global_xl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/architectures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/architectures/__init__.py -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/architectures/moe.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/architectures/moe.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/gin_configs_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/gin_configs_test.py -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/__init__.py -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/experts_choose_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/experts_choose_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/experts_choose_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/experts_choose_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/experts_choose_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/experts_choose_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/experts_choose_tiny.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/experts_choose_tiny.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/experts_choose_xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/experts_choose_xl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/experts_choose_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/experts_choose_xxl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/st_moe_32b.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/st_moe_32b.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/st_moe_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/st_moe_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/st_moe_xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/st_moe_xl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/switch_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/switch_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/switch_c.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/switch_c.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/switch_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/switch_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/switch_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/switch_xxl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/tokens_choose_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/tokens_choose_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/tokens_choose_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/tokens_choose_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/tokens_choose_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/tokens_choose_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/tokens_choose_tiny.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/tokens_choose_tiny.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/tokens_choose_xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/tokens_choose_xl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/models/tokens_choose_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/models/tokens_choose_xxl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/moe/runs/scan.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/moe/runs/scan.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/perceiver_ar/architectures/perceiver_ar.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/perceiver_ar/architectures/perceiver_ar.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/perceiver_ar/examples/c4_lm.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/perceiver_ar/examples/c4_lm.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/perceiver_ar/models/perceiver_ar_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/perceiver_ar/models/perceiver_ar_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/perceiver_ar/models/perceiver_ar_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/perceiver_ar/models/perceiver_ar_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/architectures/flash_attention.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/architectures/flash_attention.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/architectures/t5_1_1_flaxformer.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/architectures/t5_1_1_flaxformer.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/architectures/t5_flaxformer.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/architectures/t5_flaxformer.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/gin_configs_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/gin_configs_test.py -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/byt5_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/byt5_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/byt5_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/byt5_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/byt5_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/byt5_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/byt5_xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/byt5_xl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/byt5_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/byt5_xxl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/mt5_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/mt5_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/mt5_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/mt5_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/mt5_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/mt5_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/mt5_xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/mt5_xl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/mt5_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/mt5_xxl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_11B.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_11B.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_1_1_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_1_1_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_1_1_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_1_1_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_1_1_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_1_1_small.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_1_1_xl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_1_1_xl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_1_1_xxl.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_1_1_xxl.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_3B.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_3B.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_base.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_base.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_large.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_large.gin -------------------------------------------------------------------------------- /flaxformer/t5x/configs/t5/models/t5_small.gin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/t5x/configs/t5/models/t5_small.gin -------------------------------------------------------------------------------- /flaxformer/testing_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/testing_utils.py -------------------------------------------------------------------------------- /flaxformer/testing_utils_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/testing_utils_test.py -------------------------------------------------------------------------------- /flaxformer/transformer_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/transformer_common.py -------------------------------------------------------------------------------- /flaxformer/transformer_common_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/transformer_common_test.py -------------------------------------------------------------------------------- /flaxformer/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/flaxformer/types.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/flaxformer/HEAD/setup.py --------------------------------------------------------------------------------