├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── LICENSE ├── LICENSES ├── Apache-2.0.txt ├── BSD-3-Clause.txt ├── HPND.txt └── MIT.txt ├── README.md ├── examples ├── __init__.py ├── inference_example │ ├── __init__.py │ └── embedding.py ├── mlp_example │ ├── README.md │ ├── __init__.py │ ├── config.py │ ├── config.yml │ ├── context.py │ ├── data.py │ ├── model.py │ ├── run.py │ └── train.py ├── transformer_example │ ├── README.md │ ├── __init__.py │ ├── config.yml │ └── run.py └── tutorials │ ├── inference.md │ └── multi_node_training.md ├── poetry.lock ├── pyproject.toml ├── src └── scaling │ ├── __init__.py │ ├── core │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ └── base.py │ ├── context │ │ ├── __init__.py │ │ └── context.py │ ├── data │ │ ├── __init__.py │ │ ├── base_dataset.py │ │ ├── base_layer_io.py │ │ ├── blended_dataset.py │ │ ├── blended_dataset_config.py │ │ ├── broadcast_data.py │ │ ├── dataloader.py │ │ ├── file_dataset.py │ │ ├── file_handles.py │ │ ├── memory_map.py │ │ ├── pb_memory_map.py │ │ └── proto │ │ │ ├── __init__.py │ │ │ ├── text_image_dataset.proto │ │ │ └── text_image_dataset_pb2.py │ ├── determined │ │ ├── __init__.py │ │ └── core.py │ ├── fp8 │ │ ├── __init__.py │ │ ├── fp8_config.py │ │ └── fp8_linear.py │ ├── logging │ │ ├── __init__.py │ │ ├── color_formatter.py │ │ ├── logger_config.py │ │ ├── logging.py │ │ └── tensor_statistics_recorder.py │ ├── nn │ │ ├── __init__.py │ │ ├── activation_function.py │ │ ├── attention │ │ │ ├── __init__.py │ │ │ └── attention.py │ │ ├── linear │ │ │ ├── __init__.py │ │ │ ├── column_parallel_linear.py │ │ │ ├── row_parallel_linear.py │ │ │ ├── utils.py │ │ │ └── vocab_parallel_embedding.py │ │ ├── lora.py │ │ ├── lora_config.py │ │ ├── masked_softmax │ │ │ ├── __init__.py │ │ │ ├── masked_softmax.py │ │ │ └── masked_softmax_config.py │ │ ├── mlp.py │ │ ├── norm │ │ │ ├── __init__.py │ │ │ ├── get_norm.py │ │ │ ├── layernorm.py │ │ │ ├── layernorm_config.py │ │ │ └── rms_norm.py │ │ ├── parallel_module │ │ │ ├── __init__.py │ │ │ ├── activation_checkpointing.py │ │ │ ├── base_layer.py │ │ │ ├── buffers.py │ │ │ ├── communicator.py │ │ │ ├── inference_module.py │ │ │ ├── layer_spec.py │ │ │ ├── parallel_module.py │ │ │ ├── partitioned_module.py │ │ │ ├── pipeline_partitioning.py │ │ │ └── tied_layer_index.py │ │ ├── parameter_meta.py │ │ ├── pipeline_schedule │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── inference.py │ │ │ ├── instructions.py │ │ │ └── train.py │ │ ├── residual.py │ │ ├── rotary.py │ │ ├── rotary_config.py │ │ ├── scale.py │ │ └── umup.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── allreduce.py │ │ ├── base.py │ │ ├── learning_rate_scheduler │ │ │ ├── __init__.py │ │ │ ├── learning_rate_scheduler.py │ │ │ └── learning_rate_scheduler_config.py │ │ ├── loss_scaler.py │ │ ├── loss_scaler_config.py │ │ ├── optimizer.py │ │ ├── optimizer_config.py │ │ ├── parameter_group.py │ │ └── parameter_group_config.py │ ├── profiler │ │ ├── __init__.py │ │ ├── profiler.py │ │ ├── profiler_config.py │ │ └── timer.py │ ├── py.typed │ ├── runner │ │ ├── __init__.py │ │ ├── launch.py │ │ ├── launch_config.py │ │ ├── runner.py │ │ └── runner_config.py │ ├── topology │ │ ├── __init__.py │ │ ├── rng_tracker.py │ │ ├── topology.py │ │ └── topology_config.py │ ├── trainer │ │ ├── __init__.py │ │ ├── trainer.py │ │ ├── trainer_config.py │ │ └── warnings.txt │ └── utils │ │ ├── __init__.py │ │ ├── determined_utils.py │ │ ├── param_merge.py │ │ └── port.py │ └── transformer │ ├── README.md │ ├── __init__.py │ ├── context │ ├── __init__.py │ ├── config.py │ └── context.py │ ├── data │ ├── __init__.py │ ├── dataset_item.py │ ├── embedding_dataset.py │ ├── finetuning_chat_dataset.py │ ├── finetuning_text_dataset.py │ ├── inference_settings.py │ ├── legacy_blended_dataset.py │ ├── legacy_dataset │ │ ├── __init__.py │ │ └── indexed_dataset.py │ ├── text_dataset.py │ ├── text_dataset_batch.py │ ├── text_dataset_item.py │ ├── text_image_dataset.py │ └── utils.py │ ├── dataset_loader.py │ ├── inference │ ├── __init__.py │ ├── inference_model.py │ └── sample.py │ ├── model │ ├── __init__.py │ ├── image_encoder │ │ ├── __init__.py │ │ ├── clip.py │ │ └── image_encoder.py │ ├── layers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── embedding.py │ │ ├── embedding_head.py │ │ ├── layer.py │ │ ├── layernorm.py │ │ ├── lm_head.py │ │ └── lm_head_tied.py │ ├── losses │ │ ├── __init__.py │ │ ├── contrastive.py │ │ └── cross_entropy.py │ └── model.py │ ├── py.typed │ ├── tokenizer │ ├── __init__.py │ ├── alpha-001-128k.json │ ├── llama2-tokenizer.json │ └── tokenizer.py │ ├── train.py │ ├── train_determined.py │ └── utils │ ├── __init__.py │ └── get_tflops.py └── tests ├── __init__.py ├── conftest.py ├── core ├── __init__.py ├── conftest.py ├── files │ └── data.jsonl ├── minimal │ ├── __init__.py │ ├── context │ │ ├── __init__.py │ │ ├── config.py │ │ └── context.py │ ├── data │ │ ├── __init__.py │ │ └── dataset.py │ ├── minimal_config.yml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ ├── run.py │ └── train.py ├── test_config │ ├── __init__.py │ ├── test_config.py │ └── test_logger.py ├── test_data │ ├── __init__.py │ ├── test_blended_dataset_weights.py │ ├── test_broadcast_data.py │ ├── test_dataloader.py │ ├── test_file_dataset.py │ ├── test_file_handles.py │ └── test_memory_map.py ├── test_fp8 │ └── test_fp8_linear.py ├── test_nn │ ├── __init__.py │ ├── profile.json │ ├── test_attention_helpers.py │ ├── test_flash_attention.py │ ├── test_inference_module.py │ ├── test_layernorm.py │ ├── test_local_attention.py │ ├── test_lora.py │ ├── test_masked_softmax.py │ ├── test_parallel_embedding.py │ ├── test_parallel_linear.py │ ├── test_parallel_module.py │ ├── test_pipe_communication.py │ ├── test_pipeline_partitioning.py │ ├── test_pipeline_schedule.py │ ├── test_rotary.py │ └── test_umup.py ├── test_optimizer │ ├── __init__.py │ ├── test_adamw.py │ └── test_learning_rate_scheduler.py ├── test_runner │ ├── __init__.py │ ├── runner_script.py │ └── test_runner.py ├── test_topology │ ├── __init__.py │ └── test_topology.py ├── test_training │ ├── __init__.py │ ├── test_activation_checkpointing.py │ ├── test_parameters_count.py │ └── test_training.py ├── unit │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ └── test_pb_memory_map.py │ ├── logging │ │ ├── __init__.py │ │ ├── test_logging_config.py │ │ └── test_tensor_statistics_recorder.py │ └── nn │ │ ├── __init__.py │ │ ├── linear │ │ ├── __init__.py │ │ └── test_utils.py │ │ └── parallel_module │ │ ├── __init__.py │ │ └── test_parallel_module.py └── utils.py ├── examples ├── __init__.py └── test_unit.py └── transformer ├── __init__.py ├── conftest.py ├── files ├── alpha-001-128k-EXTENDED.json ├── alpha-001-128k-clean-keep-ids.json ├── alpha-001-128k-clean.json ├── alpha-001-128k.json ├── backward_compatibility_checkpoint │ ├── config.yml │ ├── ground_truth.pt │ └── state_dict.pt ├── checkpoint │ └── config.yml ├── checkpoint_legacy │ ├── config.yml │ ├── model_state_layer_0_LuminousEmbeddingInput.pt │ ├── model_state_layer_1_LuminousLayer.pt │ ├── model_state_layer_2_LayerNormWrapper.pt │ ├── model_state_layer_3_LuminousLMHead.pt │ └── vocab.json ├── checkpoint_llama2 │ └── config.yml ├── checkpoint_with_adapter │ ├── config.yml │ └── config_adapter_separate.yml ├── data │ ├── small1.jsonl │ ├── small2.jsonl │ └── small3.jsonl ├── dataset │ ├── data.bin │ ├── data.idx │ ├── data.meta.json │ ├── embedding_dataset_instructed.jsonl │ ├── embedding_dataset_non_instructed.jsonl │ ├── finetuning.json │ ├── finetuning.jsonl │ ├── finetuning_chat.jsonl │ ├── finetuning_memory_map │ │ ├── dataset.bin │ │ ├── dataset.idx │ │ └── dataset.meta.json │ ├── images │ │ ├── happy.jpeg │ │ └── sad.jpeg │ ├── legacy │ │ ├── enron_text_document_100.bin │ │ └── enron_text_document_100.idx │ ├── text_image_data.bin │ ├── text_image_data.idx │ ├── text_image_data.jsonl │ └── text_image_data.meta.json ├── llama-3.1-8B-tokenizer.json ├── llama2-tokenizer.json ├── old_umup_losses_4090_torch_2_3_1.json └── unigram_02pct_cc_v1.0_hf_converted_cleaned.json ├── test_backwards_compatibility.py ├── test_blended_dataset.py ├── test_cross_entropy.py ├── test_data.py ├── test_data_utils.py ├── test_embedding_dataset.py ├── test_embedding_training.py ├── test_finetuning.py ├── test_finetuning_parameter.py ├── test_inference.py ├── test_inference_embedding.py ├── test_load_checkpoint_non_strict.py ├── test_load_legacy_checkpoint.py ├── test_losses.py ├── test_tokenizer ├── __init__.py └── test_tokenizer.py ├── test_training.py ├── test_training_finetuning_chat.py ├── test_training_flash_attention.py ├── test_training_legacy.py ├── test_training_local_attention.py ├── test_training_sequence_parallel.py ├── test_umup_regression.py ├── test_utils.py ├── unit ├── __init__.py └── data │ ├── __init__.py │ └── test_text_image_dataset.py └── utils_determined.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSE -------------------------------------------------------------------------------- /LICENSES/Apache-2.0.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSES/Apache-2.0.txt -------------------------------------------------------------------------------- /LICENSES/BSD-3-Clause.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSES/BSD-3-Clause.txt -------------------------------------------------------------------------------- /LICENSES/HPND.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSES/HPND.txt -------------------------------------------------------------------------------- /LICENSES/MIT.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSES/MIT.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/README.md -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/inference_example/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/inference_example/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/inference_example/embedding.py -------------------------------------------------------------------------------- /examples/mlp_example/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/README.md -------------------------------------------------------------------------------- /examples/mlp_example/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mlp_example/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/config.py -------------------------------------------------------------------------------- /examples/mlp_example/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/config.yml -------------------------------------------------------------------------------- /examples/mlp_example/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/context.py -------------------------------------------------------------------------------- /examples/mlp_example/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/data.py -------------------------------------------------------------------------------- /examples/mlp_example/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/model.py -------------------------------------------------------------------------------- /examples/mlp_example/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/run.py -------------------------------------------------------------------------------- /examples/mlp_example/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/train.py -------------------------------------------------------------------------------- /examples/transformer_example/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/transformer_example/README.md -------------------------------------------------------------------------------- /examples/transformer_example/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/transformer_example/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/transformer_example/config.yml -------------------------------------------------------------------------------- /examples/transformer_example/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/transformer_example/run.py -------------------------------------------------------------------------------- /examples/tutorials/inference.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/tutorials/inference.md -------------------------------------------------------------------------------- /examples/tutorials/multi_node_training.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/tutorials/multi_node_training.md -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/pyproject.toml -------------------------------------------------------------------------------- /src/scaling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/scaling/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseConfig 2 | -------------------------------------------------------------------------------- /src/scaling/core/config/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/config/base.py -------------------------------------------------------------------------------- /src/scaling/core/context/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/context/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/context/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/context/context.py -------------------------------------------------------------------------------- /src/scaling/core/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/data/base_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/base_dataset.py -------------------------------------------------------------------------------- /src/scaling/core/data/base_layer_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/base_layer_io.py -------------------------------------------------------------------------------- /src/scaling/core/data/blended_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/blended_dataset.py -------------------------------------------------------------------------------- /src/scaling/core/data/blended_dataset_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/blended_dataset_config.py -------------------------------------------------------------------------------- /src/scaling/core/data/broadcast_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/broadcast_data.py -------------------------------------------------------------------------------- /src/scaling/core/data/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/dataloader.py -------------------------------------------------------------------------------- /src/scaling/core/data/file_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/file_dataset.py -------------------------------------------------------------------------------- /src/scaling/core/data/file_handles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/file_handles.py -------------------------------------------------------------------------------- /src/scaling/core/data/memory_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/memory_map.py -------------------------------------------------------------------------------- /src/scaling/core/data/pb_memory_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/pb_memory_map.py -------------------------------------------------------------------------------- /src/scaling/core/data/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/scaling/core/data/proto/text_image_dataset.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/proto/text_image_dataset.proto -------------------------------------------------------------------------------- /src/scaling/core/data/proto/text_image_dataset_pb2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/proto/text_image_dataset_pb2.py -------------------------------------------------------------------------------- /src/scaling/core/determined/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import init 2 | -------------------------------------------------------------------------------- /src/scaling/core/determined/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/determined/core.py -------------------------------------------------------------------------------- /src/scaling/core/fp8/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/fp8/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/fp8/fp8_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/fp8/fp8_config.py -------------------------------------------------------------------------------- /src/scaling/core/fp8/fp8_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/fp8/fp8_linear.py -------------------------------------------------------------------------------- /src/scaling/core/logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/logging/color_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/color_formatter.py -------------------------------------------------------------------------------- /src/scaling/core/logging/logger_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/logger_config.py -------------------------------------------------------------------------------- /src/scaling/core/logging/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/logging.py -------------------------------------------------------------------------------- /src/scaling/core/logging/tensor_statistics_recorder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/tensor_statistics_recorder.py -------------------------------------------------------------------------------- /src/scaling/core/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/nn/activation_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/activation_function.py -------------------------------------------------------------------------------- /src/scaling/core/nn/attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/attention/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/nn/attention/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/attention/attention.py -------------------------------------------------------------------------------- /src/scaling/core/nn/linear/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/nn/linear/column_parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/column_parallel_linear.py -------------------------------------------------------------------------------- /src/scaling/core/nn/linear/row_parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/row_parallel_linear.py -------------------------------------------------------------------------------- /src/scaling/core/nn/linear/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/utils.py -------------------------------------------------------------------------------- /src/scaling/core/nn/linear/vocab_parallel_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/vocab_parallel_embedding.py -------------------------------------------------------------------------------- /src/scaling/core/nn/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/lora.py -------------------------------------------------------------------------------- /src/scaling/core/nn/lora_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/lora_config.py -------------------------------------------------------------------------------- /src/scaling/core/nn/masked_softmax/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/masked_softmax/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/nn/masked_softmax/masked_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/masked_softmax/masked_softmax.py -------------------------------------------------------------------------------- /src/scaling/core/nn/masked_softmax/masked_softmax_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/masked_softmax/masked_softmax_config.py -------------------------------------------------------------------------------- /src/scaling/core/nn/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/mlp.py -------------------------------------------------------------------------------- /src/scaling/core/nn/norm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/nn/norm/get_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/get_norm.py -------------------------------------------------------------------------------- /src/scaling/core/nn/norm/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/layernorm.py -------------------------------------------------------------------------------- /src/scaling/core/nn/norm/layernorm_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/layernorm_config.py -------------------------------------------------------------------------------- /src/scaling/core/nn/norm/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/rms_norm.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/activation_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/activation_checkpointing.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/base_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/base_layer.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/buffers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/buffers.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/communicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/communicator.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/inference_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/inference_module.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/layer_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/layer_spec.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/parallel_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/parallel_module.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/partitioned_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/partitioned_module.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/pipeline_partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/pipeline_partitioning.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parallel_module/tied_layer_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/tied_layer_index.py -------------------------------------------------------------------------------- /src/scaling/core/nn/parameter_meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parameter_meta.py -------------------------------------------------------------------------------- /src/scaling/core/nn/pipeline_schedule/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/nn/pipeline_schedule/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/base.py -------------------------------------------------------------------------------- /src/scaling/core/nn/pipeline_schedule/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/inference.py -------------------------------------------------------------------------------- /src/scaling/core/nn/pipeline_schedule/instructions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/instructions.py -------------------------------------------------------------------------------- /src/scaling/core/nn/pipeline_schedule/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/train.py -------------------------------------------------------------------------------- /src/scaling/core/nn/residual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/residual.py -------------------------------------------------------------------------------- /src/scaling/core/nn/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/rotary.py -------------------------------------------------------------------------------- /src/scaling/core/nn/rotary_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/rotary_config.py -------------------------------------------------------------------------------- /src/scaling/core/nn/scale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/scale.py -------------------------------------------------------------------------------- /src/scaling/core/nn/umup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/umup.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/allreduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/allreduce.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/base.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/learning_rate_scheduler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/learning_rate_scheduler/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/learning_rate_scheduler/learning_rate_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/learning_rate_scheduler/learning_rate_scheduler.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/learning_rate_scheduler/learning_rate_scheduler_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/learning_rate_scheduler/learning_rate_scheduler_config.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/loss_scaler.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/loss_scaler_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/loss_scaler_config.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/optimizer.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/optimizer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/optimizer_config.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/parameter_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/parameter_group.py -------------------------------------------------------------------------------- /src/scaling/core/optimizer/parameter_group_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/parameter_group_config.py -------------------------------------------------------------------------------- /src/scaling/core/profiler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/profiler/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/profiler/profiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/profiler/profiler.py -------------------------------------------------------------------------------- /src/scaling/core/profiler/profiler_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/profiler/profiler_config.py -------------------------------------------------------------------------------- /src/scaling/core/profiler/timer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/profiler/timer.py -------------------------------------------------------------------------------- /src/scaling/core/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/scaling/core/runner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/runner/launch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/launch.py -------------------------------------------------------------------------------- /src/scaling/core/runner/launch_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/launch_config.py -------------------------------------------------------------------------------- /src/scaling/core/runner/runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/runner.py -------------------------------------------------------------------------------- /src/scaling/core/runner/runner_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/runner_config.py -------------------------------------------------------------------------------- /src/scaling/core/topology/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/topology/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/topology/rng_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/topology/rng_tracker.py -------------------------------------------------------------------------------- /src/scaling/core/topology/topology.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/topology/topology.py -------------------------------------------------------------------------------- /src/scaling/core/topology/topology_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/topology/topology_config.py -------------------------------------------------------------------------------- /src/scaling/core/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/trainer/__init__.py -------------------------------------------------------------------------------- /src/scaling/core/trainer/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/trainer/trainer.py -------------------------------------------------------------------------------- /src/scaling/core/trainer/trainer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/trainer/trainer_config.py -------------------------------------------------------------------------------- /src/scaling/core/trainer/warnings.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/trainer/warnings.txt -------------------------------------------------------------------------------- /src/scaling/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/scaling/core/utils/determined_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/utils/determined_utils.py -------------------------------------------------------------------------------- /src/scaling/core/utils/param_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/utils/param_merge.py -------------------------------------------------------------------------------- /src/scaling/core/utils/port.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/utils/port.py -------------------------------------------------------------------------------- /src/scaling/transformer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/README.md -------------------------------------------------------------------------------- /src/scaling/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/context/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/context/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/context/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/context/config.py -------------------------------------------------------------------------------- /src/scaling/transformer/context/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/context/context.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/dataset_item.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/dataset_item.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/embedding_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/embedding_dataset.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/finetuning_chat_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/finetuning_chat_dataset.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/finetuning_text_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/finetuning_text_dataset.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/inference_settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/inference_settings.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/legacy_blended_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/legacy_blended_dataset.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/legacy_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/legacy_dataset/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/legacy_dataset/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/legacy_dataset/indexed_dataset.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/text_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/text_dataset.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/text_dataset_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/text_dataset_batch.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/text_dataset_item.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/text_dataset_item.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/text_image_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/text_image_dataset.py -------------------------------------------------------------------------------- /src/scaling/transformer/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/utils.py -------------------------------------------------------------------------------- /src/scaling/transformer/dataset_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/dataset_loader.py -------------------------------------------------------------------------------- /src/scaling/transformer/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/inference/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/inference/inference_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/inference/inference_model.py -------------------------------------------------------------------------------- /src/scaling/transformer/inference/sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/inference/sample.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/image_encoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/image_encoder/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/image_encoder/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/image_encoder/clip.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/image_encoder/image_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/image_encoder/image_encoder.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/layers/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/base.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/layers/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/embedding.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/layers/embedding_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/embedding_head.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/layers/layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/layer.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/layers/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/layernorm.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/layers/lm_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/lm_head.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/layers/lm_head_tied.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/lm_head_tied.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/losses/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/losses/contrastive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/losses/contrastive.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/losses/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/losses/cross_entropy.py -------------------------------------------------------------------------------- /src/scaling/transformer/model/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/model.py -------------------------------------------------------------------------------- /src/scaling/transformer/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/scaling/transformer/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/tokenizer/__init__.py -------------------------------------------------------------------------------- /src/scaling/transformer/tokenizer/alpha-001-128k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/tokenizer/alpha-001-128k.json -------------------------------------------------------------------------------- /src/scaling/transformer/tokenizer/llama2-tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/tokenizer/llama2-tokenizer.json -------------------------------------------------------------------------------- /src/scaling/transformer/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /src/scaling/transformer/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/train.py -------------------------------------------------------------------------------- /src/scaling/transformer/train_determined.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/train_determined.py -------------------------------------------------------------------------------- /src/scaling/transformer/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/scaling/transformer/utils/get_tflops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/utils/get_tflops.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/conftest.py -------------------------------------------------------------------------------- /tests/core/files/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/files/data.jsonl -------------------------------------------------------------------------------- /tests/core/minimal/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/__init__.py -------------------------------------------------------------------------------- /tests/core/minimal/context/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/context/__init__.py -------------------------------------------------------------------------------- /tests/core/minimal/context/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/context/config.py -------------------------------------------------------------------------------- /tests/core/minimal/context/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/context/context.py -------------------------------------------------------------------------------- /tests/core/minimal/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/data/__init__.py -------------------------------------------------------------------------------- /tests/core/minimal/data/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/data/dataset.py -------------------------------------------------------------------------------- /tests/core/minimal/minimal_config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/minimal_config.yml -------------------------------------------------------------------------------- /tests/core/minimal/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/model/__init__.py -------------------------------------------------------------------------------- /tests/core/minimal/model/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/model/model.py -------------------------------------------------------------------------------- /tests/core/minimal/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/run.py -------------------------------------------------------------------------------- /tests/core/minimal/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/train.py -------------------------------------------------------------------------------- /tests/core/test_config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/test_config/test_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_config/test_config.py -------------------------------------------------------------------------------- /tests/core/test_config/test_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_config/test_logger.py -------------------------------------------------------------------------------- /tests/core/test_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/test_data/test_blended_dataset_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_blended_dataset_weights.py -------------------------------------------------------------------------------- /tests/core/test_data/test_broadcast_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_broadcast_data.py -------------------------------------------------------------------------------- /tests/core/test_data/test_dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_dataloader.py -------------------------------------------------------------------------------- /tests/core/test_data/test_file_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_file_dataset.py -------------------------------------------------------------------------------- /tests/core/test_data/test_file_handles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_file_handles.py -------------------------------------------------------------------------------- /tests/core/test_data/test_memory_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_memory_map.py -------------------------------------------------------------------------------- /tests/core/test_fp8/test_fp8_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_fp8/test_fp8_linear.py -------------------------------------------------------------------------------- /tests/core/test_nn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/test_nn/profile.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/profile.json -------------------------------------------------------------------------------- /tests/core/test_nn/test_attention_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_attention_helpers.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_flash_attention.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_inference_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_inference_module.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_layernorm.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_local_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_local_attention.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_lora.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_masked_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_masked_softmax.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_parallel_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_parallel_embedding.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_parallel_linear.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_parallel_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_parallel_module.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_pipe_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_pipe_communication.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_pipeline_partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_pipeline_partitioning.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_pipeline_schedule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_pipeline_schedule.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_rotary.py -------------------------------------------------------------------------------- /tests/core/test_nn/test_umup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_umup.py -------------------------------------------------------------------------------- /tests/core/test_optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/test_optimizer/test_adamw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_optimizer/test_adamw.py -------------------------------------------------------------------------------- /tests/core/test_optimizer/test_learning_rate_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_optimizer/test_learning_rate_scheduler.py -------------------------------------------------------------------------------- /tests/core/test_runner/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/test_runner/runner_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_runner/runner_script.py -------------------------------------------------------------------------------- /tests/core/test_runner/test_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_runner/test_runner.py -------------------------------------------------------------------------------- /tests/core/test_topology/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/test_topology/test_topology.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_topology/test_topology.py -------------------------------------------------------------------------------- /tests/core/test_training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/test_training/test_activation_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_training/test_activation_checkpointing.py -------------------------------------------------------------------------------- /tests/core/test_training/test_parameters_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_training/test_parameters_count.py -------------------------------------------------------------------------------- /tests/core/test_training/test_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_training/test_training.py -------------------------------------------------------------------------------- /tests/core/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/unit/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/unit/data/test_pb_memory_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/data/test_pb_memory_map.py -------------------------------------------------------------------------------- /tests/core/unit/logging/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/unit/logging/test_logging_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/logging/test_logging_config.py -------------------------------------------------------------------------------- /tests/core/unit/logging/test_tensor_statistics_recorder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/logging/test_tensor_statistics_recorder.py -------------------------------------------------------------------------------- /tests/core/unit/nn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/unit/nn/linear/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/unit/nn/linear/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/nn/linear/test_utils.py -------------------------------------------------------------------------------- /tests/core/unit/nn/parallel_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/unit/nn/parallel_module/test_parallel_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/nn/parallel_module/test_parallel_module.py -------------------------------------------------------------------------------- /tests/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/utils.py -------------------------------------------------------------------------------- /tests/examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/examples/test_unit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/examples/test_unit.py -------------------------------------------------------------------------------- /tests/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/transformer/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/conftest.py -------------------------------------------------------------------------------- /tests/transformer/files/alpha-001-128k-EXTENDED.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/alpha-001-128k-EXTENDED.json -------------------------------------------------------------------------------- /tests/transformer/files/alpha-001-128k-clean-keep-ids.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/alpha-001-128k-clean-keep-ids.json -------------------------------------------------------------------------------- /tests/transformer/files/alpha-001-128k-clean.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/alpha-001-128k-clean.json -------------------------------------------------------------------------------- /tests/transformer/files/alpha-001-128k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/alpha-001-128k.json -------------------------------------------------------------------------------- /tests/transformer/files/backward_compatibility_checkpoint/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/backward_compatibility_checkpoint/config.yml -------------------------------------------------------------------------------- /tests/transformer/files/backward_compatibility_checkpoint/ground_truth.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/backward_compatibility_checkpoint/ground_truth.pt -------------------------------------------------------------------------------- /tests/transformer/files/backward_compatibility_checkpoint/state_dict.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/backward_compatibility_checkpoint/state_dict.pt -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint/config.yml -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint_legacy/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/config.yml -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint_legacy/model_state_layer_0_LuminousEmbeddingInput.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/model_state_layer_0_LuminousEmbeddingInput.pt -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint_legacy/model_state_layer_1_LuminousLayer.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/model_state_layer_1_LuminousLayer.pt -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint_legacy/model_state_layer_2_LayerNormWrapper.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/model_state_layer_2_LayerNormWrapper.pt -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint_legacy/model_state_layer_3_LuminousLMHead.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/model_state_layer_3_LuminousLMHead.pt -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint_legacy/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/vocab.json -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint_llama2/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_llama2/config.yml -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint_with_adapter/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_with_adapter/config.yml -------------------------------------------------------------------------------- /tests/transformer/files/checkpoint_with_adapter/config_adapter_separate.yml: -------------------------------------------------------------------------------- 1 | name: "separate" 2 | -------------------------------------------------------------------------------- /tests/transformer/files/data/small1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/data/small1.jsonl -------------------------------------------------------------------------------- /tests/transformer/files/data/small2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/data/small2.jsonl -------------------------------------------------------------------------------- /tests/transformer/files/data/small3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/data/small3.jsonl -------------------------------------------------------------------------------- /tests/transformer/files/dataset/data.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/data.bin -------------------------------------------------------------------------------- /tests/transformer/files/dataset/data.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/data.idx -------------------------------------------------------------------------------- /tests/transformer/files/dataset/data.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/data.meta.json -------------------------------------------------------------------------------- /tests/transformer/files/dataset/embedding_dataset_instructed.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/embedding_dataset_instructed.jsonl -------------------------------------------------------------------------------- /tests/transformer/files/dataset/embedding_dataset_non_instructed.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/embedding_dataset_non_instructed.jsonl -------------------------------------------------------------------------------- /tests/transformer/files/dataset/finetuning.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning.json -------------------------------------------------------------------------------- /tests/transformer/files/dataset/finetuning.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning.jsonl -------------------------------------------------------------------------------- /tests/transformer/files/dataset/finetuning_chat.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning_chat.jsonl -------------------------------------------------------------------------------- /tests/transformer/files/dataset/finetuning_memory_map/dataset.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning_memory_map/dataset.bin -------------------------------------------------------------------------------- /tests/transformer/files/dataset/finetuning_memory_map/dataset.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning_memory_map/dataset.idx -------------------------------------------------------------------------------- /tests/transformer/files/dataset/finetuning_memory_map/dataset.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning_memory_map/dataset.meta.json -------------------------------------------------------------------------------- /tests/transformer/files/dataset/images/happy.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/images/happy.jpeg -------------------------------------------------------------------------------- /tests/transformer/files/dataset/images/sad.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/images/sad.jpeg -------------------------------------------------------------------------------- /tests/transformer/files/dataset/legacy/enron_text_document_100.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/legacy/enron_text_document_100.bin -------------------------------------------------------------------------------- /tests/transformer/files/dataset/legacy/enron_text_document_100.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/legacy/enron_text_document_100.idx -------------------------------------------------------------------------------- /tests/transformer/files/dataset/text_image_data.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/text_image_data.bin -------------------------------------------------------------------------------- /tests/transformer/files/dataset/text_image_data.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/text_image_data.idx -------------------------------------------------------------------------------- /tests/transformer/files/dataset/text_image_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/text_image_data.jsonl -------------------------------------------------------------------------------- /tests/transformer/files/dataset/text_image_data.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/text_image_data.meta.json -------------------------------------------------------------------------------- /tests/transformer/files/llama-3.1-8B-tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/llama-3.1-8B-tokenizer.json -------------------------------------------------------------------------------- /tests/transformer/files/llama2-tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/llama2-tokenizer.json -------------------------------------------------------------------------------- /tests/transformer/files/old_umup_losses_4090_torch_2_3_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/old_umup_losses_4090_torch_2_3_1.json -------------------------------------------------------------------------------- /tests/transformer/files/unigram_02pct_cc_v1.0_hf_converted_cleaned.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/unigram_02pct_cc_v1.0_hf_converted_cleaned.json -------------------------------------------------------------------------------- /tests/transformer/test_backwards_compatibility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_backwards_compatibility.py -------------------------------------------------------------------------------- /tests/transformer/test_blended_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_blended_dataset.py -------------------------------------------------------------------------------- /tests/transformer/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_cross_entropy.py -------------------------------------------------------------------------------- /tests/transformer/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_data.py -------------------------------------------------------------------------------- /tests/transformer/test_data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_data_utils.py -------------------------------------------------------------------------------- /tests/transformer/test_embedding_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_embedding_dataset.py -------------------------------------------------------------------------------- /tests/transformer/test_embedding_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_embedding_training.py -------------------------------------------------------------------------------- /tests/transformer/test_finetuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_finetuning.py -------------------------------------------------------------------------------- /tests/transformer/test_finetuning_parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_finetuning_parameter.py -------------------------------------------------------------------------------- /tests/transformer/test_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_inference.py -------------------------------------------------------------------------------- /tests/transformer/test_inference_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_inference_embedding.py -------------------------------------------------------------------------------- /tests/transformer/test_load_checkpoint_non_strict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_load_checkpoint_non_strict.py -------------------------------------------------------------------------------- /tests/transformer/test_load_legacy_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_load_legacy_checkpoint.py -------------------------------------------------------------------------------- /tests/transformer/test_losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_losses.py -------------------------------------------------------------------------------- /tests/transformer/test_tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/transformer/test_tokenizer/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_tokenizer/test_tokenizer.py -------------------------------------------------------------------------------- /tests/transformer/test_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training.py -------------------------------------------------------------------------------- /tests/transformer/test_training_finetuning_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_finetuning_chat.py -------------------------------------------------------------------------------- /tests/transformer/test_training_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_flash_attention.py -------------------------------------------------------------------------------- /tests/transformer/test_training_legacy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_legacy.py -------------------------------------------------------------------------------- /tests/transformer/test_training_local_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_local_attention.py -------------------------------------------------------------------------------- /tests/transformer/test_training_sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_sequence_parallel.py -------------------------------------------------------------------------------- /tests/transformer/test_umup_regression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_umup_regression.py -------------------------------------------------------------------------------- /tests/transformer/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_utils.py -------------------------------------------------------------------------------- /tests/transformer/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/transformer/unit/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/transformer/unit/data/test_text_image_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/unit/data/test_text_image_dataset.py -------------------------------------------------------------------------------- /tests/transformer/utils_determined.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/utils_determined.py --------------------------------------------------------------------------------