├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── LICENSE
├── LICENSES
    ├── Apache-2.0.txt
    ├── BSD-3-Clause.txt
    ├── HPND.txt
    └── MIT.txt
├── README.md
├── examples
    ├── __init__.py
    ├── inference_example
    │   ├── __init__.py
    │   └── embedding.py
    ├── mlp_example
    │   ├── README.md
    │   ├── __init__.py
    │   ├── config.py
    │   ├── config.yml
    │   ├── context.py
    │   ├── data.py
    │   ├── model.py
    │   ├── run.py
    │   └── train.py
    ├── transformer_example
    │   ├── README.md
    │   ├── __init__.py
    │   ├── config.yml
    │   └── run.py
    └── tutorials
    │   ├── inference.md
    │   └── multi_node_training.md
├── poetry.lock
├── pyproject.toml
├── src
    └── scaling
    │   ├── __init__.py
    │   ├── core
    │       ├── __init__.py
    │       ├── config
    │       │   ├── __init__.py
    │       │   └── base.py
    │       ├── context
    │       │   ├── __init__.py
    │       │   └── context.py
    │       ├── data
    │       │   ├── __init__.py
    │       │   ├── base_dataset.py
    │       │   ├── base_layer_io.py
    │       │   ├── blended_dataset.py
    │       │   ├── blended_dataset_config.py
    │       │   ├── broadcast_data.py
    │       │   ├── dataloader.py
    │       │   ├── file_dataset.py
    │       │   ├── file_handles.py
    │       │   ├── memory_map.py
    │       │   ├── pb_memory_map.py
    │       │   └── proto
    │       │   │   ├── __init__.py
    │       │   │   ├── text_image_dataset.proto
    │       │   │   └── text_image_dataset_pb2.py
    │       ├── determined
    │       │   ├── __init__.py
    │       │   └── core.py
    │       ├── fp8
    │       │   ├── __init__.py
    │       │   ├── fp8_config.py
    │       │   └── fp8_linear.py
    │       ├── logging
    │       │   ├── __init__.py
    │       │   ├── color_formatter.py
    │       │   ├── logger_config.py
    │       │   ├── logging.py
    │       │   └── tensor_statistics_recorder.py
    │       ├── nn
    │       │   ├── __init__.py
    │       │   ├── activation_function.py
    │       │   ├── attention
    │       │   │   ├── __init__.py
    │       │   │   └── attention.py
    │       │   ├── linear
    │       │   │   ├── __init__.py
    │       │   │   ├── column_parallel_linear.py
    │       │   │   ├── row_parallel_linear.py
    │       │   │   ├── utils.py
    │       │   │   └── vocab_parallel_embedding.py
    │       │   ├── lora.py
    │       │   ├── lora_config.py
    │       │   ├── masked_softmax
    │       │   │   ├── __init__.py
    │       │   │   ├── masked_softmax.py
    │       │   │   └── masked_softmax_config.py
    │       │   ├── mlp.py
    │       │   ├── norm
    │       │   │   ├── __init__.py
    │       │   │   ├── get_norm.py
    │       │   │   ├── layernorm.py
    │       │   │   ├── layernorm_config.py
    │       │   │   └── rms_norm.py
    │       │   ├── parallel_module
    │       │   │   ├── __init__.py
    │       │   │   ├── activation_checkpointing.py
    │       │   │   ├── base_layer.py
    │       │   │   ├── buffers.py
    │       │   │   ├── communicator.py
    │       │   │   ├── inference_module.py
    │       │   │   ├── layer_spec.py
    │       │   │   ├── parallel_module.py
    │       │   │   ├── partitioned_module.py
    │       │   │   ├── pipeline_partitioning.py
    │       │   │   └── tied_layer_index.py
    │       │   ├── parameter_meta.py
    │       │   ├── pipeline_schedule
    │       │   │   ├── __init__.py
    │       │   │   ├── base.py
    │       │   │   ├── inference.py
    │       │   │   ├── instructions.py
    │       │   │   └── train.py
    │       │   ├── residual.py
    │       │   ├── rotary.py
    │       │   ├── rotary_config.py
    │       │   ├── scale.py
    │       │   └── umup.py
    │       ├── optimizer
    │       │   ├── __init__.py
    │       │   ├── allreduce.py
    │       │   ├── base.py
    │       │   ├── learning_rate_scheduler
    │       │   │   ├── __init__.py
    │       │   │   ├── learning_rate_scheduler.py
    │       │   │   └── learning_rate_scheduler_config.py
    │       │   ├── loss_scaler.py
    │       │   ├── loss_scaler_config.py
    │       │   ├── optimizer.py
    │       │   ├── optimizer_config.py
    │       │   ├── parameter_group.py
    │       │   └── parameter_group_config.py
    │       ├── profiler
    │       │   ├── __init__.py
    │       │   ├── profiler.py
    │       │   ├── profiler_config.py
    │       │   └── timer.py
    │       ├── py.typed
    │       ├── runner
    │       │   ├── __init__.py
    │       │   ├── launch.py
    │       │   ├── launch_config.py
    │       │   ├── runner.py
    │       │   └── runner_config.py
    │       ├── topology
    │       │   ├── __init__.py
    │       │   ├── rng_tracker.py
    │       │   ├── topology.py
    │       │   └── topology_config.py
    │       ├── trainer
    │       │   ├── __init__.py
    │       │   ├── trainer.py
    │       │   ├── trainer_config.py
    │       │   └── warnings.txt
    │       └── utils
    │       │   ├── __init__.py
    │       │   ├── determined_utils.py
    │       │   ├── param_merge.py
    │       │   └── port.py
    │   └── transformer
    │       ├── README.md
    │       ├── __init__.py
    │       ├── context
    │           ├── __init__.py
    │           ├── config.py
    │           └── context.py
    │       ├── data
    │           ├── __init__.py
    │           ├── dataset_item.py
    │           ├── embedding_dataset.py
    │           ├── finetuning_chat_dataset.py
    │           ├── finetuning_text_dataset.py
    │           ├── inference_settings.py
    │           ├── legacy_blended_dataset.py
    │           ├── legacy_dataset
    │           │   ├── __init__.py
    │           │   └── indexed_dataset.py
    │           ├── text_dataset.py
    │           ├── text_dataset_batch.py
    │           ├── text_dataset_item.py
    │           ├── text_image_dataset.py
    │           └── utils.py
    │       ├── dataset_loader.py
    │       ├── inference
    │           ├── __init__.py
    │           ├── inference_model.py
    │           └── sample.py
    │       ├── model
    │           ├── __init__.py
    │           ├── image_encoder
    │           │   ├── __init__.py
    │           │   ├── clip.py
    │           │   └── image_encoder.py
    │           ├── layers
    │           │   ├── __init__.py
    │           │   ├── base.py
    │           │   ├── embedding.py
    │           │   ├── embedding_head.py
    │           │   ├── layer.py
    │           │   ├── layernorm.py
    │           │   ├── lm_head.py
    │           │   └── lm_head_tied.py
    │           ├── losses
    │           │   ├── __init__.py
    │           │   ├── contrastive.py
    │           │   └── cross_entropy.py
    │           └── model.py
    │       ├── py.typed
    │       ├── tokenizer
    │           ├── __init__.py
    │           ├── alpha-001-128k.json
    │           ├── llama2-tokenizer.json
    │           └── tokenizer.py
    │       ├── train.py
    │       ├── train_determined.py
    │       └── utils
    │           ├── __init__.py
    │           └── get_tflops.py
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── core
        ├── __init__.py
        ├── conftest.py
        ├── files
        │   └── data.jsonl
        ├── minimal
        │   ├── __init__.py
        │   ├── context
        │   │   ├── __init__.py
        │   │   ├── config.py
        │   │   └── context.py
        │   ├── data
        │   │   ├── __init__.py
        │   │   └── dataset.py
        │   ├── minimal_config.yml
        │   ├── model
        │   │   ├── __init__.py
        │   │   └── model.py
        │   ├── run.py
        │   └── train.py
        ├── test_config
        │   ├── __init__.py
        │   ├── test_config.py
        │   └── test_logger.py
        ├── test_data
        │   ├── __init__.py
        │   ├── test_blended_dataset_weights.py
        │   ├── test_broadcast_data.py
        │   ├── test_dataloader.py
        │   ├── test_file_dataset.py
        │   ├── test_file_handles.py
        │   └── test_memory_map.py
        ├── test_fp8
        │   └── test_fp8_linear.py
        ├── test_nn
        │   ├── __init__.py
        │   ├── profile.json
        │   ├── test_attention_helpers.py
        │   ├── test_flash_attention.py
        │   ├── test_inference_module.py
        │   ├── test_layernorm.py
        │   ├── test_local_attention.py
        │   ├── test_lora.py
        │   ├── test_masked_softmax.py
        │   ├── test_parallel_embedding.py
        │   ├── test_parallel_linear.py
        │   ├── test_parallel_module.py
        │   ├── test_pipe_communication.py
        │   ├── test_pipeline_partitioning.py
        │   ├── test_pipeline_schedule.py
        │   ├── test_rotary.py
        │   └── test_umup.py
        ├── test_optimizer
        │   ├── __init__.py
        │   ├── test_adamw.py
        │   └── test_learning_rate_scheduler.py
        ├── test_runner
        │   ├── __init__.py
        │   ├── runner_script.py
        │   └── test_runner.py
        ├── test_topology
        │   ├── __init__.py
        │   └── test_topology.py
        ├── test_training
        │   ├── __init__.py
        │   ├── test_activation_checkpointing.py
        │   ├── test_parameters_count.py
        │   └── test_training.py
        ├── unit
        │   ├── __init__.py
        │   ├── data
        │   │   ├── __init__.py
        │   │   └── test_pb_memory_map.py
        │   ├── logging
        │   │   ├── __init__.py
        │   │   ├── test_logging_config.py
        │   │   └── test_tensor_statistics_recorder.py
        │   └── nn
        │   │   ├── __init__.py
        │   │   ├── linear
        │   │       ├── __init__.py
        │   │       └── test_utils.py
        │   │   └── parallel_module
        │   │       ├── __init__.py
        │   │       └── test_parallel_module.py
        └── utils.py
    ├── examples
        ├── __init__.py
        └── test_unit.py
    └── transformer
        ├── __init__.py
        ├── conftest.py
        ├── files
            ├── alpha-001-128k-EXTENDED.json
            ├── alpha-001-128k-clean-keep-ids.json
            ├── alpha-001-128k-clean.json
            ├── alpha-001-128k.json
            ├── backward_compatibility_checkpoint
            │   ├── config.yml
            │   ├── ground_truth.pt
            │   └── state_dict.pt
            ├── checkpoint
            │   └── config.yml
            ├── checkpoint_legacy
            │   ├── config.yml
            │   ├── model_state_layer_0_LuminousEmbeddingInput.pt
            │   ├── model_state_layer_1_LuminousLayer.pt
            │   ├── model_state_layer_2_LayerNormWrapper.pt
            │   ├── model_state_layer_3_LuminousLMHead.pt
            │   └── vocab.json
            ├── checkpoint_llama2
            │   └── config.yml
            ├── checkpoint_with_adapter
            │   ├── config.yml
            │   └── config_adapter_separate.yml
            ├── data
            │   ├── small1.jsonl
            │   ├── small2.jsonl
            │   └── small3.jsonl
            ├── dataset
            │   ├── data.bin
            │   ├── data.idx
            │   ├── data.meta.json
            │   ├── embedding_dataset_instructed.jsonl
            │   ├── embedding_dataset_non_instructed.jsonl
            │   ├── finetuning.json
            │   ├── finetuning.jsonl
            │   ├── finetuning_chat.jsonl
            │   ├── finetuning_memory_map
            │   │   ├── dataset.bin
            │   │   ├── dataset.idx
            │   │   └── dataset.meta.json
            │   ├── images
            │   │   ├── happy.jpeg
            │   │   └── sad.jpeg
            │   ├── legacy
            │   │   ├── enron_text_document_100.bin
            │   │   └── enron_text_document_100.idx
            │   ├── text_image_data.bin
            │   ├── text_image_data.idx
            │   ├── text_image_data.jsonl
            │   └── text_image_data.meta.json
            ├── llama-3.1-8B-tokenizer.json
            ├── llama2-tokenizer.json
            ├── old_umup_losses_4090_torch_2_3_1.json
            └── unigram_02pct_cc_v1.0_hf_converted_cleaned.json
        ├── test_backwards_compatibility.py
        ├── test_blended_dataset.py
        ├── test_cross_entropy.py
        ├── test_data.py
        ├── test_data_utils.py
        ├── test_embedding_dataset.py
        ├── test_embedding_training.py
        ├── test_finetuning.py
        ├── test_finetuning_parameter.py
        ├── test_inference.py
        ├── test_inference_embedding.py
        ├── test_load_checkpoint_non_strict.py
        ├── test_load_legacy_checkpoint.py
        ├── test_losses.py
        ├── test_tokenizer
            ├── __init__.py
            └── test_tokenizer.py
        ├── test_training.py
        ├── test_training_finetuning_chat.py
        ├── test_training_flash_attention.py
        ├── test_training_legacy.py
        ├── test_training_local_attention.py
        ├── test_training_sequence_parallel.py
        ├── test_umup_regression.py
        ├── test_utils.py
        ├── unit
            ├── __init__.py
            └── data
            │   ├── __init__.py
            │   └── test_text_image_dataset.py
        └── utils_determined.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/.gitignore


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/.pre-commit-config.yaml


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/CHANGELOG.md


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSE


--------------------------------------------------------------------------------
/LICENSES/Apache-2.0.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSES/Apache-2.0.txt


--------------------------------------------------------------------------------
/LICENSES/BSD-3-Clause.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSES/BSD-3-Clause.txt


--------------------------------------------------------------------------------
/LICENSES/HPND.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSES/HPND.txt


--------------------------------------------------------------------------------
/LICENSES/MIT.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/LICENSES/MIT.txt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/README.md


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/inference_example/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/inference_example/embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/inference_example/embedding.py


--------------------------------------------------------------------------------
/examples/mlp_example/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/README.md


--------------------------------------------------------------------------------
/examples/mlp_example/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/mlp_example/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/config.py


--------------------------------------------------------------------------------
/examples/mlp_example/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/config.yml


--------------------------------------------------------------------------------
/examples/mlp_example/context.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/context.py


--------------------------------------------------------------------------------
/examples/mlp_example/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/data.py


--------------------------------------------------------------------------------
/examples/mlp_example/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/model.py


--------------------------------------------------------------------------------
/examples/mlp_example/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/run.py


--------------------------------------------------------------------------------
/examples/mlp_example/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/mlp_example/train.py


--------------------------------------------------------------------------------
/examples/transformer_example/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/transformer_example/README.md


--------------------------------------------------------------------------------
/examples/transformer_example/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/transformer_example/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/transformer_example/config.yml


--------------------------------------------------------------------------------
/examples/transformer_example/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/transformer_example/run.py


--------------------------------------------------------------------------------
/examples/tutorials/inference.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/tutorials/inference.md


--------------------------------------------------------------------------------
/examples/tutorials/multi_node_training.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/examples/tutorials/multi_node_training.md


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/poetry.lock


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/pyproject.toml


--------------------------------------------------------------------------------
/src/scaling/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/scaling/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseConfig
2 | 


--------------------------------------------------------------------------------
/src/scaling/core/config/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/config/base.py


--------------------------------------------------------------------------------
/src/scaling/core/context/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/context/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/context/context.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/context/context.py


--------------------------------------------------------------------------------
/src/scaling/core/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/data/base_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/base_dataset.py


--------------------------------------------------------------------------------
/src/scaling/core/data/base_layer_io.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/base_layer_io.py


--------------------------------------------------------------------------------
/src/scaling/core/data/blended_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/blended_dataset.py


--------------------------------------------------------------------------------
/src/scaling/core/data/blended_dataset_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/blended_dataset_config.py


--------------------------------------------------------------------------------
/src/scaling/core/data/broadcast_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/broadcast_data.py


--------------------------------------------------------------------------------
/src/scaling/core/data/dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/dataloader.py


--------------------------------------------------------------------------------
/src/scaling/core/data/file_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/file_dataset.py


--------------------------------------------------------------------------------
/src/scaling/core/data/file_handles.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/file_handles.py


--------------------------------------------------------------------------------
/src/scaling/core/data/memory_map.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/memory_map.py


--------------------------------------------------------------------------------
/src/scaling/core/data/pb_memory_map.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/pb_memory_map.py


--------------------------------------------------------------------------------
/src/scaling/core/data/proto/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/scaling/core/data/proto/text_image_dataset.proto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/proto/text_image_dataset.proto


--------------------------------------------------------------------------------
/src/scaling/core/data/proto/text_image_dataset_pb2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/data/proto/text_image_dataset_pb2.py


--------------------------------------------------------------------------------
/src/scaling/core/determined/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import init
2 | 


--------------------------------------------------------------------------------
/src/scaling/core/determined/core.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/determined/core.py


--------------------------------------------------------------------------------
/src/scaling/core/fp8/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/fp8/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/fp8/fp8_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/fp8/fp8_config.py


--------------------------------------------------------------------------------
/src/scaling/core/fp8/fp8_linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/fp8/fp8_linear.py


--------------------------------------------------------------------------------
/src/scaling/core/logging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/logging/color_formatter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/color_formatter.py


--------------------------------------------------------------------------------
/src/scaling/core/logging/logger_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/logger_config.py


--------------------------------------------------------------------------------
/src/scaling/core/logging/logging.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/logging.py


--------------------------------------------------------------------------------
/src/scaling/core/logging/tensor_statistics_recorder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/logging/tensor_statistics_recorder.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/activation_function.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/activation_function.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/attention/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/attention/attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/attention/attention.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/linear/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/linear/column_parallel_linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/column_parallel_linear.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/linear/row_parallel_linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/row_parallel_linear.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/linear/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/utils.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/linear/vocab_parallel_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/linear/vocab_parallel_embedding.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/lora.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/lora_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/lora_config.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/masked_softmax/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/masked_softmax/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/masked_softmax/masked_softmax.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/masked_softmax/masked_softmax.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/masked_softmax/masked_softmax_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/masked_softmax/masked_softmax_config.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/mlp.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/norm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/norm/get_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/get_norm.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/norm/layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/layernorm.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/norm/layernorm_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/layernorm_config.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/norm/rms_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/norm/rms_norm.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/activation_checkpointing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/activation_checkpointing.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/base_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/base_layer.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/buffers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/buffers.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/communicator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/communicator.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/inference_module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/inference_module.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/layer_spec.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/layer_spec.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/parallel_module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/parallel_module.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/partitioned_module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/partitioned_module.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/pipeline_partitioning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/pipeline_partitioning.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parallel_module/tied_layer_index.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parallel_module/tied_layer_index.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/parameter_meta.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/parameter_meta.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/pipeline_schedule/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/pipeline_schedule/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/base.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/pipeline_schedule/inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/inference.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/pipeline_schedule/instructions.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/instructions.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/pipeline_schedule/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/pipeline_schedule/train.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/residual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/residual.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/rotary.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/rotary.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/rotary_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/rotary_config.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/scale.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/scale.py


--------------------------------------------------------------------------------
/src/scaling/core/nn/umup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/nn/umup.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/allreduce.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/allreduce.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/base.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/learning_rate_scheduler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/learning_rate_scheduler/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/learning_rate_scheduler/learning_rate_scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/learning_rate_scheduler/learning_rate_scheduler.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/learning_rate_scheduler/learning_rate_scheduler_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/learning_rate_scheduler/learning_rate_scheduler_config.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/loss_scaler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/loss_scaler.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/loss_scaler_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/loss_scaler_config.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/optimizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/optimizer.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/optimizer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/optimizer_config.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/parameter_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/parameter_group.py


--------------------------------------------------------------------------------
/src/scaling/core/optimizer/parameter_group_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/optimizer/parameter_group_config.py


--------------------------------------------------------------------------------
/src/scaling/core/profiler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/profiler/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/profiler/profiler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/profiler/profiler.py


--------------------------------------------------------------------------------
/src/scaling/core/profiler/profiler_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/profiler/profiler_config.py


--------------------------------------------------------------------------------
/src/scaling/core/profiler/timer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/profiler/timer.py


--------------------------------------------------------------------------------
/src/scaling/core/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/scaling/core/runner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/runner/launch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/launch.py


--------------------------------------------------------------------------------
/src/scaling/core/runner/launch_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/launch_config.py


--------------------------------------------------------------------------------
/src/scaling/core/runner/runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/runner.py


--------------------------------------------------------------------------------
/src/scaling/core/runner/runner_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/runner/runner_config.py


--------------------------------------------------------------------------------
/src/scaling/core/topology/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/topology/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/topology/rng_tracker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/topology/rng_tracker.py


--------------------------------------------------------------------------------
/src/scaling/core/topology/topology.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/topology/topology.py


--------------------------------------------------------------------------------
/src/scaling/core/topology/topology_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/topology/topology_config.py


--------------------------------------------------------------------------------
/src/scaling/core/trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/trainer/__init__.py


--------------------------------------------------------------------------------
/src/scaling/core/trainer/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/trainer/trainer.py


--------------------------------------------------------------------------------
/src/scaling/core/trainer/trainer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/trainer/trainer_config.py


--------------------------------------------------------------------------------
/src/scaling/core/trainer/warnings.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/trainer/warnings.txt


--------------------------------------------------------------------------------
/src/scaling/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/scaling/core/utils/determined_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/utils/determined_utils.py


--------------------------------------------------------------------------------
/src/scaling/core/utils/param_merge.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/utils/param_merge.py


--------------------------------------------------------------------------------
/src/scaling/core/utils/port.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/core/utils/port.py


--------------------------------------------------------------------------------
/src/scaling/transformer/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/README.md


--------------------------------------------------------------------------------
/src/scaling/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/context/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/context/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/context/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/context/config.py


--------------------------------------------------------------------------------
/src/scaling/transformer/context/context.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/context/context.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/dataset_item.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/dataset_item.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/embedding_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/embedding_dataset.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/finetuning_chat_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/finetuning_chat_dataset.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/finetuning_text_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/finetuning_text_dataset.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/inference_settings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/inference_settings.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/legacy_blended_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/legacy_blended_dataset.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/legacy_dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/legacy_dataset/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/legacy_dataset/indexed_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/legacy_dataset/indexed_dataset.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/text_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/text_dataset.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/text_dataset_batch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/text_dataset_batch.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/text_dataset_item.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/text_dataset_item.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/text_image_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/text_image_dataset.py


--------------------------------------------------------------------------------
/src/scaling/transformer/data/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/data/utils.py


--------------------------------------------------------------------------------
/src/scaling/transformer/dataset_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/dataset_loader.py


--------------------------------------------------------------------------------
/src/scaling/transformer/inference/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/inference/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/inference/inference_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/inference/inference_model.py


--------------------------------------------------------------------------------
/src/scaling/transformer/inference/sample.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/inference/sample.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/image_encoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/image_encoder/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/image_encoder/clip.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/image_encoder/clip.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/image_encoder/image_encoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/image_encoder/image_encoder.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/layers/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/base.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/layers/embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/embedding.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/layers/embedding_head.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/embedding_head.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/layers/layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/layer.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/layers/layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/layernorm.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/layers/lm_head.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/lm_head.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/layers/lm_head_tied.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/layers/lm_head_tied.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/losses/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/losses/contrastive.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/losses/contrastive.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/losses/cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/losses/cross_entropy.py


--------------------------------------------------------------------------------
/src/scaling/transformer/model/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/model/model.py


--------------------------------------------------------------------------------
/src/scaling/transformer/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/scaling/transformer/tokenizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/tokenizer/__init__.py


--------------------------------------------------------------------------------
/src/scaling/transformer/tokenizer/alpha-001-128k.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/tokenizer/alpha-001-128k.json


--------------------------------------------------------------------------------
/src/scaling/transformer/tokenizer/llama2-tokenizer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/tokenizer/llama2-tokenizer.json


--------------------------------------------------------------------------------
/src/scaling/transformer/tokenizer/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/tokenizer/tokenizer.py


--------------------------------------------------------------------------------
/src/scaling/transformer/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/train.py


--------------------------------------------------------------------------------
/src/scaling/transformer/train_determined.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/train_determined.py


--------------------------------------------------------------------------------
/src/scaling/transformer/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/scaling/transformer/utils/get_tflops.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/src/scaling/transformer/utils/get_tflops.py


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/conftest.py


--------------------------------------------------------------------------------
/tests/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/conftest.py


--------------------------------------------------------------------------------
/tests/core/files/data.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/files/data.jsonl


--------------------------------------------------------------------------------
/tests/core/minimal/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/__init__.py


--------------------------------------------------------------------------------
/tests/core/minimal/context/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/context/__init__.py


--------------------------------------------------------------------------------
/tests/core/minimal/context/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/context/config.py


--------------------------------------------------------------------------------
/tests/core/minimal/context/context.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/context/context.py


--------------------------------------------------------------------------------
/tests/core/minimal/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/data/__init__.py


--------------------------------------------------------------------------------
/tests/core/minimal/data/dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/data/dataset.py


--------------------------------------------------------------------------------
/tests/core/minimal/minimal_config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/minimal_config.yml


--------------------------------------------------------------------------------
/tests/core/minimal/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/model/__init__.py


--------------------------------------------------------------------------------
/tests/core/minimal/model/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/model/model.py


--------------------------------------------------------------------------------
/tests/core/minimal/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/run.py


--------------------------------------------------------------------------------
/tests/core/minimal/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/minimal/train.py


--------------------------------------------------------------------------------
/tests/core/test_config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/test_config/test_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_config/test_config.py


--------------------------------------------------------------------------------
/tests/core/test_config/test_logger.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_config/test_logger.py


--------------------------------------------------------------------------------
/tests/core/test_data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/test_data/test_blended_dataset_weights.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_blended_dataset_weights.py


--------------------------------------------------------------------------------
/tests/core/test_data/test_broadcast_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_broadcast_data.py


--------------------------------------------------------------------------------
/tests/core/test_data/test_dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_dataloader.py


--------------------------------------------------------------------------------
/tests/core/test_data/test_file_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_file_dataset.py


--------------------------------------------------------------------------------
/tests/core/test_data/test_file_handles.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_file_handles.py


--------------------------------------------------------------------------------
/tests/core/test_data/test_memory_map.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_data/test_memory_map.py


--------------------------------------------------------------------------------
/tests/core/test_fp8/test_fp8_linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_fp8/test_fp8_linear.py


--------------------------------------------------------------------------------
/tests/core/test_nn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/test_nn/profile.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/profile.json


--------------------------------------------------------------------------------
/tests/core/test_nn/test_attention_helpers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_attention_helpers.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_flash_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_flash_attention.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_inference_module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_inference_module.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_layernorm.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_local_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_local_attention.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_lora.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_masked_softmax.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_masked_softmax.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_parallel_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_parallel_embedding.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_parallel_linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_parallel_linear.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_parallel_module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_parallel_module.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_pipe_communication.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_pipe_communication.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_pipeline_partitioning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_pipeline_partitioning.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_pipeline_schedule.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_pipeline_schedule.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_rotary.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_rotary.py


--------------------------------------------------------------------------------
/tests/core/test_nn/test_umup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_nn/test_umup.py


--------------------------------------------------------------------------------
/tests/core/test_optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/test_optimizer/test_adamw.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_optimizer/test_adamw.py


--------------------------------------------------------------------------------
/tests/core/test_optimizer/test_learning_rate_scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_optimizer/test_learning_rate_scheduler.py


--------------------------------------------------------------------------------
/tests/core/test_runner/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/test_runner/runner_script.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_runner/runner_script.py


--------------------------------------------------------------------------------
/tests/core/test_runner/test_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_runner/test_runner.py


--------------------------------------------------------------------------------
/tests/core/test_topology/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/test_topology/test_topology.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_topology/test_topology.py


--------------------------------------------------------------------------------
/tests/core/test_training/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/test_training/test_activation_checkpointing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_training/test_activation_checkpointing.py


--------------------------------------------------------------------------------
/tests/core/test_training/test_parameters_count.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_training/test_parameters_count.py


--------------------------------------------------------------------------------
/tests/core/test_training/test_training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/test_training/test_training.py


--------------------------------------------------------------------------------
/tests/core/unit/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/unit/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/unit/data/test_pb_memory_map.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/data/test_pb_memory_map.py


--------------------------------------------------------------------------------
/tests/core/unit/logging/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/unit/logging/test_logging_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/logging/test_logging_config.py


--------------------------------------------------------------------------------
/tests/core/unit/logging/test_tensor_statistics_recorder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/logging/test_tensor_statistics_recorder.py


--------------------------------------------------------------------------------
/tests/core/unit/nn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/unit/nn/linear/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/unit/nn/linear/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/nn/linear/test_utils.py


--------------------------------------------------------------------------------
/tests/core/unit/nn/parallel_module/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/unit/nn/parallel_module/test_parallel_module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/unit/nn/parallel_module/test_parallel_module.py


--------------------------------------------------------------------------------
/tests/core/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/core/utils.py


--------------------------------------------------------------------------------
/tests/examples/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/examples/test_unit.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/examples/test_unit.py


--------------------------------------------------------------------------------
/tests/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/transformer/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/conftest.py


--------------------------------------------------------------------------------
/tests/transformer/files/alpha-001-128k-EXTENDED.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/alpha-001-128k-EXTENDED.json


--------------------------------------------------------------------------------
/tests/transformer/files/alpha-001-128k-clean-keep-ids.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/alpha-001-128k-clean-keep-ids.json


--------------------------------------------------------------------------------
/tests/transformer/files/alpha-001-128k-clean.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/alpha-001-128k-clean.json


--------------------------------------------------------------------------------
/tests/transformer/files/alpha-001-128k.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/alpha-001-128k.json


--------------------------------------------------------------------------------
/tests/transformer/files/backward_compatibility_checkpoint/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/backward_compatibility_checkpoint/config.yml


--------------------------------------------------------------------------------
/tests/transformer/files/backward_compatibility_checkpoint/ground_truth.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/backward_compatibility_checkpoint/ground_truth.pt


--------------------------------------------------------------------------------
/tests/transformer/files/backward_compatibility_checkpoint/state_dict.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/backward_compatibility_checkpoint/state_dict.pt


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint/config.yml


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint_legacy/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/config.yml


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint_legacy/model_state_layer_0_LuminousEmbeddingInput.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/model_state_layer_0_LuminousEmbeddingInput.pt


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint_legacy/model_state_layer_1_LuminousLayer.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/model_state_layer_1_LuminousLayer.pt


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint_legacy/model_state_layer_2_LayerNormWrapper.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/model_state_layer_2_LayerNormWrapper.pt


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint_legacy/model_state_layer_3_LuminousLMHead.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/model_state_layer_3_LuminousLMHead.pt


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint_legacy/vocab.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_legacy/vocab.json


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint_llama2/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_llama2/config.yml


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint_with_adapter/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/checkpoint_with_adapter/config.yml


--------------------------------------------------------------------------------
/tests/transformer/files/checkpoint_with_adapter/config_adapter_separate.yml:
--------------------------------------------------------------------------------
1 | name: "separate"
2 | 


--------------------------------------------------------------------------------
/tests/transformer/files/data/small1.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/data/small1.jsonl


--------------------------------------------------------------------------------
/tests/transformer/files/data/small2.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/data/small2.jsonl


--------------------------------------------------------------------------------
/tests/transformer/files/data/small3.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/data/small3.jsonl


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/data.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/data.bin


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/data.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/data.idx


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/data.meta.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/data.meta.json


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/embedding_dataset_instructed.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/embedding_dataset_instructed.jsonl


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/embedding_dataset_non_instructed.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/embedding_dataset_non_instructed.jsonl


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/finetuning.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning.json


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/finetuning.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning.jsonl


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/finetuning_chat.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning_chat.jsonl


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/finetuning_memory_map/dataset.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning_memory_map/dataset.bin


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/finetuning_memory_map/dataset.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning_memory_map/dataset.idx


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/finetuning_memory_map/dataset.meta.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/finetuning_memory_map/dataset.meta.json


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/images/happy.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/images/happy.jpeg


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/images/sad.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/images/sad.jpeg


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/legacy/enron_text_document_100.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/legacy/enron_text_document_100.bin


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/legacy/enron_text_document_100.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/legacy/enron_text_document_100.idx


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/text_image_data.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/text_image_data.bin


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/text_image_data.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/text_image_data.idx


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/text_image_data.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/text_image_data.jsonl


--------------------------------------------------------------------------------
/tests/transformer/files/dataset/text_image_data.meta.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/dataset/text_image_data.meta.json


--------------------------------------------------------------------------------
/tests/transformer/files/llama-3.1-8B-tokenizer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/llama-3.1-8B-tokenizer.json


--------------------------------------------------------------------------------
/tests/transformer/files/llama2-tokenizer.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/llama2-tokenizer.json


--------------------------------------------------------------------------------
/tests/transformer/files/old_umup_losses_4090_torch_2_3_1.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/old_umup_losses_4090_torch_2_3_1.json


--------------------------------------------------------------------------------
/tests/transformer/files/unigram_02pct_cc_v1.0_hf_converted_cleaned.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/files/unigram_02pct_cc_v1.0_hf_converted_cleaned.json


--------------------------------------------------------------------------------
/tests/transformer/test_backwards_compatibility.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_backwards_compatibility.py


--------------------------------------------------------------------------------
/tests/transformer/test_blended_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_blended_dataset.py


--------------------------------------------------------------------------------
/tests/transformer/test_cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_cross_entropy.py


--------------------------------------------------------------------------------
/tests/transformer/test_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_data.py


--------------------------------------------------------------------------------
/tests/transformer/test_data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_data_utils.py


--------------------------------------------------------------------------------
/tests/transformer/test_embedding_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_embedding_dataset.py


--------------------------------------------------------------------------------
/tests/transformer/test_embedding_training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_embedding_training.py


--------------------------------------------------------------------------------
/tests/transformer/test_finetuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_finetuning.py


--------------------------------------------------------------------------------
/tests/transformer/test_finetuning_parameter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_finetuning_parameter.py


--------------------------------------------------------------------------------
/tests/transformer/test_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_inference.py


--------------------------------------------------------------------------------
/tests/transformer/test_inference_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_inference_embedding.py


--------------------------------------------------------------------------------
/tests/transformer/test_load_checkpoint_non_strict.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_load_checkpoint_non_strict.py


--------------------------------------------------------------------------------
/tests/transformer/test_load_legacy_checkpoint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_load_legacy_checkpoint.py


--------------------------------------------------------------------------------
/tests/transformer/test_losses.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_losses.py


--------------------------------------------------------------------------------
/tests/transformer/test_tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/transformer/test_tokenizer/test_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_tokenizer/test_tokenizer.py


--------------------------------------------------------------------------------
/tests/transformer/test_training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training.py


--------------------------------------------------------------------------------
/tests/transformer/test_training_finetuning_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_finetuning_chat.py


--------------------------------------------------------------------------------
/tests/transformer/test_training_flash_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_flash_attention.py


--------------------------------------------------------------------------------
/tests/transformer/test_training_legacy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_legacy.py


--------------------------------------------------------------------------------
/tests/transformer/test_training_local_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_local_attention.py


--------------------------------------------------------------------------------
/tests/transformer/test_training_sequence_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_training_sequence_parallel.py


--------------------------------------------------------------------------------
/tests/transformer/test_umup_regression.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_umup_regression.py


--------------------------------------------------------------------------------
/tests/transformer/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/test_utils.py


--------------------------------------------------------------------------------
/tests/transformer/unit/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/transformer/unit/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/transformer/unit/data/test_text_image_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/unit/data/test_text_image_dataset.py


--------------------------------------------------------------------------------
/tests/transformer/utils_determined.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha-Research/scaling/HEAD/tests/transformer/utils_determined.py


--------------------------------------------------------------------------------