├── .gitattributes ├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── __init__.py ├── allamo ├── __init__.py ├── checkpoint │ ├── __init__.py │ └── checkpoint_manager.py ├── configuration.py ├── dataset │ ├── __init__.py │ ├── data_loader.py │ └── dataset.py ├── logging.py ├── metrics │ ├── __init__.py │ ├── metrics_logger.py │ └── neptune_hardware_monitoring.py ├── model │ ├── __init__.py │ ├── activations.py │ ├── attentions.py │ ├── layernorms.py │ ├── lra.py │ ├── model.py │ └── rotary_embeddings.py ├── optimizer │ ├── __init__.py │ └── optimizer_utils.py ├── parallelisms │ ├── __init__.py │ ├── fsdp2_utils.py │ └── fsdp_utils.py ├── torch_utils.py ├── train_utils.py ├── trainer │ ├── __init__.py │ ├── base.py │ ├── dpo_fsdp_trainer.py │ ├── fsdp_trainer.py │ └── simple_trainer.py └── training_context.py ├── assets ├── allamo_gradio.jpg └── allamo_logo.jpg ├── dpo_fsdp_train.py ├── fsdp_train.py ├── inference ├── __init__.py ├── sample.py ├── sample_api.py └── sample_ui.py ├── scripts ├── __init__.py ├── adjust_tokenizer.py ├── convert_config_checkpoint_pt_to_json.py ├── depth_up_scaling.py ├── duplicate_edge_layers.py ├── estimate_model_memory_usage.py ├── export_to_hf.py ├── generate_dpo_reference_logprobs.py ├── import_hf_model_weights.py ├── lra │ ├── __init__.py │ ├── add_lra_to_model.py │ ├── draw_lra_plots.py │ └── modeling_lra.py ├── merge │ ├── __init__.py │ ├── merge_hf_model.py │ └── merge_pt_model.py ├── prepare_datasets.py ├── prepare_dpo_dataset.py ├── prepare_sft_dataset.py ├── prune_model_layers.py ├── swap_tokenizer_embeddings.py └── update_state_dict_prefixes.py ├── setup.py ├── train.py └── train_configs └── train_1B.json /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/.gitignore -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/CITATION.cff -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allamo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allamo/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allamo/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /allamo/configuration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/configuration.py -------------------------------------------------------------------------------- /allamo/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allamo/dataset/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/dataset/data_loader.py -------------------------------------------------------------------------------- /allamo/dataset/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/dataset/dataset.py -------------------------------------------------------------------------------- /allamo/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/logging.py -------------------------------------------------------------------------------- /allamo/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allamo/metrics/metrics_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/metrics/metrics_logger.py -------------------------------------------------------------------------------- /allamo/metrics/neptune_hardware_monitoring.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/metrics/neptune_hardware_monitoring.py -------------------------------------------------------------------------------- /allamo/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allamo/model/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/model/activations.py -------------------------------------------------------------------------------- /allamo/model/attentions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/model/attentions.py -------------------------------------------------------------------------------- /allamo/model/layernorms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/model/layernorms.py -------------------------------------------------------------------------------- /allamo/model/lra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/model/lra.py -------------------------------------------------------------------------------- /allamo/model/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/model/model.py -------------------------------------------------------------------------------- /allamo/model/rotary_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/model/rotary_embeddings.py -------------------------------------------------------------------------------- /allamo/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allamo/optimizer/optimizer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/optimizer/optimizer_utils.py -------------------------------------------------------------------------------- /allamo/parallelisms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allamo/parallelisms/fsdp2_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/parallelisms/fsdp2_utils.py -------------------------------------------------------------------------------- /allamo/parallelisms/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/parallelisms/fsdp_utils.py -------------------------------------------------------------------------------- /allamo/torch_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/torch_utils.py -------------------------------------------------------------------------------- /allamo/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/train_utils.py -------------------------------------------------------------------------------- /allamo/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allamo/trainer/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/trainer/base.py -------------------------------------------------------------------------------- /allamo/trainer/dpo_fsdp_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/trainer/dpo_fsdp_trainer.py -------------------------------------------------------------------------------- /allamo/trainer/fsdp_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/trainer/fsdp_trainer.py -------------------------------------------------------------------------------- /allamo/trainer/simple_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/trainer/simple_trainer.py -------------------------------------------------------------------------------- /allamo/training_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/allamo/training_context.py -------------------------------------------------------------------------------- /assets/allamo_gradio.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/assets/allamo_gradio.jpg -------------------------------------------------------------------------------- /assets/allamo_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/assets/allamo_logo.jpg -------------------------------------------------------------------------------- /dpo_fsdp_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/dpo_fsdp_train.py -------------------------------------------------------------------------------- /fsdp_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/fsdp_train.py -------------------------------------------------------------------------------- /inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /inference/sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/inference/sample.py -------------------------------------------------------------------------------- /inference/sample_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/inference/sample_api.py -------------------------------------------------------------------------------- /inference/sample_ui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/inference/sample_ui.py -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/adjust_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/adjust_tokenizer.py -------------------------------------------------------------------------------- /scripts/convert_config_checkpoint_pt_to_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/convert_config_checkpoint_pt_to_json.py -------------------------------------------------------------------------------- /scripts/depth_up_scaling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/depth_up_scaling.py -------------------------------------------------------------------------------- /scripts/duplicate_edge_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/duplicate_edge_layers.py -------------------------------------------------------------------------------- /scripts/estimate_model_memory_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/estimate_model_memory_usage.py -------------------------------------------------------------------------------- /scripts/export_to_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/export_to_hf.py -------------------------------------------------------------------------------- /scripts/generate_dpo_reference_logprobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/generate_dpo_reference_logprobs.py -------------------------------------------------------------------------------- /scripts/import_hf_model_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/import_hf_model_weights.py -------------------------------------------------------------------------------- /scripts/lra/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/lra/add_lra_to_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/lra/add_lra_to_model.py -------------------------------------------------------------------------------- /scripts/lra/draw_lra_plots.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/lra/draw_lra_plots.py -------------------------------------------------------------------------------- /scripts/lra/modeling_lra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/lra/modeling_lra.py -------------------------------------------------------------------------------- /scripts/merge/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/merge/merge_hf_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/merge/merge_hf_model.py -------------------------------------------------------------------------------- /scripts/merge/merge_pt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/merge/merge_pt_model.py -------------------------------------------------------------------------------- /scripts/prepare_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/prepare_datasets.py -------------------------------------------------------------------------------- /scripts/prepare_dpo_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/prepare_dpo_dataset.py -------------------------------------------------------------------------------- /scripts/prepare_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/prepare_sft_dataset.py -------------------------------------------------------------------------------- /scripts/prune_model_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/prune_model_layers.py -------------------------------------------------------------------------------- /scripts/swap_tokenizer_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/swap_tokenizer_embeddings.py -------------------------------------------------------------------------------- /scripts/update_state_dict_prefixes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/scripts/update_state_dict_prefixes.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/setup.py -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/train.py -------------------------------------------------------------------------------- /train_configs/train_1B.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisociepa/allamo/HEAD/train_configs/train_1B.json --------------------------------------------------------------------------------