├── Dockerfile ├── LICENSE ├── README.md ├── cache.py ├── lit_gpt ├── __init__.py ├── config.py ├── fused_cross_entropy.py ├── fused_rotary_embedding.py ├── gated_delta_net.py ├── gated_delta_rule_ops │ ├── __init__.py │ ├── chunk.py │ ├── fla_version │ │ ├── __init__.py │ │ ├── chunk_fla.py │ │ └── wy_fast_fla.py │ └── wy_fast.py ├── model.py ├── packed_dataset.py ├── rmsnorm.py ├── rotary.py ├── speed_monitor.py ├── tokenizer.py └── utils.py ├── pretrain.py └── scripts ├── run_interactive.sh └── tsz512x4k_15B_gated_deltanet_h1_0.4B.sh /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/README.md -------------------------------------------------------------------------------- /cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/cache.py -------------------------------------------------------------------------------- /lit_gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/__init__.py -------------------------------------------------------------------------------- /lit_gpt/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/config.py -------------------------------------------------------------------------------- /lit_gpt/fused_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/fused_cross_entropy.py -------------------------------------------------------------------------------- /lit_gpt/fused_rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/fused_rotary_embedding.py -------------------------------------------------------------------------------- /lit_gpt/gated_delta_net.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/gated_delta_net.py -------------------------------------------------------------------------------- /lit_gpt/gated_delta_rule_ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/gated_delta_rule_ops/__init__.py -------------------------------------------------------------------------------- /lit_gpt/gated_delta_rule_ops/chunk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/gated_delta_rule_ops/chunk.py -------------------------------------------------------------------------------- /lit_gpt/gated_delta_rule_ops/fla_version/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/gated_delta_rule_ops/fla_version/__init__.py -------------------------------------------------------------------------------- /lit_gpt/gated_delta_rule_ops/fla_version/chunk_fla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/gated_delta_rule_ops/fla_version/chunk_fla.py -------------------------------------------------------------------------------- /lit_gpt/gated_delta_rule_ops/fla_version/wy_fast_fla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/gated_delta_rule_ops/fla_version/wy_fast_fla.py -------------------------------------------------------------------------------- /lit_gpt/gated_delta_rule_ops/wy_fast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/gated_delta_rule_ops/wy_fast.py -------------------------------------------------------------------------------- /lit_gpt/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/model.py -------------------------------------------------------------------------------- /lit_gpt/packed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/packed_dataset.py -------------------------------------------------------------------------------- /lit_gpt/rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/rmsnorm.py -------------------------------------------------------------------------------- /lit_gpt/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/rotary.py -------------------------------------------------------------------------------- /lit_gpt/speed_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/speed_monitor.py -------------------------------------------------------------------------------- /lit_gpt/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/tokenizer.py -------------------------------------------------------------------------------- /lit_gpt/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/lit_gpt/utils.py -------------------------------------------------------------------------------- /pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/pretrain.py -------------------------------------------------------------------------------- /scripts/run_interactive.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/scripts/run_interactive.sh -------------------------------------------------------------------------------- /scripts/tsz512x4k_15B_gated_deltanet_h1_0.4B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/GatedDeltaNet/HEAD/scripts/tsz512x4k_15B_gated_deltanet_h1_0.4B.sh --------------------------------------------------------------------------------