├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── assets ├── gpt2_124M_loss.png ├── nanogpt.jpg └── xlstm_loss.png ├── bench.py ├── config ├── eval_gpt2.py ├── eval_gpt2_large.py ├── eval_gpt2_medium.py ├── eval_gpt2_xl.py ├── finetune_shakespeare.py ├── train_gpt2.py ├── train_shakespeare_char.py └── train_shakespeare_char_xlstm.py ├── configurator.py ├── data ├── openwebtext │ ├── prepare.py │ └── readme.md ├── shakespeare │ ├── prepare.py │ └── readme.md └── shakespeare_char │ ├── prepare.py │ └── readme.md ├── model.py ├── run_sweep.py ├── sample.py ├── scaling_laws.ipynb ├── train.py └── transformer_sizing.ipynb /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/README.md -------------------------------------------------------------------------------- /assets/gpt2_124M_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/assets/gpt2_124M_loss.png -------------------------------------------------------------------------------- /assets/nanogpt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/assets/nanogpt.jpg -------------------------------------------------------------------------------- /assets/xlstm_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/assets/xlstm_loss.png -------------------------------------------------------------------------------- /bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/bench.py -------------------------------------------------------------------------------- /config/eval_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/config/eval_gpt2.py -------------------------------------------------------------------------------- /config/eval_gpt2_large.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/config/eval_gpt2_large.py -------------------------------------------------------------------------------- /config/eval_gpt2_medium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/config/eval_gpt2_medium.py -------------------------------------------------------------------------------- /config/eval_gpt2_xl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/config/eval_gpt2_xl.py -------------------------------------------------------------------------------- /config/finetune_shakespeare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/config/finetune_shakespeare.py -------------------------------------------------------------------------------- /config/train_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/config/train_gpt2.py -------------------------------------------------------------------------------- /config/train_shakespeare_char.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/config/train_shakespeare_char.py -------------------------------------------------------------------------------- /config/train_shakespeare_char_xlstm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/config/train_shakespeare_char_xlstm.py -------------------------------------------------------------------------------- /configurator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/configurator.py -------------------------------------------------------------------------------- /data/openwebtext/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/data/openwebtext/prepare.py -------------------------------------------------------------------------------- /data/openwebtext/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/data/openwebtext/readme.md -------------------------------------------------------------------------------- /data/shakespeare/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/data/shakespeare/prepare.py -------------------------------------------------------------------------------- /data/shakespeare/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/data/shakespeare/readme.md -------------------------------------------------------------------------------- /data/shakespeare_char/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/data/shakespeare_char/prepare.py -------------------------------------------------------------------------------- /data/shakespeare_char/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/data/shakespeare_char/readme.md -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/model.py -------------------------------------------------------------------------------- /run_sweep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/run_sweep.py -------------------------------------------------------------------------------- /sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/sample.py -------------------------------------------------------------------------------- /scaling_laws.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/scaling_laws.ipynb -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/train.py -------------------------------------------------------------------------------- /transformer_sizing.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jadechip/nanoXLSTM/HEAD/transformer_sizing.ipynb --------------------------------------------------------------------------------