├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── assets ├── gpt2_124M_loss.png └── nanogpt.jpg ├── bench.py ├── config ├── eval_gpt2.py ├── eval_gpt2_large.py ├── eval_gpt2_medium.py ├── eval_gpt2_xl.py ├── finetune_shakespeare.py ├── train_gpt2.py └── train_shakespeare_char.py ├── config_transnormer ├── eval_gpt2.py ├── eval_gpt2_large.py ├── eval_gpt2_medium.py ├── eval_gpt2_xl.py ├── finetune_shakespeare.py ├── train_shakespeare_char.py └── train_transnormer_small.py ├── configurator.py ├── data ├── openwebtext │ ├── prepare.py │ └── readme.md ├── shakespeare │ ├── prepare.py │ └── readme.md └── shakespeare_char │ ├── prepare.py │ └── readme.md ├── model.py ├── sample.py ├── sample_transnormer.py ├── scaling_laws.ipynb ├── script.sh ├── train.py ├── train_transnormer.py ├── transformer_sizing.ipynb └── transnormer ├── __init__.py ├── lightning_attention.py ├── model.py ├── srms.py ├── srms_triton.py └── utils.py /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/README.md -------------------------------------------------------------------------------- /assets/gpt2_124M_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/assets/gpt2_124M_loss.png -------------------------------------------------------------------------------- /assets/nanogpt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/assets/nanogpt.jpg -------------------------------------------------------------------------------- /bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/bench.py -------------------------------------------------------------------------------- /config/eval_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config/eval_gpt2.py -------------------------------------------------------------------------------- /config/eval_gpt2_large.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config/eval_gpt2_large.py -------------------------------------------------------------------------------- /config/eval_gpt2_medium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config/eval_gpt2_medium.py -------------------------------------------------------------------------------- /config/eval_gpt2_xl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config/eval_gpt2_xl.py -------------------------------------------------------------------------------- /config/finetune_shakespeare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config/finetune_shakespeare.py -------------------------------------------------------------------------------- /config/train_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config/train_gpt2.py -------------------------------------------------------------------------------- /config/train_shakespeare_char.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config/train_shakespeare_char.py -------------------------------------------------------------------------------- /config_transnormer/eval_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config_transnormer/eval_gpt2.py -------------------------------------------------------------------------------- /config_transnormer/eval_gpt2_large.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config_transnormer/eval_gpt2_large.py -------------------------------------------------------------------------------- /config_transnormer/eval_gpt2_medium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config_transnormer/eval_gpt2_medium.py -------------------------------------------------------------------------------- /config_transnormer/eval_gpt2_xl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config_transnormer/eval_gpt2_xl.py -------------------------------------------------------------------------------- /config_transnormer/finetune_shakespeare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config_transnormer/finetune_shakespeare.py -------------------------------------------------------------------------------- /config_transnormer/train_shakespeare_char.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config_transnormer/train_shakespeare_char.py -------------------------------------------------------------------------------- /config_transnormer/train_transnormer_small.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/config_transnormer/train_transnormer_small.py -------------------------------------------------------------------------------- /configurator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/configurator.py -------------------------------------------------------------------------------- /data/openwebtext/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/data/openwebtext/prepare.py -------------------------------------------------------------------------------- /data/openwebtext/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/data/openwebtext/readme.md -------------------------------------------------------------------------------- /data/shakespeare/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/data/shakespeare/prepare.py -------------------------------------------------------------------------------- /data/shakespeare/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/data/shakespeare/readme.md -------------------------------------------------------------------------------- /data/shakespeare_char/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/data/shakespeare_char/prepare.py -------------------------------------------------------------------------------- /data/shakespeare_char/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/data/shakespeare_char/readme.md -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/model.py -------------------------------------------------------------------------------- /sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/sample.py -------------------------------------------------------------------------------- /sample_transnormer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/sample_transnormer.py -------------------------------------------------------------------------------- /scaling_laws.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/scaling_laws.ipynb -------------------------------------------------------------------------------- /script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/script.sh -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/train.py -------------------------------------------------------------------------------- /train_transnormer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/train_transnormer.py -------------------------------------------------------------------------------- /transformer_sizing.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/transformer_sizing.ipynb -------------------------------------------------------------------------------- /transnormer/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import * -------------------------------------------------------------------------------- /transnormer/lightning_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/transnormer/lightning_attention.py -------------------------------------------------------------------------------- /transnormer/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/transnormer/model.py -------------------------------------------------------------------------------- /transnormer/srms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/transnormer/srms.py -------------------------------------------------------------------------------- /transnormer/srms_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doraemonzzz/nanoTransNormer/HEAD/transnormer/srms_triton.py -------------------------------------------------------------------------------- /transnormer/utils.py: -------------------------------------------------------------------------------- 1 | --------------------------------------------------------------------------------