├── .gitignore ├── FractalFormer_DownstreamResidual.ipynb ├── FractalFormer_InbuiltTokenizer.ipynb ├── FractalFormer_ModelMerging.ipynb ├── FractalFormer_UpstreamResidual.ipynb ├── FractalFormer_base.ipynb ├── FractalFormer_base.py ├── README.md ├── configs.py ├── images ├── Evec.jpeg ├── MLP.jpeg ├── MQA.jpeg └── Wo.jpeg ├── input.txt ├── models ├── FractalFormer_base-v128-max_t256-layers4-heads4-kv_heads1-hidden128-intermediate512-head_dim32-theta100.0-levels3-split2-lr0.0001-decay0.01-batch12-train_iter10000--2024-03-06|09-12-42.pth ├── FractalFormer_base-v128-max_t256-layers4-heads4-kv_heads1-hidden128-intermediate512-head_dim32-theta100.0-levels3-split2-lr0.0003-decay0.01-batch12-train_iter5000--2024-03-06|07-14-57.pth └── FractalFormer_base-v128-max_t256-layers4-heads4-kv_heads1-hidden128-intermediate512-head_dim32-theta100.0-levels3-split2-lr3e-05-decay0.01-batch12-train_iter15000--2024-03-06|15-27-50.pth ├── tokenizer.py ├── tokenizers └── tokenizer.model └── weird_embeddings.ipynb /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/.gitignore -------------------------------------------------------------------------------- /FractalFormer_DownstreamResidual.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/FractalFormer_DownstreamResidual.ipynb -------------------------------------------------------------------------------- /FractalFormer_InbuiltTokenizer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/FractalFormer_InbuiltTokenizer.ipynb -------------------------------------------------------------------------------- /FractalFormer_ModelMerging.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/FractalFormer_ModelMerging.ipynb -------------------------------------------------------------------------------- /FractalFormer_UpstreamResidual.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/FractalFormer_UpstreamResidual.ipynb -------------------------------------------------------------------------------- /FractalFormer_base.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/FractalFormer_base.ipynb -------------------------------------------------------------------------------- /FractalFormer_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/FractalFormer_base.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/README.md -------------------------------------------------------------------------------- /configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/configs.py -------------------------------------------------------------------------------- /images/Evec.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/images/Evec.jpeg -------------------------------------------------------------------------------- /images/MLP.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/images/MLP.jpeg -------------------------------------------------------------------------------- /images/MQA.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/images/MQA.jpeg -------------------------------------------------------------------------------- /images/Wo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/images/Wo.jpeg -------------------------------------------------------------------------------- /input.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/input.txt -------------------------------------------------------------------------------- /models/FractalFormer_base-v128-max_t256-layers4-heads4-kv_heads1-hidden128-intermediate512-head_dim32-theta100.0-levels3-split2-lr0.0001-decay0.01-batch12-train_iter10000--2024-03-06|09-12-42.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/models/FractalFormer_base-v128-max_t256-layers4-heads4-kv_heads1-hidden128-intermediate512-head_dim32-theta100.0-levels3-split2-lr0.0001-decay0.01-batch12-train_iter10000--2024-03-06|09-12-42.pth -------------------------------------------------------------------------------- /models/FractalFormer_base-v128-max_t256-layers4-heads4-kv_heads1-hidden128-intermediate512-head_dim32-theta100.0-levels3-split2-lr0.0003-decay0.01-batch12-train_iter5000--2024-03-06|07-14-57.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/models/FractalFormer_base-v128-max_t256-layers4-heads4-kv_heads1-hidden128-intermediate512-head_dim32-theta100.0-levels3-split2-lr0.0003-decay0.01-batch12-train_iter5000--2024-03-06|07-14-57.pth -------------------------------------------------------------------------------- /models/FractalFormer_base-v128-max_t256-layers4-heads4-kv_heads1-hidden128-intermediate512-head_dim32-theta100.0-levels3-split2-lr3e-05-decay0.01-batch12-train_iter15000--2024-03-06|15-27-50.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/models/FractalFormer_base-v128-max_t256-layers4-heads4-kv_heads1-hidden128-intermediate512-head_dim32-theta100.0-levels3-split2-lr3e-05-decay0.01-batch12-train_iter15000--2024-03-06|15-27-50.pth -------------------------------------------------------------------------------- /tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/tokenizer.py -------------------------------------------------------------------------------- /tokenizers/tokenizer.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/tokenizers/tokenizer.model -------------------------------------------------------------------------------- /weird_embeddings.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evintunador/FractalFormer/HEAD/weird_embeddings.ipynb --------------------------------------------------------------------------------