├── .gitignore ├── LICENSE ├── README.md ├── assets ├── architecture.png ├── evals.png ├── goldfinch_mascot_256.jpg ├── kvcache_comparison.png └── wandb1B5_composite.png ├── cmix ├── cmix_llama.py ├── cmix_mamba.py ├── cmix_rwkv_base.py ├── cmix_x052.py └── cmix_x060.py ├── configs.py ├── configs ├── L12D768ctx1024minipile.yaml ├── L24D2048ctx2048minipile.yaml ├── L24D2048ctx4096minipile.yaml ├── L6D768minipile.yaml ├── finch.yaml ├── finchc2.yaml ├── finchc2_ablation_no_one_minus_w.yaml ├── finchc2_ablation_no_v2.yaml ├── goldfinch.yaml ├── goldfinch_ablation_gold_invratio_2.yaml ├── goldfinch_ablation_gold_invratio_6.yaml ├── goldfinch_ablation_gptalpha.yaml ├── goldfinch_ablation_rope10k.yaml ├── goldfinch_upgrade.yaml ├── gptalpha.yaml └── llama.yaml ├── cuda ├── rwkv5_cuda.py ├── rwkv6_cuda.py ├── wkv5_cuda.cu ├── wkv5_op.cpp ├── wkv6_cuda.cu └── wkv6_op.cpp ├── dragon_test.py ├── get_minipile.py ├── run_lm_eval.py ├── src ├── CoreDependencies.py ├── __init__.py ├── binidx.py ├── dataflow │ ├── 20B_tokenizer.json │ ├── binidx.py │ ├── rwkv_vocab_v20230424.txt │ └── trie_tokenizer.py ├── dataset.py ├── lit.py ├── logger.py ├── metrics │ ├── __init__.py │ └── interface.py ├── model.py ├── norm.py ├── pipeline.py ├── rotary.py ├── state.py ├── trainer.py └── utils.py ├── tmix ├── kv_cache.py ├── tmix_gold.py ├── tmix_gptalpha.py ├── tmix_llama.py ├── tmix_mamba.py ├── tmix_rwkv_base.py ├── tmix_x052.py ├── tmix_x060.py ├── tmix_x060b.py └── tmix_x060c2.py └── train.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/README.md -------------------------------------------------------------------------------- /assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/assets/architecture.png -------------------------------------------------------------------------------- /assets/evals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/assets/evals.png -------------------------------------------------------------------------------- /assets/goldfinch_mascot_256.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/assets/goldfinch_mascot_256.jpg -------------------------------------------------------------------------------- /assets/kvcache_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/assets/kvcache_comparison.png -------------------------------------------------------------------------------- /assets/wandb1B5_composite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/assets/wandb1B5_composite.png -------------------------------------------------------------------------------- /cmix/cmix_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cmix/cmix_llama.py -------------------------------------------------------------------------------- /cmix/cmix_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cmix/cmix_mamba.py -------------------------------------------------------------------------------- /cmix/cmix_rwkv_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cmix/cmix_rwkv_base.py -------------------------------------------------------------------------------- /cmix/cmix_x052.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cmix/cmix_x052.py -------------------------------------------------------------------------------- /cmix/cmix_x060.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cmix/cmix_x060.py -------------------------------------------------------------------------------- /configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs.py -------------------------------------------------------------------------------- /configs/L12D768ctx1024minipile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/L12D768ctx1024minipile.yaml -------------------------------------------------------------------------------- /configs/L24D2048ctx2048minipile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/L24D2048ctx2048minipile.yaml -------------------------------------------------------------------------------- /configs/L24D2048ctx4096minipile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/L24D2048ctx4096minipile.yaml -------------------------------------------------------------------------------- /configs/L6D768minipile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/L6D768minipile.yaml -------------------------------------------------------------------------------- /configs/finch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/finch.yaml -------------------------------------------------------------------------------- /configs/finchc2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/finchc2.yaml -------------------------------------------------------------------------------- /configs/finchc2_ablation_no_one_minus_w.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/finchc2_ablation_no_one_minus_w.yaml -------------------------------------------------------------------------------- /configs/finchc2_ablation_no_v2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/finchc2_ablation_no_v2.yaml -------------------------------------------------------------------------------- /configs/goldfinch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/goldfinch.yaml -------------------------------------------------------------------------------- /configs/goldfinch_ablation_gold_invratio_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/goldfinch_ablation_gold_invratio_2.yaml -------------------------------------------------------------------------------- /configs/goldfinch_ablation_gold_invratio_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/goldfinch_ablation_gold_invratio_6.yaml -------------------------------------------------------------------------------- /configs/goldfinch_ablation_gptalpha.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/goldfinch_ablation_gptalpha.yaml -------------------------------------------------------------------------------- /configs/goldfinch_ablation_rope10k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/goldfinch_ablation_rope10k.yaml -------------------------------------------------------------------------------- /configs/goldfinch_upgrade.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/goldfinch_upgrade.yaml -------------------------------------------------------------------------------- /configs/gptalpha.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/gptalpha.yaml -------------------------------------------------------------------------------- /configs/llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/configs/llama.yaml -------------------------------------------------------------------------------- /cuda/rwkv5_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cuda/rwkv5_cuda.py -------------------------------------------------------------------------------- /cuda/rwkv6_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cuda/rwkv6_cuda.py -------------------------------------------------------------------------------- /cuda/wkv5_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cuda/wkv5_cuda.cu -------------------------------------------------------------------------------- /cuda/wkv5_op.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cuda/wkv5_op.cpp -------------------------------------------------------------------------------- /cuda/wkv6_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cuda/wkv6_cuda.cu -------------------------------------------------------------------------------- /cuda/wkv6_op.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/cuda/wkv6_op.cpp -------------------------------------------------------------------------------- /dragon_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/dragon_test.py -------------------------------------------------------------------------------- /get_minipile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/get_minipile.py -------------------------------------------------------------------------------- /run_lm_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/run_lm_eval.py -------------------------------------------------------------------------------- /src/CoreDependencies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/CoreDependencies.py -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/binidx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/binidx.py -------------------------------------------------------------------------------- /src/dataflow/20B_tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/dataflow/20B_tokenizer.json -------------------------------------------------------------------------------- /src/dataflow/binidx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/dataflow/binidx.py -------------------------------------------------------------------------------- /src/dataflow/rwkv_vocab_v20230424.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/dataflow/rwkv_vocab_v20230424.txt -------------------------------------------------------------------------------- /src/dataflow/trie_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/dataflow/trie_tokenizer.py -------------------------------------------------------------------------------- /src/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/dataset.py -------------------------------------------------------------------------------- /src/lit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/lit.py -------------------------------------------------------------------------------- /src/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/logger.py -------------------------------------------------------------------------------- /src/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/metrics/__init__.py -------------------------------------------------------------------------------- /src/metrics/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/metrics/interface.py -------------------------------------------------------------------------------- /src/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/model.py -------------------------------------------------------------------------------- /src/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/norm.py -------------------------------------------------------------------------------- /src/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/pipeline.py -------------------------------------------------------------------------------- /src/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/rotary.py -------------------------------------------------------------------------------- /src/state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/state.py -------------------------------------------------------------------------------- /src/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/trainer.py -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/src/utils.py -------------------------------------------------------------------------------- /tmix/kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/kv_cache.py -------------------------------------------------------------------------------- /tmix/tmix_gold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/tmix_gold.py -------------------------------------------------------------------------------- /tmix/tmix_gptalpha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/tmix_gptalpha.py -------------------------------------------------------------------------------- /tmix/tmix_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/tmix_llama.py -------------------------------------------------------------------------------- /tmix/tmix_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/tmix_mamba.py -------------------------------------------------------------------------------- /tmix/tmix_rwkv_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/tmix_rwkv_base.py -------------------------------------------------------------------------------- /tmix/tmix_x052.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/tmix_x052.py -------------------------------------------------------------------------------- /tmix/tmix_x060.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/tmix_x060.py -------------------------------------------------------------------------------- /tmix/tmix_x060b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/tmix_x060b.py -------------------------------------------------------------------------------- /tmix/tmix_x060c2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/tmix/tmix_x060c2.py -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SmerkyG/GoldFinch-paper/HEAD/train.py --------------------------------------------------------------------------------