├── .flake8 ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── configs ├── config.yaml ├── data │ ├── fineweb_edu │ │ └── llama.yaml │ └── sft │ │ └── llama.yaml ├── experiment │ ├── fineweb_edu │ │ ├── attention-xl.yaml │ │ ├── attention_localattention_interleave-xl.yaml │ │ ├── rat-xl.yaml │ │ ├── rat_localattention_interleave-xl.yaml │ │ └── rnn-xl.yaml │ └── sft │ │ ├── attention-xl.yaml │ │ ├── attention_localattention_interleave-xl.yaml │ │ ├── rat-xl.yaml │ │ ├── rat_localattention_interleave-xl.yaml │ │ └── rnn-xl.yaml ├── model │ ├── backbone │ │ ├── layer │ │ │ ├── attention.yaml │ │ │ ├── ffn.yaml │ │ │ ├── identity.yaml │ │ │ ├── local_attention.yaml │ │ │ ├── rat.yaml │ │ │ └── rnn.yaml │ │ ├── sequence.yaml │ │ └── sequence_interleave.yaml │ ├── embedding │ │ ├── empty.yaml │ │ ├── interrope.yaml │ │ ├── lm.yaml │ │ ├── rope.yaml │ │ └── rope_interrope.yaml │ └── head │ │ ├── cls.yaml │ │ └── lm.yaml ├── optim │ ├── lr_scheduler │ │ └── cosine.yaml │ └── optimizer │ │ ├── adam.yaml │ │ ├── adamw.yaml │ │ └── sgd.yaml └── task │ └── lm.yaml ├── docs ├── .editorconfig ├── .github │ ├── release-drafter.yml │ └── workflows │ │ └── release-notes.yml ├── .gitignore ├── 404.html ├── Gemfile ├── Gemfile.lock ├── LICENSE.md ├── README.md ├── _config.yml ├── _includes │ ├── head.html │ └── mathjax_support.html ├── _layouts │ ├── default.html │ ├── page.html │ └── post.html ├── _posts │ └── 2025-06-30-RAT.md ├── _sass │ ├── _base.scss │ ├── _code.scss │ ├── _layout.scss │ ├── _masthead.scss │ ├── _message.scss │ ├── _pagination.scss │ ├── _posts.scss │ ├── _syntax.scss │ ├── _toc.scss │ ├── _type.scss │ └── _variables.scss ├── assets │ ├── eff.png │ ├── favicon.ico │ ├── main_result.png │ ├── pe.png │ ├── rat.png │ ├── rat_all.png │ ├── rat_eq.png │ └── rat_logo.png ├── atom.xml ├── index.html ├── poole-for-jekyll.gemspec └── styles.scss ├── eval ├── lm_harness │ └── sequence_model.py ├── longbench │ ├── config.py │ ├── eval.py │ └── pred.py ├── ruler │ └── eval_sequence.py └── sft_eval │ └── eval_sequence.py ├── rat_all.png ├── rat_logo.png ├── src ├── benchmark_acc │ ├── generation.py │ └── lm.py ├── benchmark_eff │ ├── attention_eff.py │ ├── config.py │ ├── model_eff.py │ ├── rat_eff.py │ └── rnn_eff.py ├── data │ ├── __init__.py │ ├── lm_dataloader.py │ └── sft_dataloader.py ├── model │ ├── __init__.py │ ├── backbone │ │ ├── basic_layer │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── local_attention.py │ │ │ └── mlp.py │ │ ├── cache.py │ │ ├── rat │ │ │ ├── __init__.py │ │ │ ├── rat.py │ │ │ └── rat_slow.py │ │ ├── rnn │ │ │ ├── __init__.py │ │ │ └── rnn.py │ │ ├── sequence.py │ │ ├── sequence_interleave.py │ │ └── util.py │ ├── base.py │ ├── embedding │ │ ├── embedding.py │ │ └── pe.py │ ├── head │ │ └── head.py │ ├── nn │ │ ├── activation.py │ │ ├── init.py │ │ └── norm.py │ └── op │ │ ├── __init__.py │ │ ├── ascan.py │ │ ├── merge_lastt.py │ │ └── pscan.py ├── optim │ ├── __init__.py │ ├── optimizer.py │ └── scheduler.py ├── sequence_models.code-workspace ├── task │ ├── __init__.py │ ├── metric.py │ └── task.py ├── trainer │ ├── fsdp_trainer.py │ ├── lm_fsdp_trainer.py │ ├── lm_trainer.py │ └── trainer.py └── utils │ ├── config.py │ ├── convert_load_ckpt.py │ ├── gen.py │ └── registry.py └── tokenize ├── fineweb_edu.py ├── sft.py └── sft_util.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 140 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/README.md -------------------------------------------------------------------------------- /configs/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/config.yaml -------------------------------------------------------------------------------- /configs/data/fineweb_edu/llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/data/fineweb_edu/llama.yaml -------------------------------------------------------------------------------- /configs/data/sft/llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/data/sft/llama.yaml -------------------------------------------------------------------------------- /configs/experiment/fineweb_edu/attention-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/fineweb_edu/attention-xl.yaml -------------------------------------------------------------------------------- /configs/experiment/fineweb_edu/attention_localattention_interleave-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/fineweb_edu/attention_localattention_interleave-xl.yaml -------------------------------------------------------------------------------- /configs/experiment/fineweb_edu/rat-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/fineweb_edu/rat-xl.yaml -------------------------------------------------------------------------------- /configs/experiment/fineweb_edu/rat_localattention_interleave-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/fineweb_edu/rat_localattention_interleave-xl.yaml -------------------------------------------------------------------------------- /configs/experiment/fineweb_edu/rnn-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/fineweb_edu/rnn-xl.yaml -------------------------------------------------------------------------------- /configs/experiment/sft/attention-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/sft/attention-xl.yaml -------------------------------------------------------------------------------- /configs/experiment/sft/attention_localattention_interleave-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/sft/attention_localattention_interleave-xl.yaml -------------------------------------------------------------------------------- /configs/experiment/sft/rat-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/sft/rat-xl.yaml -------------------------------------------------------------------------------- /configs/experiment/sft/rat_localattention_interleave-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/sft/rat_localattention_interleave-xl.yaml -------------------------------------------------------------------------------- /configs/experiment/sft/rnn-xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/experiment/sft/rnn-xl.yaml -------------------------------------------------------------------------------- /configs/model/backbone/layer/attention.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/backbone/layer/attention.yaml -------------------------------------------------------------------------------- /configs/model/backbone/layer/ffn.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/backbone/layer/ffn.yaml -------------------------------------------------------------------------------- /configs/model/backbone/layer/identity.yaml: -------------------------------------------------------------------------------- 1 | _name_: identity 2 | -------------------------------------------------------------------------------- /configs/model/backbone/layer/local_attention.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/backbone/layer/local_attention.yaml -------------------------------------------------------------------------------- /configs/model/backbone/layer/rat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/backbone/layer/rat.yaml -------------------------------------------------------------------------------- /configs/model/backbone/layer/rnn.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/backbone/layer/rnn.yaml -------------------------------------------------------------------------------- /configs/model/backbone/sequence.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/backbone/sequence.yaml -------------------------------------------------------------------------------- /configs/model/backbone/sequence_interleave.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/backbone/sequence_interleave.yaml -------------------------------------------------------------------------------- /configs/model/embedding/empty.yaml: -------------------------------------------------------------------------------- 1 | _name_: empty -------------------------------------------------------------------------------- /configs/model/embedding/interrope.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/embedding/interrope.yaml -------------------------------------------------------------------------------- /configs/model/embedding/lm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/embedding/lm.yaml -------------------------------------------------------------------------------- /configs/model/embedding/rope.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/embedding/rope.yaml -------------------------------------------------------------------------------- /configs/model/embedding/rope_interrope.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/embedding/rope_interrope.yaml -------------------------------------------------------------------------------- /configs/model/head/cls.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/head/cls.yaml -------------------------------------------------------------------------------- /configs/model/head/lm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/model/head/lm.yaml -------------------------------------------------------------------------------- /configs/optim/lr_scheduler/cosine.yaml: -------------------------------------------------------------------------------- 1 | _name_: cosine 2 | warmup_iter: 0.1 3 | T_max: 1000 4 | eta_min: 1.0e-7 -------------------------------------------------------------------------------- /configs/optim/optimizer/adam.yaml: -------------------------------------------------------------------------------- 1 | _name_: adam 2 | lr: 1.0e-4 3 | betas: [0.9, 0.999] 4 | -------------------------------------------------------------------------------- /configs/optim/optimizer/adamw.yaml: -------------------------------------------------------------------------------- 1 | _name_: adamw 2 | lr: 1.0e-4 3 | weight_decay: 0.1 4 | betas: [0.9, 0.999] -------------------------------------------------------------------------------- /configs/optim/optimizer/sgd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/optim/optimizer/sgd.yaml -------------------------------------------------------------------------------- /configs/task/lm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/configs/task/lm.yaml -------------------------------------------------------------------------------- /docs/.editorconfig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/.editorconfig -------------------------------------------------------------------------------- /docs/.github/release-drafter.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/.github/release-drafter.yml -------------------------------------------------------------------------------- /docs/.github/workflows/release-notes.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/.github/workflows/release-notes.yml -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/.gitignore -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/404.html -------------------------------------------------------------------------------- /docs/Gemfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/Gemfile -------------------------------------------------------------------------------- /docs/Gemfile.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/Gemfile.lock -------------------------------------------------------------------------------- /docs/LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/LICENSE.md -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_config.yml -------------------------------------------------------------------------------- /docs/_includes/head.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_includes/head.html -------------------------------------------------------------------------------- /docs/_includes/mathjax_support.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_includes/mathjax_support.html -------------------------------------------------------------------------------- /docs/_layouts/default.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_layouts/default.html -------------------------------------------------------------------------------- /docs/_layouts/page.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_layouts/page.html -------------------------------------------------------------------------------- /docs/_layouts/post.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_layouts/post.html -------------------------------------------------------------------------------- /docs/_posts/2025-06-30-RAT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_posts/2025-06-30-RAT.md -------------------------------------------------------------------------------- /docs/_sass/_base.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_base.scss -------------------------------------------------------------------------------- /docs/_sass/_code.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_code.scss -------------------------------------------------------------------------------- /docs/_sass/_layout.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_layout.scss -------------------------------------------------------------------------------- /docs/_sass/_masthead.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_masthead.scss -------------------------------------------------------------------------------- /docs/_sass/_message.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_message.scss -------------------------------------------------------------------------------- /docs/_sass/_pagination.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_pagination.scss -------------------------------------------------------------------------------- /docs/_sass/_posts.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_posts.scss -------------------------------------------------------------------------------- /docs/_sass/_syntax.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_syntax.scss -------------------------------------------------------------------------------- /docs/_sass/_toc.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_toc.scss -------------------------------------------------------------------------------- /docs/_sass/_type.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_type.scss -------------------------------------------------------------------------------- /docs/_sass/_variables.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/_sass/_variables.scss -------------------------------------------------------------------------------- /docs/assets/eff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/assets/eff.png -------------------------------------------------------------------------------- /docs/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/assets/favicon.ico -------------------------------------------------------------------------------- /docs/assets/main_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/assets/main_result.png -------------------------------------------------------------------------------- /docs/assets/pe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/assets/pe.png -------------------------------------------------------------------------------- /docs/assets/rat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/assets/rat.png -------------------------------------------------------------------------------- /docs/assets/rat_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/assets/rat_all.png -------------------------------------------------------------------------------- /docs/assets/rat_eq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/assets/rat_eq.png -------------------------------------------------------------------------------- /docs/assets/rat_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/assets/rat_logo.png -------------------------------------------------------------------------------- /docs/atom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/atom.xml -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/index.html -------------------------------------------------------------------------------- /docs/poole-for-jekyll.gemspec: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/poole-for-jekyll.gemspec -------------------------------------------------------------------------------- /docs/styles.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/docs/styles.scss -------------------------------------------------------------------------------- /eval/lm_harness/sequence_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/eval/lm_harness/sequence_model.py -------------------------------------------------------------------------------- /eval/longbench/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/eval/longbench/config.py -------------------------------------------------------------------------------- /eval/longbench/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/eval/longbench/eval.py -------------------------------------------------------------------------------- /eval/longbench/pred.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/eval/longbench/pred.py -------------------------------------------------------------------------------- /eval/ruler/eval_sequence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/eval/ruler/eval_sequence.py -------------------------------------------------------------------------------- /eval/sft_eval/eval_sequence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/eval/sft_eval/eval_sequence.py -------------------------------------------------------------------------------- /rat_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/rat_all.png -------------------------------------------------------------------------------- /rat_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/rat_logo.png -------------------------------------------------------------------------------- /src/benchmark_acc/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/benchmark_acc/generation.py -------------------------------------------------------------------------------- /src/benchmark_acc/lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/benchmark_acc/lm.py -------------------------------------------------------------------------------- /src/benchmark_eff/attention_eff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/benchmark_eff/attention_eff.py -------------------------------------------------------------------------------- /src/benchmark_eff/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/benchmark_eff/config.py -------------------------------------------------------------------------------- /src/benchmark_eff/model_eff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/benchmark_eff/model_eff.py -------------------------------------------------------------------------------- /src/benchmark_eff/rat_eff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/benchmark_eff/rat_eff.py -------------------------------------------------------------------------------- /src/benchmark_eff/rnn_eff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/benchmark_eff/rnn_eff.py -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/lm_dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/data/lm_dataloader.py -------------------------------------------------------------------------------- /src/data/sft_dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/data/sft_dataloader.py -------------------------------------------------------------------------------- /src/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/__init__.py -------------------------------------------------------------------------------- /src/model/backbone/basic_layer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/basic_layer/__init__.py -------------------------------------------------------------------------------- /src/model/backbone/basic_layer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/basic_layer/attention.py -------------------------------------------------------------------------------- /src/model/backbone/basic_layer/local_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/basic_layer/local_attention.py -------------------------------------------------------------------------------- /src/model/backbone/basic_layer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/basic_layer/mlp.py -------------------------------------------------------------------------------- /src/model/backbone/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/cache.py -------------------------------------------------------------------------------- /src/model/backbone/rat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/rat/__init__.py -------------------------------------------------------------------------------- /src/model/backbone/rat/rat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/rat/rat.py -------------------------------------------------------------------------------- /src/model/backbone/rat/rat_slow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/rat/rat_slow.py -------------------------------------------------------------------------------- /src/model/backbone/rnn/__init__.py: -------------------------------------------------------------------------------- 1 | from .rnn import RNN 2 | -------------------------------------------------------------------------------- /src/model/backbone/rnn/rnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/rnn/rnn.py -------------------------------------------------------------------------------- /src/model/backbone/sequence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/sequence.py -------------------------------------------------------------------------------- /src/model/backbone/sequence_interleave.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/sequence_interleave.py -------------------------------------------------------------------------------- /src/model/backbone/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/backbone/util.py -------------------------------------------------------------------------------- /src/model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/base.py -------------------------------------------------------------------------------- /src/model/embedding/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/embedding/embedding.py -------------------------------------------------------------------------------- /src/model/embedding/pe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/embedding/pe.py -------------------------------------------------------------------------------- /src/model/head/head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/head/head.py -------------------------------------------------------------------------------- /src/model/nn/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/nn/activation.py -------------------------------------------------------------------------------- /src/model/nn/init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/nn/init.py -------------------------------------------------------------------------------- /src/model/nn/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/nn/norm.py -------------------------------------------------------------------------------- /src/model/op/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/op/__init__.py -------------------------------------------------------------------------------- /src/model/op/ascan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/op/ascan.py -------------------------------------------------------------------------------- /src/model/op/merge_lastt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/op/merge_lastt.py -------------------------------------------------------------------------------- /src/model/op/pscan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/model/op/pscan.py -------------------------------------------------------------------------------- /src/optim/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/optim/__init__.py -------------------------------------------------------------------------------- /src/optim/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/optim/optimizer.py -------------------------------------------------------------------------------- /src/optim/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/optim/scheduler.py -------------------------------------------------------------------------------- /src/sequence_models.code-workspace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/sequence_models.code-workspace -------------------------------------------------------------------------------- /src/task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/task/__init__.py -------------------------------------------------------------------------------- /src/task/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/task/metric.py -------------------------------------------------------------------------------- /src/task/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/task/task.py -------------------------------------------------------------------------------- /src/trainer/fsdp_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/trainer/fsdp_trainer.py -------------------------------------------------------------------------------- /src/trainer/lm_fsdp_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/trainer/lm_fsdp_trainer.py -------------------------------------------------------------------------------- /src/trainer/lm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/trainer/lm_trainer.py -------------------------------------------------------------------------------- /src/trainer/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/trainer/trainer.py -------------------------------------------------------------------------------- /src/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/utils/config.py -------------------------------------------------------------------------------- /src/utils/convert_load_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/utils/convert_load_ckpt.py -------------------------------------------------------------------------------- /src/utils/gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/utils/gen.py -------------------------------------------------------------------------------- /src/utils/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/src/utils/registry.py -------------------------------------------------------------------------------- /tokenize/fineweb_edu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/tokenize/fineweb_edu.py -------------------------------------------------------------------------------- /tokenize/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/tokenize/sft.py -------------------------------------------------------------------------------- /tokenize/sft_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLAIRE-Labo/RAT/HEAD/tokenize/sft_util.py --------------------------------------------------------------------------------