├── .gitignore ├── LICENSE ├── README.md ├── assets ├── Avatar-Resa.png ├── cost.png ├── overall_comparison.png ├── pipeline.png └── resa_eval.png ├── recipes ├── DeepSeek-R1-Distill-Qwen-1.5B │ ├── grpo │ │ ├── sae_tuning.yaml │ │ └── train_sae.yaml │ └── sft │ │ ├── train_model_deepscaler.yaml │ │ └── train_model_still.yaml └── accelerate_ds_cfgs │ └── ds_zero2.yaml ├── resa ├── config.py ├── post_train_hf │ ├── callback.py │ ├── hub.py │ ├── preprocess.py │ └── sft.py ├── sae │ ├── merge_sae_tuned_models.py │ ├── preprocess.py │ └── sparsify │ │ ├── .pre-commit-config.yaml │ │ ├── LICENSE │ │ ├── README.md │ │ ├── pyproject.toml │ │ ├── sparsify │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── config.py │ │ ├── data.py │ │ ├── muon.py │ │ ├── sign_sgd.py │ │ ├── sparse_coder.py │ │ ├── trainer.py │ │ ├── utils.py │ │ └── xformers.py │ │ └── tests │ │ ├── __init__.py │ │ └── test_decode.py └── utils │ ├── chat_template.py │ ├── constant.py │ └── prompt.py └── scripts ├── eval ├── eval_sae_tuning.sh └── run_eval_custom_tasks.py ├── set ├── environment.yml ├── environment_eval.yml ├── prepare.sh ├── run_download_base_model_sae.py ├── run_download_octothinker_ckpts.py ├── run_download_tina_ckpts.py ├── set_env.sh ├── set_env_eval.sh └── set_vars.sh └── train ├── finetune.py ├── finetune_sae.sh ├── post_train_model_sft.sh ├── pretrain_sae.sh ├── sae_tuning.py ├── sae_tuning_model.sh ├── train_from_scratch.py └── train_sae_from_scratch.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/README.md -------------------------------------------------------------------------------- /assets/Avatar-Resa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/assets/Avatar-Resa.png -------------------------------------------------------------------------------- /assets/cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/assets/cost.png -------------------------------------------------------------------------------- /assets/overall_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/assets/overall_comparison.png -------------------------------------------------------------------------------- /assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/assets/pipeline.png -------------------------------------------------------------------------------- /assets/resa_eval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/assets/resa_eval.png -------------------------------------------------------------------------------- /recipes/DeepSeek-R1-Distill-Qwen-1.5B/grpo/sae_tuning.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/recipes/DeepSeek-R1-Distill-Qwen-1.5B/grpo/sae_tuning.yaml -------------------------------------------------------------------------------- /recipes/DeepSeek-R1-Distill-Qwen-1.5B/grpo/train_sae.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/recipes/DeepSeek-R1-Distill-Qwen-1.5B/grpo/train_sae.yaml -------------------------------------------------------------------------------- /recipes/DeepSeek-R1-Distill-Qwen-1.5B/sft/train_model_deepscaler.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/recipes/DeepSeek-R1-Distill-Qwen-1.5B/sft/train_model_deepscaler.yaml -------------------------------------------------------------------------------- /recipes/DeepSeek-R1-Distill-Qwen-1.5B/sft/train_model_still.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/recipes/DeepSeek-R1-Distill-Qwen-1.5B/sft/train_model_still.yaml -------------------------------------------------------------------------------- /recipes/accelerate_ds_cfgs/ds_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/recipes/accelerate_ds_cfgs/ds_zero2.yaml -------------------------------------------------------------------------------- /resa/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/config.py -------------------------------------------------------------------------------- /resa/post_train_hf/callback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/post_train_hf/callback.py -------------------------------------------------------------------------------- /resa/post_train_hf/hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/post_train_hf/hub.py -------------------------------------------------------------------------------- /resa/post_train_hf/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/post_train_hf/preprocess.py -------------------------------------------------------------------------------- /resa/post_train_hf/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/post_train_hf/sft.py -------------------------------------------------------------------------------- /resa/sae/merge_sae_tuned_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/merge_sae_tuned_models.py -------------------------------------------------------------------------------- /resa/sae/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/preprocess.py -------------------------------------------------------------------------------- /resa/sae/sparsify/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/.pre-commit-config.yaml -------------------------------------------------------------------------------- /resa/sae/sparsify/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/LICENSE -------------------------------------------------------------------------------- /resa/sae/sparsify/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/README.md -------------------------------------------------------------------------------- /resa/sae/sparsify/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/pyproject.toml -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/__init__.py -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/__main__.py -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/config.py -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/data.py -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/muon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/muon.py -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/sign_sgd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/sign_sgd.py -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/sparse_coder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/sparse_coder.py -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/trainer.py -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/utils.py -------------------------------------------------------------------------------- /resa/sae/sparsify/sparsify/xformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/sparsify/xformers.py -------------------------------------------------------------------------------- /resa/sae/sparsify/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resa/sae/sparsify/tests/test_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/sae/sparsify/tests/test_decode.py -------------------------------------------------------------------------------- /resa/utils/chat_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/utils/chat_template.py -------------------------------------------------------------------------------- /resa/utils/constant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/utils/constant.py -------------------------------------------------------------------------------- /resa/utils/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/resa/utils/prompt.py -------------------------------------------------------------------------------- /scripts/eval/eval_sae_tuning.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/eval/eval_sae_tuning.sh -------------------------------------------------------------------------------- /scripts/eval/run_eval_custom_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/eval/run_eval_custom_tasks.py -------------------------------------------------------------------------------- /scripts/set/environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/set/environment.yml -------------------------------------------------------------------------------- /scripts/set/environment_eval.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/set/environment_eval.yml -------------------------------------------------------------------------------- /scripts/set/prepare.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/set/prepare.sh -------------------------------------------------------------------------------- /scripts/set/run_download_base_model_sae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/set/run_download_base_model_sae.py -------------------------------------------------------------------------------- /scripts/set/run_download_octothinker_ckpts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/set/run_download_octothinker_ckpts.py -------------------------------------------------------------------------------- /scripts/set/run_download_tina_ckpts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/set/run_download_tina_ckpts.py -------------------------------------------------------------------------------- /scripts/set/set_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/set/set_env.sh -------------------------------------------------------------------------------- /scripts/set/set_env_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/set/set_env_eval.sh -------------------------------------------------------------------------------- /scripts/set/set_vars.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/set/set_vars.sh -------------------------------------------------------------------------------- /scripts/train/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/train/finetune.py -------------------------------------------------------------------------------- /scripts/train/finetune_sae.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/train/finetune_sae.sh -------------------------------------------------------------------------------- /scripts/train/post_train_model_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/train/post_train_model_sft.sh -------------------------------------------------------------------------------- /scripts/train/pretrain_sae.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/train/pretrain_sae.sh -------------------------------------------------------------------------------- /scripts/train/sae_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/train/sae_tuning.py -------------------------------------------------------------------------------- /scripts/train/sae_tuning_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/train/sae_tuning_model.sh -------------------------------------------------------------------------------- /scripts/train/train_from_scratch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/train/train_from_scratch.py -------------------------------------------------------------------------------- /scripts/train/train_sae_from_scratch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangshang-wang/Resa/HEAD/scripts/train/train_sae_from_scratch.sh --------------------------------------------------------------------------------