├── .github └── todo.yaml ├── .gitignore ├── LICENSE ├── README.md ├── assets ├── 89F5EE60-13D9-416B-B395-8774B4350509.webp └── qrcode_1731259533808.jpg ├── benchmark └── test.py ├── config ├── config.yaml ├── loss │ ├── cdpo.yaml │ ├── csft.yaml │ ├── dpo-sigmoid.yaml │ ├── dpo.yaml │ ├── fdpo-kl.yaml │ ├── fpo.yaml │ ├── kto-logsigmoid.yaml │ ├── kto-simple.yaml │ ├── kto-surprisal.yaml │ ├── kto-zero.yaml │ ├── kto.yaml │ ├── orpo.yaml │ ├── ppo.yaml │ ├── sft.yaml │ ├── simpo.yaml │ ├── slic.yaml │ ├── tdpo1.yaml │ └── tdpo2.yaml └── model │ ├── base_model.yaml │ ├── gemma-2-2b.yaml │ ├── gemma-2-9b.yaml │ ├── llama13b.yaml │ ├── llama30b.yaml │ ├── llama65b.yaml │ ├── llama7b.yaml │ ├── mistral7b.yaml │ ├── mistral7b_instruct.yaml │ ├── mistral7b_sft_beta.yaml │ ├── pythia1-4b.yaml │ ├── pythia12-0b.yaml │ ├── pythia2-8b.yaml │ ├── pythia6-9b.yaml │ ├── qwen-2-1.5b.yaml │ └── zephyr-sft-beta.yaml ├── data └── dataloader.py ├── debug.py ├── environment.yaml ├── feature_alignment ├── __init__.py ├── compare.py ├── eval.py ├── feature_map.py ├── model │ ├── dpo.py │ ├── fpo.py │ ├── model.py │ ├── sft.py │ ├── simpo.py │ └── tdpo.py ├── models.py ├── push.py ├── sae │ └── jump_relu_sae.py ├── trainers.py ├── transformers_model │ └── modeling_gemma2.py ├── utils │ ├── __init__.py │ ├── callbacks.py │ └── util.py └── visualize.py ├── requirements.txt ├── run.sh ├── sample.py └── train.py /.github/todo.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/README.md -------------------------------------------------------------------------------- /assets/89F5EE60-13D9-416B-B395-8774B4350509.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/assets/89F5EE60-13D9-416B-B395-8774B4350509.webp -------------------------------------------------------------------------------- /assets/qrcode_1731259533808.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/assets/qrcode_1731259533808.jpg -------------------------------------------------------------------------------- /benchmark/test.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/config.yaml -------------------------------------------------------------------------------- /config/loss/cdpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/cdpo.yaml -------------------------------------------------------------------------------- /config/loss/csft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/csft.yaml -------------------------------------------------------------------------------- /config/loss/dpo-sigmoid.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/dpo-sigmoid.yaml -------------------------------------------------------------------------------- /config/loss/dpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/dpo.yaml -------------------------------------------------------------------------------- /config/loss/fdpo-kl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/fdpo-kl.yaml -------------------------------------------------------------------------------- /config/loss/fpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/fpo.yaml -------------------------------------------------------------------------------- /config/loss/kto-logsigmoid.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/kto-logsigmoid.yaml -------------------------------------------------------------------------------- /config/loss/kto-simple.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/kto-simple.yaml -------------------------------------------------------------------------------- /config/loss/kto-surprisal.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/kto-surprisal.yaml -------------------------------------------------------------------------------- /config/loss/kto-zero.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/kto-zero.yaml -------------------------------------------------------------------------------- /config/loss/kto.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/kto.yaml -------------------------------------------------------------------------------- /config/loss/orpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/orpo.yaml -------------------------------------------------------------------------------- /config/loss/ppo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/ppo.yaml -------------------------------------------------------------------------------- /config/loss/sft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/sft.yaml -------------------------------------------------------------------------------- /config/loss/simpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/simpo.yaml -------------------------------------------------------------------------------- /config/loss/slic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/slic.yaml -------------------------------------------------------------------------------- /config/loss/tdpo1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/tdpo1.yaml -------------------------------------------------------------------------------- /config/loss/tdpo2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/loss/tdpo2.yaml -------------------------------------------------------------------------------- /config/model/base_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/base_model.yaml -------------------------------------------------------------------------------- /config/model/gemma-2-2b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/gemma-2-2b.yaml -------------------------------------------------------------------------------- /config/model/gemma-2-9b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/gemma-2-9b.yaml -------------------------------------------------------------------------------- /config/model/llama13b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/llama13b.yaml -------------------------------------------------------------------------------- /config/model/llama30b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/llama30b.yaml -------------------------------------------------------------------------------- /config/model/llama65b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/llama65b.yaml -------------------------------------------------------------------------------- /config/model/llama7b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/llama7b.yaml -------------------------------------------------------------------------------- /config/model/mistral7b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/mistral7b.yaml -------------------------------------------------------------------------------- /config/model/mistral7b_instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/mistral7b_instruct.yaml -------------------------------------------------------------------------------- /config/model/mistral7b_sft_beta.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/mistral7b_sft_beta.yaml -------------------------------------------------------------------------------- /config/model/pythia1-4b.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - base_model 3 | 4 | name_or_path: EleutherAI/pythia-1.4b 5 | block_name: GPTNeoXLayer -------------------------------------------------------------------------------- /config/model/pythia12-0b.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - base_model 3 | 4 | name_or_path: EleutherAI/pythia-12b 5 | block_name: GPTNeoXLayer -------------------------------------------------------------------------------- /config/model/pythia2-8b.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - base_model 3 | 4 | name_or_path: EleutherAI/pythia-2.8b 5 | block_name: GPTNeoXLayer -------------------------------------------------------------------------------- /config/model/pythia6-9b.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - base_model 3 | 4 | name_or_path: EleutherAI/pythia-6.9b 5 | block_name: GPTNeoXLayer -------------------------------------------------------------------------------- /config/model/qwen-2-1.5b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/qwen-2-1.5b.yaml -------------------------------------------------------------------------------- /config/model/zephyr-sft-beta.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/config/model/zephyr-sft-beta.yaml -------------------------------------------------------------------------------- /data/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/data/dataloader.py -------------------------------------------------------------------------------- /debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/debug.py -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/environment.yaml -------------------------------------------------------------------------------- /feature_alignment/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /feature_alignment/compare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/compare.py -------------------------------------------------------------------------------- /feature_alignment/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/eval.py -------------------------------------------------------------------------------- /feature_alignment/feature_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/feature_map.py -------------------------------------------------------------------------------- /feature_alignment/model/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/model/dpo.py -------------------------------------------------------------------------------- /feature_alignment/model/fpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/model/fpo.py -------------------------------------------------------------------------------- /feature_alignment/model/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/model/model.py -------------------------------------------------------------------------------- /feature_alignment/model/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/model/sft.py -------------------------------------------------------------------------------- /feature_alignment/model/simpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/model/simpo.py -------------------------------------------------------------------------------- /feature_alignment/model/tdpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/model/tdpo.py -------------------------------------------------------------------------------- /feature_alignment/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/models.py -------------------------------------------------------------------------------- /feature_alignment/push.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/push.py -------------------------------------------------------------------------------- /feature_alignment/sae/jump_relu_sae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/sae/jump_relu_sae.py -------------------------------------------------------------------------------- /feature_alignment/trainers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/trainers.py -------------------------------------------------------------------------------- /feature_alignment/transformers_model/modeling_gemma2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/transformers_model/modeling_gemma2.py -------------------------------------------------------------------------------- /feature_alignment/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /feature_alignment/utils/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/utils/callbacks.py -------------------------------------------------------------------------------- /feature_alignment/utils/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/utils/util.py -------------------------------------------------------------------------------- /feature_alignment/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/feature_alignment/visualize.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/requirements.txt -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/run.sh -------------------------------------------------------------------------------- /sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/sample.py -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikaStars39/FeatureAlignment/HEAD/train.py --------------------------------------------------------------------------------