├── .gitignore ├── LICENSE ├── README.md ├── assets └── main_figure.jpeg ├── requirements.txt ├── setup.py ├── src └── sae_ts │ ├── __init__.py │ ├── baselines │ ├── __init__.py │ ├── activation_steering.py │ ├── analysis.py │ └── plots.py │ ├── ft_effects │ ├── __init__.py │ ├── multi_gpu.py │ ├── train.py │ └── utils.py │ └── steering │ ├── __init__.py │ ├── evals_utils.py │ ├── patch.py │ ├── sae.py │ └── utils.py └── steer_cfgs ├── gemma2-9b ├── anger │ ├── act_steer.json │ ├── criteria.json │ ├── feature_steer.json │ └── optimised_steer.json ├── christian_evangelist │ ├── act_steer.json │ ├── criteria.json │ ├── feature_steer.json │ └── optimised_steer.json ├── conspiracy │ ├── act_steer.json │ ├── criteria.json │ ├── feature_steer.json │ └── optimised_steer.json ├── french │ ├── act_steer.json │ ├── criteria.json │ ├── feature_steer.json │ ├── optimised_steer.json │ └── prompts copy.json ├── london │ ├── act_steer.json │ ├── criteria.json │ ├── feature_steer.json │ └── optimised_steer.json ├── love │ ├── act_steer.json │ ├── criteria.json │ ├── feature_steer.json │ └── optimised_steer.json ├── praise │ ├── act_steer.json │ ├── criteria.json │ ├── feature_steer.json │ └── optimised_steer.json ├── want_to_die │ ├── act_steer.json │ ├── criteria.json │ ├── feature_steer.json │ └── optimised_steer.json └── wedding │ ├── act_steer.json │ ├── criteria.json │ ├── feature_steer.json │ └── optimised_steer.json └── gemma2 ├── GGB_65k ├── criteria.json ├── feature_steer.json └── optimised_steer.json ├── anger ├── act_steer.json ├── criteria.json ├── feature_steer.json └── optimised_steer.json ├── christian_evangelist ├── act_steer.json ├── criteria.json ├── feature_steer.json └── optimised_steer.json ├── citations ├── act_steer.json ├── criteria.json ├── feature_steer.json └── optimised_steer.json ├── conspiracy ├── act_steer.json ├── criteria.json ├── feature_steer.json └── optimised_steer.json ├── french ├── act_steer.json ├── criteria.json ├── feature_steer.json ├── optimised_steer.json └── prompts copy.json ├── london ├── act_steer.json ├── criteria.json ├── feature_steer.json └── optimised_steer.json ├── london_65k ├── criteria.json ├── feature_steer.json └── optimised_steer.json ├── love ├── act_steer.json ├── criteria.json ├── feature_steer.json └── optimised_steer.json ├── praise ├── act_steer.json ├── criteria.json ├── feature_steer.json └── optimised_steer.json ├── want_to_die ├── act_steer.json ├── criteria.json ├── feature_steer.json └── optimised_steer.json └── wedding ├── act_steer.json ├── criteria.json ├── feature_steer.json └── optimised_steer.json /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/README.md -------------------------------------------------------------------------------- /assets/main_figure.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/assets/main_figure.jpeg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/setup.py -------------------------------------------------------------------------------- /src/sae_ts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/sae_ts/baselines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/sae_ts/baselines/activation_steering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/baselines/activation_steering.py -------------------------------------------------------------------------------- /src/sae_ts/baselines/analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/baselines/analysis.py -------------------------------------------------------------------------------- /src/sae_ts/baselines/plots.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/baselines/plots.py -------------------------------------------------------------------------------- /src/sae_ts/ft_effects/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/sae_ts/ft_effects/multi_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/ft_effects/multi_gpu.py -------------------------------------------------------------------------------- /src/sae_ts/ft_effects/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/ft_effects/train.py -------------------------------------------------------------------------------- /src/sae_ts/ft_effects/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/ft_effects/utils.py -------------------------------------------------------------------------------- /src/sae_ts/steering/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/sae_ts/steering/evals_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/steering/evals_utils.py -------------------------------------------------------------------------------- /src/sae_ts/steering/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/steering/patch.py -------------------------------------------------------------------------------- /src/sae_ts/steering/sae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/steering/sae.py -------------------------------------------------------------------------------- /src/sae_ts/steering/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/src/sae_ts/steering/utils.py -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/anger/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/anger/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/anger/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/anger/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/anger/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/anger/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/anger/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/anger/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/christian_evangelist/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/christian_evangelist/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/christian_evangelist/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/christian_evangelist/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/christian_evangelist/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/christian_evangelist/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/christian_evangelist/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/christian_evangelist/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/conspiracy/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/conspiracy/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/conspiracy/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/conspiracy/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/conspiracy/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/conspiracy/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/conspiracy/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/conspiracy/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/french/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/french/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/french/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/french/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/french/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/french/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/french/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/french/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/french/prompts copy.json: -------------------------------------------------------------------------------- 1 | [ 2 | "I think" 3 | ] -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/london/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/london/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/london/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/london/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/london/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/london/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/london/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/london/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/love/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/love/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/love/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/love/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/love/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/love/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/love/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/love/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/praise/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/praise/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/praise/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/praise/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/praise/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/praise/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/praise/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/praise/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/want_to_die/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/want_to_die/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/want_to_die/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/want_to_die/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/want_to_die/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/want_to_die/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/want_to_die/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/want_to_die/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/wedding/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/wedding/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/wedding/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/wedding/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/wedding/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/wedding/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2-9b/wedding/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2-9b/wedding/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/GGB_65k/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/GGB_65k/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/GGB_65k/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/GGB_65k/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/GGB_65k/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/GGB_65k/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/anger/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/anger/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/anger/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/anger/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/anger/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/anger/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/anger/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/anger/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/christian_evangelist/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/christian_evangelist/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/christian_evangelist/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/christian_evangelist/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/christian_evangelist/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/christian_evangelist/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/christian_evangelist/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/christian_evangelist/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/citations/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/citations/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/citations/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/citations/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/citations/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/citations/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/citations/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/citations/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/conspiracy/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/conspiracy/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/conspiracy/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/conspiracy/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/conspiracy/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/conspiracy/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/conspiracy/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/conspiracy/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/french/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/french/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/french/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/french/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/french/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/french/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/french/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/french/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/french/prompts copy.json: -------------------------------------------------------------------------------- 1 | [ 2 | "I think" 3 | ] -------------------------------------------------------------------------------- /steer_cfgs/gemma2/london/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/london/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/london/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/london/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/london/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/london/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/london/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/london/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/london_65k/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/london_65k/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/london_65k/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/london_65k/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/london_65k/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/london_65k/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/love/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/love/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/love/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/love/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/love/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/love/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/love/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/love/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/praise/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/praise/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/praise/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/praise/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/praise/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/praise/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/praise/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/praise/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/want_to_die/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/want_to_die/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/want_to_die/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/want_to_die/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/want_to_die/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/want_to_die/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/want_to_die/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/want_to_die/optimised_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/wedding/act_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/wedding/act_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/wedding/criteria.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/wedding/criteria.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/wedding/feature_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/wedding/feature_steer.json -------------------------------------------------------------------------------- /steer_cfgs/gemma2/wedding/optimised_steer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slavachalnev/SAE-TS/HEAD/steer_cfgs/gemma2/wedding/optimised_steer.json --------------------------------------------------------------------------------