├── .gitattributes ├── .gitignore ├── .python-version ├── LICENSE ├── README.md ├── axbench ├── README.md ├── __init__.py ├── concept10 │ ├── prod_2b_l10_v1 │ │ ├── generate │ │ │ ├── generate_state.pkl │ │ │ ├── metadata.jsonl │ │ │ └── train_data.parquet │ │ └── inference │ │ │ ├── latent_eval_data.parquet │ │ │ └── latent_inference_state.pkl │ ├── prod_2b_l20_v1 │ │ ├── generate │ │ │ ├── generate_state.pkl │ │ │ ├── metadata.jsonl │ │ │ └── train_data.parquet │ │ └── inference │ │ │ └── latent_eval_data.parquet │ ├── prod_9b_l20_v1 │ │ ├── generate │ │ │ ├── generate_state.pkl │ │ │ ├── metadata.jsonl │ │ │ └── train_data.parquet │ │ └── inference │ │ │ ├── latent_eval_data.parquet │ │ │ └── latent_inference_state.pkl │ └── prod_9b_l31_v1 │ │ ├── generate │ │ ├── generate_state.pkl │ │ ├── metadata.jsonl │ │ └── train_data.parquet │ │ └── inference │ │ ├── latent_eval_data.parquet │ │ └── latent_inference_state.pkl ├── concept16k │ └── README.md ├── concept16k_v2 │ └── README.md ├── concept500 │ ├── prod_2b_l10_v1 │ │ ├── generate │ │ │ ├── generate_state.pkl │ │ │ ├── metadata.jsonl │ │ │ └── train_data.parquet │ │ └── inference │ │ │ ├── latent_eval_data.parquet │ │ │ └── latent_inference_state.pkl │ ├── prod_2b_l20_v1 │ │ ├── generate │ │ │ ├── generate_state.pkl │ │ │ ├── metadata.jsonl │ │ │ └── train_data.parquet │ │ └── inference │ │ │ ├── latent_eval_data.parquet │ │ │ └── latent_inference_state.pkl │ ├── prod_9b_l20_v1 │ │ ├── generate │ │ │ ├── generate_state.pkl │ │ │ ├── metadata.jsonl │ │ │ └── train_data.parquet │ │ └── inference │ │ │ ├── latent_eval_data.parquet │ │ │ └── latent_inference_state.pkl │ └── prod_9b_l31_v1 │ │ ├── generate │ │ ├── generate_state.pkl │ │ ├── metadata.jsonl │ │ └── train_data.parquet │ │ └── inference │ │ ├── latent_eval_data.parquet │ │ └── latent_inference_state.pkl ├── conceptFD │ ├── README.md │ ├── prod_2b_l10_v1 │ │ ├── generate │ │ │ ├── metadata.jsonl │ │ │ └── train_data.parquet │ │ └── inference │ │ │ └── latent_eval_data.parquet │ └── prod_2b_l20_v1 │ │ ├── generate │ │ ├── metadata.jsonl │ │ └── train_data.parquet │ │ └── inference │ │ └── latent_eval_data.parquet ├── data │ ├── download-2b.sh │ ├── download-9b.sh │ ├── download-alpaca.sh │ ├── download-others.sh │ ├── download-pls.sh │ ├── download-seed-sentences.py │ └── process-feature-description.py ├── demo │ ├── concepts.csv │ ├── demo.sh │ ├── hypersteer_demo.sh │ └── sweep │ │ ├── hypersteer_simple.yaml │ │ └── simple.yaml ├── evaluators │ ├── __init__.py │ ├── aucroc.py │ ├── evaluator.py │ ├── hard_negative.py │ ├── latent_stats.py │ ├── lm_judge.py │ ├── ppl.py │ ├── prompt_templates.py │ ├── rule_judge.py │ └── winrate.py ├── examples │ ├── 2b_l20_diffmean.json │ ├── 2b_l20_lsreft.json │ ├── 9b_l20_diffmean.json │ ├── 9b_l20_lsreft.json │ ├── basics.ipynb │ ├── lang>subspace.ipynb │ ├── platonic.ipynb │ ├── subspace_gazer.ipynb │ └── tutorial.ipynb ├── experiment_commands.txt ├── models │ ├── __init__.py │ ├── bow.py │ ├── concept_lora.py │ ├── concept_model.py │ ├── concept_reft.py │ ├── concept_vector.py │ ├── hypernet │ │ ├── __init__.py │ │ ├── configuration_hypernet.py │ │ ├── layers.py │ │ ├── modeling_hypernet.py │ │ └── utils.py │ ├── hypersteer.py │ ├── ig.py │ ├── interventions.py │ ├── language_models.py │ ├── lora.py │ ├── lsreft.py │ ├── mean.py │ ├── model.py │ ├── preference_lora.py │ ├── preference_model.py │ ├── preference_reft.py │ ├── preference_vector.py │ ├── probe.py │ ├── prompt.py │ ├── random.py │ ├── reft.py │ ├── sae.py │ ├── sft.py │ └── steering_vector.py ├── rm_demo.sh ├── scripts │ ├── __init__.py │ ├── analyse.ipynb │ ├── args │ │ ├── __init__.py │ │ ├── dataset_args.py │ │ ├── eval_args.py │ │ └── training_args.py │ ├── evaluate.py │ ├── generate.py │ ├── human_eval.py │ ├── inference.py │ ├── reps_analyse.ipynb │ └── train.py ├── sweep │ ├── aryaman │ │ ├── sae_selection_2b_l10.yaml │ │ ├── sae_selection_2b_l10_concept10.yaml │ │ ├── sae_selection_2b_l20.yaml │ │ ├── sae_selection_9b_l20.yaml │ │ ├── sae_selection_9b_l31.yaml │ │ └── simple.yaml │ └── wuzhengx │ │ ├── 2b │ │ ├── l10 │ │ │ ├── bow.yaml │ │ │ ├── gemmascope_axbench_max_act.yaml │ │ │ ├── gemmascope_clamp.yaml │ │ │ ├── gemmascope_fd.yaml │ │ │ ├── gemmascope_min_clamp.yaml │ │ │ ├── ig.yaml │ │ │ ├── lora.yaml │ │ │ ├── loreft.yaml │ │ │ ├── lsreft.yaml │ │ │ ├── lsreft_fd.yaml │ │ │ ├── lsreft_scaling_law.yaml │ │ │ ├── lsreft_synergy.yaml │ │ │ ├── no_grad.yaml │ │ │ ├── probe.yaml │ │ │ ├── prompt_detection.yaml │ │ │ ├── sft.yaml │ │ │ ├── simple_prompt_steering.yaml │ │ │ └── steering_vec.yaml │ │ └── l20 │ │ │ ├── 16k_diffmean.yaml │ │ │ ├── 16k_diffmean_crossfit.yaml │ │ │ ├── 16k_lsreft.yaml │ │ │ ├── 16k_lsreft_crossfit.yaml │ │ │ ├── bow.yaml │ │ │ ├── gemmascope_axbench_max_act.yaml │ │ │ ├── gemmascope_clamp.yaml │ │ │ ├── gemmascope_fd.yaml │ │ │ ├── gemmascope_min_clamp.yaml │ │ │ ├── ig.yaml │ │ │ ├── lora.yaml │ │ │ ├── loreft.yaml │ │ │ ├── lsreft.yaml │ │ │ ├── lsreft_fd.yaml │ │ │ ├── lsreft_scaling_law.yaml │ │ │ ├── lsreft_synergy.yaml │ │ │ ├── no_grad.yaml │ │ │ ├── probe.yaml │ │ │ ├── prompt_detection.yaml │ │ │ ├── sft.yaml │ │ │ ├── simple_prompt_steering.yaml │ │ │ └── steering_vec.yaml │ │ ├── 9b │ │ ├── l20 │ │ │ ├── 16k_diffmean.yaml │ │ │ ├── 16k_diffmean_crossfit.yaml │ │ │ ├── 16k_lsreft.yaml │ │ │ ├── 16k_lsreft_crossfit.yaml │ │ │ ├── bow.yaml │ │ │ ├── gemmascope_axbench_max_act.yaml │ │ │ ├── gemmascope_clamp.yaml │ │ │ ├── gemmascope_min_clamp.yaml │ │ │ ├── ig.yaml │ │ │ ├── lora.yaml │ │ │ ├── loreft.yaml │ │ │ ├── lsreft.yaml │ │ │ ├── lsreft_scaling_law.yaml │ │ │ ├── lsreft_synergy.yaml │ │ │ ├── no_grad.yaml │ │ │ ├── probe.yaml │ │ │ ├── prompt_detection.yaml │ │ │ ├── simple_prompt_steering.yaml │ │ │ └── steering_vec.yaml │ │ └── l31 │ │ │ ├── bow.yaml │ │ │ ├── gemmascope_axbench_max_act.yaml │ │ │ ├── gemmascope_clamp.yaml │ │ │ ├── gemmascope_min_clamp.yaml │ │ │ ├── ig.yaml │ │ │ ├── lora.yaml │ │ │ ├── loreft.yaml │ │ │ ├── lsreft.yaml │ │ │ ├── lsreft_scaling_law.yaml │ │ │ ├── lsreft_synergy.yaml │ │ │ ├── no_grad.yaml │ │ │ ├── probe.yaml │ │ │ ├── prompt_detection.yaml │ │ │ ├── simple_prompt_steering.yaml │ │ │ └── steering_vec.yaml │ │ ├── llama_8b │ │ └── l20 │ │ │ └── lsreft.yaml │ │ ├── others │ │ ├── prod_2b_l10_fd_v1.yaml │ │ ├── prod_2b_l10_v1.yaml │ │ ├── prod_2b_l20_fd_v1.yaml │ │ ├── prod_2b_l20_v1.yaml │ │ ├── prod_9b_l20_v1.yaml │ │ └── prod_9b_l31_v1.yaml │ │ ├── pls │ │ ├── prod_2b_l20_gemma_65k.yaml │ │ ├── prod_8b_l20_llama_131k.yaml │ │ └── prod_9b_l20_gemma_131k.yaml │ │ └── reps │ │ ├── README.md │ │ ├── dataset │ │ ├── concept100.yaml │ │ └── concept500.yaml │ │ └── experiments │ │ ├── c_lora_g2-2b_axbench.yaml │ │ ├── c_lora_g2-2b_axbench_suppress.yaml │ │ ├── c_lora_g2-9b_axbench.yaml │ │ ├── c_lora_g2-9b_axbench_suppress.yaml │ │ ├── c_lora_g3-12b_axbench.yaml │ │ ├── c_lora_g3-12b_concept100.yaml │ │ ├── c_lora_g3-12b_concept100_suppress.yaml │ │ ├── c_lora_g3-27b_axbench.yaml │ │ ├── c_lora_g3-27b_concept100.yaml │ │ ├── c_lora_g3-27b_concept100_suppress.yaml │ │ ├── c_loreft_g2-2b_axbench.yaml │ │ ├── c_loreft_g2-2b_axbench_suppress.yaml │ │ ├── c_loreft_g2-9b_axbench.yaml │ │ ├── c_loreft_g2-9b_axbench_suppress.yaml │ │ ├── c_loreft_g3-12b_axbench.yaml │ │ ├── c_loreft_g3-12b_concept100.yaml │ │ ├── c_loreft_g3-12b_concept100_suppress.yaml │ │ ├── c_loreft_g3-27b_axbench.yaml │ │ ├── c_loreft_g3-27b_concept100.yaml │ │ ├── c_loreft_g3-27b_concept100_suppress.yaml │ │ ├── c_vector_g2-2b_axbench.yaml │ │ ├── c_vector_g2-2b_axbench_attack.yaml │ │ ├── c_vector_g2-2b_axbench_overwrite_append.yaml │ │ ├── c_vector_g2-2b_axbench_overwrite_prepend.yaml │ │ ├── c_vector_g2-2b_axbench_suppress.yaml │ │ ├── c_vector_g2-2b_axbench_suppress_rule.yaml │ │ ├── c_vector_g2-2b_nfs_axbench.yaml │ │ ├── c_vector_g2-9b_axbench.yaml │ │ ├── c_vector_g2-9b_axbench_attack.yaml │ │ ├── c_vector_g2-9b_axbench_overwrite_append.yaml │ │ ├── c_vector_g2-9b_axbench_overwrite_prepend.yaml │ │ ├── c_vector_g2-9b_axbench_suppress.yaml │ │ ├── c_vector_g2-9b_axbench_suppress_rule.yaml │ │ ├── c_vector_g2-9b_nfs_axbench.yaml │ │ ├── c_vector_g3-12b_axbench.yaml │ │ ├── c_vector_g3-12b_axbench_overwrite_append.yaml │ │ ├── c_vector_g3-12b_axbench_overwrite_prepend.yaml │ │ ├── c_vector_g3-12b_axbench_suppress.yaml │ │ ├── c_vector_g3-12b_concept100.yaml │ │ ├── c_vector_g3-12b_concept100_suppress.yaml │ │ ├── c_vector_g3-27b_axbench.yaml │ │ ├── c_vector_g3-27b_axbench_overwrite_append.yaml │ │ ├── c_vector_g3-27b_axbench_overwrite_prepend.yaml │ │ ├── c_vector_g3-27b_axbench_suppress.yaml │ │ ├── c_vector_g3-27b_axbench_suppress_overwrite_prepend.yaml │ │ ├── c_vector_g3-27b_concept100.yaml │ │ ├── c_vector_g3-27b_concept100_suppress.yaml │ │ ├── p_embedding_dps_g2-2b_axbench.yaml │ │ ├── p_embedding_dps_g2-9b_axbench.yaml │ │ ├── p_lora_bipo_g2-2b_axbench.yaml │ │ ├── p_lora_bipo_g2-9b_axbench.yaml │ │ ├── p_lora_dps_g2-2b_axbench.yaml │ │ ├── p_lora_dps_g2-2b_axbench_suppress.yaml │ │ ├── p_lora_dps_g2-2b_nfs_axbench.yaml │ │ ├── p_lora_dps_g2-9b_axbench.yaml │ │ ├── p_lora_dps_g2-9b_axbench_suppress.yaml │ │ ├── p_lora_dps_g2-9b_nfs_axbench.yaml │ │ ├── p_lora_dps_g3-12b_axbench.yaml │ │ ├── p_lora_dps_g3-12b_concept100.yaml │ │ ├── p_lora_dps_g3-12b_concept100_suppress.yaml │ │ ├── p_lora_dps_g3-27b_axbench.yaml │ │ ├── p_lora_dps_g3-27b_concept100.yaml │ │ ├── p_lora_dps_g3-27b_concept100_suppress.yaml │ │ ├── p_loreft_bipo_g2-2b_axbench.yaml │ │ ├── p_loreft_bipo_g2-9b_axbench.yaml │ │ ├── p_loreft_dps_g2-2b_axbench.yaml │ │ ├── p_loreft_dps_g2-2b_axbench_suppress.yaml │ │ ├── p_loreft_dps_g2-2b_nfs_axbench.yaml │ │ ├── p_loreft_dps_g2-9b_axbench.yaml │ │ ├── p_loreft_dps_g2-9b_axbench_suppress.yaml │ │ ├── p_loreft_dps_g2-9b_nfs_axbench.yaml │ │ ├── p_loreft_dps_g3-12b_axbench.yaml │ │ ├── p_loreft_dps_g3-12b_concept100.yaml │ │ ├── p_loreft_dps_g3-12b_concept100_suppress.yaml │ │ ├── p_loreft_dps_g3-27b_axbench.yaml │ │ ├── p_loreft_dps_g3-27b_concept100.yaml │ │ ├── p_loreft_dps_g3-27b_concept100_suppress.yaml │ │ ├── p_prefix_dps_g2-2b_axbench.yaml │ │ ├── p_prefix_dps_g2-9b_axbench.yaml │ │ ├── p_vector_bipo_g2-2b_axbench.yaml │ │ ├── p_vector_bipo_g2-2b_lf_axbench.yaml │ │ ├── p_vector_bipo_g2-9b_axbench.yaml │ │ ├── p_vector_bipo_g2-9b_lf_axbench.yaml │ │ ├── p_vector_dps_g2-2b_axbench.yaml │ │ ├── p_vector_dps_g2-2b_axbench_attack.yaml │ │ ├── p_vector_dps_g2-2b_axbench_suppress.yaml │ │ ├── p_vector_dps_g2-2b_axbench_suppress_rule.yaml │ │ ├── p_vector_dps_g2-2b_nfs_axbench.yaml │ │ ├── p_vector_dps_g2-9b_axbench.yaml │ │ ├── p_vector_dps_g2-9b_axbench_attack.yaml │ │ ├── p_vector_dps_g2-9b_axbench_suppress.yaml │ │ ├── p_vector_dps_g2-9b_axbench_suppress_rule.yam │ │ ├── p_vector_dps_g2-9b_axbench_suppress_rule.yaml │ │ ├── p_vector_dps_g2-9b_nfs_axbench.yaml │ │ ├── p_vector_dps_g3-12b_axbench.yaml │ │ ├── p_vector_dps_g3-12b_axbench_suppress_rule.yaml │ │ ├── p_vector_dps_g3-12b_concept100.yaml │ │ ├── p_vector_dps_g3-27b_axbench.yaml │ │ ├── p_vector_dps_g3-27b_axbench_suppress.yaml │ │ ├── p_vector_dps_g3-27b_axbench_suppress_rule.yaml │ │ ├── p_vector_dps_g3-27b_concept100.yaml │ │ ├── p_vector_g2-2b_axbench_overwrite_append.yaml │ │ ├── p_vector_g2-2b_axbench_overwrite_prepend.yaml │ │ ├── p_vector_g2-9b_axbench_overwrite_append.yaml │ │ ├── p_vector_g2-9b_axbench_overwrite_prepend.yaml │ │ ├── p_vector_g3-12b_axbench_overwrite_append.yaml │ │ ├── p_vector_g3-12b_axbench_overwrite_prepend.yaml │ │ ├── p_vector_g3-27b_axbench_overwrite_append.yaml │ │ ├── p_vector_g3-27b_axbench_overwrite_prepend.yaml │ │ ├── prompt_steering_g2-2b_concept20_suppress.yaml │ │ ├── prompt_steering_g2-2b_concept20_suppress_overwrite_append.yaml │ │ ├── prompt_steering_g2-2b_concept20_suppress_overwrite_prepend.yaml │ │ ├── prompt_steering_g2-2b_concept500_suppress.yaml │ │ ├── prompt_steering_g2-2b_prompt_rule.yaml │ │ ├── prompt_steering_g2-9b_concept20_suppress.yaml │ │ ├── prompt_steering_g2-9b_concept20_suppress_overwrite_append.yaml │ │ ├── prompt_steering_g2-9b_concept20_suppress_overwrite_prepend.yaml │ │ ├── prompt_steering_g2-9b_concept500_suppress.yaml │ │ ├── prompt_steering_g3-12b_concept100.yaml │ │ ├── prompt_steering_g3-12b_concept20_suppress.yaml │ │ ├── prompt_steering_g3-12b_concept20_suppress_overwrite_append.yaml │ │ ├── prompt_steering_g3-12b_concept20_suppress_overwrite_prepend.yaml │ │ ├── prompt_steering_g3-12b_concept20_suppress_rule.yaml │ │ ├── prompt_steering_g3-27b_concept100.yaml │ │ ├── prompt_steering_g3-27b_concept20_suppress.yaml │ │ ├── prompt_steering_g3-27b_concept20_suppress_overwrite_append.yaml │ │ ├── prompt_steering_g3-27b_concept20_suppress_overwrite_prepend.yaml │ │ └── prompt_steering_g3-27b_concept20_suppress_rule.yaml ├── templates │ ├── __init__.py │ ├── html_templates.py │ └── prompt_templates.py ├── tests │ ├── README.md │ ├── test_released_artifacts.py │ └── unit_tests │ │ ├── test_dataset.py │ │ ├── test_prompt_utils.py │ │ └── test_sae.py └── utils │ ├── __init__.py │ ├── constants.py │ ├── data_utils.py │ ├── dataset.py │ ├── model_utils.py │ ├── plot_utils.py │ └── prompt_utils.py ├── hypersteer_requirement.txt ├── pyproject.toml └── uv.lock /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/.gitignore -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/README.md -------------------------------------------------------------------------------- /axbench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/README.md -------------------------------------------------------------------------------- /axbench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/__init__.py -------------------------------------------------------------------------------- /axbench/concept10/prod_2b_l10_v1/generate/generate_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_2b_l10_v1/generate/generate_state.pkl -------------------------------------------------------------------------------- /axbench/concept10/prod_2b_l10_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_2b_l10_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/concept10/prod_2b_l10_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_2b_l10_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/concept10/prod_2b_l10_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_2b_l10_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/concept10/prod_2b_l10_v1/inference/latent_inference_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_2b_l10_v1/inference/latent_inference_state.pkl -------------------------------------------------------------------------------- /axbench/concept10/prod_2b_l20_v1/generate/generate_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_2b_l20_v1/generate/generate_state.pkl -------------------------------------------------------------------------------- /axbench/concept10/prod_2b_l20_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_2b_l20_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/concept10/prod_2b_l20_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_2b_l20_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/concept10/prod_2b_l20_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_2b_l20_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l20_v1/generate/generate_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l20_v1/generate/generate_state.pkl -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l20_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l20_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l20_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l20_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l20_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l20_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l20_v1/inference/latent_inference_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l20_v1/inference/latent_inference_state.pkl -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l31_v1/generate/generate_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l31_v1/generate/generate_state.pkl -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l31_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l31_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l31_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l31_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l31_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l31_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/concept10/prod_9b_l31_v1/inference/latent_inference_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept10/prod_9b_l31_v1/inference/latent_inference_state.pkl -------------------------------------------------------------------------------- /axbench/concept16k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept16k/README.md -------------------------------------------------------------------------------- /axbench/concept16k_v2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept16k_v2/README.md -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l10_v1/generate/generate_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l10_v1/generate/generate_state.pkl -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l10_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l10_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l10_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l10_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l10_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l10_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l10_v1/inference/latent_inference_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l10_v1/inference/latent_inference_state.pkl -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l20_v1/generate/generate_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l20_v1/generate/generate_state.pkl -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l20_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l20_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l20_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l20_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l20_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l20_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/concept500/prod_2b_l20_v1/inference/latent_inference_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_2b_l20_v1/inference/latent_inference_state.pkl -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l20_v1/generate/generate_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l20_v1/generate/generate_state.pkl -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l20_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l20_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l20_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l20_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l20_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l20_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l20_v1/inference/latent_inference_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l20_v1/inference/latent_inference_state.pkl -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l31_v1/generate/generate_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l31_v1/generate/generate_state.pkl -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l31_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l31_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l31_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l31_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l31_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l31_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/concept500/prod_9b_l31_v1/inference/latent_inference_state.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/concept500/prod_9b_l31_v1/inference/latent_inference_state.pkl -------------------------------------------------------------------------------- /axbench/conceptFD/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/conceptFD/README.md -------------------------------------------------------------------------------- /axbench/conceptFD/prod_2b_l10_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/conceptFD/prod_2b_l10_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/conceptFD/prod_2b_l10_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/conceptFD/prod_2b_l10_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/conceptFD/prod_2b_l10_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/conceptFD/prod_2b_l10_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/conceptFD/prod_2b_l20_v1/generate/metadata.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/conceptFD/prod_2b_l20_v1/generate/metadata.jsonl -------------------------------------------------------------------------------- /axbench/conceptFD/prod_2b_l20_v1/generate/train_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/conceptFD/prod_2b_l20_v1/generate/train_data.parquet -------------------------------------------------------------------------------- /axbench/conceptFD/prod_2b_l20_v1/inference/latent_eval_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/conceptFD/prod_2b_l20_v1/inference/latent_eval_data.parquet -------------------------------------------------------------------------------- /axbench/data/download-2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/data/download-2b.sh -------------------------------------------------------------------------------- /axbench/data/download-9b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/data/download-9b.sh -------------------------------------------------------------------------------- /axbench/data/download-alpaca.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/data/download-alpaca.sh -------------------------------------------------------------------------------- /axbench/data/download-others.sh: -------------------------------------------------------------------------------- 1 | git clone https://github.com/yoavgur/Feature-Descriptions.git -------------------------------------------------------------------------------- /axbench/data/download-pls.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/data/download-pls.sh -------------------------------------------------------------------------------- /axbench/data/download-seed-sentences.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/data/download-seed-sentences.py -------------------------------------------------------------------------------- /axbench/data/process-feature-description.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/data/process-feature-description.py -------------------------------------------------------------------------------- /axbench/demo/concepts.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/demo/concepts.csv -------------------------------------------------------------------------------- /axbench/demo/demo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/demo/demo.sh -------------------------------------------------------------------------------- /axbench/demo/hypersteer_demo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/demo/hypersteer_demo.sh -------------------------------------------------------------------------------- /axbench/demo/sweep/hypersteer_simple.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/demo/sweep/hypersteer_simple.yaml -------------------------------------------------------------------------------- /axbench/demo/sweep/simple.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/demo/sweep/simple.yaml -------------------------------------------------------------------------------- /axbench/evaluators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/evaluators/aucroc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/evaluators/aucroc.py -------------------------------------------------------------------------------- /axbench/evaluators/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/evaluators/evaluator.py -------------------------------------------------------------------------------- /axbench/evaluators/hard_negative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/evaluators/hard_negative.py -------------------------------------------------------------------------------- /axbench/evaluators/latent_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/evaluators/latent_stats.py -------------------------------------------------------------------------------- /axbench/evaluators/lm_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/evaluators/lm_judge.py -------------------------------------------------------------------------------- /axbench/evaluators/ppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/evaluators/ppl.py -------------------------------------------------------------------------------- /axbench/evaluators/prompt_templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/evaluators/prompt_templates.py -------------------------------------------------------------------------------- /axbench/evaluators/rule_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/evaluators/rule_judge.py -------------------------------------------------------------------------------- /axbench/evaluators/winrate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/evaluators/winrate.py -------------------------------------------------------------------------------- /axbench/examples/2b_l20_diffmean.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/examples/2b_l20_diffmean.json -------------------------------------------------------------------------------- /axbench/examples/2b_l20_lsreft.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/examples/2b_l20_lsreft.json -------------------------------------------------------------------------------- /axbench/examples/9b_l20_diffmean.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/examples/9b_l20_diffmean.json -------------------------------------------------------------------------------- /axbench/examples/9b_l20_lsreft.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/examples/9b_l20_lsreft.json -------------------------------------------------------------------------------- /axbench/examples/basics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/examples/basics.ipynb -------------------------------------------------------------------------------- /axbench/examples/lang>subspace.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/examples/lang>subspace.ipynb -------------------------------------------------------------------------------- /axbench/examples/platonic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/examples/platonic.ipynb -------------------------------------------------------------------------------- /axbench/examples/subspace_gazer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/examples/subspace_gazer.ipynb -------------------------------------------------------------------------------- /axbench/examples/tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/examples/tutorial.ipynb -------------------------------------------------------------------------------- /axbench/experiment_commands.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/experiment_commands.txt -------------------------------------------------------------------------------- /axbench/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/models/bow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/bow.py -------------------------------------------------------------------------------- /axbench/models/concept_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/concept_lora.py -------------------------------------------------------------------------------- /axbench/models/concept_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/concept_model.py -------------------------------------------------------------------------------- /axbench/models/concept_reft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/concept_reft.py -------------------------------------------------------------------------------- /axbench/models/concept_vector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/concept_vector.py -------------------------------------------------------------------------------- /axbench/models/hypernet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/models/hypernet/configuration_hypernet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/hypernet/configuration_hypernet.py -------------------------------------------------------------------------------- /axbench/models/hypernet/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/hypernet/layers.py -------------------------------------------------------------------------------- /axbench/models/hypernet/modeling_hypernet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/hypernet/modeling_hypernet.py -------------------------------------------------------------------------------- /axbench/models/hypernet/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/hypernet/utils.py -------------------------------------------------------------------------------- /axbench/models/hypersteer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/hypersteer.py -------------------------------------------------------------------------------- /axbench/models/ig.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/ig.py -------------------------------------------------------------------------------- /axbench/models/interventions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/interventions.py -------------------------------------------------------------------------------- /axbench/models/language_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/language_models.py -------------------------------------------------------------------------------- /axbench/models/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/lora.py -------------------------------------------------------------------------------- /axbench/models/lsreft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/lsreft.py -------------------------------------------------------------------------------- /axbench/models/mean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/mean.py -------------------------------------------------------------------------------- /axbench/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/model.py -------------------------------------------------------------------------------- /axbench/models/preference_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/preference_lora.py -------------------------------------------------------------------------------- /axbench/models/preference_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/preference_model.py -------------------------------------------------------------------------------- /axbench/models/preference_reft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/preference_reft.py -------------------------------------------------------------------------------- /axbench/models/preference_vector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/preference_vector.py -------------------------------------------------------------------------------- /axbench/models/probe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/probe.py -------------------------------------------------------------------------------- /axbench/models/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/prompt.py -------------------------------------------------------------------------------- /axbench/models/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/random.py -------------------------------------------------------------------------------- /axbench/models/reft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/reft.py -------------------------------------------------------------------------------- /axbench/models/sae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/sae.py -------------------------------------------------------------------------------- /axbench/models/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/sft.py -------------------------------------------------------------------------------- /axbench/models/steering_vector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/models/steering_vector.py -------------------------------------------------------------------------------- /axbench/rm_demo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/rm_demo.sh -------------------------------------------------------------------------------- /axbench/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/scripts/analyse.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/analyse.ipynb -------------------------------------------------------------------------------- /axbench/scripts/args/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/scripts/args/dataset_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/args/dataset_args.py -------------------------------------------------------------------------------- /axbench/scripts/args/eval_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/args/eval_args.py -------------------------------------------------------------------------------- /axbench/scripts/args/training_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/args/training_args.py -------------------------------------------------------------------------------- /axbench/scripts/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/evaluate.py -------------------------------------------------------------------------------- /axbench/scripts/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/generate.py -------------------------------------------------------------------------------- /axbench/scripts/human_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/human_eval.py -------------------------------------------------------------------------------- /axbench/scripts/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/inference.py -------------------------------------------------------------------------------- /axbench/scripts/reps_analyse.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/reps_analyse.ipynb -------------------------------------------------------------------------------- /axbench/scripts/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/scripts/train.py -------------------------------------------------------------------------------- /axbench/sweep/aryaman/sae_selection_2b_l10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/aryaman/sae_selection_2b_l10.yaml -------------------------------------------------------------------------------- /axbench/sweep/aryaman/sae_selection_2b_l10_concept10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/aryaman/sae_selection_2b_l10_concept10.yaml -------------------------------------------------------------------------------- /axbench/sweep/aryaman/sae_selection_2b_l20.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/aryaman/sae_selection_2b_l20.yaml -------------------------------------------------------------------------------- /axbench/sweep/aryaman/sae_selection_9b_l20.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/aryaman/sae_selection_9b_l20.yaml -------------------------------------------------------------------------------- /axbench/sweep/aryaman/sae_selection_9b_l31.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/aryaman/sae_selection_9b_l31.yaml -------------------------------------------------------------------------------- /axbench/sweep/aryaman/simple.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/aryaman/simple.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/bow.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/bow.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/gemmascope_axbench_max_act.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/gemmascope_axbench_max_act.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/gemmascope_clamp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/gemmascope_clamp.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/gemmascope_fd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/gemmascope_fd.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/gemmascope_min_clamp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/gemmascope_min_clamp.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/ig.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/ig.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/lora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/lora.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/loreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/loreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/lsreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/lsreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/lsreft_fd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/lsreft_fd.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/lsreft_scaling_law.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/lsreft_scaling_law.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/lsreft_synergy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/lsreft_synergy.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/no_grad.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/no_grad.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/probe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/probe.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/prompt_detection.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/prompt_detection.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/sft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/sft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/simple_prompt_steering.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/simple_prompt_steering.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l10/steering_vec.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l10/steering_vec.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/16k_diffmean.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/16k_diffmean.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/16k_diffmean_crossfit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/16k_diffmean_crossfit.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/16k_lsreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/16k_lsreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/16k_lsreft_crossfit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/16k_lsreft_crossfit.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/bow.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/bow.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/gemmascope_axbench_max_act.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/gemmascope_axbench_max_act.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/gemmascope_clamp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/gemmascope_clamp.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/gemmascope_fd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/gemmascope_fd.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/gemmascope_min_clamp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/gemmascope_min_clamp.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/ig.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/ig.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/lora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/lora.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/loreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/loreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/lsreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/lsreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/lsreft_fd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/lsreft_fd.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/lsreft_scaling_law.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/lsreft_scaling_law.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/lsreft_synergy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/lsreft_synergy.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/no_grad.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/no_grad.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/probe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/probe.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/prompt_detection.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/prompt_detection.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/sft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/sft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/simple_prompt_steering.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/simple_prompt_steering.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/2b/l20/steering_vec.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/2b/l20/steering_vec.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/16k_diffmean.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/16k_diffmean.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/16k_diffmean_crossfit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/16k_diffmean_crossfit.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/16k_lsreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/16k_lsreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/16k_lsreft_crossfit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/16k_lsreft_crossfit.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/bow.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/bow.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/gemmascope_axbench_max_act.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/gemmascope_axbench_max_act.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/gemmascope_clamp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/gemmascope_clamp.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/gemmascope_min_clamp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/gemmascope_min_clamp.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/ig.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/ig.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/lora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/lora.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/loreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/loreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/lsreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/lsreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/lsreft_scaling_law.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/lsreft_scaling_law.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/lsreft_synergy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/lsreft_synergy.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/no_grad.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/no_grad.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/probe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/probe.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/prompt_detection.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/prompt_detection.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/simple_prompt_steering.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/simple_prompt_steering.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l20/steering_vec.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l20/steering_vec.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/bow.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/bow.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/gemmascope_axbench_max_act.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/gemmascope_axbench_max_act.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/gemmascope_clamp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/gemmascope_clamp.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/gemmascope_min_clamp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/gemmascope_min_clamp.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/ig.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/ig.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/lora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/lora.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/loreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/loreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/lsreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/lsreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/lsreft_scaling_law.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/lsreft_scaling_law.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/lsreft_synergy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/lsreft_synergy.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/no_grad.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/no_grad.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/probe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/probe.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/prompt_detection.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/prompt_detection.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/simple_prompt_steering.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/simple_prompt_steering.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/9b/l31/steering_vec.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/9b/l31/steering_vec.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/llama_8b/l20/lsreft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/llama_8b/l20/lsreft.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/others/prod_2b_l10_fd_v1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/others/prod_2b_l10_fd_v1.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/others/prod_2b_l10_v1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/others/prod_2b_l10_v1.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/others/prod_2b_l20_fd_v1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/others/prod_2b_l20_fd_v1.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/others/prod_2b_l20_v1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/others/prod_2b_l20_v1.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/others/prod_9b_l20_v1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/others/prod_9b_l20_v1.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/others/prod_9b_l31_v1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/others/prod_9b_l31_v1.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/pls/prod_2b_l20_gemma_65k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/pls/prod_2b_l20_gemma_65k.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/pls/prod_8b_l20_llama_131k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/pls/prod_8b_l20_llama_131k.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/pls/prod_9b_l20_gemma_131k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/pls/prod_9b_l20_gemma_131k.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/README.md -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/dataset/concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/dataset/concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/dataset/concept500.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/dataset/concept500.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g2-2b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g2-2b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g2-9b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g2-9b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-12b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-12b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-12b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-12b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-12b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-12b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-27b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-27b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-27b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-27b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-27b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_lora_g3-27b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g2-2b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g2-2b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g2-9b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g2-9b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-12b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-12b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-12b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-12b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-12b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-12b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-27b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-27b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-27b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-27b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-27b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_loreft_g3-27b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_attack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_attack.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_suppress_rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_axbench_suppress_rule.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_nfs_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-2b_nfs_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_attack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_attack.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_suppress_rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_axbench_suppress_rule.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_nfs_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g2-9b_nfs_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_axbench_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_axbench_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_axbench_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_axbench_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-12b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_axbench_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_axbench_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_axbench_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_axbench_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_axbench_suppress_overwrite_prepend.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/c_vector_g3-27b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_embedding_dps_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_embedding_dps_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_embedding_dps_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_embedding_dps_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_bipo_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_bipo_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_bipo_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_bipo_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-2b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-2b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-2b_nfs_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-2b_nfs_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-9b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-9b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-9b_nfs_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g2-9b_nfs_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-12b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-12b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-12b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-12b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-12b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-12b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-27b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-27b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-27b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-27b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-27b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_lora_dps_g3-27b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_bipo_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_bipo_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_bipo_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_bipo_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-2b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-2b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-2b_nfs_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-2b_nfs_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-9b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-9b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-9b_nfs_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g2-9b_nfs_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-12b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-12b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-12b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-12b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-12b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-12b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-27b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-27b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-27b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-27b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-27b_concept100_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_loreft_dps_g3-27b_concept100_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_prefix_dps_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_prefix_dps_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_prefix_dps_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_prefix_dps_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_bipo_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_bipo_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_bipo_g2-2b_lf_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_bipo_g2-2b_lf_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_bipo_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_bipo_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_bipo_g2-9b_lf_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_bipo_g2-9b_lf_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_axbench_attack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_axbench_attack.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_axbench_suppress_rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_axbench_suppress_rule.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_nfs_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-2b_nfs_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_axbench_attack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_axbench_attack.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_axbench_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_axbench_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_axbench_suppress_rule.yam: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_axbench_suppress_rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_axbench_suppress_rule.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_nfs_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g2-9b_nfs_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-12b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-12b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-12b_axbench_suppress_rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-12b_axbench_suppress_rule.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-12b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-12b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-27b_axbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-27b_axbench.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-27b_axbench_suppress.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-27b_axbench_suppress_rule.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-27b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_dps_g3-27b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_g2-2b_axbench_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_g2-2b_axbench_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_g2-2b_axbench_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_g2-2b_axbench_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_g2-9b_axbench_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_g2-9b_axbench_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_g2-9b_axbench_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_g2-9b_axbench_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_g3-12b_axbench_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_g3-12b_axbench_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_g3-12b_axbench_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_g3-12b_axbench_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_g3-27b_axbench_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_g3-27b_axbench_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/p_vector_g3-27b_axbench_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/p_vector_g3-27b_axbench_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_concept20_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_concept20_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_concept20_suppress_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_concept20_suppress_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_concept20_suppress_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_concept20_suppress_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_concept500_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_concept500_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_prompt_rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-2b_prompt_rule.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-9b_concept20_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-9b_concept20_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-9b_concept20_suppress_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-9b_concept20_suppress_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-9b_concept20_suppress_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-9b_concept20_suppress_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-9b_concept500_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g2-9b_concept500_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept20_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept20_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept20_suppress_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept20_suppress_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept20_suppress_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept20_suppress_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept20_suppress_rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-12b_concept20_suppress_rule.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept100.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept20_suppress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept20_suppress.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept20_suppress_overwrite_append.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept20_suppress_overwrite_append.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept20_suppress_overwrite_prepend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept20_suppress_overwrite_prepend.yaml -------------------------------------------------------------------------------- /axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept20_suppress_rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/sweep/wuzhengx/reps/experiments/prompt_steering_g3-27b_concept20_suppress_rule.yaml -------------------------------------------------------------------------------- /axbench/templates/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/templates/html_templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/templates/html_templates.py -------------------------------------------------------------------------------- /axbench/templates/prompt_templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/templates/prompt_templates.py -------------------------------------------------------------------------------- /axbench/tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/tests/README.md -------------------------------------------------------------------------------- /axbench/tests/test_released_artifacts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/tests/test_released_artifacts.py -------------------------------------------------------------------------------- /axbench/tests/unit_tests/test_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/tests/unit_tests/test_dataset.py -------------------------------------------------------------------------------- /axbench/tests/unit_tests/test_prompt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/tests/unit_tests/test_prompt_utils.py -------------------------------------------------------------------------------- /axbench/tests/unit_tests/test_sae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/tests/unit_tests/test_sae.py -------------------------------------------------------------------------------- /axbench/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /axbench/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/utils/constants.py -------------------------------------------------------------------------------- /axbench/utils/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/utils/data_utils.py -------------------------------------------------------------------------------- /axbench/utils/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/utils/dataset.py -------------------------------------------------------------------------------- /axbench/utils/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/utils/model_utils.py -------------------------------------------------------------------------------- /axbench/utils/plot_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/utils/plot_utils.py -------------------------------------------------------------------------------- /axbench/utils/prompt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/axbench/utils/prompt_utils.py -------------------------------------------------------------------------------- /hypersteer_requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/hypersteer_requirement.txt -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/pyproject.toml -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/axbench/HEAD/uv.lock --------------------------------------------------------------------------------