├── README.md ├── assets └── teaser.svg ├── datasets ├── __init__.py └── activations.py ├── dictionary_learning ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── buffer.py ├── config.py ├── dictionary.py ├── evaluation.py ├── grad_pursuit.py ├── interp.py ├── pretrained_dictionary_downloader.sh ├── requirements.txt ├── tests │ └── test_end_to_end.py ├── trainers │ ├── __init__.py │ ├── batch_top_k.py │ ├── gated_anneal.py │ ├── gdm.py │ ├── jumprelu.py │ ├── matroyshka_batch_top_k.py │ ├── p_anneal.py │ ├── standard.py │ ├── top_k.py │ └── trainer.py ├── training.py └── utils.py ├── encode_images.py ├── find_hai_indices.py ├── imagenet_subset.py ├── images └── white.png ├── inat_depth.py ├── metric.py ├── metric_benchmark.tar.gz ├── models ├── clip.py ├── dino.py ├── llava.py └── siglip.py ├── requirements.txt ├── sae_train.py ├── save_activations.py ├── scripts ├── matryoshka_hierarchy.sh ├── mllm_steering.sh └── monosemanticity_score.sh ├── similarity_baseline.py ├── steering_qualitative.py ├── steering_score.py ├── uniqueness.py ├── utils.py └── visualize_neurons.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/README.md -------------------------------------------------------------------------------- /assets/teaser.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/assets/teaser.svg -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/datasets/__init__.py -------------------------------------------------------------------------------- /datasets/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/datasets/activations.py -------------------------------------------------------------------------------- /dictionary_learning/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/.gitignore -------------------------------------------------------------------------------- /dictionary_learning/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/LICENSE -------------------------------------------------------------------------------- /dictionary_learning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/README.md -------------------------------------------------------------------------------- /dictionary_learning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/__init__.py -------------------------------------------------------------------------------- /dictionary_learning/buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/buffer.py -------------------------------------------------------------------------------- /dictionary_learning/config.py: -------------------------------------------------------------------------------- 1 | # debugging flag for use in other scripts 2 | DEBUG = False -------------------------------------------------------------------------------- /dictionary_learning/dictionary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/dictionary.py -------------------------------------------------------------------------------- /dictionary_learning/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/evaluation.py -------------------------------------------------------------------------------- /dictionary_learning/grad_pursuit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/grad_pursuit.py -------------------------------------------------------------------------------- /dictionary_learning/interp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/interp.py -------------------------------------------------------------------------------- /dictionary_learning/pretrained_dictionary_downloader.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/pretrained_dictionary_downloader.sh -------------------------------------------------------------------------------- /dictionary_learning/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/requirements.txt -------------------------------------------------------------------------------- /dictionary_learning/tests/test_end_to_end.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/tests/test_end_to_end.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/__init__.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/batch_top_k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/batch_top_k.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/gated_anneal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/gated_anneal.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/gdm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/gdm.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/jumprelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/jumprelu.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/matroyshka_batch_top_k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/matroyshka_batch_top_k.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/p_anneal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/p_anneal.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/standard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/standard.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/top_k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/top_k.py -------------------------------------------------------------------------------- /dictionary_learning/trainers/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/trainers/trainer.py -------------------------------------------------------------------------------- /dictionary_learning/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/training.py -------------------------------------------------------------------------------- /dictionary_learning/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/dictionary_learning/utils.py -------------------------------------------------------------------------------- /encode_images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/encode_images.py -------------------------------------------------------------------------------- /find_hai_indices.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/find_hai_indices.py -------------------------------------------------------------------------------- /imagenet_subset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/imagenet_subset.py -------------------------------------------------------------------------------- /images/white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/images/white.png -------------------------------------------------------------------------------- /inat_depth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/inat_depth.py -------------------------------------------------------------------------------- /metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/metric.py -------------------------------------------------------------------------------- /metric_benchmark.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/metric_benchmark.tar.gz -------------------------------------------------------------------------------- /models/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/models/clip.py -------------------------------------------------------------------------------- /models/dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/models/dino.py -------------------------------------------------------------------------------- /models/llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/models/llava.py -------------------------------------------------------------------------------- /models/siglip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/models/siglip.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/requirements.txt -------------------------------------------------------------------------------- /sae_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/sae_train.py -------------------------------------------------------------------------------- /save_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/save_activations.py -------------------------------------------------------------------------------- /scripts/matryoshka_hierarchy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/scripts/matryoshka_hierarchy.sh -------------------------------------------------------------------------------- /scripts/mllm_steering.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/scripts/mllm_steering.sh -------------------------------------------------------------------------------- /scripts/monosemanticity_score.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/scripts/monosemanticity_score.sh -------------------------------------------------------------------------------- /similarity_baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/similarity_baseline.py -------------------------------------------------------------------------------- /steering_qualitative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/steering_qualitative.py -------------------------------------------------------------------------------- /steering_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/steering_score.py -------------------------------------------------------------------------------- /uniqueness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/uniqueness.py -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/utils.py -------------------------------------------------------------------------------- /visualize_neurons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/sae-for-vlm/HEAD/visualize_neurons.py --------------------------------------------------------------------------------