├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .github └── workflows │ ├── checks.yml │ ├── gh-pages.yml │ └── release.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode ├── cspell.json ├── extensions.json └── settings.json ├── LICENSE ├── README.md ├── docs ├── content │ ├── SUMMARY.md │ ├── citation.md │ ├── contributing.md │ ├── css │ │ ├── custom_formatting.css │ │ └── material_extra.css │ ├── demo.ipynb │ ├── index.md │ ├── javascript │ │ ├── custom_formatting.js │ │ └── mathjax.js │ └── pre-process-datasets.ipynb └── gen_ref_pages.py ├── mkdocs.yml ├── poetry.lock ├── pyproject.toml └── sparse_autoencoder ├── __init__.py ├── activation_resampler ├── __init__.py ├── activation_resampler.py ├── tests │ └── test_activation_resampler.py └── utils │ ├── __init__.py │ └── component_slice_tensor.py ├── activation_store ├── __init__.py ├── base_store.py ├── tensor_store.py └── tests │ └── test_tensor_store.py ├── autoencoder ├── __init__.py ├── components │ ├── __init__.py │ ├── linear_encoder.py │ ├── tests │ │ ├── __snapshots__ │ │ │ └── test_linear_encoder.ambr │ │ ├── test_compare_neel_implementation.py │ │ ├── test_linear_encoder.py │ │ ├── test_tied_bias.py │ │ └── test_unit_norm_decoder.py │ ├── tied_bias.py │ └── unit_norm_decoder.py ├── lightning.py ├── model.py ├── tests │ ├── __snapshots__ │ │ └── test_model.ambr │ └── test_model.py └── types.py ├── metrics ├── __init__.py ├── loss │ ├── __init__.py │ ├── l1_absolute_loss.py │ ├── l2_reconstruction_loss.py │ ├── sae_loss.py │ └── tests │ │ ├── test_l1_absolute_loss.py │ │ ├── test_l2_reconstruction_loss.py │ │ └── test_sae_loss.py ├── train │ ├── __init__.py │ ├── capacity.py │ ├── feature_density.py │ ├── l0_norm.py │ ├── neuron_activity.py │ ├── neuron_fired_count.py │ └── tests │ │ ├── test_feature_density.py │ │ ├── test_l0_norm.py │ │ ├── test_neuron_activity.py │ │ └── test_neuron_fired_count.py ├── validate │ ├── __init__.py │ ├── reconstruction_score.py │ └── tests │ │ └── __snapshots__ │ │ └── test_model_reconstruction_score.ambr └── wrappers │ ├── __init__.py │ ├── classwise.py │ └── tests │ └── test_classwise.py ├── optimizer ├── __init__.py ├── adam_with_reset.py └── tests │ └── test_adam_with_reset.py ├── source_data ├── __init__.py ├── abstract_dataset.py ├── mock_dataset.py ├── pretokenized_dataset.py ├── tests │ ├── test_abstract_dataset.py │ ├── test_mock_dataset.py │ ├── test_pretokenized_dataset.py │ └── test_text_dataset.py └── text_dataset.py ├── source_model ├── __init__.py ├── replace_activations_hook.py ├── reshape_activations.py ├── store_activations_hook.py ├── tests │ ├── test_replace_activations_hook.py │ ├── test_store_activations_hook.py │ └── test_zero_ablate_hook.py └── zero_ablate_hook.py ├── tensor_types.py ├── train ├── __init__.py ├── join_sweep.py ├── pipeline.py ├── sweep.py ├── sweep_config.py ├── tests │ ├── __snapshots__ │ │ └── test_sweep.ambr │ ├── test_pipeline.py │ └── test_sweep.py └── utils │ ├── __init__.py │ ├── get_model_device.py │ ├── round_down.py │ ├── tests │ ├── test_get_model_device.py │ └── test_wandb_sweep_types.py │ └── wandb_sweep_types.py ├── training_runs ├── __init__.py └── gpt2.py └── utils ├── __init__.py ├── data_parallel.py └── tensor_shape.py /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.devcontainer/Dockerfile -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.devcontainer/devcontainer.json -------------------------------------------------------------------------------- /.github/workflows/checks.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.github/workflows/checks.yml -------------------------------------------------------------------------------- /.github/workflows/gh-pages.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.github/workflows/gh-pages.yml -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.github/workflows/release.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.vscode/cspell.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.vscode/cspell.json -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.vscode/extensions.json -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/README.md -------------------------------------------------------------------------------- /docs/content/SUMMARY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/SUMMARY.md -------------------------------------------------------------------------------- /docs/content/citation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/citation.md -------------------------------------------------------------------------------- /docs/content/contributing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/contributing.md -------------------------------------------------------------------------------- /docs/content/css/custom_formatting.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/css/custom_formatting.css -------------------------------------------------------------------------------- /docs/content/css/material_extra.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/css/material_extra.css -------------------------------------------------------------------------------- /docs/content/demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/demo.ipynb -------------------------------------------------------------------------------- /docs/content/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/index.md -------------------------------------------------------------------------------- /docs/content/javascript/custom_formatting.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/javascript/custom_formatting.js -------------------------------------------------------------------------------- /docs/content/javascript/mathjax.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/javascript/mathjax.js -------------------------------------------------------------------------------- /docs/content/pre-process-datasets.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/content/pre-process-datasets.ipynb -------------------------------------------------------------------------------- /docs/gen_ref_pages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/docs/gen_ref_pages.py -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/mkdocs.yml -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/pyproject.toml -------------------------------------------------------------------------------- /sparse_autoencoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/__init__.py -------------------------------------------------------------------------------- /sparse_autoencoder/activation_resampler/__init__.py: -------------------------------------------------------------------------------- 1 | """Activation Resampler.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/activation_resampler/activation_resampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/activation_resampler/activation_resampler.py -------------------------------------------------------------------------------- /sparse_autoencoder/activation_resampler/tests/test_activation_resampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/activation_resampler/tests/test_activation_resampler.py -------------------------------------------------------------------------------- /sparse_autoencoder/activation_resampler/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Activation resampler utils.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/activation_resampler/utils/component_slice_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/activation_resampler/utils/component_slice_tensor.py -------------------------------------------------------------------------------- /sparse_autoencoder/activation_store/__init__.py: -------------------------------------------------------------------------------- 1 | """Activation Stores.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/activation_store/base_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/activation_store/base_store.py -------------------------------------------------------------------------------- /sparse_autoencoder/activation_store/tensor_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/activation_store/tensor_store.py -------------------------------------------------------------------------------- /sparse_autoencoder/activation_store/tests/test_tensor_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/activation_store/tests/test_tensor_store.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/__init__.py: -------------------------------------------------------------------------------- 1 | """Sparse autoencoder model & components.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/components/__init__.py: -------------------------------------------------------------------------------- 1 | """Sparse autoencoder components.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/components/linear_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/components/linear_encoder.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/components/tests/__snapshots__/test_linear_encoder.ambr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/components/tests/__snapshots__/test_linear_encoder.ambr -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/components/tests/test_compare_neel_implementation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/components/tests/test_compare_neel_implementation.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/components/tests/test_linear_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/components/tests/test_linear_encoder.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/components/tests/test_tied_bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/components/tests/test_tied_bias.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/components/tests/test_unit_norm_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/components/tests/test_unit_norm_decoder.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/components/tied_bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/components/tied_bias.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/components/unit_norm_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/components/unit_norm_decoder.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/lightning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/lightning.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/model.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/tests/__snapshots__/test_model.ambr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/tests/__snapshots__/test_model.ambr -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/tests/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/tests/test_model.py -------------------------------------------------------------------------------- /sparse_autoencoder/autoencoder/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/autoencoder/types.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/__init__.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/loss/__init__.py: -------------------------------------------------------------------------------- 1 | """Loss metrics.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/loss/l1_absolute_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/loss/l1_absolute_loss.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/loss/l2_reconstruction_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/loss/l2_reconstruction_loss.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/loss/sae_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/loss/sae_loss.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/loss/tests/test_l1_absolute_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/loss/tests/test_l1_absolute_loss.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/loss/tests/test_l2_reconstruction_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/loss/tests/test_l2_reconstruction_loss.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/loss/tests/test_sae_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/loss/tests/test_sae_loss.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/__init__.py: -------------------------------------------------------------------------------- 1 | """Train step metrics.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/capacity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/train/capacity.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/feature_density.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/train/feature_density.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/l0_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/train/l0_norm.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/neuron_activity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/train/neuron_activity.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/neuron_fired_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/train/neuron_fired_count.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/tests/test_feature_density.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/train/tests/test_feature_density.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/tests/test_l0_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/train/tests/test_l0_norm.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/tests/test_neuron_activity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/train/tests/test_neuron_activity.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/train/tests/test_neuron_fired_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/train/tests/test_neuron_fired_count.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/validate/__init__.py: -------------------------------------------------------------------------------- 1 | """Validate step metrics.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/validate/reconstruction_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/validate/reconstruction_score.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/validate/tests/__snapshots__/test_model_reconstruction_score.ambr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/validate/tests/__snapshots__/test_model_reconstruction_score.ambr -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | """Metric wrappers.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/wrappers/classwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/wrappers/classwise.py -------------------------------------------------------------------------------- /sparse_autoencoder/metrics/wrappers/tests/test_classwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/metrics/wrappers/tests/test_classwise.py -------------------------------------------------------------------------------- /sparse_autoencoder/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | """Optimizers for Sparse Autoencoders.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/optimizer/adam_with_reset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/optimizer/adam_with_reset.py -------------------------------------------------------------------------------- /sparse_autoencoder/optimizer/tests/test_adam_with_reset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/optimizer/tests/test_adam_with_reset.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_data/__init__.py: -------------------------------------------------------------------------------- 1 | """Source Data.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/source_data/abstract_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_data/abstract_dataset.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_data/mock_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_data/mock_dataset.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_data/pretokenized_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_data/pretokenized_dataset.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_data/tests/test_abstract_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_data/tests/test_abstract_dataset.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_data/tests/test_mock_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_data/tests/test_mock_dataset.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_data/tests/test_pretokenized_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_data/tests/test_pretokenized_dataset.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_data/tests/test_text_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_data/tests/test_text_dataset.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_data/text_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_data/text_dataset.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_model/__init__.py: -------------------------------------------------------------------------------- 1 | """Source Model.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/source_model/replace_activations_hook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_model/replace_activations_hook.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_model/reshape_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_model/reshape_activations.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_model/store_activations_hook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_model/store_activations_hook.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_model/tests/test_replace_activations_hook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_model/tests/test_replace_activations_hook.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_model/tests/test_store_activations_hook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_model/tests/test_store_activations_hook.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_model/tests/test_zero_ablate_hook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_model/tests/test_zero_ablate_hook.py -------------------------------------------------------------------------------- /sparse_autoencoder/source_model/zero_ablate_hook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/source_model/zero_ablate_hook.py -------------------------------------------------------------------------------- /sparse_autoencoder/tensor_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/tensor_types.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/__init__.py: -------------------------------------------------------------------------------- 1 | """Train.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/train/join_sweep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/join_sweep.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/pipeline.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/sweep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/sweep.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/sweep_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/sweep_config.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/tests/__snapshots__/test_sweep.ambr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/tests/__snapshots__/test_sweep.ambr -------------------------------------------------------------------------------- /sparse_autoencoder/train/tests/test_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/tests/test_pipeline.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/tests/test_sweep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/tests/test_sweep.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Train Utils.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/train/utils/get_model_device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/utils/get_model_device.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/utils/round_down.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/utils/round_down.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/utils/tests/test_get_model_device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/utils/tests/test_get_model_device.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/utils/tests/test_wandb_sweep_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/utils/tests/test_wandb_sweep_types.py -------------------------------------------------------------------------------- /sparse_autoencoder/train/utils/wandb_sweep_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/train/utils/wandb_sweep_types.py -------------------------------------------------------------------------------- /sparse_autoencoder/training_runs/__init__.py: -------------------------------------------------------------------------------- 1 | """Training runs.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/training_runs/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/training_runs/gpt2.py -------------------------------------------------------------------------------- /sparse_autoencoder/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Shared utils.""" 2 | -------------------------------------------------------------------------------- /sparse_autoencoder/utils/data_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/utils/data_parallel.py -------------------------------------------------------------------------------- /sparse_autoencoder/utils/tensor_shape.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-safety-foundation/sparse_autoencoder/HEAD/sparse_autoencoder/utils/tensor_shape.py --------------------------------------------------------------------------------