├── .gitignore ├── README.md ├── activation_dataset.py ├── autoencoders ├── __init__.py ├── ensemble.py ├── ica.py ├── learned_dict.py ├── mlp_tests.py ├── nmf.py ├── pca.py ├── residual_denoising_autoencoder.py ├── rica.py ├── sae_ensemble.py ├── semilinear_autoencoder.py └── topk_encoder.py ├── basic_l1_sweep.py ├── big_sweep.py ├── big_sweep_experiments.py ├── case_studies_loop.ipynb ├── cluster_runs.py ├── cmdutil.py ├── config.py ├── do_ioi_multiple_layers.sh ├── experiments ├── check_l0_tokens.py ├── deep_ae_testing.py ├── huge_batch_size.py ├── inter_layer_comparison.ipynb ├── interp_moment_corrs.py ├── investigate.py └── pca_perplexity.py ├── generate_test_data.py ├── inter_dict_connections.ipynb ├── interp_notebooks ├── dict_across_time.ipynb ├── dict_compare.ipynb ├── feature_interp.ipynb └── interpreting_sparse_dictionaries.ipynb ├── interpret.py ├── minimal_feature_interp.ipynb ├── plotting ├── __init__.py ├── bottleneck_plot.py ├── erasure_plot.py ├── fvu_sparsity_plot.py ├── fvu_sparsity_plot_gpt2sm.py ├── fvu_sparsity_plot_mlp_center.py ├── num_dead_plot.py ├── plot_autointerp_across_chunks.py ├── plot_autointerp_across_size.py ├── plot_autointerp_violins.py ├── plot_autointerp_vs_baselines.py ├── plot_autointerp_vs_topk_baselines.py ├── plot_kl_div.py ├── plot_n_active.py ├── plot_n_active_big_70m.py ├── plot_n_active_gpt2sm.py ├── plot_n_active_long.py ├── plot_n_active_over_time.py ├── plot_n_active_summary.py └── plot_sweep_results.py ├── replicate_toy_models.py ├── requirements.txt ├── sc_datasets ├── __init__.py └── random_dataset.py ├── standard_metrics.py ├── sweep_baselines.py ├── test ├── test_end_to_end.py ├── test_ica.py ├── test_interpret.py └── test_stats_batched.py ├── test_datasets ├── __init__.py ├── induction.py ├── ioi.py ├── ioi_counterfact.py └── preprocess_gender_dataset.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/README.md -------------------------------------------------------------------------------- /activation_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/activation_dataset.py -------------------------------------------------------------------------------- /autoencoders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoencoders/ensemble.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/ensemble.py -------------------------------------------------------------------------------- /autoencoders/ica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/ica.py -------------------------------------------------------------------------------- /autoencoders/learned_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/learned_dict.py -------------------------------------------------------------------------------- /autoencoders/mlp_tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/mlp_tests.py -------------------------------------------------------------------------------- /autoencoders/nmf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/nmf.py -------------------------------------------------------------------------------- /autoencoders/pca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/pca.py -------------------------------------------------------------------------------- /autoencoders/residual_denoising_autoencoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/residual_denoising_autoencoder.py -------------------------------------------------------------------------------- /autoencoders/rica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/rica.py -------------------------------------------------------------------------------- /autoencoders/sae_ensemble.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/sae_ensemble.py -------------------------------------------------------------------------------- /autoencoders/semilinear_autoencoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/semilinear_autoencoder.py -------------------------------------------------------------------------------- /autoencoders/topk_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/autoencoders/topk_encoder.py -------------------------------------------------------------------------------- /basic_l1_sweep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/basic_l1_sweep.py -------------------------------------------------------------------------------- /big_sweep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/big_sweep.py -------------------------------------------------------------------------------- /big_sweep_experiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/big_sweep_experiments.py -------------------------------------------------------------------------------- /case_studies_loop.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/case_studies_loop.ipynb -------------------------------------------------------------------------------- /cluster_runs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/cluster_runs.py -------------------------------------------------------------------------------- /cmdutil.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/cmdutil.py -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/config.py -------------------------------------------------------------------------------- /do_ioi_multiple_layers.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/do_ioi_multiple_layers.sh -------------------------------------------------------------------------------- /experiments/check_l0_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/experiments/check_l0_tokens.py -------------------------------------------------------------------------------- /experiments/deep_ae_testing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/experiments/deep_ae_testing.py -------------------------------------------------------------------------------- /experiments/huge_batch_size.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/experiments/huge_batch_size.py -------------------------------------------------------------------------------- /experiments/inter_layer_comparison.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/experiments/inter_layer_comparison.ipynb -------------------------------------------------------------------------------- /experiments/interp_moment_corrs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/experiments/interp_moment_corrs.py -------------------------------------------------------------------------------- /experiments/investigate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/experiments/investigate.py -------------------------------------------------------------------------------- /experiments/pca_perplexity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/experiments/pca_perplexity.py -------------------------------------------------------------------------------- /generate_test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/generate_test_data.py -------------------------------------------------------------------------------- /inter_dict_connections.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/inter_dict_connections.ipynb -------------------------------------------------------------------------------- /interp_notebooks/dict_across_time.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/interp_notebooks/dict_across_time.ipynb -------------------------------------------------------------------------------- /interp_notebooks/dict_compare.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/interp_notebooks/dict_compare.ipynb -------------------------------------------------------------------------------- /interp_notebooks/feature_interp.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/interp_notebooks/feature_interp.ipynb -------------------------------------------------------------------------------- /interp_notebooks/interpreting_sparse_dictionaries.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/interp_notebooks/interpreting_sparse_dictionaries.ipynb -------------------------------------------------------------------------------- /interpret.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/interpret.py -------------------------------------------------------------------------------- /minimal_feature_interp.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/minimal_feature_interp.ipynb -------------------------------------------------------------------------------- /plotting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plotting/bottleneck_plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/bottleneck_plot.py -------------------------------------------------------------------------------- /plotting/erasure_plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/erasure_plot.py -------------------------------------------------------------------------------- /plotting/fvu_sparsity_plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/fvu_sparsity_plot.py -------------------------------------------------------------------------------- /plotting/fvu_sparsity_plot_gpt2sm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/fvu_sparsity_plot_gpt2sm.py -------------------------------------------------------------------------------- /plotting/fvu_sparsity_plot_mlp_center.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/fvu_sparsity_plot_mlp_center.py -------------------------------------------------------------------------------- /plotting/num_dead_plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/num_dead_plot.py -------------------------------------------------------------------------------- /plotting/plot_autointerp_across_chunks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_autointerp_across_chunks.py -------------------------------------------------------------------------------- /plotting/plot_autointerp_across_size.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_autointerp_across_size.py -------------------------------------------------------------------------------- /plotting/plot_autointerp_violins.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_autointerp_violins.py -------------------------------------------------------------------------------- /plotting/plot_autointerp_vs_baselines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_autointerp_vs_baselines.py -------------------------------------------------------------------------------- /plotting/plot_autointerp_vs_topk_baselines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_autointerp_vs_topk_baselines.py -------------------------------------------------------------------------------- /plotting/plot_kl_div.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_kl_div.py -------------------------------------------------------------------------------- /plotting/plot_n_active.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_n_active.py -------------------------------------------------------------------------------- /plotting/plot_n_active_big_70m.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_n_active_big_70m.py -------------------------------------------------------------------------------- /plotting/plot_n_active_gpt2sm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_n_active_gpt2sm.py -------------------------------------------------------------------------------- /plotting/plot_n_active_long.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_n_active_long.py -------------------------------------------------------------------------------- /plotting/plot_n_active_over_time.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_n_active_over_time.py -------------------------------------------------------------------------------- /plotting/plot_n_active_summary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_n_active_summary.py -------------------------------------------------------------------------------- /plotting/plot_sweep_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/plotting/plot_sweep_results.py -------------------------------------------------------------------------------- /replicate_toy_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/replicate_toy_models.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/requirements.txt -------------------------------------------------------------------------------- /sc_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sc_datasets/random_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/sc_datasets/random_dataset.py -------------------------------------------------------------------------------- /standard_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/standard_metrics.py -------------------------------------------------------------------------------- /sweep_baselines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/sweep_baselines.py -------------------------------------------------------------------------------- /test/test_end_to_end.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/test/test_end_to_end.py -------------------------------------------------------------------------------- /test/test_ica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/test/test_ica.py -------------------------------------------------------------------------------- /test/test_interpret.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/test/test_interpret.py -------------------------------------------------------------------------------- /test/test_stats_batched.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/test/test_stats_batched.py -------------------------------------------------------------------------------- /test_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_datasets/induction.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_datasets/ioi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/test_datasets/ioi.py -------------------------------------------------------------------------------- /test_datasets/ioi_counterfact.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/test_datasets/ioi_counterfact.py -------------------------------------------------------------------------------- /test_datasets/preprocess_gender_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/test_datasets/preprocess_gender_dataset.py -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HoagyC/sparse_coding/HEAD/utils.py --------------------------------------------------------------------------------