├── .gitignore ├── LICENSE ├── README.md ├── activations.py ├── analysis ├── __init__.py ├── activations.py ├── correlations.py ├── entropy_neurons.py ├── heuristic_explanation.py ├── neuron_df.py ├── plots.py ├── prediction_neurons.py ├── sequence_features.py ├── vocab_df.py └── weights.py ├── attention_deactivation.py ├── attention_deactivation_qpos.py ├── correlations.py ├── correlations_fast.py ├── correlations_parallel.py ├── dataframes ├── interpretable_neurons │ ├── pythia-160m │ │ └── universal.csv │ ├── stanford-gpt2-medium-a │ │ ├── prediction_neurons.csv │ │ └── universal.csv │ └── stanford-gpt2-small-a │ │ ├── high_excess_correlation.csv │ │ ├── sub_gaussian_activation_kurtosis.csv │ │ └── universal.csv ├── neuron_dfs │ ├── pythia-160m.csv │ ├── stanford-gpt2-medium-a.csv │ └── stanford-gpt2-small-a.csv └── vocab_dfs │ ├── gpt2.csv │ ├── gpt2_topics.csv │ └── pythia.csv ├── entropy_intervention.py ├── explain.py ├── intervention.py ├── make_dataset.py ├── paper_notebooks ├── alphabet_neurons.ipynb ├── bos_signal_neurons.ipynb ├── entropy_neurons.ipynb ├── family_count.ipynb ├── mysteries.ipynb ├── position_neurons.ipynb ├── prediction_neurons.ipynb ├── previous_token_neurons.ipynb ├── properties_of_universal_neurons.ipynb ├── syntax_neurons.ipynb ├── topic_neurons.ipynb └── unigram_neurons.ipynb ├── requirements.txt ├── slurm ├── attention_deactivation_exp.sh ├── compute_attention_deactivation.sh ├── compute_correlation.sh ├── compute_correlation_fast.sh ├── compute_correlation_parallel.sh ├── correlation_error_experiment.sh ├── correlation_exp_fast.sh ├── correlation_exp_parallel.sh ├── correlation_experiment.sh ├── correlation_subset_experiment.sh ├── entropy_intervention.sh ├── entropy_neuron_experiment.sh ├── intervention.sh ├── intervention_experiment.sh ├── run_all_summary.sh ├── run_explanation.sh ├── run_explanation_all.sh ├── run_summary.sh ├── run_summary_all_data.sh ├── run_weight_summary.sh ├── save_neuron_acts.sh └── test_gpu.sh ├── summary.py ├── summary_viewer.py ├── utils.py └── weights.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/README.md -------------------------------------------------------------------------------- /activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/activations.py -------------------------------------------------------------------------------- /analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /analysis/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/activations.py -------------------------------------------------------------------------------- /analysis/correlations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/correlations.py -------------------------------------------------------------------------------- /analysis/entropy_neurons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/entropy_neurons.py -------------------------------------------------------------------------------- /analysis/heuristic_explanation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/heuristic_explanation.py -------------------------------------------------------------------------------- /analysis/neuron_df.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/neuron_df.py -------------------------------------------------------------------------------- /analysis/plots.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/plots.py -------------------------------------------------------------------------------- /analysis/prediction_neurons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/prediction_neurons.py -------------------------------------------------------------------------------- /analysis/sequence_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/sequence_features.py -------------------------------------------------------------------------------- /analysis/vocab_df.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/vocab_df.py -------------------------------------------------------------------------------- /analysis/weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/analysis/weights.py -------------------------------------------------------------------------------- /attention_deactivation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/attention_deactivation.py -------------------------------------------------------------------------------- /attention_deactivation_qpos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/attention_deactivation_qpos.py -------------------------------------------------------------------------------- /correlations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/correlations.py -------------------------------------------------------------------------------- /correlations_fast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/correlations_fast.py -------------------------------------------------------------------------------- /correlations_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/correlations_parallel.py -------------------------------------------------------------------------------- /dataframes/interpretable_neurons/pythia-160m/universal.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/interpretable_neurons/pythia-160m/universal.csv -------------------------------------------------------------------------------- /dataframes/interpretable_neurons/stanford-gpt2-medium-a/prediction_neurons.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/interpretable_neurons/stanford-gpt2-medium-a/prediction_neurons.csv -------------------------------------------------------------------------------- /dataframes/interpretable_neurons/stanford-gpt2-medium-a/universal.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/interpretable_neurons/stanford-gpt2-medium-a/universal.csv -------------------------------------------------------------------------------- /dataframes/interpretable_neurons/stanford-gpt2-small-a/high_excess_correlation.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/interpretable_neurons/stanford-gpt2-small-a/high_excess_correlation.csv -------------------------------------------------------------------------------- /dataframes/interpretable_neurons/stanford-gpt2-small-a/sub_gaussian_activation_kurtosis.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/interpretable_neurons/stanford-gpt2-small-a/sub_gaussian_activation_kurtosis.csv -------------------------------------------------------------------------------- /dataframes/interpretable_neurons/stanford-gpt2-small-a/universal.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/interpretable_neurons/stanford-gpt2-small-a/universal.csv -------------------------------------------------------------------------------- /dataframes/neuron_dfs/pythia-160m.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/neuron_dfs/pythia-160m.csv -------------------------------------------------------------------------------- /dataframes/neuron_dfs/stanford-gpt2-medium-a.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/neuron_dfs/stanford-gpt2-medium-a.csv -------------------------------------------------------------------------------- /dataframes/neuron_dfs/stanford-gpt2-small-a.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/neuron_dfs/stanford-gpt2-small-a.csv -------------------------------------------------------------------------------- /dataframes/vocab_dfs/gpt2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/vocab_dfs/gpt2.csv -------------------------------------------------------------------------------- /dataframes/vocab_dfs/gpt2_topics.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/vocab_dfs/gpt2_topics.csv -------------------------------------------------------------------------------- /dataframes/vocab_dfs/pythia.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/dataframes/vocab_dfs/pythia.csv -------------------------------------------------------------------------------- /entropy_intervention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/entropy_intervention.py -------------------------------------------------------------------------------- /explain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/explain.py -------------------------------------------------------------------------------- /intervention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/intervention.py -------------------------------------------------------------------------------- /make_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/make_dataset.py -------------------------------------------------------------------------------- /paper_notebooks/alphabet_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/alphabet_neurons.ipynb -------------------------------------------------------------------------------- /paper_notebooks/bos_signal_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/bos_signal_neurons.ipynb -------------------------------------------------------------------------------- /paper_notebooks/entropy_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/entropy_neurons.ipynb -------------------------------------------------------------------------------- /paper_notebooks/family_count.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/family_count.ipynb -------------------------------------------------------------------------------- /paper_notebooks/mysteries.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/mysteries.ipynb -------------------------------------------------------------------------------- /paper_notebooks/position_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/position_neurons.ipynb -------------------------------------------------------------------------------- /paper_notebooks/prediction_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/prediction_neurons.ipynb -------------------------------------------------------------------------------- /paper_notebooks/previous_token_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/previous_token_neurons.ipynb -------------------------------------------------------------------------------- /paper_notebooks/properties_of_universal_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/properties_of_universal_neurons.ipynb -------------------------------------------------------------------------------- /paper_notebooks/syntax_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/syntax_neurons.ipynb -------------------------------------------------------------------------------- /paper_notebooks/topic_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/topic_neurons.ipynb -------------------------------------------------------------------------------- /paper_notebooks/unigram_neurons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/paper_notebooks/unigram_neurons.ipynb -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/requirements.txt -------------------------------------------------------------------------------- /slurm/attention_deactivation_exp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/attention_deactivation_exp.sh -------------------------------------------------------------------------------- /slurm/compute_attention_deactivation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/compute_attention_deactivation.sh -------------------------------------------------------------------------------- /slurm/compute_correlation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/compute_correlation.sh -------------------------------------------------------------------------------- /slurm/compute_correlation_fast.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/compute_correlation_fast.sh -------------------------------------------------------------------------------- /slurm/compute_correlation_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/compute_correlation_parallel.sh -------------------------------------------------------------------------------- /slurm/correlation_error_experiment.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/correlation_error_experiment.sh -------------------------------------------------------------------------------- /slurm/correlation_exp_fast.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/correlation_exp_fast.sh -------------------------------------------------------------------------------- /slurm/correlation_exp_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/correlation_exp_parallel.sh -------------------------------------------------------------------------------- /slurm/correlation_experiment.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/correlation_experiment.sh -------------------------------------------------------------------------------- /slurm/correlation_subset_experiment.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/correlation_subset_experiment.sh -------------------------------------------------------------------------------- /slurm/entropy_intervention.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/entropy_intervention.sh -------------------------------------------------------------------------------- /slurm/entropy_neuron_experiment.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/entropy_neuron_experiment.sh -------------------------------------------------------------------------------- /slurm/intervention.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/intervention.sh -------------------------------------------------------------------------------- /slurm/intervention_experiment.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/intervention_experiment.sh -------------------------------------------------------------------------------- /slurm/run_all_summary.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/run_all_summary.sh -------------------------------------------------------------------------------- /slurm/run_explanation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/run_explanation.sh -------------------------------------------------------------------------------- /slurm/run_explanation_all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/run_explanation_all.sh -------------------------------------------------------------------------------- /slurm/run_summary.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/run_summary.sh -------------------------------------------------------------------------------- /slurm/run_summary_all_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/run_summary_all_data.sh -------------------------------------------------------------------------------- /slurm/run_weight_summary.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/run_weight_summary.sh -------------------------------------------------------------------------------- /slurm/save_neuron_acts.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/save_neuron_acts.sh -------------------------------------------------------------------------------- /slurm/test_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/slurm/test_gpu.sh -------------------------------------------------------------------------------- /summary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/summary.py -------------------------------------------------------------------------------- /summary_viewer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/summary_viewer.py -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/utils.py -------------------------------------------------------------------------------- /weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wesg52/universal-neurons/HEAD/weights.py --------------------------------------------------------------------------------