├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── notebooks ├── 1 descriptive │ ├── explorative │ │ ├── logit differences.ipynb │ │ └── minK logP.ipynb │ └── token pertubation │ │ ├── token pertubation (batched).ipynb │ │ └── token pertubation (single).ipynb ├── 2 localizing │ ├── activation patching │ │ ├── activ patching full.ipynb │ │ └── activ patching src trg.ipynb │ ├── attention head analysis │ │ ├── KQ attention head analysis.ipynb │ │ ├── rare token hypothesis.ipynb │ │ └── unigram frequency attention correlation.ipynb │ └── gradient-based attribution │ │ ├── activation gradients │ │ ├── cosine distance.ipynb │ │ ├── perturbed activation gradients.ipynb │ │ └── single sequence activation gradients.ipynb │ │ └── parameter gradients │ │ ├── contrastive grads.ipynb │ │ ├── max weight search.ipynb │ │ └── mem range grads.ipynb └── 3 editing │ ├── fine-tuning plots.ipynb │ ├── fine-tuning.ipynb │ └── noise intervention.ipynb ├── paragraphs └── gpt-neo-125M │ ├── mem_strings.csv │ ├── perturbed │ ├── mem_toks.pt │ └── perturbed_mem_toks.pt │ └── preds │ ├── 0_10_preds.pt │ ├── 11_29_preds.pt │ ├── 30_49_preds.pt │ └── 50_50_preds.pt ├── setup.sh └── utils ├── dataLoaders.py ├── evaluation.py ├── gradient.py ├── intervening.py ├── localizing.py ├── modelHandlers.py └── patching.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | data 3 | results 4 | 5 | .ipynb_checkpoints/ -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/README.md -------------------------------------------------------------------------------- /notebooks/1 descriptive/explorative/logit differences.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/1 descriptive/explorative/logit differences.ipynb -------------------------------------------------------------------------------- /notebooks/1 descriptive/explorative/minK logP.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/1 descriptive/explorative/minK logP.ipynb -------------------------------------------------------------------------------- /notebooks/1 descriptive/token pertubation/token pertubation (batched).ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/1 descriptive/token pertubation/token pertubation (batched).ipynb -------------------------------------------------------------------------------- /notebooks/1 descriptive/token pertubation/token pertubation (single).ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/1 descriptive/token pertubation/token pertubation (single).ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/activation patching/activ patching full.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/activation patching/activ patching full.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/activation patching/activ patching src trg.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/activation patching/activ patching src trg.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/attention head analysis/KQ attention head analysis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/attention head analysis/KQ attention head analysis.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/attention head analysis/rare token hypothesis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/attention head analysis/rare token hypothesis.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/attention head analysis/unigram frequency attention correlation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/attention head analysis/unigram frequency attention correlation.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/gradient-based attribution/activation gradients/cosine distance.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/gradient-based attribution/activation gradients/cosine distance.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/gradient-based attribution/activation gradients/perturbed activation gradients.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/gradient-based attribution/activation gradients/perturbed activation gradients.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/gradient-based attribution/activation gradients/single sequence activation gradients.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/gradient-based attribution/activation gradients/single sequence activation gradients.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/gradient-based attribution/parameter gradients/contrastive grads.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/gradient-based attribution/parameter gradients/contrastive grads.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/gradient-based attribution/parameter gradients/max weight search.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/gradient-based attribution/parameter gradients/max weight search.ipynb -------------------------------------------------------------------------------- /notebooks/2 localizing/gradient-based attribution/parameter gradients/mem range grads.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/2 localizing/gradient-based attribution/parameter gradients/mem range grads.ipynb -------------------------------------------------------------------------------- /notebooks/3 editing/fine-tuning plots.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/3 editing/fine-tuning plots.ipynb -------------------------------------------------------------------------------- /notebooks/3 editing/fine-tuning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/3 editing/fine-tuning.ipynb -------------------------------------------------------------------------------- /notebooks/3 editing/noise intervention.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/notebooks/3 editing/noise intervention.ipynb -------------------------------------------------------------------------------- /paragraphs/gpt-neo-125M/mem_strings.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/paragraphs/gpt-neo-125M/mem_strings.csv -------------------------------------------------------------------------------- /paragraphs/gpt-neo-125M/perturbed/mem_toks.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/paragraphs/gpt-neo-125M/perturbed/mem_toks.pt -------------------------------------------------------------------------------- /paragraphs/gpt-neo-125M/perturbed/perturbed_mem_toks.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/paragraphs/gpt-neo-125M/perturbed/perturbed_mem_toks.pt -------------------------------------------------------------------------------- /paragraphs/gpt-neo-125M/preds/0_10_preds.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/paragraphs/gpt-neo-125M/preds/0_10_preds.pt -------------------------------------------------------------------------------- /paragraphs/gpt-neo-125M/preds/11_29_preds.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/paragraphs/gpt-neo-125M/preds/11_29_preds.pt -------------------------------------------------------------------------------- /paragraphs/gpt-neo-125M/preds/30_49_preds.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/paragraphs/gpt-neo-125M/preds/30_49_preds.pt -------------------------------------------------------------------------------- /paragraphs/gpt-neo-125M/preds/50_50_preds.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/paragraphs/gpt-neo-125M/preds/50_50_preds.pt -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/setup.sh -------------------------------------------------------------------------------- /utils/dataLoaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/utils/dataLoaders.py -------------------------------------------------------------------------------- /utils/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/utils/evaluation.py -------------------------------------------------------------------------------- /utils/gradient.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/utils/gradient.py -------------------------------------------------------------------------------- /utils/intervening.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/utils/intervening.py -------------------------------------------------------------------------------- /utils/localizing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/utils/localizing.py -------------------------------------------------------------------------------- /utils/modelHandlers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/utils/modelHandlers.py -------------------------------------------------------------------------------- /utils/patching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/localizing-paragraph-memorization/HEAD/utils/patching.py --------------------------------------------------------------------------------