├── .codecov.yml ├── .coveragerc ├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── pre-merge.yml │ ├── pre-release.yml │ └── publish.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── .vscode └── settings.json ├── CITATION.cff ├── Dockerfile ├── LICENSE ├── README.md ├── docs ├── .gitignore ├── Makefile ├── make.bat └── source │ ├── _static │ └── .gitkeep │ ├── _templates │ └── autosummary │ │ └── module.rst │ ├── conf.py │ ├── index.rst │ └── tutorials │ ├── combining_with_transformer_lens.ipynb │ ├── loading_pretrained_lenses.rst │ ├── maintainers_guide.rst │ ├── prediction_trajectories.ipynb │ └── training_and_evaluating_lenses.rst ├── notebooks └── interactive.ipynb ├── pyproject.toml ├── setup.cfg ├── tests ├── __init__.py ├── conftest.py ├── plotting │ ├── test_prediction_trajectory.py │ ├── test_token_formatter.py │ └── test_trajectory_plotting.py ├── scripts │ ├── __init__.py │ └── test_integration.py ├── test_data.py ├── test_data │ └── pile_text.jsonl ├── test_distance.py ├── test_lenses.py ├── test_load_artifact.py ├── test_model_surgery.py ├── test_stats.py ├── test_subspaces.py ├── test_unembed.py └── test_utils.py └── tuned_lens ├── __init__.py ├── __main__.py ├── causal ├── __init__.py ├── ablation.py ├── subspaces.py └── utils.py ├── data.py ├── load_artifacts.py ├── model_surgery.py ├── nn ├── __init__.py ├── lenses.py └── unembed.py ├── plotting ├── __init__.py ├── prediction_trajectory.py ├── token_formatter.py └── trajectory_plotting.py ├── scripts ├── __init__.py ├── eval_loop.py ├── ingredients.py └── train_loop.py ├── stats ├── __init__.py ├── distance.py └── logit_stats.py └── utils.py /.codecov.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.codecov.yml -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.coveragerc -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.github/ISSUE_TEMPLATE/bug_report.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/workflows/pre-merge.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.github/workflows/pre-merge.yml -------------------------------------------------------------------------------- /.github/workflows/pre-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.github/workflows/pre-release.yml -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/CITATION.cff -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/README.md -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | source/_api/ 2 | build 3 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/source/_static/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/module.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/source/_templates/autosummary/module.rst -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/tutorials/combining_with_transformer_lens.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/source/tutorials/combining_with_transformer_lens.ipynb -------------------------------------------------------------------------------- /docs/source/tutorials/loading_pretrained_lenses.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/source/tutorials/loading_pretrained_lenses.rst -------------------------------------------------------------------------------- /docs/source/tutorials/maintainers_guide.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/source/tutorials/maintainers_guide.rst -------------------------------------------------------------------------------- /docs/source/tutorials/prediction_trajectories.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/source/tutorials/prediction_trajectories.ipynb -------------------------------------------------------------------------------- /docs/source/tutorials/training_and_evaluating_lenses.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/docs/source/tutorials/training_and_evaluating_lenses.rst -------------------------------------------------------------------------------- /notebooks/interactive.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/notebooks/interactive.ipynb -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/pyproject.toml -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/plotting/test_prediction_trajectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/plotting/test_prediction_trajectory.py -------------------------------------------------------------------------------- /tests/plotting/test_token_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/plotting/test_token_formatter.py -------------------------------------------------------------------------------- /tests/plotting/test_trajectory_plotting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/plotting/test_trajectory_plotting.py -------------------------------------------------------------------------------- /tests/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/scripts/test_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/scripts/test_integration.py -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_data.py -------------------------------------------------------------------------------- /tests/test_data/pile_text.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_data/pile_text.jsonl -------------------------------------------------------------------------------- /tests/test_distance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_distance.py -------------------------------------------------------------------------------- /tests/test_lenses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_lenses.py -------------------------------------------------------------------------------- /tests/test_load_artifact.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_load_artifact.py -------------------------------------------------------------------------------- /tests/test_model_surgery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_model_surgery.py -------------------------------------------------------------------------------- /tests/test_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_stats.py -------------------------------------------------------------------------------- /tests/test_subspaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_subspaces.py -------------------------------------------------------------------------------- /tests/test_unembed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_unembed.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /tuned_lens/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/__init__.py -------------------------------------------------------------------------------- /tuned_lens/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/__main__.py -------------------------------------------------------------------------------- /tuned_lens/causal/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/causal/__init__.py -------------------------------------------------------------------------------- /tuned_lens/causal/ablation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/causal/ablation.py -------------------------------------------------------------------------------- /tuned_lens/causal/subspaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/causal/subspaces.py -------------------------------------------------------------------------------- /tuned_lens/causal/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/causal/utils.py -------------------------------------------------------------------------------- /tuned_lens/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/data.py -------------------------------------------------------------------------------- /tuned_lens/load_artifacts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/load_artifacts.py -------------------------------------------------------------------------------- /tuned_lens/model_surgery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/model_surgery.py -------------------------------------------------------------------------------- /tuned_lens/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/nn/__init__.py -------------------------------------------------------------------------------- /tuned_lens/nn/lenses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/nn/lenses.py -------------------------------------------------------------------------------- /tuned_lens/nn/unembed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/nn/unembed.py -------------------------------------------------------------------------------- /tuned_lens/plotting/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/plotting/__init__.py -------------------------------------------------------------------------------- /tuned_lens/plotting/prediction_trajectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/plotting/prediction_trajectory.py -------------------------------------------------------------------------------- /tuned_lens/plotting/token_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/plotting/token_formatter.py -------------------------------------------------------------------------------- /tuned_lens/plotting/trajectory_plotting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/plotting/trajectory_plotting.py -------------------------------------------------------------------------------- /tuned_lens/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """Implementations of subcommands.""" 2 | -------------------------------------------------------------------------------- /tuned_lens/scripts/eval_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/scripts/eval_loop.py -------------------------------------------------------------------------------- /tuned_lens/scripts/ingredients.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/scripts/ingredients.py -------------------------------------------------------------------------------- /tuned_lens/scripts/train_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/scripts/train_loop.py -------------------------------------------------------------------------------- /tuned_lens/stats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/stats/__init__.py -------------------------------------------------------------------------------- /tuned_lens/stats/distance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/stats/distance.py -------------------------------------------------------------------------------- /tuned_lens/stats/logit_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/stats/logit_stats.py -------------------------------------------------------------------------------- /tuned_lens/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlignmentResearch/tuned-lens/HEAD/tuned_lens/utils.py --------------------------------------------------------------------------------