├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── datasets.md
├── mypy.ini
├── neuron_explainer
    ├── __init__.py
    ├── activation_server
    │   ├── README.md
    │   ├── derived_scalar_computation.py
    │   ├── dst_helpers.py
    │   ├── explainer_routes.py
    │   ├── explanation_datasets.py
    │   ├── inference_routes.py
    │   ├── interactive_model.py
    │   ├── load_neurons.py
    │   ├── main.py
    │   ├── neuron_datasets.py
    │   ├── read_routes.py
    │   ├── requests_and_responses.py
    │   └── tdb_conversions.py
    ├── activations
    │   ├── activation_records.py
    │   ├── activations.py
    │   ├── attention_utils.py
    │   ├── derived_scalars
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── activations_and_metadata.py
    │   │   ├── attention.py
    │   │   ├── autoencoder.py
    │   │   ├── config.py
    │   │   ├── derived_scalar_store.py
    │   │   ├── derived_scalar_types.py
    │   │   ├── direct_effects.py
    │   │   ├── edge_activation.py
    │   │   ├── edge_attribution.py
    │   │   ├── indexing.py
    │   │   ├── least_common_tokens.py
    │   │   ├── locations.py
    │   │   ├── logprobs.py
    │   │   ├── make_scalar_derivers.py
    │   │   ├── mlp.py
    │   │   ├── multi_group.py
    │   │   ├── multi_pass_scalar_deriver.py
    │   │   ├── node_write.py
    │   │   ├── postprocessing.py
    │   │   ├── raw_activations.py
    │   │   ├── reconstituted.py
    │   │   ├── reconstituter_class.py
    │   │   ├── residual.py
    │   │   ├── scalar_deriver.py
    │   │   ├── tests
    │   │   │   ├── test_attention.py
    │   │   │   ├── test_derived_scalar_store.py
    │   │   │   ├── test_derived_scalar_types.py
    │   │   │   └── utils.py
    │   │   ├── tokens.py
    │   │   ├── utils.py
    │   │   └── write_tensors.py
    │   ├── hook_graph.py
    │   └── test_attention_utils.py
    ├── api_client.py
    ├── explanations
    │   ├── __init__.py
    │   ├── attention_head_scoring.py
    │   ├── calibrated_simulator.py
    │   ├── explainer.py
    │   ├── explanations.py
    │   ├── few_shot_examples.py
    │   ├── prompt_builder.py
    │   ├── scoring.py
    │   ├── simulator.py
    │   ├── test_explainer.py
    │   └── test_simulator.py
    ├── fast_dataclasses
    │   ├── __init__.py
    │   ├── fast_dataclasses.py
    │   └── test_fast_dataclasses.py
    ├── file_utils.py
    ├── models
    │   ├── README.md
    │   ├── __init__.py
    │   ├── autoencoder.py
    │   ├── autoencoder_context.py
    │   ├── hooks.py
    │   ├── inference_engine_type_registry.py
    │   ├── model_component_registry.py
    │   ├── model_context.py
    │   ├── model_registry.py
    │   └── transformer.py
    ├── pydantic
    │   ├── __init__.py
    │   ├── camel_case_base_model.py
    │   ├── hashable_base_model.py
    │   └── immutable.py
    ├── scripts
    │   ├── create_hf_test_data.py
    │   └── download_from_hf.py
    └── tests
    │   ├── conftest.py
    │   ├── test_activation_reconstituter.py
    │   ├── test_against_data.py
    │   ├── test_all_dsts.py
    │   ├── test_emb_dsts.py
    │   ├── test_hooks.py
    │   ├── test_interactive_model.py
    │   ├── test_model_context_get_weight.py
    │   ├── test_offline_autoencoder_dsts.py
    │   ├── test_online_autoencoder_dsts.py
    │   ├── test_postprocessing.py
    │   ├── test_reconstituted_gradients.py
    │   ├── test_serialization_of_model_config_from_model_context.py
    │   ├── test_trace_through_v.py
    │   └── test_transformer.py
├── neuron_viewer
    ├── .gitignore
    ├── .parcelrc
    ├── .postcssrc
    ├── .prettierrc
    ├── README.md
    ├── package-lock.json
    ├── package.json
    ├── prepend_autogen_comments.sh
    ├── public
    │   ├── favicon.ico
    │   ├── logo192.png
    │   ├── logo512.png
    │   ├── manifest.json
    │   └── robots.txt
    ├── src
    │   ├── App.css
    │   ├── App.tsx
    │   ├── TransformerDebugger
    │   │   ├── TransformerDebugger.tsx
    │   │   ├── cards
    │   │   │   ├── BySequenceTokenDisplay.tsx
    │   │   │   ├── DisplayOptions.tsx
    │   │   │   ├── LayerDisplay.tsx
    │   │   │   ├── LogitsDisplay.tsx
    │   │   │   ├── SparsityMetricsDisplay.tsx
    │   │   │   ├── TokenTable.tsx
    │   │   │   ├── inference_params
    │   │   │   │   ├── AblateNodeSpecs.tsx
    │   │   │   │   ├── InferenceParamsDisplay.tsx
    │   │   │   │   ├── TokenLabel.tsx
    │   │   │   │   ├── TraceUpstreamNodeSpec.tsx
    │   │   │   │   └── inferenceParams.ts
    │   │   │   ├── node_table
    │   │   │   │   ├── NodeTable.tsx
    │   │   │   │   └── TopTokensDisplay.tsx
    │   │   │   └── prompt
    │   │   │   │   ├── MultiTokenInput.tsx
    │   │   │   │   ├── PromptAndTokensOfInterest.tsx
    │   │   │   │   └── swap.png
    │   │   ├── common
    │   │   │   ├── ExplanatoryTooltip.tsx
    │   │   │   └── JsonModal.tsx
    │   │   ├── requests
    │   │   │   ├── explanationFetcher.ts
    │   │   │   ├── inferenceDataFetcher.ts
    │   │   │   └── inferenceResponseUtils.tsx
    │   │   └── utils
    │   │   │   ├── explanations.ts
    │   │   │   ├── nodes.tsx
    │   │   │   ├── numbers.tsx
    │   │   │   └── urlParams.ts
    │   ├── client
    │   │   ├── core
    │   │   │   ├── ApiError.ts
    │   │   │   ├── ApiRequestOptions.ts
    │   │   │   ├── ApiResult.ts
    │   │   │   ├── CancelablePromise.ts
    │   │   │   ├── OpenAPI.ts
    │   │   │   └── request.ts
    │   │   ├── index.ts
    │   │   ├── models
    │   │   │   ├── AblationSpec.ts
    │   │   │   ├── ActivationLocationType.ts
    │   │   │   ├── AttentionHeadRecordResponse.ts
    │   │   │   ├── AttentionTraceType.ts
    │   │   │   ├── AttributedScoredExplanation.ts
    │   │   │   ├── BatchedRequest.ts
    │   │   │   ├── BatchedResponse.ts
    │   │   │   ├── BatchedTdbRequest.ts
    │   │   │   ├── ComponentTypeForAttention.ts
    │   │   │   ├── ComponentTypeForMlp.ts
    │   │   │   ├── DerivedAttentionScalarsRequest.ts
    │   │   │   ├── DerivedAttentionScalarsRequestSpec.ts
    │   │   │   ├── DerivedAttentionScalarsResponse.ts
    │   │   │   ├── DerivedAttentionScalarsResponseData.ts
    │   │   │   ├── DerivedScalarType.ts
    │   │   │   ├── DerivedScalarsRequest.ts
    │   │   │   ├── DerivedScalarsRequestSpec.ts
    │   │   │   ├── DerivedScalarsResponse.ts
    │   │   │   ├── DerivedScalarsResponseData.ts
    │   │   │   ├── Dimension.ts
    │   │   │   ├── ExistingExplanationsRequest.ts
    │   │   │   ├── ExplanationResult.ts
    │   │   │   ├── GroupId.ts
    │   │   │   ├── HTTPValidationError.ts
    │   │   │   ├── InferenceAndTokenData.ts
    │   │   │   ├── InferenceRequestSpec.ts
    │   │   │   ├── InferenceResponse.ts
    │   │   │   ├── InferenceResponseAndResponseDict.ts
    │   │   │   ├── InferenceSubRequest.ts
    │   │   │   ├── LossFnConfig.ts
    │   │   │   ├── LossFnName.ts
    │   │   │   ├── MirroredActivationIndex.ts
    │   │   │   ├── MirroredNodeIndex.ts
    │   │   │   ├── MirroredTraceConfig.ts
    │   │   │   ├── ModelInfoResponse.ts
    │   │   │   ├── MultipleTopKDerivedScalarsRequest.ts
    │   │   │   ├── MultipleTopKDerivedScalarsRequestSpec.ts
    │   │   │   ├── MultipleTopKDerivedScalarsResponse.ts
    │   │   │   ├── MultipleTopKDerivedScalarsResponseData.ts
    │   │   │   ├── NeuronDatasetMetadata.ts
    │   │   │   ├── NeuronRecordResponse.ts
    │   │   │   ├── NodeAblation.ts
    │   │   │   ├── NodeIdAndDatasets.ts
    │   │   │   ├── NodeToTrace.ts
    │   │   │   ├── NodeType.ts
    │   │   │   ├── PassType.ts
    │   │   │   ├── PreOrPostAct.ts
    │   │   │   ├── ProcessingResponseDataType.ts
    │   │   │   ├── ScoreRequest.ts
    │   │   │   ├── ScoreResult.ts
    │   │   │   ├── ScoredTokensRequestSpec.ts
    │   │   │   ├── ScoredTokensResponseData.ts
    │   │   │   ├── TdbRequestSpec.ts
    │   │   │   ├── Tensor0D.ts
    │   │   │   ├── Tensor1D.ts
    │   │   │   ├── Tensor2D.ts
    │   │   │   ├── Tensor3D.ts
    │   │   │   ├── TensorType.ts
    │   │   │   ├── TokenAndAttentionScalars.ts
    │   │   │   ├── TokenAndScalar.ts
    │   │   │   ├── TokenPairAttributionRequestSpec.ts
    │   │   │   ├── TokenPairAttributionResponseData.ts
    │   │   │   ├── TokenScoringType.ts
    │   │   │   ├── TopTokens.ts
    │   │   │   ├── TopTokensAttendedTo.ts
    │   │   │   └── ValidationError.ts
    │   │   └── services
    │   │   │   ├── ExplainerService.ts
    │   │   │   ├── HelloWorldService.ts
    │   │   │   ├── InferenceService.ts
    │   │   │   ├── MemoryService.ts
    │   │   │   └── ReadService.ts
    │   ├── colors.ts
    │   ├── commonUiComponents.tsx
    │   ├── heatmapGrid.tsx
    │   ├── heatmapGrid2d.tsx
    │   ├── images.d.ts
    │   ├── index.css
    │   ├── index.html
    │   ├── index.tsx
    │   ├── modelInteractions.tsx
    │   ├── navigation.tsx
    │   ├── nodePage.tsx
    │   ├── panes
    │   │   ├── activationsForPrompt.tsx
    │   │   ├── datasetExamples.tsx
    │   │   ├── explanation.tsx
    │   │   ├── fetchAndDisplayPane.tsx
    │   │   ├── index.ts
    │   │   ├── logitLens.tsx
    │   │   └── scoreExplanation.tsx
    │   ├── plots.tsx
    │   ├── requests
    │   │   ├── explainerRequests.ts
    │   │   ├── inferenceRequests.ts
    │   │   ├── paths.ts
    │   │   └── readRequests.ts
    │   ├── tokenHeatmap.tsx
    │   ├── tokenHeatmap2d.tsx
    │   ├── tokenRendering.tsx
    │   ├── types.ts
    │   └── welcome.tsx
    ├── tailwind.config.js
    └── tsconfig.json
├── pyproject.toml
├── pytest.ini
├── setup.py
└── terminology.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Cached user explanations
132 | cached_explanations/
133 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | profile = black
3 | 
4 | known_firstparty=
5 |     neuron_explainer
6 |     neuron_viewer
7 | 
8 | line_length = 100
9 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     rev: v0.1.9
 4 |     hooks:
 5 |       - id: ruff
 6 |         args: [--fix, --unsafe-fixes, --fix-only, --exit-non-zero-on-fix]
 7 |         files: neuron_explainer
 8 | 
 9 |   - repo: https://github.com/hauntsaninja/black-pre-commit-mirror
10 |     rev: 23.10.0
11 |     hooks:
12 |       - id: black
13 |         args: [--line-length=100, --exclude="", --workers=6]
14 | 
15 |   - repo: https://github.com/pycqa/isort
16 |     rev: 5.12.0
17 |     hooks:
18 |       - id: isort
19 |         args: [--line-length=100, --profile=black, --settings-path=.isort.cfg]
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Transformer Debugger
 2 | 
 3 | Transformer Debugger (TDB) is a tool developed by OpenAI's [Superalignment
 4 | team](https://openai.com/blog/introducing-superalignment) with the goal of
 5 | supporting investigations into specific behaviors of small language models. The tool combines
 6 | [automated interpretability](https://openai.com/research/language-models-can-explain-neurons-in-language-models)
 7 | techniques with [sparse autoencoders](https://transformer-circuits.pub/2023/monosemantic-features).
 8 | 
 9 | TDB enables rapid exploration before needing to write code, with the ability to intervene in the
10 | forward pass and see how it affects a particular behavior. It can be used to answer questions like,
11 | "Why does the model output token A instead of token B for this prompt?" or "Why does attention head
12 | H attend to token T for this prompt?" It does so by identifying specific components (neurons,
13 | attention heads, autoencoder latents) that contribute to the behavior, showing automatically
14 | generated explanations of what causes those components to activate most strongly, and tracing
15 | connections between components to help discover circuits.
16 | 
17 | These videos give an overview of TDB and show how it can be used to investigate [indirect object
18 | identification in GPT-2 small](https://arxiv.org/abs/2211.00593):
19 | 
20 | - [Introduction](https://www.loom.com/share/721244075f12439496db5d53439d2f84?sid=8445200e-c49e-4028-8b8e-3ea8d361dec0)
21 | - [Neuron viewer pages](https://www.loom.com/share/21b601b8494b40c49b8dc7bfd1dc6829?sid=ee23c00a-9ede-4249-b9d7-c2ba15993556)
22 | - [Example: Investigating name mover heads, part 1](https://www.loom.com/share/3478057cec484a1b85471585fef10811?sid=b9c3be4b-7117-405a-8d31-0f9e541dcfb6)
23 | - [Example: Investigating name mover heads, part 2](https://www.loom.com/share/6bd8c6bde84b42a98f9a26a969d4a3ad?sid=4a09ac29-58a2-433e-b55d-762414d9a7fa)
24 | 
25 | ## What's in the release?
26 | 
27 | - [Neuron viewer](neuron_viewer/README.md): A React app that hosts TDB as well as pages with information about individual model components (MLP neurons, attention heads and autoencoder latents for both).
28 | - [Activation server](neuron_explainer/activation_server/README.md): A backend server that performs inference on a subject model to provide data for TDB. It also reads and serves data from public Azure buckets.
29 | - [Models](neuron_explainer/models/README.md): A simple inference library for GPT-2 models and their autoencoders, with hooks to grab activations.
30 | - [Collated activation datasets](datasets.md): top-activating dataset examples for MLP neurons, attention heads and autoencoder latents.
31 | 
32 | ## Setup
33 | 
34 | Follow these steps to install the repo.  You'll first need python/pip, as well as node/npm.
35 | 
36 | Though optional, we recommend you use a virtual environment or equivalent:
37 | 
38 | ```sh
39 | # If you're already in a venv, deactivate it.
40 | deactivate
41 | # Create a new venv.
42 | python -m venv ~/.virtualenvs/transformer-debugger
43 | # Activate the new venv.
44 | source ~/.virtualenvs/transformer-debugger/bin/activate
45 | ```
46 | 
47 | Once your environment is set up, follow the following steps:
48 | ```sh
49 | git clone git@github.com:openai/transformer-debugger.git
50 | cd transformer-debugger
51 | 
52 | # Install neuron_explainer
53 | pip install -e .
54 | 
55 | # Set up the pre-commit hooks.
56 | pre-commit install
57 | 
58 | # Install neuron_viewer.
59 | cd neuron_viewer
60 | npm install
61 | cd ..
62 | ```
63 | 
64 | To run the TDB app, you'll then need to follow the instructions to set up the [activation server backend](neuron_explainer/activation_server/README.md) and [neuron viewer frontend](neuron_viewer/README.md).
65 | 
66 | ## Making changes
67 | 
68 | To validate changes:
69 | 
70 | - Run `pytest`
71 | - Run `mypy --config=mypy.ini .`
72 | - Run activation server and neuron viewer and confirm that basic functionality like TDB and neuron
73 |   viewer pages is still working
74 | 
75 | 
76 | ## Links
77 | 
78 | - [Terminology](terminology.md)
79 | 
80 | ## How to cite
81 | 
82 | Please cite as:
83 | 
84 | ```
85 | Mossing, et al., “Transformer Debugger”, GitHub, 2024.
86 | ```
87 | 
88 | BibTex citation:
89 | 
90 | ```
91 | @misc{mossing2024tdb,
92 |   title={Transformer Debugger},
93 |   author={Mossing, Dan and Bills, Steven and Tillman, Henk and Dupré la Tour, Tom and Cammarata, Nick and Gao, Leo and Achiam, Joshua and Yeh, Catherine and Leike, Jan and Wu, Jeff and Saunders, William},
94 |   year={2024},
95 |   publisher={GitHub},
96 |   howpublished={\url{https://github.com/openai/transformer-debugger}},
97 | }
98 | ```
99 | 


--------------------------------------------------------------------------------
/datasets.md:
--------------------------------------------------------------------------------
 1 | # Collated activation datasets
 2 | 
 3 | This document lists the collated activation datasets that are compatible with the Transformer Debugger. These datasets contain some top-activating examples for each MLP neuron, attention head, and autoencoder latent, as well as the corresponding activations for each token (or token pair) in the example. They provide a way to visualize what each neuron, attention head, or autoencoder latent is selective for (obviously in an incomplete way). These activation datasets are used by the [neuron viewer](neuron_viewer/README.md) to display the top-activating examples for each component, and are also typically used for [automated interpretability](https://openai.com/research/language-models-can-explain-neurons-in-language-models).
 4 | 
 5 | The activations datasets are located on Azure Blob Storage, for example accessible via the [`blobfile`](https://github.com/blobfile/blobfile) library. 
 6 | 
 7 | # GPT-2 small
 8 | 
 9 | Collated activation datasets are available for both the MLP neurons and the attention heads. MLP neuron activations are recorded for each token, while attention head activations are recorded for each token pair. 
10 | 
11 | The datasets are located at the following paths:
12 | > - MLP neurons: `https://openaipublic.blob.core.windows.net/neuron-explainer/gpt2_small_data/collated-activations/{layer_index}/{neuron_index}.json`
13 | > - Attention heads: `https://openaipublic.blob.core.windows.net/neuron-explainer/gpt2_small/attn_write_norm/collated-activations-by-token-pair/{layer_index}/{head_index}.json`
14 | 
15 | with the following parameters:
16 | - `layer_index` is in range(12)
17 | - `neuron_index` is in range(3084)
18 | - `head_index` is in range(12)
19 | 
20 | 
21 | ## GPT-2 small - MLP autoencoders
22 | 
23 | MLP autoencoders were trained either on the MLP neurons (after the activation function), or on the MLP-layer output that is written to the residual stream. See [Autoencoders for GPT-2 small](neuron_explainer/models/README.md#sparse-autoencoder) for more details. 
24 | 
25 | The datasets are located at the following paths:
26 | 
27 | > - MLP latents: `https://openaipublic.blob.core.windows.net/neuron-explainer/gpt2-small/autoencoder_latent/{autoencoder_input}{version}/collated-activations/{layer_index}/{latent_index}.pt`
28 | 
29 | with the following parameters:
30 | - `autoencoder_input` is in ["mlp_post_act", "resid_delta_mlp"]
31 | - `version` is in ["", "_v4"]. (The `_v4` versions use slightly different hyperparameters, and should be preferred.)
32 | - `layer_index` is in range(12)
33 | - `latent_index` is in range(32768)
34 | 
35 | ## GPT-2 small - Attention autoencoders
36 | 
37 | Attention autoencoders were trained on the attention-layer output that is written to the residual stream. See [Autoencoders for GPT-2 small](neuron_explainer/models/README.md#sparse-autoencoder) for more details. The `collated-activations` dataset contains autoencoder latent activations for each token, while the `collated-activations-by-token-pair` dataset contains autoencoder latent *attribution* to each token pair. To compute the attribution given an autoencoder latent `L` and a token pair `(T1, T2)`, we multiply the attention pattern `A(T1, T2)` with the gradient of `L` with respect to the attention pattern: `attribution_L(T1, T2) = A(T1, T2) * ∂L/∂A(T1, T2)`. 
38 | 
39 | The datasets are located at the following paths:
40 | 
41 | > - Attention latents (by token): `https://openaipublic.blob.core.windows.net/neuron-explainer/gpt2-small/autoencoder_latent/resid_delta_attn_v4/collated-activations/{layer_index}/{latent_index}.pt`
42 | > - Attention latents (by token pair): `https://openaipublic.blob.core.windows.net/neuron-explainer/gpt2-small/autoencoder_latent/resid_delta_attn_v4/collated-activations-by-token-pair/{layer_index}/{latent_index}.pt`
43 | 
44 | with the following parameters:
45 | - `layer_index` is in range(12)
46 | - `latent_index` is in range(10240)
47 | 
48 | 
49 | 
50 | # GPT-2 xl
51 | 
52 | For GPT-2 xl, only the MLP neurons activations are available. The datasets are located at the following paths:
53 | > - MLP neurons: `https://openaipublic.blob.core.windows.net/neuron-explainer/data/collated-activations/{layer_index}/{neuron_index}.json`
54 | 
55 | with the following parameters:
56 | - `layer_index` is in range(48)
57 | - `neuron_index` is in range(6400)
58 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | 
 3 | ; Not all dependencies have type annotations; ignore this.
 4 | ignore_missing_imports=True
 5 | namespace_packages=True
 6 | explicit_package_bases = True
 7 | 
 8 | ; Be strict about certain rules.
 9 | strict_equality=True
10 | warn_unused_configs=True
11 | no_implicit_optional=True
12 | strict_optional=True
13 | warn_redundant_casts=True
14 | warn_unused_ignores=True
15 | check_untyped_defs=True
16 | 
17 | [mypy-neuron_explainer.*]
18 | ignore_errors=False
19 | disallow_untyped_defs=True
20 | 
21 | [mypy-neuron_explainer.api_client]
22 | ignore_errors=True
23 | 
24 | [mypy-neuron_explainer.models.hooks]
25 | ignore_errors=True
26 | 
27 | [mypy-neuron_explainer.models.transformer]
28 | ignore_errors=True
29 | 
30 | [mypy-neuron_explainer.tests.test_hooks]
31 | ignore_errors=True
32 | 
33 | [mypy-neuron_explainer.tests.test_transformer]
34 | ignore_errors=True
35 | 


--------------------------------------------------------------------------------
/neuron_explainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/transformer-debugger/87e6db7b7e73ded5037eeeff05deb5e81548a10a/neuron_explainer/__init__.py


--------------------------------------------------------------------------------
/neuron_explainer/activation_server/dst_helpers.py:
--------------------------------------------------------------------------------
  1 | # Small helper functions for working with derived scalars in the context of activation server
  2 | # request handling.
  3 | 
  4 | import math
  5 | from typing import Any, Callable, TypeVar
  6 | 
  7 | import torch
  8 | 
  9 | from neuron_explainer.activation_server.requests_and_responses import *
 10 | from neuron_explainer.activations.derived_scalars.derived_scalar_store import DerivedScalarStore
 11 | from neuron_explainer.activations.derived_scalars.derived_scalar_types import DerivedScalarType
 12 | from neuron_explainer.activations.derived_scalars.indexing import (
 13 |     DerivedScalarIndex,
 14 |     MirroredNodeIndex,
 15 | )
 16 | from neuron_explainer.models.model_component_registry import Dimension
 17 | 
 18 | T = TypeVar("T")
 19 | 
 20 | 
 21 | def _float_tensor_to_list(x: torch.Tensor) -> list[float]:
 22 |     return [x if math.isfinite(x) else -999 for x in x.tolist()]
 23 | 
 24 | 
 25 | def _torch_to_tensor_nd(x: torch.Tensor) -> TensorND:
 26 |     ndim = x.ndim
 27 |     if ndim == 0:
 28 |         return Tensor0D(value=x.item())
 29 |     elif ndim == 1:
 30 |         return Tensor1D(value=_float_tensor_to_list(x))
 31 |     elif ndim == 2:
 32 |         return Tensor2D(value=[_float_tensor_to_list(row) for row in x])
 33 |     elif ndim == 3:
 34 |         return Tensor3D(value=[[_float_tensor_to_list(row) for row in matrix] for matrix in x])
 35 |     else:
 36 |         raise NotImplementedError(f"Unknown ndim: {ndim}")
 37 | 
 38 | 
 39 | def _get_dims_to_keep(
 40 |     dst: DerivedScalarType, keep_dimension_fn: Callable[[Dimension], bool]
 41 | ) -> list[Dimension]:
 42 |     return [dim for dim in dst.shape_spec_per_token_sequence if keep_dimension_fn(dim)]
 43 | 
 44 | 
 45 | def _sum_dst(
 46 |     ds_store: DerivedScalarStore,
 47 |     dst: DerivedScalarType,
 48 |     keep_dimension_fn: Callable[[Dimension], bool],
 49 |     abs_mode: bool,
 50 | ) -> torch.Tensor:
 51 |     dims_to_keep = _get_dims_to_keep(dst, keep_dimension_fn)
 52 |     store_for_dst = ds_store.filter_dsts([dst])
 53 |     activations_and_metadata = next(
 54 |         iter(store_for_dst.activations_and_metadata_by_dst_and_pass_type.values())
 55 |     )
 56 |     ndim_before_sum = len(activations_and_metadata.shape)
 57 |     if abs_mode:
 58 |         sum_for_dst = store_for_dst.sum_abs(dims_to_keep=dims_to_keep)
 59 |     else:
 60 |         sum_for_dst = store_for_dst.sum(dims_to_keep=dims_to_keep)
 61 |     assert len(sum_for_dst.shape) == len(
 62 |         dims_to_keep
 63 |     ), f"{sum_for_dst.shape=}, {ndim_before_sum=}, {dims_to_keep=}"
 64 |     return sum_for_dst
 65 | 
 66 | 
 67 | def get_intermediate_sum_by_dst(
 68 |     ds_store: DerivedScalarStore,
 69 |     keep_dimension_fn: Callable[[Dimension], bool],
 70 |     abs_mode: bool = False,
 71 | ) -> dict[DerivedScalarType, TensorND]:
 72 |     dict_of_torch_tensors = {
 73 |         dst: _sum_dst(ds_store, dst, keep_dimension_fn, abs_mode=abs_mode) for dst in ds_store.dsts
 74 |     }
 75 |     return {dst: _torch_to_tensor_nd(x) for dst, x in dict_of_torch_tensors.items()}
 76 | 
 77 | 
 78 | def get_ds_index_from_node_index(
 79 |     node_index: MirroredNodeIndex,
 80 |     dsts: list[DerivedScalarType],
 81 | ) -> DerivedScalarIndex:
 82 |     """
 83 |     Converts from a MirroredNodeIndex (more general, e.g. defined by a NodeType such as MLP neurons)
 84 |     to a DerivedScalarIndex (more specific, e.g. defined by a DerivedScalarType such as MLP write
 85 |     norm) conditional on the given derived scalar types, which are assumed to be unique for each
 86 |     NodeType.
 87 |     """
 88 |     dsts_matching_node_type = [dst for dst in dsts if dst.node_type == node_index.node_type]
 89 |     assert len(dsts_matching_node_type) == 1, (
 90 |         f"Expected exactly one derived scalar type to have node type {node_index.node_type}, "
 91 |         f"but found {dsts_matching_node_type} in {dsts}"
 92 |     )
 93 |     return DerivedScalarIndex.from_node_index(
 94 |         node_index=node_index,
 95 |         dst=dsts_matching_node_type[0],
 96 |     )
 97 | 
 98 | 
 99 | def assert_tensor(tensor: Any) -> torch.Tensor:
100 |     # for mypy
101 |     assert isinstance(tensor, torch.Tensor)
102 |     return tensor
103 | 


--------------------------------------------------------------------------------
/neuron_explainer/activation_server/explanation_datasets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from neuron_explainer.activation_server.load_neurons import convert_dataset_path_to_short_name
 4 | 
 5 | # Maps from neuron dataset path to explanation dataset path.
 6 | AZURE_EXPLANATION_DATASET_REGISTRY = {
 7 |     "https://openaipublic.blob.core.windows.net/neuron-explainer/data/collated-activations/": "https://openaipublic.blob.core.windows.net/neuron-explainer/data/explanations/",
 8 |     "https://openaipublic.blob.core.windows.net/neuron-explainer/gpt2_small_data/collated-activations/": "https://openaipublic.blob.core.windows.net/neuron-explainer/gpt2_small_data/explanations/",
 9 | }
10 | 
11 | 
12 | def get_local_cached_explanation_directory(dataset_path: str) -> str:
13 |     root_project_directory = os.path.dirname(
14 |         os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
15 |     )
16 |     dataset_short_name = convert_dataset_path_to_short_name(dataset_path)
17 |     return f"{root_project_directory}/cached_explanations/{dataset_short_name}"
18 | 
19 | 
20 | async def get_all_explanation_datasets(neuron_dataset: str) -> list[str]:
21 |     """
22 |     Get all explanation datasets for a given neuron dataset. Search the public azure bucket and also
23 |     the local filesystem cache. Returns a list of paths to the explanation datasets.
24 |     Path can be an azure path (beginning with `https://`) or a local path.
25 |     """
26 |     datasets = []
27 |     if neuron_dataset in AZURE_EXPLANATION_DATASET_REGISTRY:
28 |         datasets.append(AZURE_EXPLANATION_DATASET_REGISTRY[neuron_dataset])
29 |     local_cache_dir = get_local_cached_explanation_directory(neuron_dataset)
30 |     # Iterate through folders to get a list of dirs.
31 |     # There will be different local cache directories if the user generates scored explanations for
32 |     # the same neuron dataset using different neuron/attention explainer registry entries (i.e. so
33 |     # that AttentionExplainAndScoreMethodId or NeuronExplainAndScoreMethodId differ).
34 |     if os.path.exists(local_cache_dir) and os.path.isdir(local_cache_dir):
35 |         for entry in os.listdir(local_cache_dir):
36 |             candidate_path = os.path.join(local_cache_dir, entry)
37 |             if os.path.isdir(candidate_path):
38 |                 datasets.append(candidate_path)
39 |     return datasets
40 | 


--------------------------------------------------------------------------------
/neuron_explainer/activation_server/inference_routes.py:
--------------------------------------------------------------------------------
 1 | """Routes / endpoints related to performing inference on the subject model."""
 2 | 
 3 | from fastapi import FastAPI, HTTPException
 4 | 
 5 | from neuron_explainer.activation_server.interactive_model import InteractiveModel
 6 | from neuron_explainer.activation_server.requests_and_responses import (
 7 |     BatchedRequest,
 8 |     BatchedResponse,
 9 |     BatchedTdbRequest,
10 |     DerivedAttentionScalarsRequest,
11 |     DerivedAttentionScalarsResponse,
12 |     DerivedScalarsRequest,
13 |     DerivedScalarsResponse,
14 |     ModelInfoResponse,
15 |     MultipleTopKDerivedScalarsRequest,
16 |     MultipleTopKDerivedScalarsResponse,
17 | )
18 | 
19 | 
20 | def define_inference_routes(
21 |     app: FastAPI,
22 |     model: InteractiveModel | None,
23 |     mlp_autoencoder_name: str | None,
24 |     attn_autoencoder_name: str | None,
25 | ) -> None:
26 |     def assert_model() -> None:
27 |         if model is None:
28 |             raise HTTPException(
29 |                 status_code=500,
30 |                 detail="Inference model not running. Restart the activation server with run_model=True to use inference endpoints.",
31 |             )
32 | 
33 |     @app.post("/derived_scalars", response_model=DerivedScalarsResponse, tags=["inference"])
34 |     async def derived_scalars(request: DerivedScalarsRequest) -> DerivedScalarsResponse:
35 |         assert_model()
36 |         assert model is not None  # redundant; needed for mypy
37 |         return await model.get_derived_scalars(request)
38 | 
39 |     @app.post(
40 |         "/derived_attention_scalars",
41 |         response_model=DerivedAttentionScalarsResponse,
42 |         tags=["inference"],
43 |     )
44 |     async def derived_attention_scalars(
45 |         request: DerivedAttentionScalarsRequest,
46 |     ) -> DerivedAttentionScalarsResponse:
47 |         assert_model()
48 |         assert model is not None  # redundant; needed for mypy
49 |         return await model.get_derived_attention_scalars(request)
50 | 
51 |     @app.post(
52 |         "/multiple_top_k_derived_scalars",
53 |         response_model=MultipleTopKDerivedScalarsResponse,
54 |         tags=["inference"],
55 |     )
56 |     async def multiple_top_k_derived_scalars(
57 |         request: MultipleTopKDerivedScalarsRequest,
58 |     ) -> MultipleTopKDerivedScalarsResponse:
59 |         assert_model()
60 |         assert model is not None  # redundant; needed for mypy
61 |         return await model.get_multiple_top_k_derived_scalars(request)
62 | 
63 |     @app.post("/batched", response_model=BatchedResponse, tags=["inference"])
64 |     async def batched(request: BatchedRequest) -> BatchedResponse:
65 |         assert_model()
66 |         assert model is not None  # redundant; needed for mypy
67 |         return await model.handle_batched_request(request)
68 | 
69 |     @app.post("/batched_tdb", response_model=BatchedResponse, tags=["inference"])
70 |     async def batched_tdb(request: BatchedTdbRequest) -> BatchedResponse:
71 |         assert_model()
72 |         assert model is not None  # redundant; needed for mypy
73 |         return await model.handle_batched_tdb_request(request)
74 | 
75 |     @app.post("/model_info", response_model=ModelInfoResponse, tags=["inference"])
76 |     def model_info() -> ModelInfoResponse:
77 |         assert_model()
78 |         assert model is not None  # redundant; needed for mypy
79 |         return model.get_model_info(
80 |             mlp_autoencoder_name=mlp_autoencoder_name, attn_autoencoder_name=attn_autoencoder_name
81 |         )
82 | 


--------------------------------------------------------------------------------
/neuron_explainer/activation_server/load_neurons.py:
--------------------------------------------------------------------------------
 1 | from fastapi import HTTPException
 2 | 
 3 | from neuron_explainer.activation_server.neuron_datasets import (
 4 |     NEURON_DATASET_METADATA_REGISTRY,
 5 |     get_neuron_dataset_metadata_by_short_name_and_dst,
 6 | )
 7 | from neuron_explainer.activations.activations import NeuronRecord, load_neuron_async
 8 | from neuron_explainer.activations.derived_scalars import DerivedScalarType
 9 | from neuron_explainer.pydantic import CamelCaseBaseModel, immutable
10 | 
11 | 
12 | @immutable
13 | class NodeIdAndDatasets(CamelCaseBaseModel):
14 |     dst: DerivedScalarType
15 |     layer_index: int
16 |     activation_index: int
17 |     datasets: list[str]
18 |     """A list of dataset paths or short names."""
19 | 
20 | 
21 | def resolve_neuron_dataset(dataset: str, dst: DerivedScalarType) -> str:
22 |     if dataset.startswith("https://"):
23 |         return dataset
24 |     else:
25 |         # It's the short name for a dataset, like "gpt2-small". We have to look up the metadata.
26 |         dataset_metadata = get_neuron_dataset_metadata_by_short_name_and_dst(dataset, dst)
27 |         return dataset_metadata.neuron_dataset_path
28 | 
29 | 
30 | def convert_dataset_path_to_short_name(dataset_path: str) -> str:
31 |     assert dataset_path.startswith("https://")
32 |     short_name = None
33 |     for metadata in NEURON_DATASET_METADATA_REGISTRY.values():
34 |         if metadata.neuron_dataset_path == dataset_path:
35 |             short_name = metadata.short_name
36 |             break
37 |     assert (
38 |         short_name is not None
39 |     ), f"Could not find short name for {dataset_path}. If you're trying to use a custom dataset, ensure that you have added it to neuron_datasets.py:NEURON_DATASET_METADATA_REGISTRY."
40 |     return short_name
41 | 
42 | 
43 | async def load_neuron_from_datasets(
44 |     node_id_and_datasets: NodeIdAndDatasets,
45 | ) -> tuple[str, NeuronRecord]:
46 |     """
47 |     Load a neuron record of the specified dst (e.g. DerivedScalarType.MLP_POST_ACT) from a list of
48 |     datasets, returning the data from the first dataset that has the neuron.
49 | 
50 |     Used to allow first trying a dataset that only covers a subset of neurons for a model,
51 |     with a fallback to another dataset that covers all neurons.
52 |     """
53 |     dst = node_id_and_datasets.dst
54 |     datasets = node_id_and_datasets.datasets
55 |     dataset_paths = [resolve_neuron_dataset(dataset, dst) for dataset in datasets]
56 |     layer_index = node_id_and_datasets.layer_index
57 |     activation_index = node_id_and_datasets.activation_index
58 |     for dataset_path in dataset_paths:
59 |         try:
60 |             return dataset_path, await load_neuron_async(
61 |                 dataset_path, layer_index, activation_index
62 |             )
63 |         except FileNotFoundError:
64 |             pass
65 |     raise HTTPException(
66 |         status_code=404,
67 |         detail=f"Could not find {dst} {layer_index}:{activation_index} in {dataset_paths}",
68 |     )
69 | 


--------------------------------------------------------------------------------
/neuron_explainer/activations/derived_scalars/__init__.py:
--------------------------------------------------------------------------------
1 | from .derived_scalar_types import DerivedScalarType
2 | 


--------------------------------------------------------------------------------
/neuron_explainer/activations/derived_scalars/edge_activation.py:
--------------------------------------------------------------------------------
 1 | """This file defines ScalarDerivers for efficiently computing the direct effect of a single upstream node
 2 | on many downstream nodes."""
 3 | 
 4 | from typing import Callable
 5 | 
 6 | from neuron_explainer.activations.derived_scalars.derived_scalar_types import DerivedScalarType
 7 | from neuron_explainer.activations.derived_scalars.node_write import make_node_write_scalar_source
 8 | from neuron_explainer.activations.derived_scalars.reconstituter_class import ActivationReconstituter
 9 | from neuron_explainer.activations.derived_scalars.scalar_deriver import (
10 |     DstConfig,
11 |     ScalarDeriver,
12 |     ScalarSource,
13 | )
14 | from neuron_explainer.models.model_component_registry import ActivationLocationType
15 | from neuron_explainer.models.model_context import StandardModelContext
16 | 
17 | 
18 | def convert_node_write_scalar_deriver_to_in_edge_activation(
19 |     node_write_scalar_source: ScalarSource,
20 |     output_dst: DerivedScalarType,
21 |     dst_config: DstConfig,
22 |     downstream_activation_location_type: ActivationLocationType,
23 |     downstream_q_or_k: ActivationLocationType | None,
24 | ) -> ScalarDeriver:
25 |     """Converts a scalar deriver for a write vector from some upstream node type to a scalar deriver for
26 |     in edge activation for downstream nodes of some type (MLP, autoencoder, or attention head). In the
27 |     case of attention heads, this is split up by subnode (Q or K)."""
28 | 
29 |     model_context = dst_config.get_model_context()
30 |     autoencoder_context = dst_config.get_autoencoder_context()
31 |     assert isinstance(model_context, StandardModelContext)
32 |     transformer = model_context.get_or_create_model()
33 |     reconstituter = ActivationReconstituter.from_activation_location_type(
34 |         transformer=transformer,
35 |         autoencoder_context=autoencoder_context,
36 |         activation_location_type=downstream_activation_location_type,
37 |         q_or_k=downstream_q_or_k,
38 |     )
39 |     return reconstituter.make_jvp_scalar_deriver(
40 |         write_scalar_source=node_write_scalar_source,
41 |         dst_config=dst_config,
42 |         output_dst=output_dst,
43 |     )
44 | 
45 | 
46 | def make_in_edge_activation_scalar_deriver_factory(
47 |     activation_location_type: ActivationLocationType,
48 |     q_or_k: ActivationLocationType | None = None,
49 | ) -> Callable[[DstConfig], ScalarDeriver]:
50 |     """Returns a function that creates a scalar deriver for the edge attribution from arbitrary node
51 |     to the specified downstream activation location type / sub activation location type (MLP post act,
52 |     autoencoder latent, attention head Q or K).
53 |     """
54 | 
55 |     sub_node_type_to_output_dst = {
56 |         (ActivationLocationType.MLP_POST_ACT, None): DerivedScalarType.MLP_IN_EDGE_ACTIVATION,
57 |         (
58 |             ActivationLocationType.ONLINE_AUTOENCODER_LATENT,
59 |             None,
60 |         ): DerivedScalarType.ONLINE_AUTOENCODER_IN_EDGE_ACTIVATION,
61 |         (
62 |             ActivationLocationType.ATTN_QK_PROBS,
63 |             ActivationLocationType.ATTN_QUERY,
64 |         ): DerivedScalarType.ATTN_QUERY_IN_EDGE_ACTIVATION,
65 |         (
66 |             ActivationLocationType.ATTN_QK_PROBS,
67 |             ActivationLocationType.ATTN_KEY,
68 |         ): DerivedScalarType.ATTN_KEY_IN_EDGE_ACTIVATION,
69 |     }
70 | 
71 |     output_dst = sub_node_type_to_output_dst[(activation_location_type, q_or_k)]
72 | 
73 |     def make_in_edge_activation_scalar_deriver(dst_config: DstConfig) -> ScalarDeriver:
74 |         node_write_scalar_source = make_node_write_scalar_source(dst_config)
75 |         return convert_node_write_scalar_deriver_to_in_edge_activation(
76 |             node_write_scalar_source=node_write_scalar_source,
77 |             output_dst=output_dst,
78 |             dst_config=dst_config,
79 |             downstream_activation_location_type=activation_location_type,
80 |             downstream_q_or_k=q_or_k,
81 |         )
82 | 
83 |     return make_in_edge_activation_scalar_deriver
84 | 


--------------------------------------------------------------------------------
/neuron_explainer/activations/derived_scalars/tests/test_attention.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch
 3 | 
 4 | from neuron_explainer.activations.derived_scalars.attention import (
 5 |     flatten_lower_triangle,
 6 |     unflatten_lower_triangle,
 7 |     unflatten_lower_triangle_and_sum_columns,
 8 | )
 9 | 
10 | 
11 | @pytest.mark.parametrize("extra_dim", [[], [2], [2, 3]])
12 | @pytest.mark.parametrize("N", [63, 64, 65])
13 | def test_unflatten_lower_triangle(extra_dim: list[int], N: int) -> None:
14 |     """Test that unflatten_lower_triangle is the inverse of flatten_lower_triangle."""
15 |     # Create a random tensor of shape ... x M x N
16 |     M = 64
17 |     original_tensor = torch.rand(extra_dim + [M, N])
18 | 
19 |     # Set all elements above the lower triangular to 0
20 |     lower_triangular_mask = torch.tril(torch.ones(M, N)).bool()
21 |     original_tensor[..., ~lower_triangular_mask] = 0
22 | 
23 |     # Apply flatten_lower_triangle to the original tensor
24 |     flattened = flatten_lower_triangle(original_tensor)
25 |     assert flattened.shape == tuple(extra_dim + [lower_triangular_mask.sum()])
26 | 
27 |     # Apply unflatten_lower_triangle to the flattened tensor
28 |     reconstructed_tensor = unflatten_lower_triangle(flattened, M, N)
29 |     assert torch.allclose(original_tensor, reconstructed_tensor)
30 | 
31 | 
32 | @pytest.mark.parametrize("extra_dim", [[], [2], [2, 3]])
33 | @pytest.mark.parametrize("N", [63, 64, 65])
34 | def test_unflatten_lower_triangle_and_sum_columns(extra_dim: list[int], N: int) -> None:
35 |     """Test unflatten_lower_triangle_and_sum_columns(...) is equal to unflatten_lower_triangle(...).sum(-1)."""
36 |     # Create a random flattened tensor
37 |     M = 64
38 |     num_elements = int(torch.tril(torch.ones(M, N)).bool().sum().item())
39 |     flattened = torch.rand(extra_dim + [num_elements])
40 | 
41 |     # apply unflatten_lower_triangle_and_sum_columns
42 |     result = unflatten_lower_triangle_and_sum_columns(flattened, M, N)
43 | 
44 |     # apply unflatten_lower_triangle and sum(-1)
45 |     reconstructed = unflatten_lower_triangle(flattened, M, N)
46 |     reference = reconstructed.sum(dim=-1)
47 |     assert torch.allclose(result, reference)
48 | 


--------------------------------------------------------------------------------
/neuron_explainer/activations/derived_scalars/tests/utils.py:
--------------------------------------------------------------------------------
 1 | from neuron_explainer.activations.derived_scalars.derived_scalar_types import DerivedScalarType
 2 | from neuron_explainer.models.model_component_registry import Dimension
 3 | from neuron_explainer.models.model_context import ModelContext, get_default_device
 4 | 
 5 | get_testing_device = get_default_device  # keep for compatibility
 6 | 
 7 | 
 8 | def get_autoencoder_test_path(
 9 |     dst: DerivedScalarType,
10 | ) -> str:
11 |     """Return the path to a test autoencoder."""
12 | 
13 |     name = f"{dst.value}.pt"
14 |     return f"https://openaipublic.blob.core.windows.net/neuron-explainer/test-data/autoencoder_test_state_dicts/{name}"
15 | 
16 | 
17 | def get_activation_shape(
18 |     dst: DerivedScalarType,
19 |     model_context: ModelContext,
20 |     n_tokens: int = 10,
21 |     n_latents: int | None = None,
22 | ) -> tuple[int, ...]:
23 |     """Return the shape of activations"""
24 |     activation_shape = []
25 |     assert dst.shape_spec_per_token_sequence[0].is_sequence_token_dimension
26 |     if dst in [
27 |         DerivedScalarType.ATTN_WRITE_NORM,
28 |         DerivedScalarType.FLATTENED_ATTN_POST_SOFTMAX,
29 |         DerivedScalarType.ATTN_ACT_TIMES_GRAD,
30 |         DerivedScalarType.ATTN_WRITE_TO_FINAL_RESIDUAL_GRAD,
31 |     ]:
32 |         # first dimension is token pairs
33 |         activation_shape.append(n_tokens * (n_tokens + 1) // 2)
34 |     else:
35 |         activation_shape.append(n_tokens)
36 |     for dimension in dst.shape_spec_per_token_sequence[1:]:
37 |         if dimension == Dimension.SINGLETON:
38 |             activation_shape.append(1)
39 |         elif dimension.is_model_intrinsic:
40 |             activation_shape.append(model_context.get_dim_size(dimension))
41 |         elif dimension.is_sequence_token_dimension:
42 |             activation_shape.append(n_tokens)
43 |         elif dimension.is_parameterized_dimension:
44 |             assert n_latents is not None
45 |             activation_shape.append(n_latents)
46 |         else:
47 |             raise ValueError(f"Unsupported dimension: {dimension}")
48 | 
49 |     print(f"{dst}: {activation_shape}")
50 |     return tuple(activation_shape)
51 | 


--------------------------------------------------------------------------------
/neuron_explainer/activations/derived_scalars/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def detach_and_clone(x: torch.Tensor, requires_grad: bool) -> torch.Tensor:
 5 |     """In some cases, a derived scalar may be computed by applying a function to
 6 |     some activations, and running .backward() on the output, with some tensors
 7 |     desired to be backprop'ed through and some not. This function is for that:
 8 |     it detaches and clones the input tensor such that it doesn't interfere with
 9 |     other places those activations are used, and so that the gradient information
10 |     is cleared. It then sets requires_grad to the desired value based on whether this
11 |     activation should be backprop'ed through."""
12 |     return x.detach().clone().requires_grad_(requires_grad)
13 | 


--------------------------------------------------------------------------------
/neuron_explainer/activations/derived_scalars/write_tensors.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from neuron_explainer.activations.derived_scalars.derived_scalar_types import DerivedScalarType
 4 | from neuron_explainer.models.autoencoder_context import (
 5 |     AutoencoderContext,
 6 |     get_autoencoder_output_weight_by_layer_index,
 7 | )
 8 | from neuron_explainer.models.model_component_registry import (
 9 |     LayerIndex,
10 |     NodeType,
11 |     WeightLocationType,
12 | )
13 | from neuron_explainer.models.model_context import ModelContext
14 | 
15 | 
16 | def get_attn_write_tensor_by_layer_index(
17 |     model_context: ModelContext,
18 |     layer_indices: list[int] | None,
19 | ) -> dict[LayerIndex, torch.Tensor]:
20 |     """Returns a dictionary mapping layer index to the write weight matrix for that layer."""
21 |     if layer_indices is None:
22 |         layer_indices = list(range(model_context.n_layers))
23 |     W_out_by_layer_index: dict[LayerIndex, torch.Tensor] = {
24 |         layer_index: model_context.get_weight(
25 |             location_type=WeightLocationType.ATTN_TO_RESIDUAL,
26 |             layer=layer_index,
27 |             device=model_context.device,
28 |         )  # shape (n_heads, d_head, d_model)
29 |         for layer_index in layer_indices
30 |     }
31 |     return W_out_by_layer_index
32 | 
33 | 
34 | def get_mlp_write_tensor_by_layer_index(
35 |     model_context: ModelContext, layer_indices: list[int] | None
36 | ) -> dict[LayerIndex, torch.Tensor]:
37 |     if layer_indices is None:
38 |         layer_indices = list(range(model_context.n_layers))
39 |     W_out_location_type = WeightLocationType.MLP_TO_RESIDUAL
40 |     W_out_by_layer_index: dict[LayerIndex, torch.Tensor] = {
41 |         layer_index: model_context.get_weight(
42 |             location_type=W_out_location_type,
43 |             layer=layer_index,
44 |             device=model_context.device,
45 |         )  # shape (d_ff, d_model)
46 |         for layer_index in layer_indices
47 |     }
48 |     return W_out_by_layer_index
49 | 
50 | 
51 | def _assert_non_none(x: LayerIndex) -> int:
52 |     assert x is not None
53 |     return x
54 | 
55 | 
56 | def get_autoencoder_write_tensor_by_layer_index(
57 |     autoencoder_context: AutoencoderContext,
58 |     model_context: ModelContext,
59 | ) -> dict[LayerIndex, torch.Tensor]:
60 |     if autoencoder_context.dst == DerivedScalarType.MLP_POST_ACT:
61 |         autoencoder_output_weight_by_layer_index = get_autoencoder_output_weight_by_layer_index(
62 |             autoencoder_context
63 |         )
64 |         W_out_by_layer_index = get_mlp_write_tensor_by_layer_index_with_autoencoder_context(
65 |             autoencoder_context, model_context
66 |         )
67 |         return {
68 |             _assert_non_none(layer_index): torch.einsum(
69 |                 "an,nd->ad",
70 |                 autoencoder_output_weight_by_layer_index[layer_index],
71 |                 W_out_by_layer_index[_assert_non_none(layer_index)],
72 |             )
73 |             for layer_index in autoencoder_context.layer_indices
74 |         }
75 |     else:
76 |         assert (
77 |             autoencoder_context.dst.node_type == NodeType.RESIDUAL_STREAM_CHANNEL
78 |         ), autoencoder_context.dst
79 |         return get_autoencoder_output_weight_by_layer_index(autoencoder_context)
80 | 
81 | 
82 | def get_mlp_write_tensor_by_layer_index_with_autoencoder_context(
83 |     autoencoder_context: AutoencoderContext,
84 |     model_context: ModelContext,
85 | ) -> dict[int, torch.Tensor]:
86 |     assert all(layer_index is not None for layer_index in autoencoder_context.layer_indices)
87 |     layer_indices: list[int] = list(autoencoder_context.layer_indices)  # type: ignore
88 |     write_tensor_by_layer_index = get_mlp_write_tensor_by_layer_index(
89 |         model_context=model_context, layer_indices=layer_indices
90 |     )
91 |     return {
92 |         _assert_non_none(layer_index): write_tensor_by_layer_index[layer_index]
93 |         for layer_index in autoencoder_context.layer_indices
94 |     }
95 | 


--------------------------------------------------------------------------------
/neuron_explainer/activations/test_attention_utils.py:
--------------------------------------------------------------------------------
 1 | from neuron_explainer.activations.attention_utils import (
 2 |     _inverse_triangular_number,
 3 |     convert_flattened_index_to_unflattened_index,
 4 |     get_attended_to_sequence_length_per_sequence_token,
 5 |     get_max_num_attended_to_sequence_tokens,
 6 | )
 7 | 
 8 | 
 9 | def _simulate_num_activations(
10 |     num_sequence_tokens: int, max_num_attended_to_sequence_tokens: int
11 | ) -> int:
12 |     num_activations_per_token = list(range(1, max_num_attended_to_sequence_tokens + 1)) + [
13 |         max_num_attended_to_sequence_tokens
14 |         for _ in range(num_sequence_tokens - max_num_attended_to_sequence_tokens)
15 |     ]
16 |     num_activations = sum(num_activations_per_token)
17 |     return num_activations
18 | 
19 | 
20 | def test_inverse_triangular_number() -> None:
21 |     for m in range(5):
22 |         n = m * (m + 1) // 2
23 |         assert _inverse_triangular_number(n) == m
24 | 
25 | 
26 | def test_get_max_num_attended_to_sequence_tokens() -> None:
27 |     num_sequence_tokens = 100
28 |     for max_num_attended_to_sequence_tokens in [50, 100]:
29 |         num_activations = _simulate_num_activations(
30 |             num_sequence_tokens, max_num_attended_to_sequence_tokens
31 |         )
32 |         assert (
33 |             get_max_num_attended_to_sequence_tokens(num_sequence_tokens, num_activations)
34 |             == max_num_attended_to_sequence_tokens
35 |         )
36 | 
37 |         attended_to_sequence_lengths = get_attended_to_sequence_length_per_sequence_token(
38 |             num_sequence_tokens, max_num_attended_to_sequence_tokens
39 |         )
40 |         assert sum(attended_to_sequence_lengths) == num_activations, (
41 |             sum(attended_to_sequence_lengths),
42 |             num_activations,
43 |         )
44 | 
45 | 
46 | def test_convert_flattened_index_to_unflattened_index() -> None:
47 |     possible_max_num_attended_to_sequence_tokens = 9
48 |     num_sequence_tokens = 17
49 |     assert possible_max_num_attended_to_sequence_tokens < num_sequence_tokens
50 |     for max_num_attended_to_sequence_tokens in [
51 |         possible_max_num_attended_to_sequence_tokens,
52 |         num_sequence_tokens,
53 |     ]:
54 |         attended_to_sequence_lengths = get_attended_to_sequence_length_per_sequence_token(
55 |             num_sequence_tokens, max_num_attended_to_sequence_tokens
56 |         )
57 |         num_activations = sum(attended_to_sequence_lengths)
58 | 
59 |         flat_indices = list(range(num_activations))
60 |         flat_indices_split_by_sequence_token = []
61 |         for attended_to_sequence_length in attended_to_sequence_lengths:
62 |             flat_indices_split_by_sequence_token.append(flat_indices[:attended_to_sequence_length])
63 |             flat_indices = flat_indices[attended_to_sequence_length:]
64 | 
65 |         for flat_index in list(range(num_activations)):
66 |             if max_num_attended_to_sequence_tokens == num_sequence_tokens:
67 |                 unflattened_i, unflattened_j = convert_flattened_index_to_unflattened_index(
68 |                     flat_index
69 |                 )
70 |             else:
71 |                 unflattened_i, unflattened_j = convert_flattened_index_to_unflattened_index(
72 |                     flat_index,
73 |                     num_sequence_tokens=num_sequence_tokens,
74 |                     num_activations=num_activations,
75 |                 )
76 |             assert unflattened_i < num_sequence_tokens
77 |             assert unflattened_j < len(flat_indices_split_by_sequence_token[unflattened_i])
78 |             assert (
79 |                 flat_indices_split_by_sequence_token[unflattened_i][unflattened_j] == flat_index
80 |             ), (
81 |                 flat_indices_split_by_sequence_token[unflattened_i][unflattened_j],
82 |                 flat_index,
83 |             )
84 | 


--------------------------------------------------------------------------------
/neuron_explainer/explanations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/transformer-debugger/87e6db7b7e73ded5037eeeff05deb5e81548a10a/neuron_explainer/explanations/__init__.py


--------------------------------------------------------------------------------
/neuron_explainer/explanations/test_explainer.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from typing import Any
  3 | 
  4 | from neuron_explainer.explanations.explainer import TokenActivationPairExplainer
  5 | from neuron_explainer.explanations.few_shot_examples import TEST_EXAMPLES, FewShotExampleSet
  6 | from neuron_explainer.explanations.prompt_builder import ChatMessage, PromptFormat, Role
  7 | 
  8 | 
  9 | def setup_module(unused_module: Any) -> None:
 10 |     # Make sure we have an event loop, since the attempt to create the Semaphore in
 11 |     # ApiClient will fail without it.
 12 |     loop = asyncio.new_event_loop()
 13 |     asyncio.set_event_loop(loop)
 14 | 
 15 | 
 16 | def test_if_formatting() -> None:
 17 |     expected_prompt = """We're studying neurons in a neural network. Each neuron looks for some particular thing in a short document. Look at the parts of the document the neuron activates for and summarize in a single sentence what the neuron is looking for. Don't list examples of words.
 18 | 
 19 | The activation format is token<tab>activation. Activation values range from 0 to 10. A neuron finding what it's looking for is represented by a non-zero activation value. The higher the activation value, the stronger the match.
 20 | 
 21 | Neuron 1
 22 | Activations:
 23 | <start>
 24 | a	10
 25 | b	0
 26 | c	0
 27 | <end>
 28 | <start>
 29 | d	0
 30 | e	10
 31 | f	0
 32 | <end>
 33 | 
 34 | Explanation of neuron 1 behavior: this neuron activates for vowels.
 35 | 
 36 | Neuron 2
 37 | Activations:
 38 | <start>
 39 | a	10
 40 | b	0
 41 | c	0
 42 | <end>
 43 | <start>
 44 | d	0
 45 | e	10
 46 | f	0
 47 | <end>
 48 | 
 49 | Explanation of neuron 2 behavior:<|endofprompt|> this neuron activates for"""
 50 | 
 51 |     explainer = TokenActivationPairExplainer(
 52 |         model_name="gpt-4o",
 53 |         prompt_format=PromptFormat.INSTRUCTION_FOLLOWING,
 54 |         few_shot_example_set=FewShotExampleSet.TEST,
 55 |     )
 56 |     prompt = explainer.make_explanation_prompt(
 57 |         all_activations=TEST_EXAMPLES[0].activation_records,
 58 |         max_activation=1.0,
 59 |         max_tokens_for_completion=20,
 60 |     )
 61 | 
 62 |     assert prompt == expected_prompt
 63 | 
 64 | 
 65 | def test_chat_format() -> None:
 66 |     expected_prompt = [
 67 |         ChatMessage(
 68 |             role=Role.SYSTEM,
 69 |             content="""We're studying neurons in a neural network. Each neuron looks for some particular thing in a short document. Look at the parts of the document the neuron activates for and summarize in a single sentence what the neuron is looking for. Don't list examples of words.
 70 | 
 71 | The activation format is token<tab>activation. Activation values range from 0 to 10. A neuron finding what it's looking for is represented by a non-zero activation value. The higher the activation value, the stronger the match.""",
 72 |         ),
 73 |         ChatMessage(
 74 |             role=Role.USER,
 75 |             content="""
 76 | 
 77 | Neuron 1
 78 | Activations:
 79 | <start>
 80 | a	10
 81 | b	0
 82 | c	0
 83 | <end>
 84 | <start>
 85 | d	0
 86 | e	10
 87 | f	0
 88 | <end>
 89 | 
 90 | Explanation of neuron 1 behavior: this neuron activates for""",
 91 |         ),
 92 |         ChatMessage(
 93 |             role=Role.ASSISTANT,
 94 |             content=" vowels.",
 95 |         ),
 96 |         ChatMessage(
 97 |             role=Role.USER,
 98 |             content="""
 99 | 
100 | Neuron 2
101 | Activations:
102 | <start>
103 | a	10
104 | b	0
105 | c	0
106 | <end>
107 | <start>
108 | d	0
109 | e	10
110 | f	0
111 | <end>
112 | 
113 | Explanation of neuron 2 behavior: this neuron activates for""",
114 |         ),
115 |     ]
116 | 
117 |     explainer = TokenActivationPairExplainer(
118 |         model_name="gpt-4o",
119 |         prompt_format=PromptFormat.CHAT_MESSAGES,
120 |         few_shot_example_set=FewShotExampleSet.TEST,
121 |     )
122 |     prompt = explainer.make_explanation_prompt(
123 |         all_activations=TEST_EXAMPLES[0].activation_records,
124 |         max_activation=1.0,
125 |         max_tokens_for_completion=20,
126 |     )
127 | 
128 |     assert isinstance(prompt, list)
129 |     assert isinstance(prompt[0], dict)  # Really a ChatMessage
130 |     for actual_message, expected_message in zip(prompt, expected_prompt):
131 |         assert actual_message["role"] == expected_message["role"]
132 |         assert actual_message["content"] == expected_message["content"]
133 |     assert prompt == expected_prompt
134 | 


--------------------------------------------------------------------------------
/neuron_explainer/fast_dataclasses/__init__.py:
--------------------------------------------------------------------------------
1 | from .fast_dataclasses import FastDataclass, dumps, loads, register_dataclass
2 | 
3 | __all__ = ["FastDataclass", "dumps", "loads", "register_dataclass"]
4 | 


--------------------------------------------------------------------------------
/neuron_explainer/file_utils.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | import urllib.request
 4 | from io import IOBase
 5 | 
 6 | import aiohttp
 7 | 
 8 | 
 9 | def file_exists(filepath: str) -> bool:
10 |     if filepath.startswith("https://"):
11 |         try:
12 |             urllib.request.urlopen(filepath)
13 |             return True
14 |         except urllib.error.HTTPError:
15 |             return False
16 |     else:
17 |         # It's a local file.
18 |         return os.path.exists(filepath)
19 | 
20 | 
21 | class CustomFileHandler:
22 |     def __init__(self, filepath: str, mode: str) -> None:
23 |         self.filepath = filepath
24 |         self.mode = mode
25 |         self.file = None
26 | 
27 |     def __enter__(self) -> IOBase:
28 |         assert not self.filepath.startswith("az://"), "Azure blob storage is not supported"
29 |         if self.filepath.startswith("https://"):
30 |             assert self.mode in ["r", "rb"], "Only read mode is supported for remote files"
31 |             remote_data = urllib.request.urlopen(self.filepath)
32 |             if "b" in self.mode:
33 |                 # Read the content into a BytesIO object for binary mode
34 |                 self.file = io.BytesIO(remote_data.read())
35 |             else:
36 |                 # Decode the content and use StringIO for text mode (less common for torch.load)
37 |                 self.file = io.StringIO(remote_data.read().decode())
38 |         else:
39 |             # Create the subdirectories if they don't exist
40 |             directory = os.path.dirname(self.filepath)
41 |             os.makedirs(directory, exist_ok=True)
42 |             self.file = open(self.filepath, self.mode)
43 |             if "b" in self.mode:
44 |                 # Ensure the file is seekable; if not, read into a BytesIO object
45 |                 try:
46 |                     self.file.seek(0)
47 |                 except io.UnsupportedOperation:
48 |                     self.file.close()
49 |                     with open(self.filepath, self.mode) as f:
50 |                         self.file = io.BytesIO(f.read())
51 |         return self.file
52 | 
53 |     def __exit__(self, exc_type, exc_val, exc_tb) -> bool:
54 |         # Close the file if it's open
55 |         if self.file is not None:
56 |             self.file.close()
57 |         # Propagate exceptions
58 |         return False
59 | 
60 | 
61 | async def read_single_async(filepath: str) -> bytes:
62 |     if filepath.startswith("https://"):
63 |         async with aiohttp.ClientSession() as session:
64 |             async with session.get(filepath) as response:
65 |                 return await response.read()
66 |     else:
67 |         with open(filepath, "rb") as f:
68 |             return f.read()
69 | 
70 | 
71 | def copy_to_local_cache(src: str, dst: str) -> None:
72 |     if not os.path.exists(os.path.dirname(dst)):
73 |         os.makedirs(os.path.dirname(dst), exist_ok=True)
74 |     if src.startswith("https://"):
75 |         with urllib.request.urlopen(src) as response, open(dst, "wb") as out_file:
76 |             data = response.read()  # Consider chunked reading for large files
77 |             out_file.write(data)
78 |     else:
79 |         with open(src, "rb") as in_file, open(dst, "wb") as out_file:
80 |             data = in_file.read()
81 |             out_file.write(data)
82 | 


--------------------------------------------------------------------------------
/neuron_explainer/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .autoencoder import Autoencoder
2 | from .hooks import Hooks, TransformerHooks
3 | from .transformer import Transformer, TransformerConfig
4 | 


--------------------------------------------------------------------------------
/neuron_explainer/pydantic/__init__.py:
--------------------------------------------------------------------------------
1 | from .camel_case_base_model import CamelCaseBaseModel
2 | from .hashable_base_model import HashableBaseModel
3 | from .immutable import immutable
4 | 
5 | __all__ = ["CamelCaseBaseModel", "HashableBaseModel", "immutable"]
6 | 


--------------------------------------------------------------------------------
/neuron_explainer/pydantic/camel_case_base_model.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | 
 4 | def to_camel(string: str) -> str:
 5 |     return "".join(word.capitalize() if i > 0 else word for i, word in enumerate(string.split("_")))
 6 | 
 7 | 
 8 | class CamelCaseBaseModel(BaseModel):
 9 |     """
10 |     Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |     either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |     camelCase names.
13 |     """
14 | 
15 |     class Config:
16 |         alias_generator = to_camel
17 |         allow_population_by_field_name = True
18 | 


--------------------------------------------------------------------------------
/neuron_explainer/pydantic/hashable_base_model.py:
--------------------------------------------------------------------------------
 1 | from .camel_case_base_model import CamelCaseBaseModel
 2 | 
 3 | 
 4 | class HashableBaseModel(CamelCaseBaseModel):
 5 |     def __hash__(self) -> int:
 6 |         values = tuple(getattr(self, name) for name in self.__annotations__.keys())
 7 |         # Convert lists to tuples.
 8 |         values = tuple(value if not isinstance(value, list) else tuple(value) for value in values)
 9 |         return hash(values)
10 | 


--------------------------------------------------------------------------------
/neuron_explainer/pydantic/immutable.py:
--------------------------------------------------------------------------------
 1 | from typing import TypeVar
 2 | 
 3 | from pydantic import BaseConfig, BaseModel
 4 | 
 5 | T = TypeVar("T", bound=BaseModel)
 6 | 
 7 | 
 8 | def immutable(cls: type[T]) -> type[T]:
 9 |     """
10 |     Makes a Pydantic model immutable.
11 | 
12 |     Annotate a Pydantic class with `@immutable` to prevent the values of its fields from being
13 |     changed after an instance is constructed. (This only guarantees shallow immutability of course:
14 |     fields may have their internal state change.)
15 |     """
16 | 
17 |     class Config(BaseConfig):
18 |         frozen: bool = True
19 | 
20 |     cls.Config = Config
21 |     return cls
22 | 


--------------------------------------------------------------------------------
/neuron_explainer/scripts/create_hf_test_data.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | 
  3 | import click
  4 | import torch
  5 | from transformers import GPT2Tokenizer
  6 | 
  7 | from neuron_explainer.file_utils import copy_to_local_cache
  8 | from neuron_explainer.scripts.download_from_hf import get_hf_model
  9 | 
 10 | # ==============================
 11 | # Reference models for testing
 12 | # ==============================
 13 | 
 14 | ALL_MODELS = [
 15 |     "gpt2/small",
 16 |     "gpt2/medium",
 17 |     "gpt2/large",
 18 |     "gpt2/xl",
 19 | ]
 20 | 
 21 | # test prompts to sample at temperature zero from
 22 | test_prompts = [
 23 |     "this is a test",
 24 |     "I'm sorry Dave, I'm afraid",
 25 |     "We're not strangers to love. You know the rules, and",
 26 |     "in the beginning",
 27 |     "buy now!",
 28 |     "Why did the chicken cross the road?",
 29 | ]
 30 | 
 31 | 
 32 | # =======================================================
 33 | # Get the hf models and generate test data from those
 34 | # =======================================================
 35 | 
 36 | 
 37 | def create_hf_test_data(
 38 |     models: list[str],
 39 |     test_prompts: list[str],
 40 |     num_examples: int,
 41 |     seq_len: int,
 42 |     sample_len: int,
 43 |     last_n: int,
 44 | ) -> dict:
 45 |     # for GPT2 models, seq len maxes out at 1024
 46 |     seq_len = min(seq_len, 1024)
 47 | 
 48 |     tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 49 |     prompts = [tokenizer.encode(p, return_tensors="pt") for p in test_prompts]
 50 | 
 51 |     test_data = {}
 52 |     for model_name in models:
 53 |         print(f"Creating test data for {model_name}")
 54 |         model_data: dict[str, Any] = {}
 55 | 
 56 |         # prepare model
 57 |         model = get_hf_model(model_name)
 58 |         model.cuda()
 59 |         print(f"...loaded {model_name}...")
 60 | 
 61 |         # make test inputs and get logits
 62 |         with torch.no_grad():
 63 |             X = torch.randint(0, 50257, (num_examples, seq_len)).cuda()
 64 |             Y = model(X)
 65 |         X = X.cpu()
 66 |         logits = Y.logits.cpu()
 67 |         logits_at_inputs = logits.gather(-1, X.unsqueeze(-1)).squeeze(-1)
 68 |         logits_slice = logits[:, -last_n:].clone()
 69 |         model_data["inputs"] = X
 70 |         model_data["logits_at_inputs"] = logits_at_inputs
 71 |         model_data["logits_slice"] = logits_slice
 72 |         model_data["slice_last_n"] = last_n
 73 | 
 74 |         # generate temperature-zero samples
 75 |         samples = []
 76 |         for op, p in zip(test_prompts, prompts):
 77 |             p = p.cuda()
 78 |             tok1 = model.generate(p, max_length=sample_len + len(p[0]), temperature=0)
 79 |             tok2 = model.generate(p, max_length=sample_len + len(p[0]), temperature=0)
 80 | 
 81 |             str1 = tokenizer.decode(tok1[0])
 82 |             str2 = tokenizer.decode(tok2[0])
 83 |             assert (
 84 |                 str1 == str2
 85 |             ), "HuggingFace temperature-zero generate was unexpectedly nondeterministic"
 86 | 
 87 |             # get tokens out as a list, then chop off the ones from the prompt
 88 |             tok1 = tok1[0].tolist()
 89 |             tok1 = tok1[len(p[0]) :]
 90 | 
 91 |             samples.append({"prompt": op, "completion": tokenizer.decode(tok1), "tokens": tok1})
 92 | 
 93 |         model_data["samples"] = samples
 94 |         test_data[model_name] = model_data
 95 | 
 96 |         # free up GPU memory
 97 |         model.cpu()
 98 |         del model
 99 | 
100 |     return test_data
101 | 
102 | 
103 | @click.command()
104 | @click.option(
105 |     "-dir",
106 |     "--savedir",
107 |     type=str,
108 |     default="https://openaipublic.blob.core.windows.net/neuron-explainer/data/test-reference-data",
109 | )
110 | @click.option("-n", "--num_examples", type=int, default=4)
111 | @click.option("-m", "--sample_len", type=int, default=50)
112 | @click.option("-s", "--seq_len", type=int, default=1024)
113 | @click.option("-l", "--last_n", type=int, default=100)
114 | def make_and_save_test_data(
115 |     savedir: str, num_examples: int, seq_len: int, sample_len: int, last_n: int
116 | ) -> None:
117 |     test_data = create_hf_test_data(
118 |         models=ALL_MODELS,
119 |         test_prompts=test_prompts,
120 |         num_examples=num_examples,
121 |         seq_len=seq_len,
122 |         sample_len=sample_len,
123 |         last_n=last_n,
124 |     )
125 |     torch.save(test_data, "test_data.pt")
126 |     copy_to_local_cache(src="test_data.pt", dst="/".join([savedir, "test_data.pt"]))
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     make_and_save_test_data()
131 | 


--------------------------------------------------------------------------------
/neuron_explainer/scripts/download_from_hf.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os.path as osp
  3 | 
  4 | import click
  5 | import torch
  6 | from transformers import GPT2LMHeadModel
  7 | 
  8 | from neuron_explainer.file_utils import CustomFileHandler
  9 | from neuron_explainer.models.transformer import TransformerConfig
 10 | 
 11 | EXCLUDES = [".attn.bias", ".attn.masked_bias"]
 12 | 
 13 | ALL_MODELS = [
 14 |     "gpt2/small",
 15 |     "gpt2/medium",
 16 |     "gpt2/large",
 17 |     "gpt2/xl",
 18 | ]
 19 | 
 20 | 
 21 | def get_hf_model(model_name: str) -> GPT2LMHeadModel:
 22 |     _, model_size = model_name.split("/")
 23 |     hf_name = "gpt2" if model_size == "small" else f"gpt2-{model_size}"
 24 |     model = GPT2LMHeadModel.from_pretrained(hf_name)
 25 |     return model
 26 | 
 27 | 
 28 | # ====================================
 29 | # Conversion from HuggingFace format
 30 | # ====================================
 31 | def convert(hf_sd: dict) -> dict:
 32 |     """convert state_dict from HuggingFace format to our format"""
 33 |     n_layers = max([int(k.split(".")[2]) for k in hf_sd.keys() if ".h." in k]) + 1
 34 | 
 35 |     hf_to_ours = dict()
 36 |     hf_to_ours["wte"] = "tok_embed"
 37 |     hf_to_ours["wpe"] = "pos_embed"
 38 |     hf_to_ours["ln_f"] = "final_ln"
 39 |     hf_to_ours["lm_head"] = "unembed"
 40 |     for i in range(n_layers):
 41 |         hf_to_ours[f"h.{i}"] = f"xf_layers.{i}"
 42 |     hf_to_ours["attn.c_attn"] = "attn.linear_qkv"
 43 |     hf_to_ours["attn.c_proj"] = "attn.out_proj"
 44 |     hf_to_ours["mlp.c_fc"] = "mlp.in_layer"
 45 |     hf_to_ours["mlp.c_proj"] = "mlp.out_layer"
 46 | 
 47 |     sd = dict()
 48 |     for k, v in hf_sd.items():
 49 |         if any(x in k for x in EXCLUDES):
 50 |             continue
 51 |         if "weight" in k and ("attn" in k or "mlp" in k):
 52 |             v = v.T
 53 |         k = k.replace("transformer.", "")
 54 |         for hf_part, part in hf_to_ours.items():
 55 |             k = k.replace(hf_part, part)
 56 |         if "attn.linear_qkv." in k:
 57 |             qproj, kproj, vproj = v.chunk(3, dim=0)
 58 |             sd[k.replace(".linear_qkv.", ".q_proj.")] = qproj
 59 |             sd[k.replace(".linear_qkv.", ".k_proj.")] = kproj
 60 |             sd[k.replace(".linear_qkv.", ".v_proj.")] = vproj
 61 |         else:
 62 |             sd[k] = v
 63 | 
 64 |     return sd
 65 | 
 66 | 
 67 | def download(model_name: str, save_dir: str) -> None:
 68 |     assert model_name in ALL_MODELS, f"Must use valid model size, not {model_name=}"
 69 |     print(f"Downloading and converting model {model_name} to {save_dir}...")
 70 | 
 71 |     print(f"Getting HuggingFace model {model_name}...")
 72 |     model = get_hf_model(model_name)
 73 | 
 74 |     hf_config = model.config
 75 |     base_config = dict(
 76 |         enc="gpt2",
 77 |         ctx_window=1024,
 78 |         # attn
 79 |         m_attn=1,
 80 |         # mlp
 81 |         m_mlp=4,
 82 |     )
 83 |     cfg = TransformerConfig(
 84 |         **base_config,  # type: ignore
 85 |         d_model=hf_config.n_embd,
 86 |         n_layers=hf_config.n_layer,
 87 |         n_heads=hf_config.n_head,
 88 |     )
 89 | 
 90 |     print("Converting state_dict...")
 91 |     sd = convert(model.state_dict())
 92 | 
 93 |     print(f"Saving model to {save_dir}...")
 94 |     # save to file with config
 95 |     pieces_path = osp.join(save_dir, model_name, "model_pieces")
 96 |     for k, v in sd.items():
 97 |         with CustomFileHandler(osp.join(pieces_path, f"{k}.pt"), "wb") as f:
 98 |             torch.save(v, f)
 99 | 
100 |     fname_cfg = osp.join(save_dir, model_name, "config.json")
101 |     with CustomFileHandler(fname_cfg, "w") as f:
102 |         f.write(json.dumps(cfg.__dict__))
103 | 
104 | 
105 | @click.command()
106 | @click.argument("save_dir", type=click.Path(exists=False, file_okay=False))
107 | def download_all(save_dir: str) -> None:
108 |     for model_size in ALL_MODELS:
109 |         download(model_size, save_dir)
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     download_all()
114 | 


--------------------------------------------------------------------------------
/neuron_explainer/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # This file defines fixtures for model tests, with a focus on expensive objects that are used across
 2 | # multiple test files. Fixtures are created once per session (i.e. `pytest` invocation), and are
 3 | # available to and reused across all test cases in the session. Fixtures are evaluated lazily.
 4 | # The filename uses the pytest convention.
 5 | 
 6 | import pytest
 7 | 
 8 | from neuron_explainer.activations.derived_scalars import DerivedScalarType
 9 | from neuron_explainer.activations.derived_scalars.tests.utils import get_autoencoder_test_path
10 | from neuron_explainer.models.autoencoder_context import AutoencoderConfig, AutoencoderContext
11 | from neuron_explainer.models.model_context import StandardModelContext, get_default_device
12 | 
13 | AUTOENCODER_TEST_DST = DerivedScalarType.MLP_POST_ACT
14 | AUTOENCODER_TEST_PATH = get_autoencoder_test_path(AUTOENCODER_TEST_DST)
15 | 
16 | 
17 | @pytest.fixture(scope="session")
18 | def standard_model_context() -> StandardModelContext:
19 |     standard_model_context = StandardModelContext.from_model_type(
20 |         "gpt2-small", device=get_default_device()
21 |     )
22 |     assert isinstance(standard_model_context, StandardModelContext)
23 |     return standard_model_context
24 | 
25 | 
26 | @pytest.fixture(scope="session")
27 | def standard_autoencoder_context(
28 |     standard_model_context: StandardModelContext,
29 | ) -> AutoencoderContext:
30 |     autoencoder_config = AutoencoderConfig(
31 |         dst=AUTOENCODER_TEST_DST,
32 |         autoencoder_path_by_layer_index={
33 |             layer_index: AUTOENCODER_TEST_PATH
34 |             for layer_index in range(standard_model_context.n_layers)
35 |         },
36 |     )
37 |     return AutoencoderContext(
38 |         autoencoder_config=autoencoder_config,
39 |         device=standard_model_context.device,
40 |     )
41 | 


--------------------------------------------------------------------------------
/neuron_explainer/tests/test_model_context_get_weight.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from typing import Any, Callable
 3 | 
 4 | import torch
 5 | 
 6 | from neuron_explainer.models.inference_engine_type_registry import InferenceEngineType
 7 | from neuron_explainer.models.model_component_registry import WeightLocationType
 8 | from neuron_explainer.models.model_context import ModelContext
 9 | 
10 | 
11 | def assert_all_eq(
12 |     lst: list[Any],
13 |     eq_fn: Callable[[Any, Any], bool] = lambda x, y: x == y,
14 |     weight_location_type: WeightLocationType | None = None,
15 | ) -> Any:
16 |     for i in range(1, len(lst)):
17 |         assert eq_fn(lst[i], lst[0]), f"{lst[i]} != {lst[0]}; {weight_location_type=}; {i=}"
18 |     return lst[0]
19 | 
20 | 
21 | def test_model_context_weights() -> None:
22 |     for model_name in ["gpt2-small"]:
23 |         contexts = []
24 |         standard_model_context = ModelContext.from_model_type(
25 |             model_name,
26 |             inference_engine_type=InferenceEngineType.STANDARD,
27 |             device="cpu",
28 |         )
29 |         contexts.append(("standard", standard_model_context))
30 | 
31 |         standard_model_context_with_model = ModelContext.from_model_type(
32 |             model_name,
33 |             inference_engine_type=InferenceEngineType.STANDARD,
34 |             device="cpu",
35 |         )
36 |         standard_model_context_with_model.get_or_create_model(simplify=False)  # type: ignore
37 |         contexts.append(("standard_cached", standard_model_context_with_model))
38 | 
39 |         for weight_location_type in WeightLocationType:
40 |             if not weight_location_type.has_no_layers:
41 |                 # just test layer 0 for now
42 |                 layer_index: int | None = 0
43 |             else:
44 |                 layer_index = None
45 | 
46 |             weights = []
47 |             for ctx_name, ctx in contexts:
48 |                 try:
49 |                     t = time.time()
50 |                     # Convert all weights to float32, since different contexts may use different
51 |                     # dtypes by default. torch.allclose requires dtypes to match.
52 |                     weight = ctx.get_weight(weight_location_type, layer_index).to(torch.float32)
53 |                     print(f"{ctx_name} {weight.shape=} loaded in {time.time() - t:.2f}s")
54 |                     weights.append(weight)
55 |                 except NotImplementedError:
56 |                     print(f"{weight_location_type} not implemented in {ctx_name} context")
57 | 
58 |             if len(weights):
59 |                 assert_all_eq(
60 |                     [weight.shape for weight in weights], lambda x, y: x == y, weight_location_type
61 |                 )
62 |                 assert_all_eq(
63 |                     list(weights),
64 |                     lambda x, y: torch.allclose(x, y, atol=1e-5, rtol=1e-3),
65 |                     weight_location_type,
66 |                 )
67 |             else:
68 |                 print(f"no weights found for {weight_location_type}")
69 | 


--------------------------------------------------------------------------------
/neuron_explainer/tests/test_serialization_of_model_config_from_model_context.py:
--------------------------------------------------------------------------------
1 | import json
2 | 
3 | from neuron_explainer.models.model_context import StandardModelContext
4 | 
5 | 
6 | def test_standard_model_context(standard_model_context: StandardModelContext) -> None:
7 |     json.dumps(standard_model_context.get_model_config_as_dict())
8 | 


--------------------------------------------------------------------------------
/neuron_explainer/tests/test_trace_through_v.py:
--------------------------------------------------------------------------------
 1 | from neuron_explainer.activation_server.derived_scalar_computation import (
 2 |     get_derived_scalars_for_prompt,
 3 |     maybe_construct_loss_fn_for_backward_pass,
 4 | )
 5 | from neuron_explainer.activation_server.requests_and_responses import LossFnConfig, LossFnName
 6 | from neuron_explainer.activations.derived_scalars import DerivedScalarType
 7 | from neuron_explainer.activations.derived_scalars.derived_scalar_store import AttentionTraceType
 8 | from neuron_explainer.activations.derived_scalars.indexing import (
 9 |     NodeIndex,
10 |     PreOrPostAct,
11 |     TraceConfig,
12 | )
13 | from neuron_explainer.activations.derived_scalars.scalar_deriver import DstConfig
14 | from neuron_explainer.models.autoencoder_context import AutoencoderContext
15 | from neuron_explainer.models.model_component_registry import NodeType, PassType
16 | from neuron_explainer.models.model_context import StandardModelContext
17 | 
18 | DETACH_LAYER_NORM_SCALE_FOR_TEST = (
19 |     False  # this sets whether to detach layer norm scale when computing these DSTs.
20 | )
21 | 
22 | 
23 | def test_trace_through_v(
24 |     standard_model_context: StandardModelContext,
25 |     standard_autoencoder_context: AutoencoderContext,
26 | ) -> None:
27 |     prompt = "This is a test"
28 |     loss_fn_for_backward_pass = maybe_construct_loss_fn_for_backward_pass(
29 |         model_context=standard_model_context,
30 |         config=LossFnConfig(
31 |             name=LossFnName.LOGIT_DIFF,
32 |             target_tokens=["."],
33 |             distractor_tokens=["!"],
34 |         ),
35 |     )
36 | 
37 |     for downstream_trace_config in [
38 |         None,
39 |         TraceConfig(
40 |             node_index=NodeIndex(
41 |                 node_type=NodeType.ATTENTION_HEAD,
42 |                 layer_index=5,
43 |                 pass_type=PassType.FORWARD,
44 |                 tensor_indices=(0, 0, 0),
45 |             ),
46 |             pre_or_post_act=PreOrPostAct.POST,
47 |             detach_layer_norm_scale=DETACH_LAYER_NORM_SCALE_FOR_TEST,
48 |             attention_trace_type=AttentionTraceType.K,
49 |         ),
50 |     ]:
51 |         trace_config = TraceConfig(
52 |             node_index=NodeIndex(
53 |                 node_type=NodeType.ATTENTION_HEAD,
54 |                 layer_index=3,
55 |                 pass_type=PassType.FORWARD,
56 |                 tensor_indices=(0, 0, 0),
57 |             ),
58 |             pre_or_post_act=PreOrPostAct.POST,
59 |             detach_layer_norm_scale=DETACH_LAYER_NORM_SCALE_FOR_TEST,
60 |             attention_trace_type=AttentionTraceType.V,
61 |             downstream_trace_config=downstream_trace_config,
62 |         )
63 |         dst_config = DstConfig(
64 |             model_context=standard_model_context,
65 |             autoencoder_context=standard_autoencoder_context,
66 |             trace_config=trace_config,
67 |         )
68 |         dst_list = [
69 |             DerivedScalarType.UNFLATTENED_ATTN_WRITE_TO_FINAL_RESIDUAL_GRAD,
70 |             DerivedScalarType.ONLINE_AUTOENCODER_WRITE_TO_FINAL_RESIDUAL_GRAD,
71 |         ]
72 |         dst_and_config_list = [(dst, dst_config) for dst in dst_list]
73 |         current_ds_store, _, raw_store = get_derived_scalars_for_prompt(
74 |             model_context=standard_model_context,
75 |             prompt=prompt,
76 |             trace_config=trace_config,
77 |             dst_and_config_list=dst_and_config_list,  # type: ignore
78 |             autoencoder_context=standard_autoencoder_context,
79 |             loss_fn_for_backward_pass=loss_fn_for_backward_pass,
80 |         )
81 | 


--------------------------------------------------------------------------------
/neuron_viewer/.gitignore:
--------------------------------------------------------------------------------
 1 | **/*.trace
 2 | **/*.zip
 3 | **/*.tar.gz
 4 | **/*.tgz
 5 | **/*.log
 6 | .parcel-cache
 7 | 
 8 | **/*.bun
 9 | 
10 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
11 | 
12 | # dependencies
13 | /node_modules
14 | /.pnp
15 | .pnp.js
16 | 
17 | # testing
18 | /coverage
19 | 
20 | # production
21 | /build
22 | 
23 | # misc
24 | .DS_Store
25 | .env.local
26 | .env.development.local
27 | .env.test.local
28 | .env.production.local
29 | 
30 | npm-debug.log*
31 | yarn-debug.log*
32 | yarn-error.log*
33 | 
34 | *.pyc
35 | dist/
36 | 
37 | .vscode
38 | 


--------------------------------------------------------------------------------
/neuron_viewer/.parcelrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "@parcel/config-default",
 3 |   "transformers": {
 4 |     "*.{ts,tsx}": ["@parcel/transformer-typescript-tsc"]
 5 |   },
 6 |   "validators": {
 7 |     "*.{ts,tsx}": ["@parcel/validator-typescript"]
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/neuron_viewer/.postcssrc:
--------------------------------------------------------------------------------
1 | {
2 |   "plugins": {
3 |     "tailwindcss": {}
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/neuron_viewer/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "printWidth": 100,
3 |   "useTabs": false,
4 |   "semi": true,
5 |   "tabWidth": 2,
6 | }
7 | 


--------------------------------------------------------------------------------
/neuron_viewer/README.md:
--------------------------------------------------------------------------------
 1 | # Neuron viewer
 2 | 
 3 | A React app that hosts TDB as well as pages with information about individual model components
 4 | (MLP neurons, attention heads and autoencoder latents for both).
 5 | 
 6 | 
 7 | ## Running the server locally
 8 | 
 9 | First, install the app:
10 | 
11 | ```sh
12 | npm install
13 | ```
14 | 
15 | Then run the frontend:
16 | 
17 | ```sh
18 | npm start
19 | ```
20 | 
21 | - To open a Neuron Viewer page, navigate to `http://localhost:1234`.
22 | - To open TDB, navigate to `http://localhost:1234/gpt2-small/tdb_alpha`.
23 | - To open TDB with autoencoders, navigate to `http://localhost:1234/gpt2-small_ae-resid-delta-mlp-v4_ae-resid-delta-attn-v4/tdb_alpha`
24 | (where `ae-resid-delta-mlp-v4` and `ae-resid-delta-attn-v4` must match the autoencoder names that are used in the [activation server](../neuron_explainer/activation_server/README.md)).
25 | 
26 | ## Formatting code
27 | 
28 | To check whether the code is correctly formatted:
29 | 
30 | ```sh
31 | npm run check-code-format
32 | ```
33 | 
34 | To format the code:
35 | 
36 | ```sh
37 | npm run format-code
38 | ```
39 | 
40 | ## Code organization
41 | 
42 | - [src/client](src/client/): Auto-generated code for interacting with the activation server (the neuron viewer's backend). Do not edit this code! Follow the instructions in [the activation server README](../neuron_explainer/activation_server/README.md) to regenerate this code if you make changes to the activation server. Use [src/requests](src/requests/) when calling the activation server.
43 | - [src/panes](src/panes/): UI elements that can be used as panes on a page: tokens+activations, similar neurons, etc.
44 | - [src/requests](src/requests/): Client libraries for making network requests to the activation server.
45 | - [src/TransformerDebugger](src/TransformerDebugger/): Code related to the Transformer Debugger.
46 | - [src](src/): Other code.
47 | 
48 | ## Using a remote activation server
49 | 
50 | If you decide to run your activation server on a different host or port than the default, you can
51 | point neuron viewer at it by setting the `NEURON_VIEWER_ACTIVATION_SERVER_URL` environment variable:
52 |     
53 | ```sh
54 | NEURON_VIEWER_ACTIVATION_SERVER_URL=https://some.url:port npm start
55 | ```
56 | 
57 | ## Making changes
58 | 
59 | Be sure to run the following to validate any changes you make:
60 | 
61 | ```sh
62 | npm run check-type-warnings && npm run check-code-format && npm run build
63 | ```
64 | 


--------------------------------------------------------------------------------
/neuron_viewer/prepend_autogen_comments.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | start_dir="./src/client"
 4 | prepend_string="// Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it."
 5 | 
 6 | # Find all files
 7 | find "$start_dir" -type f | while read file; do
 8 |     # Create a temporary file
 9 |     temp_file=$(mktemp)
10 |     
11 |     # Write the string to the temporary file
12 |     echo "$prepend_string\n" > "$temp_file"
13 | 
14 |     # Concatenate the original file to the temporary file
15 |     cat "$file" >> "$temp_file"
16 |     
17 |     # Replace the original file with the temporary file
18 |     mv "$temp_file" "$file"
19 | done
20 | 


--------------------------------------------------------------------------------
/neuron_viewer/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/transformer-debugger/87e6db7b7e73ded5037eeeff05deb5e81548a10a/neuron_viewer/public/favicon.ico


--------------------------------------------------------------------------------
/neuron_viewer/public/logo192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/transformer-debugger/87e6db7b7e73ded5037eeeff05deb5e81548a10a/neuron_viewer/public/logo192.png


--------------------------------------------------------------------------------
/neuron_viewer/public/logo512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/transformer-debugger/87e6db7b7e73ded5037eeeff05deb5e81548a10a/neuron_viewer/public/logo512.png


--------------------------------------------------------------------------------
/neuron_viewer/public/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "short_name": "React App",
 3 |   "name": "Create React App Sample",
 4 |   "icons": [
 5 |     {
 6 |       "src": "favicon.ico",
 7 |       "sizes": "64x64 32x32 24x24 16x16",
 8 |       "type": "image/x-icon"
 9 |     },
10 |     {
11 |       "src": "logo192.png",
12 |       "type": "image/png",
13 |       "sizes": "192x192"
14 |     },
15 |     {
16 |       "src": "logo512.png",
17 |       "type": "image/png",
18 |       "sizes": "512x512"
19 |     }
20 |   ],
21 |   "start_url": ".",
22 |   "display": "standalone",
23 |   "theme_color": "#000000",
24 |   "background_color": "#ffffff"
25 | }
26 | 


--------------------------------------------------------------------------------
/neuron_viewer/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/App.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 | 
5 | .ag-theme-alpine {
6 |   --ag-grid-size: 1px;
7 |   --ag-list-item-height: 20px;
8 | }
9 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/App.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | import { useNavigate, Route, Routes, Link } from "react-router-dom";
 3 | import "./App.css";
 4 | import TransformerDebugger from "./TransformerDebugger/TransformerDebugger";
 5 | import { NextUIProvider } from "@nextui-org/react";
 6 | import Welcome from "./welcome";
 7 | import NodePage from "./nodePage";
 8 | 
 9 | const NotFoundPage: React.FC = () => {
10 |   return (
11 |     <div className="flex items-center justify-center h-screen bg-gray-100">
12 |       <div className="text-center">
13 |         <h1 className="text-4xl font-bold text-gray-800">Page Not Found</h1>
14 |         <p className="mt-4 text-xl text-gray-600">
15 |           Sorry, the page you are looking for does not exist.
16 |         </p>
17 |         <Link
18 |           to="/"
19 |           className="mt-6 inline-block px-6 py-3 bg-blue-500 text-white font-medium text-lg leading-tight uppercase rounded shadow-md hover:bg-blue-700 hover:shadow-lg focus:bg-blue-700 focus:shadow-lg focus:outline-none focus:ring-0 active:bg-blue-800 active:shadow-lg transition duration-150 ease-in-out"
20 |         >
21 |           Go back home
22 |         </Link>
23 |       </div>
24 |     </div>
25 |   );
26 | };
27 | 
28 | const App: React.FC = () => {
29 |   const navigate = useNavigate();
30 | 
31 |   return (
32 |     <NextUIProvider navigate={navigate}>
33 |       <Routes>
34 |         {/* Actual substantive pages */}
35 |         <Route path="/" element={<Welcome />} />
36 |         <Route path="/:model/:nodeTypeStr/:layerIndex/:nodeIndex" element={<NodePage />} />
37 |         <Route path=":model/tdb_alpha" element={<TransformerDebugger />} />
38 | 
39 |         {/* Catch-all for bogus URLs */}
40 |         <Route path="*" element={<NotFoundPage />} />
41 |       </Routes>
42 |     </NextUIProvider>
43 |   );
44 | };
45 | 
46 | export default App;
47 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/DisplayOptions.tsx:
--------------------------------------------------------------------------------
 1 | import { Checkbox } from "@nextui-org/react";
 2 | import { ExplanatoryTooltip } from "../common/ExplanatoryTooltip";
 3 | 
 4 | const displayKeys: { [key: string]: { label: string; explanation: string } } = {
 5 |   logits: {
 6 |     label: "Show logits display",
 7 |     explanation:
 8 |       "Whether to show a table listing top candidates for the next token with their logits.",
 9 |   },
10 |   bySequenceToken: {
11 |     label: "Show token effect display",
12 |     explanation:
13 |       "Whether to show the prompt, with each token colored by the estimated total effect summed over all nodes of a same type (MLP neurons, attention heads, embeddings).",
14 |   },
15 |   node: {
16 |     label: "Show node table",
17 |     explanation:
18 |       "Whether to show a table of nodes (MLP neurons, attention heads, autoencoder latents, etc.) and their effect on the direction of interest.",
19 |   },
20 | };
21 | 
22 | const DisplayOptions = ({
23 |   displaySettings,
24 |   toggleDisplay,
25 | }: {
26 |   displaySettings: Map<string, boolean>;
27 |   toggleDisplay: (key: string) => void;
28 | }) => {
29 |   return (
30 |     <>
31 |       {Object.keys(displayKeys).map((key) => (
32 |         <ExplanatoryTooltip explanation={displayKeys[key].explanation} key={key}>
33 |           <div>
34 |             <Checkbox
35 |               type="checkbox"
36 |               isSelected={displaySettings.get(key)}
37 |               onValueChange={() => toggleDisplay(key)}
38 |             >
39 |               {displayKeys[key].label}
40 |             </Checkbox>
41 |           </div>
42 |         </ExplanatoryTooltip>
43 |       ))}
44 |     </>
45 |   );
46 | };
47 | 
48 | export default DisplayOptions;
49 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/LayerDisplay.tsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/transformer-debugger/87e6db7b7e73ded5037eeeff05deb5e81548a10a/neuron_viewer/src/TransformerDebugger/cards/LayerDisplay.tsx


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/SparsityMetricsDisplay.tsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/transformer-debugger/87e6db7b7e73ded5037eeeff05deb5e81548a10a/neuron_viewer/src/TransformerDebugger/cards/SparsityMetricsDisplay.tsx


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/TokenTable.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | import { formatToken } from "../../tokenRendering";
 3 | 
 4 | type TokenTableProps = {
 5 |   leftTokens: string[];
 6 |   rightTokens?: string[];
 7 | };
 8 | 
 9 | const TokenTable: React.FC<TokenTableProps> = ({ leftTokens, rightTokens }) => {
10 |   const tableStyle: React.CSSProperties = {
11 |     maxWidth: "800px",
12 |     margin: "0 auto",
13 |     borderCollapse: "collapse",
14 |     borderColor: "#f0f0f0",
15 |   };
16 | 
17 |   const cellStyle: React.CSSProperties = {
18 |     textAlign: "center",
19 |     padding: "5px",
20 |     border: "1px solid #f0f0f0",
21 |     fontFamily: "monospace",
22 |   };
23 | 
24 |   const indexMismatchStyle: React.CSSProperties = {
25 |     ...cellStyle,
26 |     backgroundColor: "#ffcccc", // Light red background for mismatched indices
27 |   };
28 | 
29 |   const rowNameStyle: React.CSSProperties = {
30 |     ...cellStyle,
31 |     fontWeight: "bold",
32 |     fontFamily: "sans-serif",
33 |   };
34 | 
35 |   const isMismatchAtIndex = (index: number) => {
36 |     return rightTokens && leftTokens[index] !== rightTokens[index];
37 |   };
38 | 
39 |   return (
40 |     <table className="token-table" style={tableStyle}>
41 |       <tbody>
42 |         <tr>
43 |           <td style={rowNameStyle}>{rightTokens ? "Left token" : "Token"}</td>
44 |           {leftTokens.map((token, i) => (
45 |             <td key={`left-token-${i}`} style={cellStyle}>
46 |               {formatToken(token)}
47 |             </td>
48 |           ))}
49 |         </tr>
50 |         {rightTokens && (
51 |           <tr>
52 |             <td style={rowNameStyle}>Right token</td>
53 |             {rightTokens.map((token, i) => (
54 |               <td key={`right-token-${i}`} style={cellStyle}>
55 |                 {formatToken(token)}
56 |               </td>
57 |             ))}
58 |           </tr>
59 |         )}
60 |         <tr>
61 |           <td style={rowNameStyle}>Index</td>
62 |           {leftTokens.map((_, i) => (
63 |             <td key={`index-${i}`} style={isMismatchAtIndex(i) ? indexMismatchStyle : cellStyle}>
64 |               {i}
65 |             </td>
66 |           ))}
67 |         </tr>
68 |       </tbody>
69 |     </table>
70 |   );
71 | };
72 | 
73 | export default TokenTable;
74 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/inference_params/AblateNodeSpecs.tsx:
--------------------------------------------------------------------------------
 1 | import {
 2 |   Table,
 3 |   TableHeader,
 4 |   TableColumn,
 5 |   TableBody,
 6 |   TableRow,
 7 |   TableCell,
 8 |   Button,
 9 |   Divider,
10 | } from "@nextui-org/react";
11 | import { makeNodeName } from "../../utils/nodes";
12 | import { NodeType, InferenceAndTokenData } from "../../../client";
13 | import { TokenLabel } from "./TokenLabel";
14 | import { PromptInferenceParams } from "./inferenceParams";
15 | 
16 | type AblateNodeSpecsProps = {
17 |   leftPromptInferenceParams: PromptInferenceParams;
18 |   setLeftPromptInferenceParams: React.Dispatch<React.SetStateAction<PromptInferenceParams | null>>;
19 |   inferenceAndTokenData: InferenceAndTokenData | null;
20 |   twoPromptsMode: boolean;
21 | };
22 | export const AblateNodeSpecs: React.FC<AblateNodeSpecsProps> = ({
23 |   leftPromptInferenceParams,
24 |   setLeftPromptInferenceParams,
25 |   inferenceAndTokenData,
26 |   twoPromptsMode,
27 | }) => {
28 |   const nodeAblations = leftPromptInferenceParams.nodeAblations;
29 |   if (nodeAblations.length === 0) {
30 |     return null;
31 |   }
32 |   return (
33 |     <div>
34 |       <div className="flex flex-row gap-2 items-center">
35 |         <span className="text-xl font-bold">Active ablations</span>
36 |       </div>
37 |       <div>
38 |         {twoPromptsMode ? (
39 |           <span>Ablations affect both prompts</span>
40 |         ) : (
41 |           <span>Left pane shows the ablated version; right pane shows the baseline version.</span>
42 |         )}
43 |       </div>
44 |       <Table isStriped aria-label="Ablations" fullWidth={false}>
45 |         <TableHeader>
46 |           <TableColumn>Name</TableColumn>
47 |           <TableColumn>Pass type</TableColumn>
48 |           <TableColumn>Token attended to</TableColumn>
49 |           <TableColumn>Token attended from</TableColumn>
50 |           <TableColumn>Ablated to value</TableColumn>
51 |           <TableColumn>Remove</TableColumn>
52 |         </TableHeader>
53 |         <TableBody>
54 |           {nodeAblations.map((spec, index) => (
55 |             <TableRow key={index}>
56 |               <TableCell>{makeNodeName(spec.nodeIndex)}</TableCell>
57 |               <TableCell>{spec.nodeIndex.passType}</TableCell>
58 |               <TableCell>
59 |                 {spec.nodeIndex.nodeType === NodeType.ATTENTION_HEAD ? (
60 |                   <TokenLabel
61 |                     index={spec.nodeIndex.tensorIndices[1]}
62 |                     inferenceAndTokenData={inferenceAndTokenData}
63 |                   />
64 |                 ) : (
65 |                   ""
66 |                 )}
67 |               </TableCell>
68 |               <TableCell>
69 |                 <TokenLabel
70 |                   index={spec.nodeIndex.tensorIndices[0]}
71 |                   inferenceAndTokenData={inferenceAndTokenData}
72 |                 />
73 |               </TableCell>
74 |               <TableCell>{spec.value}</TableCell>
75 |               <TableCell>
76 |                 <Button
77 |                   onClick={(e) => {
78 |                     e.preventDefault();
79 |                     const newAblations = [...nodeAblations];
80 |                     newAblations.splice(index, 1);
81 |                     setLeftPromptInferenceParams({
82 |                       ...leftPromptInferenceParams,
83 |                       nodeAblations: newAblations,
84 |                     });
85 |                   }}
86 |                 >
87 |                   Remove
88 |                 </Button>
89 |               </TableCell>
90 |             </TableRow>
91 |           ))}
92 |         </TableBody>
93 |       </Table>
94 |       <Divider />
95 |     </div>
96 |   );
97 | };
98 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/inference_params/TokenLabel.tsx:
--------------------------------------------------------------------------------
 1 | // Displays a token index and optional token string.
 2 | import React from "react";
 3 | import { InferenceAndTokenData } from "../../../client";
 4 | import { renderToken } from "../../../tokenRendering";
 5 | 
 6 | export const TokenLabel: React.FC<{
 7 |   index: number;
 8 |   tokenString?: string;
 9 |   inferenceAndTokenData: InferenceAndTokenData | null;
10 | }> = ({ index, tokenString, inferenceAndTokenData }) => {
11 |   const currentTokenString = inferenceAndTokenData?.tokensAsStrings[index] || tokenString || "";
12 |   return (
13 |     <>
14 |       {renderToken(currentTokenString)} ({index})
15 |     </>
16 |   );
17 | };
18 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/inference_params/inferenceParams.ts:
--------------------------------------------------------------------------------
 1 | import type {
 2 |   ComponentTypeForMlp,
 3 |   ComponentTypeForAttention,
 4 |   NodeAblation,
 5 |   NodeToTrace,
 6 | } from "../../../client";
 7 | 
 8 | // Prompt-specific parameters. If there are two prompts, all of these can vary between them.
 9 | // (Note that we've temporarily forced ablations to match between prompts; they're stored
10 | // exclusively on the left prompt's params.)
11 | export type PromptInferenceParams = {
12 |   prompt: string;
13 |   targetTokens: string[];
14 |   distractorTokens: string[];
15 |   nodeAblations: NodeAblation[];
16 |   upstreamNodeToTrace: NodeToTrace | null;
17 |   downstreamNodeToTrace: NodeToTrace | null;
18 | };
19 | 
20 | // Non-prompt-specific parameters. If there are two prompts, these are shared between them.
21 | export type CommonInferenceParams = {
22 |   componentTypeForMlp: ComponentTypeForMlp;
23 |   componentTypeForAttention: ComponentTypeForAttention;
24 |   topAndBottomKForNodeTable: number;
25 |   hideEarlyLayersWhenAblating: boolean;
26 | };
27 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/node_table/TopTokensDisplay.tsx:
--------------------------------------------------------------------------------
  1 | import { TopTokens, TokenAndScalar } from "../../../client";
  2 | import { Tooltip } from "@nextui-org/react";
  3 | import { ExplanatoryTooltip } from "../../common/ExplanatoryTooltip";
  4 | import { renderTokenOnGray } from "../../../tokenRendering";
  5 | 
  6 | const renderTokenList = (
  7 |   title: string,
  8 |   explanation: string,
  9 |   tokens: TokenAndScalar[],
 10 |   maxTokens?: number
 11 | ) => (
 12 |   <span className="text-sm text-gray-700">
 13 |     <span>
 14 |       <ExplanatoryTooltip explanation={explanation}>
 15 |         <span>
 16 |           <strong>{title}:</strong>
 17 |         </span>
 18 |       </ExplanatoryTooltip>
 19 |     </span>
 20 |     {tokens.slice(0, maxTokens).map((token, idx) => {
 21 |       return (
 22 |         <Tooltip key={idx} content={token.scalar.toFixed(2)}>
 23 |           {renderTokenOnGray(token.token, idx)}
 24 |         </Tooltip>
 25 |       );
 26 |     })}
 27 |   </span>
 28 | );
 29 | 
 30 | function whichSidesToDisplay(
 31 |   leftSideData: TopTokens | null,
 32 |   rightSideData: TopTokens | null,
 33 |   maxTokens?: number
 34 | ): { displayLeftSide: boolean; displayRightSide: boolean } {
 35 |   // Display both sides unless all the tokens are the same. If they are the same, then show the side
 36 |   // with larger magnitude on the first token (to avoid showing a side with all 0s)
 37 |   let displayLeftSide = leftSideData !== null;
 38 |   let displayRightSide = rightSideData !== null;
 39 |   if (leftSideData && rightSideData) {
 40 |     const leftTopToken = leftSideData.top[0];
 41 |     const rightTopToken = rightSideData.top[0];
 42 |     if (Math.abs(leftTopToken.scalar) <= 0.01) {
 43 |       return { displayLeftSide: false, displayRightSide: true };
 44 |     }
 45 |     if (Math.abs(rightTopToken.scalar) <= 0.01) {
 46 |       return { displayLeftSide: true, displayRightSide: false };
 47 |     }
 48 |     const leftTopTokens = leftSideData.top.slice(0, maxTokens).map((token) => token.token);
 49 |     const rightTopTokens = rightSideData.top.slice(0, maxTokens).map((token) => token.token);
 50 |     const leftBottomTokens = leftSideData.bottom.slice(0, maxTokens).map((token) => token.token);
 51 |     const rightBottomTokens = rightSideData.bottom.slice(0, maxTokens).map((token) => token.token);
 52 |     const topTokensAreEqual =
 53 |       leftTopTokens.length === rightTopTokens.length &&
 54 |       leftTopTokens.every((token, index) => token === rightTopTokens[index]);
 55 |     const bottomTokensAreEqual =
 56 |       leftBottomTokens.length === rightBottomTokens.length &&
 57 |       leftBottomTokens.every((token, index) => token === rightBottomTokens[index]);
 58 |     if (topTokensAreEqual && bottomTokensAreEqual) {
 59 |       displayLeftSide = Math.abs(leftTopToken.scalar) > Math.abs(rightTopToken.scalar);
 60 |       displayRightSide = !displayLeftSide;
 61 |     }
 62 |     return { displayLeftSide, displayRightSide };
 63 |   }
 64 |   return { displayLeftSide, displayRightSide };
 65 | }
 66 | 
 67 | export const TopTokensDisplay: React.FC<{
 68 |   leftSideData: TopTokens | null;
 69 |   rightSideData: TopTokens | null;
 70 |   label: string;
 71 |   explanations: { increase: string; decrease: string };
 72 | }> = ({ leftSideData, rightSideData, label, explanations }) => {
 73 |   const { displayLeftSide, displayRightSide } = whichSidesToDisplay(leftSideData, rightSideData);
 74 |   const leftTopTokens = leftSideData?.top;
 75 |   const leftBottomTokens = leftSideData?.bottom;
 76 |   const rightTopTokens = rightSideData?.top;
 77 |   const rightBottomTokens = rightSideData?.bottom;
 78 |   const leftTitlePrefix = "Left ";
 79 |   const rightTitlePrefix = "Right ";
 80 |   return (
 81 |     <div>
 82 |       {displayLeftSide && (
 83 |         <div>
 84 |           {leftTopTokens &&
 85 |             renderTokenList(
 86 |               `${leftTitlePrefix}${label} top`,
 87 |               explanations.increase,
 88 |               leftTopTokens,
 89 |               10
 90 |             )}
 91 |           {leftBottomTokens &&
 92 |             renderTokenList("bottom", explanations.decrease, leftBottomTokens, 10)}
 93 |         </div>
 94 |       )}
 95 |       {displayRightSide && (
 96 |         <div>
 97 |           {rightTopTokens &&
 98 |             renderTokenList(
 99 |               `${rightTitlePrefix}${label} top`,
100 |               explanations.increase,
101 |               rightTopTokens,
102 |               10
103 |             )}
104 |           {rightBottomTokens &&
105 |             renderTokenList("bottom", explanations.decrease, rightBottomTokens, 10)}
106 |         </div>
107 |       )}
108 |     </div>
109 |   );
110 | };
111 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/prompt/MultiTokenInput.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, Input } from "@nextui-org/react";
 2 | 
 3 | export const MultiTokenInput: React.FC<{
 4 |   tokens: string[];
 5 |   onChange: (tokens: string[]) => void;
 6 |   className?: string;
 7 |   allowLengthZero?: boolean;
 8 | }> = ({ tokens, onChange, className, allowLengthZero }) => {
 9 |   // display a row of text inputs with one token per input, + button to add more tokens, - button to remove last token
10 |   // when token is changed, call onChange with new tokens
11 |   const allowRemovingTokens = tokens.length > 1 || (allowLengthZero && tokens.length === 1);
12 | 
13 |   return (
14 |     <div className={`flex flex-row gap-2 ${className}`}>
15 |       {tokens.map((token, index) => (
16 |         <Input
17 |           className="pt-0 pb-0 sm:text-sm block mr-0 w-auto font-mono"
18 |           size="sm"
19 |           key={index}
20 |           type="text"
21 |           value={token}
22 |           onValueChange={(value) => {
23 |             const newTokens = [...tokens];
24 |             newTokens[index] = value;
25 |             onChange(newTokens);
26 |           }}
27 |         />
28 |       ))}
29 |       <Button
30 |         onClick={(e) => {
31 |           e.preventDefault();
32 |           onChange([...tokens, ""]);
33 |         }}
34 |       >
35 |         Add token
36 |       </Button>
37 |       <Button
38 |         className="disabled:opacity-50"
39 |         disabled={!allowRemovingTokens}
40 |         onClick={(e) => {
41 |           e.preventDefault();
42 |           if (!allowRemovingTokens) {
43 |             return;
44 |           }
45 |           onChange(tokens.slice(0, tokens.length - 1));
46 |         }}
47 |       >
48 |         Remove token
49 |       </Button>
50 |     </div>
51 |   );
52 | };
53 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/cards/prompt/swap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/transformer-debugger/87e6db7b7e73ded5037eeeff05deb5e81548a10a/neuron_viewer/src/TransformerDebugger/cards/prompt/swap.png


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/common/ExplanatoryTooltip.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | import { Tooltip } from "@nextui-org/react";
 3 | 
 4 | // This component will result in its entire contents being tooltipped, such that hovering over any
 5 | // part of it will show the explanation.
 6 | //
 7 | // Usage:
 8 | //   <ExplanatoryTooltip explanation="Clear explanation of what's being shown">
 9 | //     <h3>Some contents</h3>
10 | //   </ExplanatoryTooltip>
11 | export const ExplanatoryTooltip: React.FC<{
12 |   explanation: string;
13 |   children: React.ReactNode;
14 | }> = ({ explanation, children }) => {
15 |   return (
16 |     <Tooltip content={explanation} className="inline-block">
17 |       {children}
18 |     </Tooltip>
19 |   );
20 | };
21 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/common/JsonModal.tsx:
--------------------------------------------------------------------------------
 1 | import {
 2 |   Modal,
 3 |   ModalContent,
 4 |   ModalHeader,
 5 |   ModalBody,
 6 |   ModalFooter,
 7 |   Button,
 8 |   useDisclosure,
 9 | } from "@nextui-org/react";
10 | import ReactJson, { OnCopyProps } from "@microlink/react-json-view";
11 | import { MagnifyingGlassIcon } from "@heroicons/react/24/solid";
12 | import { ExplanatoryTooltip } from "./ExplanatoryTooltip";
13 | 
14 | type JsonModalProps = {
15 |   jsonData: any;
16 |   buttonLabel?: string | JSX.Element;
17 |   collapsed?: number;
18 | };
19 | 
20 | const copyOrDownload = (copy: OnCopyProps) => {
21 |   const jsonAsString = JSON.stringify(copy.src, null, 2);
22 |   if (navigator.clipboard) {
23 |     navigator.clipboard.writeText(jsonAsString).catch((err) => {
24 |       console.error("Error in copying text: ", err);
25 |     });
26 |   } else {
27 |     const blob = new Blob([jsonAsString], {
28 |       type: "application/json",
29 |     });
30 |     const href = URL.createObjectURL(blob);
31 |     const link = document.createElement("a");
32 |     link.href = href;
33 |     link.download = "data.json";
34 |     document.body.appendChild(link);
35 |     link.click();
36 |     document.body.removeChild(link);
37 |     URL.revokeObjectURL(href);
38 |   }
39 | };
40 | 
41 | export default function JsonModal({
42 |   jsonData,
43 |   buttonLabel = <MagnifyingGlassIcon className="h-6 w-6 text-blue-500" />,
44 |   collapsed = 2,
45 | }: JsonModalProps) {
46 |   const { isOpen, onOpen, onOpenChange } = useDisclosure();
47 | 
48 |   return (
49 |     <>
50 |       <ExplanatoryTooltip explanation="Show the JSON used to render this component.">
51 |         <Button onPress={onOpen}>{buttonLabel}</Button>
52 |       </ExplanatoryTooltip>
53 |       <Modal isOpen={isOpen} onOpenChange={onOpenChange} size={"5xl"}>
54 |         <ModalContent>
55 |           {(onClose) => (
56 |             <>
57 |               <ModalHeader className="flex flex-col gap-1">JSON Data</ModalHeader>
58 |               <ModalBody>
59 |                 <div style={{ overflow: "auto", maxHeight: "calc(100vh - 250px)" }}>
60 |                   <ReactJson
61 |                     src={jsonData}
62 |                     collapsed={collapsed}
63 |                     enableClipboard={copyOrDownload}
64 |                     groupArraysAfterLength={5}
65 |                   />
66 |                 </div>
67 |               </ModalBody>
68 |               <ModalFooter>
69 |                 <Button color="danger" variant="light" onPress={onClose}>
70 |                   Close
71 |                 </Button>
72 |               </ModalFooter>
73 |             </>
74 |           )}
75 |         </ModalContent>
76 |       </Modal>
77 |     </>
78 |   );
79 | }
80 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/requests/inferenceResponseUtils.tsx:
--------------------------------------------------------------------------------
 1 | import type { InferenceAndTokenData, InferenceResponseAndResponseDict } from "../../client";
 2 | 
 3 | export function getSubResponse<T>(
 4 |   responseData: InferenceResponseAndResponseDict | null,
 5 |   requestSpecName: string
 6 | ): T | null {
 7 |   if (!responseData) {
 8 |     return null;
 9 |   }
10 |   return responseData.processingResponseDataByName![requestSpecName] as T;
11 | }
12 | 
13 | export function getInferenceAndTokenData(
14 |   responseData: InferenceResponseAndResponseDict | null
15 | ): InferenceAndTokenData | null {
16 |   if (!responseData) {
17 |     return null;
18 |   }
19 |   return responseData.inferenceResponse.inferenceAndTokenData;
20 | }
21 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/utils/explanations.ts:
--------------------------------------------------------------------------------
 1 | // TODO: Make this explanation clearer. Does this only cover the direct effect as opposed to indirect effects?
 2 | export const WRITE_MAGNITUDE_EXPLANATION =
 3 |   "Magnitude of the write vector to the direction of interest produced by the component.";
 4 | 
 5 | export const ACTIVATION_EXPLANATION =
 6 |   "MLP post-activation, attention post-softmax, or autoencoder latent activation.";
 7 | 
 8 | // TODO: Make this explanation clearer. Is this a magnitude? Does this only cover the direct effect as opposed to indirect effects?
 9 | export const DIRECTION_WRITE_EXPLANATION =
10 |   "Direction write: Value of the write to the direction of interest.";
11 | 
12 | export const ACT_TIMES_GRAD_EXPLANATION =
13 |   "Activation * gradient: Estimate of the total effect of the component on the activation of the direction of interest, including indirect effects through other components.";
14 | 
15 | export const TOKEN_ATTENDED_TO_EXPLANATION =
16 |   "Token attended-to, for attention heads only, where activations are specific to a token pair. This is the least recent token in the token pair.";
17 | 
18 | export const TOKEN_ATTRIBUTED_TO_EXPLANATION =
19 |   "Token attended-to, for attention-write autoencoder latents only. This is the token with the most positive attribution to the latent activation.";
20 | 
21 | export const TOKEN_ATTENDED_FROM_EXPLANATION =
22 |   "Current token, for all components. For MLP neurons and MLP latents, this is the token where the component activates. For attention heads, where activations are specific to a token pair, this is the most recent token in the token pair.";
23 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/TransformerDebugger/utils/numbers.tsx:
--------------------------------------------------------------------------------
 1 | import { IRowNode } from "ag-grid-community";
 2 | 
 3 | export const formatFloat = (value: any, numDecimalPlaces: number = 2) => {
 4 |   return value !== undefined ? parseFloat(value).toFixed(numDecimalPlaces) : "";
 5 | };
 6 | 
 7 | export const formatFloatWithZeroPoint = (
 8 |   value: any,
 9 |   zeroPoint: number,
10 |   numDecimalPlaces: number = 2
11 | ) => {
12 |   return value !== undefined ? (parseFloat(value) - zeroPoint).toFixed(numDecimalPlaces) : "";
13 | };
14 | 
15 | export const diffOptionalNumbers = (a: number | undefined, b: number | undefined) => {
16 |   if (a === undefined) {
17 |     a = 0;
18 |   }
19 |   if (b === undefined) {
20 |     b = 0;
21 |   }
22 |   return a - b;
23 | };
24 | 
25 | export const compareWithUndefinedAsZero = (
26 |   a: number | undefined,
27 |   b: number | undefined,
28 |   unusedNodeA: IRowNode,
29 |   unusedNodeB: IRowNode,
30 |   // The grid itself handles inverting the order, so the comparator doesn't need to use it.
31 |   unusedIsDescending: boolean
32 | ) => {
33 |   if (a === undefined) {
34 |     a = 0;
35 |   }
36 |   if (b === undefined) {
37 |     b = 0;
38 |   }
39 |   return a - b;
40 | };
41 | 
42 | export const compareWithUndefinedLast = (
43 |   a: number | undefined,
44 |   b: number | undefined,
45 |   unusedNodeA: IRowNode,
46 |   unusedNodeB: IRowNode,
47 |   isDescending: boolean
48 | ) => {
49 |   if (a === undefined) {
50 |     a = isDescending ? -Infinity : Infinity;
51 |   }
52 |   if (b === undefined) {
53 |     b = isDescending ? -Infinity : Infinity;
54 |   }
55 |   return a - b;
56 | };
57 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/core/ApiError.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | import type { ApiRequestOptions } from "./ApiRequestOptions";
 7 | import type { ApiResult } from "./ApiResult";
 8 | 
 9 | export class ApiError extends Error {
10 |   public readonly url: string;
11 |   public readonly status: number;
12 |   public readonly statusText: string;
13 |   public readonly body: any;
14 |   public readonly request: ApiRequestOptions;
15 | 
16 |   constructor(request: ApiRequestOptions, response: ApiResult, message: string) {
17 |     super(message);
18 | 
19 |     this.name = "ApiError";
20 |     this.url = response.url;
21 |     this.status = response.status;
22 |     this.statusText = response.statusText;
23 |     this.body = response.body;
24 |     this.request = request;
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/core/ApiRequestOptions.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | export type ApiRequestOptions = {
 7 |   readonly method: "GET" | "PUT" | "POST" | "DELETE" | "OPTIONS" | "HEAD" | "PATCH";
 8 |   readonly url: string;
 9 |   readonly path?: Record<string, any>;
10 |   readonly cookies?: Record<string, any>;
11 |   readonly headers?: Record<string, any>;
12 |   readonly query?: Record<string, any>;
13 |   readonly formData?: Record<string, any>;
14 |   readonly body?: any;
15 |   readonly mediaType?: string;
16 |   readonly responseHeader?: string;
17 |   readonly errors?: Record<number, string>;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/core/ApiResult.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | export type ApiResult = {
 7 |   readonly url: string;
 8 |   readonly ok: boolean;
 9 |   readonly status: number;
10 |   readonly statusText: string;
11 |   readonly body: any;
12 | };
13 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/core/CancelablePromise.ts:
--------------------------------------------------------------------------------
  1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
  2 | 
  3 | /* istanbul ignore file */
  4 | /* tslint:disable */
  5 | /* eslint-disable */
  6 | export class CancelError extends Error {
  7 |   constructor(message: string) {
  8 |     super(message);
  9 |     this.name = "CancelError";
 10 |   }
 11 | 
 12 |   public get isCancelled(): boolean {
 13 |     return true;
 14 |   }
 15 | }
 16 | 
 17 | export interface OnCancel {
 18 |   readonly isResolved: boolean;
 19 |   readonly isRejected: boolean;
 20 |   readonly isCancelled: boolean;
 21 | 
 22 |   (cancelHandler: () => void): void;
 23 | }
 24 | 
 25 | export class CancelablePromise<T> implements Promise<T> {
 26 |   #isResolved: boolean;
 27 |   #isRejected: boolean;
 28 |   #isCancelled: boolean;
 29 |   readonly #cancelHandlers: (() => void)[];
 30 |   readonly #promise: Promise<T>;
 31 |   #resolve?: (value: T | PromiseLike<T>) => void;
 32 |   #reject?: (reason?: any) => void;
 33 | 
 34 |   constructor(
 35 |     executor: (
 36 |       resolve: (value: T | PromiseLike<T>) => void,
 37 |       reject: (reason?: any) => void,
 38 |       onCancel: OnCancel
 39 |     ) => void
 40 |   ) {
 41 |     this.#isResolved = false;
 42 |     this.#isRejected = false;
 43 |     this.#isCancelled = false;
 44 |     this.#cancelHandlers = [];
 45 |     this.#promise = new Promise<T>((resolve, reject) => {
 46 |       this.#resolve = resolve;
 47 |       this.#reject = reject;
 48 | 
 49 |       const onResolve = (value: T | PromiseLike<T>): void => {
 50 |         if (this.#isResolved || this.#isRejected || this.#isCancelled) {
 51 |           return;
 52 |         }
 53 |         this.#isResolved = true;
 54 |         this.#resolve?.(value);
 55 |       };
 56 | 
 57 |       const onReject = (reason?: any): void => {
 58 |         if (this.#isResolved || this.#isRejected || this.#isCancelled) {
 59 |           return;
 60 |         }
 61 |         this.#isRejected = true;
 62 |         this.#reject?.(reason);
 63 |       };
 64 | 
 65 |       const onCancel = (cancelHandler: () => void): void => {
 66 |         if (this.#isResolved || this.#isRejected || this.#isCancelled) {
 67 |           return;
 68 |         }
 69 |         this.#cancelHandlers.push(cancelHandler);
 70 |       };
 71 | 
 72 |       Object.defineProperty(onCancel, "isResolved", {
 73 |         get: (): boolean => this.#isResolved,
 74 |       });
 75 | 
 76 |       Object.defineProperty(onCancel, "isRejected", {
 77 |         get: (): boolean => this.#isRejected,
 78 |       });
 79 | 
 80 |       Object.defineProperty(onCancel, "isCancelled", {
 81 |         get: (): boolean => this.#isCancelled,
 82 |       });
 83 | 
 84 |       return executor(onResolve, onReject, onCancel as OnCancel);
 85 |     });
 86 |   }
 87 | 
 88 |   get [Symbol.toStringTag]() {
 89 |     return "Cancellable Promise";
 90 |   }
 91 | 
 92 |   public then<TResult1 = T, TResult2 = never>(
 93 |     onFulfilled?: ((value: T) => TResult1 | PromiseLike<TResult1>) | null,
 94 |     onRejected?: ((reason: any) => TResult2 | PromiseLike<TResult2>) | null
 95 |   ): Promise<TResult1 | TResult2> {
 96 |     return this.#promise.then(onFulfilled, onRejected);
 97 |   }
 98 | 
 99 |   public catch<TResult = never>(
100 |     onRejected?: ((reason: any) => TResult | PromiseLike<TResult>) | null
101 |   ): Promise<T | TResult> {
102 |     return this.#promise.catch(onRejected);
103 |   }
104 | 
105 |   public finally(onFinally?: (() => void) | null): Promise<T> {
106 |     return this.#promise.finally(onFinally);
107 |   }
108 | 
109 |   public cancel(): void {
110 |     if (this.#isResolved || this.#isRejected || this.#isCancelled) {
111 |       return;
112 |     }
113 |     this.#isCancelled = true;
114 |     if (this.#cancelHandlers.length) {
115 |       try {
116 |         for (const cancelHandler of this.#cancelHandlers) {
117 |           cancelHandler();
118 |         }
119 |       } catch (error) {
120 |         console.warn("Cancellation threw an error", error);
121 |         return;
122 |       }
123 |     }
124 |     this.#cancelHandlers.length = 0;
125 |     this.#reject?.(new CancelError("Request aborted"));
126 |   }
127 | 
128 |   public get isCancelled(): boolean {
129 |     return this.#isCancelled;
130 |   }
131 | }
132 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/core/OpenAPI.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | import type { ApiRequestOptions } from "./ApiRequestOptions";
 7 | 
 8 | type Resolver<T> = (options: ApiRequestOptions) => Promise<T>;
 9 | type Headers = Record<string, string>;
10 | 
11 | export type OpenAPIConfig = {
12 |   BASE: string;
13 |   VERSION: string;
14 |   WITH_CREDENTIALS: boolean;
15 |   CREDENTIALS: "include" | "omit" | "same-origin";
16 |   TOKEN?: string | Resolver<string>;
17 |   USERNAME?: string | Resolver<string>;
18 |   PASSWORD?: string | Resolver<string>;
19 |   HEADERS?: Headers | Resolver<Headers>;
20 |   ENCODE_PATH?: (path: string) => string;
21 | };
22 | 
23 | export const OpenAPI: OpenAPIConfig = {
24 |   BASE: "",
25 |   VERSION: "0.1.0",
26 |   WITH_CREDENTIALS: false,
27 |   CREDENTIALS: "include",
28 |   TOKEN: undefined,
29 |   USERNAME: undefined,
30 |   PASSWORD: undefined,
31 |   HEADERS: undefined,
32 |   ENCODE_PATH: undefined,
33 | };
34 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/AblationSpec.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { MirroredActivationIndex } from "./MirroredActivationIndex";
 8 | 
 9 | /**
10 |  * A specification for performing ablation on a model.
11 |  */
12 | export type AblationSpec = {
13 |   index: MirroredActivationIndex;
14 |   value: number;
15 | };
16 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ActivationLocationType.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * These are the names of activations expected to be instantiated during a forward pass. All activations are
 9 |  * pre-layer norm unless otherwise specified (RESID_POST_XYZ_LAYER_NORM).
10 |  */
11 | export enum ActivationLocationType {
12 |   RESID_POST_EMB = "resid.post_emb",
13 |   RESID_DELTA_ATTN = "resid.delta_attn",
14 |   RESID_POST_ATTN = "resid.post_attn",
15 |   RESID_DELTA_MLP = "resid.delta_mlp",
16 |   RESID_POST_MLP = "resid.post_mlp",
17 |   RESID_POST_MLP_LN = "resid.post_mlp_ln",
18 |   RESID_POST_ATTN_LN = "resid.post_attn_ln",
19 |   RESID_POST_LN_F = "resid.post_ln_f",
20 |   MLP_LN_SCALE = "mlp_ln.scale",
21 |   ATTN_LN_SCALE = "attn_ln.scale",
22 |   RESID_LN_F_SCALE = "resid.ln_f.scale",
23 |   ATTN_Q = "attn.q",
24 |   ATTN_K = "attn.k",
25 |   ATTN_V = "attn.v",
26 |   ATTN_QK_LOGITS = "attn.qk_logits",
27 |   ATTN_QK_PROBS = "attn.qk_probs",
28 |   ATTN_V_OUT = "attn.v_out",
29 |   MLP_PRE_ACT = "mlp.pre_act",
30 |   MLP_POST_ACT = "mlp.post_act",
31 |   LOGITS = "logits",
32 |   ONLINE_AUTOENCODER_LATENT = "online_autoencoder_latent",
33 |   ONLINE_MLP_AUTOENCODER_LATENT = "online_mlp_autoencoder_latent",
34 |   ONLINE_ATTENTION_AUTOENCODER_LATENT = "online_attention_autoencoder_latent",
35 |   ONLINE_MLP_AUTOENCODER_ERROR = "online_mlp_autoencoder_error",
36 |   ONLINE_RESIDUAL_MLP_AUTOENCODER_ERROR = "online_residual_mlp_autoencoder_error",
37 |   ONLINE_RESIDUAL_ATTENTION_AUTOENCODER_ERROR = "online_residual_attention_autoencoder_error",
38 | }
39 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/AttentionHeadRecordResponse.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { TokenAndAttentionScalars } from "./TokenAndAttentionScalars";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type AttentionHeadRecordResponse = {
15 |   dataset: string;
16 |   maxAttentionActivation: number;
17 |   mostPositiveTokenSequences: Array<Array<TokenAndAttentionScalars>>;
18 |   randomTokenSequences: Array<Array<TokenAndAttentionScalars>>;
19 | };
20 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/AttentionTraceType.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * An enumeration.
 9 |  */
10 | export enum AttentionTraceType {
11 |   Q = "Q",
12 |   K = "K",
13 |   QK = "QK",
14 |   V = "V",
15 | }
16 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/AttributedScoredExplanation.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type AttributedScoredExplanation = {
13 |   explanation: string;
14 |   score?: number;
15 |   datasetName: string;
16 | };
17 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/BatchedRequest.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { InferenceSubRequest } from "./InferenceSubRequest";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type BatchedRequest = {
15 |   inferenceSubRequests: Array<InferenceSubRequest>;
16 | };
17 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/BatchedResponse.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { InferenceResponseAndResponseDict } from "./InferenceResponseAndResponseDict";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type BatchedResponse = {
15 |   inferenceSubResponses: Array<InferenceResponseAndResponseDict>;
16 | };
17 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/BatchedTdbRequest.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { TdbRequestSpec } from "./TdbRequestSpec";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type BatchedTdbRequest = {
15 |   subRequests: Array<TdbRequestSpec>;
16 | };
17 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ComponentTypeForAttention.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * The type of component / fundamental unit to use for Attention layers.
 9 |  *
10 |  * This determines which types of node appear in the node table to represent the Attention layers.
11 |  * Heads are the fundamental unit of Attention layers, but autoencoder latents are more interpretable.
12 |  */
13 | export enum ComponentTypeForAttention {
14 |   ATTENTION_HEAD = "attention_head",
15 |   AUTOENCODER_LATENT = "autoencoder_latent",
16 | }
17 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ComponentTypeForMlp.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * The type of component / fundamental unit to use for MLP layers.
 9 |  *
10 |  * This determines which types of node appear in the node table to represent the MLP layers.
11 |  * Neurons are the fundamental unit of MLP layers, but autoencoder latents are more interpretable.
12 |  */
13 | export enum ComponentTypeForMlp {
14 |   NEURON = "neuron",
15 |   AUTOENCODER_LATENT = "autoencoder_latent",
16 | }
17 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/DerivedAttentionScalarsRequest.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedAttentionScalarsRequestSpec } from "./DerivedAttentionScalarsRequestSpec";
 8 | import type { InferenceRequestSpec } from "./InferenceRequestSpec";
 9 | 
10 | /**
11 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
12 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
13 |  * camelCase names.
14 |  */
15 | export type DerivedAttentionScalarsRequest = {
16 |   inferenceRequestSpec: InferenceRequestSpec;
17 |   derivedAttentionScalarsRequestSpec: DerivedAttentionScalarsRequestSpec;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/DerivedAttentionScalarsRequestSpec.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedScalarType } from "./DerivedScalarType";
 8 | import type { NodeIdAndDatasets } from "./NodeIdAndDatasets";
 9 | 
10 | /**
11 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
12 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
13 |  * camelCase names.
14 |  */
15 | export type DerivedAttentionScalarsRequestSpec = {
16 |   specType?: DerivedAttentionScalarsRequestSpec.specType;
17 |   dst: DerivedScalarType;
18 |   layerIndex?: number;
19 |   activationIndex: number;
20 |   normalizeActivationsUsingNeuronRecord?: NodeIdAndDatasets;
21 | };
22 | 
23 | export namespace DerivedAttentionScalarsRequestSpec {
24 |   export enum specType {
25 |     DERIVED_ATTENTION_SCALARS_REQUEST_SPEC = "derived_attention_scalars_request_spec",
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/DerivedAttentionScalarsResponse.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedAttentionScalarsResponseData } from "./DerivedAttentionScalarsResponseData";
 8 | import type { InferenceAndTokenData } from "./InferenceAndTokenData";
 9 | 
10 | /**
11 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
12 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
13 |  * camelCase names.
14 |  */
15 | export type DerivedAttentionScalarsResponse = {
16 |   inferenceAndTokenData: InferenceAndTokenData;
17 |   derivedAttentionScalarsResponseData: DerivedAttentionScalarsResponseData;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/DerivedAttentionScalarsResponseData.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { ProcessingResponseDataType } from "./ProcessingResponseDataType";
 8 | import type { TokenAndAttentionScalars } from "./TokenAndAttentionScalars";
 9 | 
10 | /**
11 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
12 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
13 |  * camelCase names.
14 |  */
15 | export type DerivedAttentionScalarsResponseData = {
16 |   responseDataType?: ProcessingResponseDataType;
17 |   tokenAndAttentionScalarsList: Array<TokenAndAttentionScalars>;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/DerivedScalarsRequest.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedScalarsRequestSpec } from "./DerivedScalarsRequestSpec";
 8 | import type { InferenceRequestSpec } from "./InferenceRequestSpec";
 9 | 
10 | /**
11 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
12 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
13 |  * camelCase names.
14 |  */
15 | export type DerivedScalarsRequest = {
16 |   inferenceRequestSpec: InferenceRequestSpec;
17 |   derivedScalarsRequestSpec: DerivedScalarsRequestSpec;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/DerivedScalarsRequestSpec.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedScalarType } from "./DerivedScalarType";
 8 | import type { NodeIdAndDatasets } from "./NodeIdAndDatasets";
 9 | import type { PassType } from "./PassType";
10 | 
11 | /**
12 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
13 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
14 |  * camelCase names.
15 |  */
16 | export type DerivedScalarsRequestSpec = {
17 |   specType?: DerivedScalarsRequestSpec.specType;
18 |   dst: DerivedScalarType;
19 |   layerIndex?: number;
20 |   activationIndex: number;
21 |   normalizeActivationsUsingNeuronRecord?: NodeIdAndDatasets;
22 |   passType?: PassType;
23 |   numTopTokens?: number;
24 | };
25 | 
26 | export namespace DerivedScalarsRequestSpec {
27 |   export enum specType {
28 |     DERIVED_SCALARS_REQUEST_SPEC = "derived_scalars_request_spec",
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/DerivedScalarsResponse.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedScalarsResponseData } from "./DerivedScalarsResponseData";
 8 | import type { InferenceAndTokenData } from "./InferenceAndTokenData";
 9 | 
10 | /**
11 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
12 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
13 |  * camelCase names.
14 |  */
15 | export type DerivedScalarsResponse = {
16 |   inferenceAndTokenData: InferenceAndTokenData;
17 |   derivedScalarsResponseData: DerivedScalarsResponseData;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/DerivedScalarsResponseData.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { MirroredNodeIndex } from "./MirroredNodeIndex";
 8 | import type { ProcessingResponseDataType } from "./ProcessingResponseDataType";
 9 | import type { TopTokens } from "./TopTokens";
10 | 
11 | /**
12 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
13 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
14 |  * camelCase names.
15 |  */
16 | export type DerivedScalarsResponseData = {
17 |   responseDataType?: ProcessingResponseDataType;
18 |   activations: Array<number>;
19 |   normalizedActivations?: Array<number>;
20 |   nodeIndices: Array<MirroredNodeIndex>;
21 |   topTokens?: TopTokens;
22 | };
23 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/Dimension.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Dimensions correspond to the names of dimensions of activation tensors, and can depend on the input,
 9 |  * the model, or e.g. parameters of added subgraphs such as autoencoders.
10 |  * The dimensions below are taken to be 'per layer' wherever applicable.
11 |  * Dimensions associated with attention heads (e.g. value channels) are taken to be 'per attention head'.
12 |  */
13 | export enum Dimension {
14 |   SEQUENCE_TOKENS = "sequence_tokens",
15 |   ATTENDED_TO_SEQUENCE_TOKENS = "attended_to_sequence_tokens",
16 |   MAX_CONTEXT_LENGTH = "max_context_length",
17 |   RESIDUAL_STREAM_CHANNELS = "residual_stream_channels",
18 |   VOCAB_SIZE = "vocab_size",
19 |   ATTN_HEADS = "attn_heads",
20 |   QUERY_AND_KEY_CHANNELS = "query_and_key_channels",
21 |   VALUE_CHANNELS = "value_channels",
22 |   MLP_ACTS = "mlp_acts",
23 |   LAYERS = "layers",
24 |   SINGLETON = "singleton",
25 |   AUTOENCODER_LATENTS = "autoencoder_latents",
26 |   AUTOENCODER_LATENTS_BY_TOKEN_PAIR = "autoencoder_latents_by_token_pair",
27 | }
28 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ExistingExplanationsRequest.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedScalarType } from "./DerivedScalarType";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type ExistingExplanationsRequest = {
15 |   dst: DerivedScalarType;
16 |   layerIndex: number;
17 |   activationIndex: number;
18 |   explanationDatasets: Array<string>;
19 |   neuronDataset?: string;
20 | };
21 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ExplanationResult.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type ExplanationResult = {
13 |   explanations: Array<string>;
14 |   dataset: string;
15 | };
16 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/GroupId.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Identifiers for groups in multi-top-k requests.
 9 |  */
10 | export enum GroupId {
11 |   ACT_TIMES_GRAD = "act_times_grad",
12 |   ACTIVATION = "activation",
13 |   DIRECT_WRITE_TO_GRAD = "direct_write_to_grad",
14 |   DIRECTION_WRITE = "direction_write",
15 |   LOGITS = "logits",
16 |   MLP_LAYER_WRITE = "mlp_layer_write",
17 |   SINGLETON = "singleton",
18 |   TOKEN_WRITE = "token_write",
19 |   TOKEN_READ = "token_read",
20 |   WRITE_NORM = "write_norm",
21 |   TOKEN_PAIR_ATTRIBUTION = "token_pair_attribution",
22 | }
23 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/HTTPValidationError.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { ValidationError } from "./ValidationError";
 8 | 
 9 | export type HTTPValidationError = {
10 |   detail?: Array<ValidationError>;
11 | };
12 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/InferenceAndTokenData.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type InferenceAndTokenData = {
13 |   inferenceTime: number;
14 |   memoryUsedBefore?: number;
15 |   memoryUsedAfter?: number;
16 |   log?: string;
17 |   loss?: number;
18 |   activationValueForBackwardPass?: number;
19 |   tokensAsInts: Array<number>;
20 |   tokensAsStrings: Array<string>;
21 | };
22 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/InferenceRequestSpec.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { AblationSpec } from "./AblationSpec";
 8 | import type { LossFnConfig } from "./LossFnConfig";
 9 | import type { MirroredActivationIndex } from "./MirroredActivationIndex";
10 | import type { MirroredTraceConfig } from "./MirroredTraceConfig";
11 | 
12 | /**
13 |  * The minimum specification for performing a forward and/or backward pass on a model, with hooks at some set of layers.
14 |  */
15 | export type InferenceRequestSpec = {
16 |   prompt: string;
17 |   ablationSpecs?: Array<AblationSpec>;
18 |   lossFnConfig?: LossFnConfig;
19 |   traceConfig?: MirroredTraceConfig;
20 |   activationIndexForWithinLayerGrad?: MirroredActivationIndex;
21 | };
22 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/InferenceResponse.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { InferenceAndTokenData } from "./InferenceAndTokenData";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type InferenceResponse = {
15 |   inferenceAndTokenData: InferenceAndTokenData;
16 | };
17 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/InferenceResponseAndResponseDict.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedAttentionScalarsResponseData } from "./DerivedAttentionScalarsResponseData";
 8 | import type { DerivedScalarsResponseData } from "./DerivedScalarsResponseData";
 9 | import type { InferenceResponse } from "./InferenceResponse";
10 | import type { MultipleTopKDerivedScalarsResponseData } from "./MultipleTopKDerivedScalarsResponseData";
11 | import type { ScoredTokensResponseData } from "./ScoredTokensResponseData";
12 | import type { TokenPairAttributionResponseData } from "./TokenPairAttributionResponseData";
13 | 
14 | /**
15 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
16 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
17 |  * camelCase names.
18 |  */
19 | export type InferenceResponseAndResponseDict = {
20 |   inferenceResponse: InferenceResponse;
21 |   processingResponseDataByName?: Record<
22 |     string,
23 |     | MultipleTopKDerivedScalarsResponseData
24 |     | DerivedScalarsResponseData
25 |     | DerivedAttentionScalarsResponseData
26 |     | ScoredTokensResponseData
27 |     | TokenPairAttributionResponseData
28 |   >;
29 | };
30 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/InferenceSubRequest.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedAttentionScalarsRequestSpec } from "./DerivedAttentionScalarsRequestSpec";
 8 | import type { DerivedScalarsRequestSpec } from "./DerivedScalarsRequestSpec";
 9 | import type { InferenceRequestSpec } from "./InferenceRequestSpec";
10 | import type { MultipleTopKDerivedScalarsRequestSpec } from "./MultipleTopKDerivedScalarsRequestSpec";
11 | import type { ScoredTokensRequestSpec } from "./ScoredTokensRequestSpec";
12 | import type { TokenPairAttributionRequestSpec } from "./TokenPairAttributionRequestSpec";
13 | 
14 | /**
15 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
16 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
17 |  * camelCase names.
18 |  */
19 | export type InferenceSubRequest = {
20 |   inferenceRequestSpec: InferenceRequestSpec;
21 |   processingRequestSpecByName?: Record<
22 |     string,
23 |     | MultipleTopKDerivedScalarsRequestSpec
24 |     | DerivedScalarsRequestSpec
25 |     | DerivedAttentionScalarsRequestSpec
26 |     | ScoredTokensRequestSpec
27 |     | TokenPairAttributionRequestSpec
28 |   >;
29 | };
30 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/LossFnConfig.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { LossFnName } from "./LossFnName";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type LossFnConfig = {
15 |   name: LossFnName;
16 |   targetTokens?: Array<string>;
17 |   distractorTokens?: Array<string>;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/LossFnName.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * An enumeration.
 9 |  */
10 | export enum LossFnName {
11 |   LOGIT_DIFF = "logit_diff",
12 |   LOGIT_MINUS_MEAN = "logit_minus_mean",
13 |   PROBS = "probs",
14 |   ZERO = "zero",
15 | }
16 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/MirroredActivationIndex.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { ActivationLocationType } from "./ActivationLocationType";
 8 | import type { PassType } from "./PassType";
 9 | 
10 | /**
11 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
12 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
13 |  * camelCase names.
14 |  */
15 | export type MirroredActivationIndex = {
16 |   activationLocationType: ActivationLocationType;
17 |   tensorIndices: Array<number | "All">;
18 |   layerIndex?: number;
19 |   passType: PassType;
20 | };
21 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/MirroredNodeIndex.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { NodeType } from "./NodeType";
 8 | import type { PassType } from "./PassType";
 9 | 
10 | /**
11 |  * This class mirrors the fields of NodeIndex without default values.
12 |  */
13 | export type MirroredNodeIndex = {
14 |   nodeType: NodeType;
15 |   tensorIndices: Array<number>;
16 |   layerIndex?: number;
17 |   passType: PassType;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/MirroredTraceConfig.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { AttentionTraceType } from "./AttentionTraceType";
 8 | import type { MirroredNodeIndex } from "./MirroredNodeIndex";
 9 | import type { PreOrPostAct } from "./PreOrPostAct";
10 | 
11 | /**
12 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
13 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
14 |  * camelCase names.
15 |  */
16 | export type MirroredTraceConfig = {
17 |   nodeIndex: MirroredNodeIndex;
18 |   preOrPostAct: PreOrPostAct;
19 |   detachLayerNormScale: boolean;
20 |   attentionTraceType?: AttentionTraceType;
21 |   downstreamTraceConfig?: MirroredTraceConfig;
22 | };
23 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ModelInfoResponse.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type ModelInfoResponse = {
13 |   modelName?: string;
14 |   hasMlpAutoencoder: boolean;
15 |   mlpAutoencoderName?: string;
16 |   hasAttentionAutoencoder: boolean;
17 |   attentionAutoencoderName?: string;
18 |   nLayers: number;
19 | };
20 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/MultipleTopKDerivedScalarsRequest.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { InferenceRequestSpec } from "./InferenceRequestSpec";
 8 | import type { MultipleTopKDerivedScalarsRequestSpec } from "./MultipleTopKDerivedScalarsRequestSpec";
 9 | 
10 | /**
11 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
12 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
13 |  * camelCase names.
14 |  */
15 | export type MultipleTopKDerivedScalarsRequest = {
16 |   inferenceRequestSpec: InferenceRequestSpec;
17 |   multipleTopKDerivedScalarsRequestSpec: MultipleTopKDerivedScalarsRequestSpec;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/MultipleTopKDerivedScalarsRequestSpec.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedScalarType } from "./DerivedScalarType";
 8 | import type { Dimension } from "./Dimension";
 9 | import type { PassType } from "./PassType";
10 | 
11 | /**
12 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
13 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
14 |  * camelCase names.
15 |  */
16 | export type MultipleTopKDerivedScalarsRequestSpec = {
17 |   specType?: MultipleTopKDerivedScalarsRequestSpec.specType;
18 |   dstListByGroupId: Record<string, Array<DerivedScalarType>>;
19 |   tokenIndex?: number;
20 |   topAndBottomK?: number;
21 |   passType?: PassType;
22 |   dimensionsToKeepForIntermediateSum?: Array<Dimension>;
23 | };
24 | 
25 | export namespace MultipleTopKDerivedScalarsRequestSpec {
26 |   export enum specType {
27 |     MULTIPLE_TOP_K_DERIVED_SCALARS_REQUEST_SPEC = "multiple_top_k_derived_scalars_request_spec",
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/MultipleTopKDerivedScalarsResponse.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { InferenceAndTokenData } from "./InferenceAndTokenData";
 8 | import type { MultipleTopKDerivedScalarsResponseData } from "./MultipleTopKDerivedScalarsResponseData";
 9 | 
10 | /**
11 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
12 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
13 |  * camelCase names.
14 |  */
15 | export type MultipleTopKDerivedScalarsResponse = {
16 |   inferenceAndTokenData: InferenceAndTokenData;
17 |   multipleTopKDerivedScalarsResponseData: MultipleTopKDerivedScalarsResponseData;
18 | };
19 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/MultipleTopKDerivedScalarsResponseData.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { MirroredNodeIndex } from "./MirroredNodeIndex";
 8 | import type { ProcessingResponseDataType } from "./ProcessingResponseDataType";
 9 | import type { Tensor0D } from "./Tensor0D";
10 | import type { Tensor1D } from "./Tensor1D";
11 | import type { Tensor2D } from "./Tensor2D";
12 | import type { Tensor3D } from "./Tensor3D";
13 | 
14 | /**
15 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
16 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
17 |  * camelCase names.
18 |  */
19 | export type MultipleTopKDerivedScalarsResponseData = {
20 |   responseDataType?: ProcessingResponseDataType;
21 |   activationsByGroupId: Record<string, Array<number>>;
22 |   nodeIndices: Array<MirroredNodeIndex>;
23 |   vocabTokenStringsForIndices?: Array<string>;
24 |   intermediateSumActivationsByDstByGroupId: Record<
25 |     string,
26 |     Record<string, Tensor0D | Tensor1D | Tensor2D | Tensor3D>
27 |   >;
28 | };
29 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/NeuronDatasetMetadata.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type NeuronDatasetMetadata = {
13 |   shortName: string;
14 |   derivedScalarType: string;
15 |   userVisibleName: string;
16 |   neuronDatasetPath: string;
17 | };
18 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/NeuronRecordResponse.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { TokenAndScalar } from "./TokenAndScalar";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type NeuronRecordResponse = {
15 |   dataset: string;
16 |   maxActivation: number;
17 |   topActivations: Array<Array<TokenAndScalar>>;
18 |   randomSample: Array<Array<TokenAndScalar>>;
19 | };
20 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/NodeAblation.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { MirroredNodeIndex } from "./MirroredNodeIndex";
 8 | 
 9 | /**
10 |  * A specification for tracing an upstream node.
11 |  *
12 |  * This data structure is used by the client. The server converts it to an AblationSpec.
13 |  */
14 | export type NodeAblation = {
15 |   nodeIndex: MirroredNodeIndex;
16 |   value: number;
17 | };
18 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/NodeIdAndDatasets.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedScalarType } from "./DerivedScalarType";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type NodeIdAndDatasets = {
15 |   dst: DerivedScalarType;
16 |   layerIndex: number;
17 |   activationIndex: number;
18 |   datasets: Array<string>;
19 | };
20 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/NodeToTrace.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { AttentionTraceType } from "./AttentionTraceType";
 8 | import type { MirroredNodeIndex } from "./MirroredNodeIndex";
 9 | import type { MirroredTraceConfig } from "./MirroredTraceConfig";
10 | 
11 | /**
12 |  * A specification for tracing a node.
13 |  *
14 |  * This data structure is used by the client. The server converts it to an activation index and
15 |  * an ablation spec.
16 |  *
17 |  * In the case of tracing through attention value, there can be up to two NodeToTrace
18 |  * objects: one upstream and one downstream. First, a gradient is computed with respect to the
19 |  * downstream node. Then, the direct effect of the upstream (attention) node on that downstream
20 |  * node is computed. Then, the gradient is computed with respect to that direct effect, propagated
21 |  * through V
22 |  */
23 | export type NodeToTrace = {
24 |   nodeIndex: MirroredNodeIndex;
25 |   attentionTraceType?: AttentionTraceType;
26 |   downstreamTraceConfig?: MirroredTraceConfig;
27 | };
28 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/NodeType.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * A "node" is defined as a model component associated with a scalar activation per
 9 |  * token or per token pair. The canonical example is an MLP neuron. An activation
10 |  * for which the NodeType is defined has the node as the last dimension of the
11 |  * activation tensor.
12 |  */
13 | export enum NodeType {
14 |   ATTENTION_HEAD = "attention_head",
15 |   QK_CHANNEL = "qk_channel",
16 |   V_CHANNEL = "v_channel",
17 |   MLP_NEURON = "mlp_neuron",
18 |   AUTOENCODER_LATENT = "autoencoder_latent",
19 |   MLP_AUTOENCODER_LATENT = "mlp_autoencoder_latent",
20 |   ATTENTION_AUTOENCODER_LATENT = "attention_autoencoder_latent",
21 |   AUTOENCODER_LATENT_BY_TOKEN_PAIR = "autoencoder_latent_by_token_pair",
22 |   LAYER = "layer",
23 |   RESIDUAL_STREAM_CHANNEL = "residual_stream_channel",
24 |   VOCAB_TOKEN = "vocab_token",
25 | }
26 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/PassType.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * An enumeration.
 9 |  */
10 | export enum PassType {
11 |   FORWARD = "forward",
12 |   BACKWARD = "backward",
13 | }
14 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/PreOrPostAct.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Specifies whether to trace from pre- or post-nonlinearity
 9 |  */
10 | export enum PreOrPostAct {
11 |   PRE = "pre",
12 |   POST = "post",
13 | }
14 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ProcessingResponseDataType.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * An enumeration.
 9 |  */
10 | export enum ProcessingResponseDataType {
11 |   DERIVED_SCALARS_RESPONSE_DATA = "derived_scalars_response_data",
12 |   DERIVED_ATTENTION_SCALARS_RESPONSE_DATA = "derived_attention_scalars_response_data",
13 |   MULTIPLE_TOP_K_DERIVED_SCALARS_RESPONSE_DATA = "multiple_top_k_derived_scalars_response_data",
14 |   SCORED_TOKENS_RESPONSE_DATA = "scored_tokens_response_data",
15 |   TOKEN_PAIR_ATTRIBUTION_RESPONSE_DATA = "token_pair_attribution_response_data",
16 | }
17 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ScoreRequest.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { DerivedScalarType } from "./DerivedScalarType";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type ScoreRequest = {
15 |   dst: DerivedScalarType;
16 |   layerIndex: number;
17 |   activationIndex: number;
18 |   datasets: Array<string>;
19 |   explanation: string;
20 |   maxSequences?: number;
21 | };
22 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ScoreResult.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type ScoreResult = {
13 |   score: number;
14 |   datasetPath: string;
15 | };
16 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ScoredTokensRequestSpec.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { TokenScoringType } from "./TokenScoringType";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type ScoredTokensRequestSpec = {
15 |   specType?: ScoredTokensRequestSpec.specType;
16 |   tokenScoringType: TokenScoringType;
17 |   numTokens: number;
18 |   dependsOnSpecName: string;
19 | };
20 | 
21 | export namespace ScoredTokensRequestSpec {
22 |   export enum specType {
23 |     SCORED_TOKENS_REQUEST_SPEC = "scored_tokens_request_spec",
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ScoredTokensResponseData.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { MirroredNodeIndex } from "./MirroredNodeIndex";
 8 | import type { ProcessingResponseDataType } from "./ProcessingResponseDataType";
 9 | import type { TopTokens } from "./TopTokens";
10 | 
11 | /**
12 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
13 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
14 |  * camelCase names.
15 |  */
16 | export type ScoredTokensResponseData = {
17 |   responseDataType?: ProcessingResponseDataType;
18 |   nodeIndices: Array<MirroredNodeIndex>;
19 |   topTokensList: Array<TopTokens>;
20 | };
21 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/TdbRequestSpec.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { ComponentTypeForAttention } from "./ComponentTypeForAttention";
 8 | import type { ComponentTypeForMlp } from "./ComponentTypeForMlp";
 9 | import type { NodeAblation } from "./NodeAblation";
10 | import type { NodeToTrace } from "./NodeToTrace";
11 | 
12 | /**
13 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
14 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
15 |  * camelCase names.
16 |  */
17 | export type TdbRequestSpec = {
18 |   specType?: TdbRequestSpec.specType;
19 |   prompt: string;
20 |   targetTokens: Array<string>;
21 |   distractorTokens: Array<string>;
22 |   componentTypeForMlp: ComponentTypeForMlp;
23 |   componentTypeForAttention: ComponentTypeForAttention;
24 |   topAndBottomKForNodeTable: number;
25 |   hideEarlyLayersWhenAblating: boolean;
26 |   nodeAblations?: Array<NodeAblation>;
27 |   upstreamNodeToTrace?: NodeToTrace;
28 |   downstreamNodeToTrace?: NodeToTrace;
29 | };
30 | 
31 | export namespace TdbRequestSpec {
32 |   export enum specType {
33 |     TDB_REQUEST_SPEC = "tdb_request_spec",
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/Tensor0D.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { TensorType } from "./TensorType";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type Tensor0D = {
15 |   tensorType?: TensorType;
16 |   value: number;
17 | };
18 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/Tensor1D.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { TensorType } from "./TensorType";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type Tensor1D = {
15 |   tensorType?: TensorType;
16 |   value: Array<number>;
17 | };
18 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/Tensor2D.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { TensorType } from "./TensorType";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type Tensor2D = {
15 |   tensorType?: TensorType;
16 |   value: Array<Array<number>>;
17 | };
18 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/Tensor3D.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { TensorType } from "./TensorType";
 8 | 
 9 | /**
10 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
11 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
12 |  * camelCase names.
13 |  */
14 | export type Tensor3D = {
15 |   tensorType?: TensorType;
16 |   value: Array<Array<Array<number>>>;
17 | };
18 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/TensorType.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * An enumeration.
 9 |  */
10 | export enum TensorType {
11 |   TENSOR_0D = "tensor_0d",
12 |   TENSOR_1D = "tensor_1d",
13 |   TENSOR_2D = "tensor_2d",
14 |   TENSOR_3D = "tensor_3d",
15 | }
16 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/TokenAndAttentionScalars.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type TokenAndAttentionScalars = {
13 |   token: string;
14 |   scalars: Array<number>;
15 |   normalizedScalars: Array<number>;
16 |   totalScalarIn: number;
17 |   normalizedTotalScalarIn: number;
18 |   maxScalarIn: number;
19 |   normalizedMaxScalarIn: number;
20 |   totalScalarOut: number;
21 |   normalizedTotalScalarOut: number;
22 |   maxScalarOut: number;
23 |   normalizedMaxScalarOut: number;
24 | };
25 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/TokenAndScalar.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type TokenAndScalar = {
13 |   token: string;
14 |   scalar: number;
15 |   normalizedScalar: number;
16 | };
17 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/TokenPairAttributionRequestSpec.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type TokenPairAttributionRequestSpec = {
13 |   specType?: TokenPairAttributionRequestSpec.specType;
14 |   numTokensAttendedTo: number;
15 |   dependsOnSpecName: string;
16 | };
17 | 
18 | export namespace TokenPairAttributionRequestSpec {
19 |   export enum specType {
20 |     TOKEN_PAIR_ATTRIBUTION_REQUEST_SPEC = "token_pair_attribution_request_spec",
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/TokenPairAttributionResponseData.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { MirroredNodeIndex } from "./MirroredNodeIndex";
 8 | import type { ProcessingResponseDataType } from "./ProcessingResponseDataType";
 9 | import type { TopTokensAttendedTo } from "./TopTokensAttendedTo";
10 | 
11 | /**
12 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
13 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
14 |  * camelCase names.
15 |  */
16 | export type TokenPairAttributionResponseData = {
17 |   responseDataType?: ProcessingResponseDataType;
18 |   nodeIndices: Array<MirroredNodeIndex>;
19 |   topTokensAttendedToList: Array<TopTokensAttendedTo>;
20 | };
21 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/TokenScoringType.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Methods by which vocab tokens may be scored.
 9 |  */
10 | export enum TokenScoringType {
11 |   UPVOTED_OUTPUT_TOKENS = "upvoted_output_tokens",
12 |   INPUT_TOKENS_THAT_UPVOTE_MLP = "input_tokens_that_upvote_mlp",
13 |   INPUT_TOKENS_THAT_UPVOTE_ATTN_Q = "input_tokens_that_upvote_attn_q",
14 |   INPUT_TOKENS_THAT_UPVOTE_ATTN_K = "input_tokens_that_upvote_attn_k",
15 | }
16 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/TopTokens.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | import type { TokenAndScalar } from "./TokenAndScalar";
 8 | 
 9 | /**
10 |  * Contains two lists of tokens and associated scalars: one for the highest-scoring tokens and one
11 |  * for the lowest-scoring tokens, according to some way of scoring tokens. For example, this could
12 |  * be used to represent the top upvoted and downvoted "logit lens" tokens. An instance of this
13 |  * class is scoped to a single node. The set of tokens eligible for scoring is typically just the
14 |  * model's entire vocabulary. Each list is sorted from largest to smallest absolute value for the
15 |  * associated scalar.
16 |  */
17 | export type TopTokens = {
18 |   top: Array<TokenAndScalar>;
19 |   bottom: Array<TokenAndScalar>;
20 | };
21 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/TopTokensAttendedTo.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | /**
 8 |  * Base model that will automatically generate camelCase aliases for fields. Python code can use
 9 |  * either snake_case or camelCase names. When Typescript code is generated, it will only use the
10 |  * camelCase names.
11 |  */
12 | export type TopTokensAttendedTo = {
13 |   tokenIndices: Array<number>;
14 |   attributions: Array<number>;
15 | };
16 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/models/ValidationError.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | 
 7 | export type ValidationError = {
 8 |   loc: Array<string | number>;
 9 |   msg: string;
10 |   type: string;
11 | };
12 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/services/ExplainerService.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | import type { ExplanationResult } from "../models/ExplanationResult";
 7 | import type { NodeIdAndDatasets } from "../models/NodeIdAndDatasets";
 8 | import type { ScoreRequest } from "../models/ScoreRequest";
 9 | import type { ScoreResult } from "../models/ScoreResult";
10 | 
11 | import type { CancelablePromise } from "../core/CancelablePromise";
12 | import { OpenAPI } from "../core/OpenAPI";
13 | import { request as __request } from "../core/request";
14 | 
15 | export class ExplainerService {
16 |   /**
17 |    * Explain
18 |    * @param requestBody
19 |    * @returns ExplanationResult Successful Response
20 |    * @throws ApiError
21 |    */
22 |   public static explainerExplain(
23 |     requestBody: NodeIdAndDatasets
24 |   ): CancelablePromise<ExplanationResult> {
25 |     return __request(OpenAPI, {
26 |       method: "POST",
27 |       url: "/explain",
28 |       body: requestBody,
29 |       mediaType: "application/json",
30 |       errors: {
31 |         422: `Validation Error`,
32 |       },
33 |     });
34 |   }
35 | 
36 |   /**
37 |    * Score
38 |    * @param requestBody
39 |    * @returns ScoreResult Successful Response
40 |    * @throws ApiError
41 |    */
42 |   public static explainerScore(requestBody: ScoreRequest): CancelablePromise<ScoreResult> {
43 |     return __request(OpenAPI, {
44 |       method: "POST",
45 |       url: "/score",
46 |       body: requestBody,
47 |       mediaType: "application/json",
48 |       errors: {
49 |         422: `Validation Error`,
50 |       },
51 |     });
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/services/HelloWorldService.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | import type { GroupId } from "../models/GroupId";
 7 | 
 8 | import type { CancelablePromise } from "../core/CancelablePromise";
 9 | import { OpenAPI } from "../core/OpenAPI";
10 | import { request as __request } from "../core/request";
11 | 
12 | export class HelloWorldService {
13 |   /**
14 |    * Read Root
15 |    * @returns string Successful Response
16 |    * @throws ApiError
17 |    */
18 |   public static helloWorldReadRoot(): CancelablePromise<Record<string, string>> {
19 |     return __request(OpenAPI, {
20 |       method: "GET",
21 |       url: "/",
22 |     });
23 |   }
24 | 
25 |   /**
26 |    * Force Client Code Generation
27 |    * @param groupId
28 |    * @returns any Successful Response
29 |    * @throws ApiError
30 |    */
31 |   public static helloWorldForceClientCodeGeneration(groupId: GroupId): CancelablePromise<any> {
32 |     return __request(OpenAPI, {
33 |       method: "GET",
34 |       url: "/force_client_code_generation",
35 |       query: {
36 |         group_id: groupId,
37 |       },
38 |       errors: {
39 |         422: `Validation Error`,
40 |       },
41 |     });
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/services/InferenceService.ts:
--------------------------------------------------------------------------------
  1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
  2 | 
  3 | /* istanbul ignore file */
  4 | /* tslint:disable */
  5 | /* eslint-disable */
  6 | import type { BatchedRequest } from "../models/BatchedRequest";
  7 | import type { BatchedResponse } from "../models/BatchedResponse";
  8 | import type { BatchedTdbRequest } from "../models/BatchedTdbRequest";
  9 | import type { DerivedAttentionScalarsRequest } from "../models/DerivedAttentionScalarsRequest";
 10 | import type { DerivedAttentionScalarsResponse } from "../models/DerivedAttentionScalarsResponse";
 11 | import type { DerivedScalarsRequest } from "../models/DerivedScalarsRequest";
 12 | import type { DerivedScalarsResponse } from "../models/DerivedScalarsResponse";
 13 | import type { ModelInfoResponse } from "../models/ModelInfoResponse";
 14 | import type { MultipleTopKDerivedScalarsRequest } from "../models/MultipleTopKDerivedScalarsRequest";
 15 | import type { MultipleTopKDerivedScalarsResponse } from "../models/MultipleTopKDerivedScalarsResponse";
 16 | 
 17 | import type { CancelablePromise } from "../core/CancelablePromise";
 18 | import { OpenAPI } from "../core/OpenAPI";
 19 | import { request as __request } from "../core/request";
 20 | 
 21 | export class InferenceService {
 22 |   /**
 23 |    * Derived Scalars
 24 |    * @param requestBody
 25 |    * @returns DerivedScalarsResponse Successful Response
 26 |    * @throws ApiError
 27 |    */
 28 |   public static inferenceDerivedScalars(
 29 |     requestBody: DerivedScalarsRequest
 30 |   ): CancelablePromise<DerivedScalarsResponse> {
 31 |     return __request(OpenAPI, {
 32 |       method: "POST",
 33 |       url: "/derived_scalars",
 34 |       body: requestBody,
 35 |       mediaType: "application/json",
 36 |       errors: {
 37 |         422: `Validation Error`,
 38 |       },
 39 |     });
 40 |   }
 41 | 
 42 |   /**
 43 |    * Derived Attention Scalars
 44 |    * @param requestBody
 45 |    * @returns DerivedAttentionScalarsResponse Successful Response
 46 |    * @throws ApiError
 47 |    */
 48 |   public static inferenceDerivedAttentionScalars(
 49 |     requestBody: DerivedAttentionScalarsRequest
 50 |   ): CancelablePromise<DerivedAttentionScalarsResponse> {
 51 |     return __request(OpenAPI, {
 52 |       method: "POST",
 53 |       url: "/derived_attention_scalars",
 54 |       body: requestBody,
 55 |       mediaType: "application/json",
 56 |       errors: {
 57 |         422: `Validation Error`,
 58 |       },
 59 |     });
 60 |   }
 61 | 
 62 |   /**
 63 |    * Multiple Top K Derived Scalars
 64 |    * @param requestBody
 65 |    * @returns MultipleTopKDerivedScalarsResponse Successful Response
 66 |    * @throws ApiError
 67 |    */
 68 |   public static inferenceMultipleTopKDerivedScalars(
 69 |     requestBody: MultipleTopKDerivedScalarsRequest
 70 |   ): CancelablePromise<MultipleTopKDerivedScalarsResponse> {
 71 |     return __request(OpenAPI, {
 72 |       method: "POST",
 73 |       url: "/multiple_top_k_derived_scalars",
 74 |       body: requestBody,
 75 |       mediaType: "application/json",
 76 |       errors: {
 77 |         422: `Validation Error`,
 78 |       },
 79 |     });
 80 |   }
 81 | 
 82 |   /**
 83 |    * Batched
 84 |    * @param requestBody
 85 |    * @returns BatchedResponse Successful Response
 86 |    * @throws ApiError
 87 |    */
 88 |   public static inferenceBatched(requestBody: BatchedRequest): CancelablePromise<BatchedResponse> {
 89 |     return __request(OpenAPI, {
 90 |       method: "POST",
 91 |       url: "/batched",
 92 |       body: requestBody,
 93 |       mediaType: "application/json",
 94 |       errors: {
 95 |         422: `Validation Error`,
 96 |       },
 97 |     });
 98 |   }
 99 | 
100 |   /**
101 |    * Batched Tdb
102 |    * @param requestBody
103 |    * @returns BatchedResponse Successful Response
104 |    * @throws ApiError
105 |    */
106 |   public static inferenceBatchedTdb(
107 |     requestBody: BatchedTdbRequest
108 |   ): CancelablePromise<BatchedResponse> {
109 |     return __request(OpenAPI, {
110 |       method: "POST",
111 |       url: "/batched_tdb",
112 |       body: requestBody,
113 |       mediaType: "application/json",
114 |       errors: {
115 |         422: `Validation Error`,
116 |       },
117 |     });
118 |   }
119 | 
120 |   /**
121 |    * Model Info
122 |    * @returns ModelInfoResponse Successful Response
123 |    * @throws ApiError
124 |    */
125 |   public static inferenceModelInfo(): CancelablePromise<ModelInfoResponse> {
126 |     return __request(OpenAPI, {
127 |       method: "POST",
128 |       url: "/model_info",
129 |     });
130 |   }
131 | }
132 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/services/MemoryService.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | import type { CancelablePromise } from "../core/CancelablePromise";
 7 | import { OpenAPI } from "../core/OpenAPI";
 8 | import { request as __request } from "../core/request";
 9 | 
10 | export class MemoryService {
11 |   /**
12 |    * Dump Memory Snapshot
13 |    * @returns string Successful Response
14 |    * @throws ApiError
15 |    */
16 |   public static memoryDumpMemorySnapshot(): CancelablePromise<string> {
17 |     return __request(OpenAPI, {
18 |       method: "GET",
19 |       url: "/dump_memory_snapshot",
20 |     });
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/client/services/ReadService.ts:
--------------------------------------------------------------------------------
 1 | // Auto-generated code. Do not edit! See neuron_explainer/activation_server/README.md to learn how to regenerate it.
 2 | 
 3 | /* istanbul ignore file */
 4 | /* tslint:disable */
 5 | /* eslint-disable */
 6 | import type { AttentionHeadRecordResponse } from "../models/AttentionHeadRecordResponse";
 7 | import type { AttributedScoredExplanation } from "../models/AttributedScoredExplanation";
 8 | import type { ExistingExplanationsRequest } from "../models/ExistingExplanationsRequest";
 9 | import type { NeuronDatasetMetadata } from "../models/NeuronDatasetMetadata";
10 | import type { NeuronRecordResponse } from "../models/NeuronRecordResponse";
11 | import type { NodeIdAndDatasets } from "../models/NodeIdAndDatasets";
12 | 
13 | import type { CancelablePromise } from "../core/CancelablePromise";
14 | import { OpenAPI } from "../core/OpenAPI";
15 | import { request as __request } from "../core/request";
16 | 
17 | export class ReadService {
18 |   /**
19 |    * Existing Explanations
20 |    * @param requestBody
21 |    * @returns AttributedScoredExplanation Successful Response
22 |    * @throws ApiError
23 |    */
24 |   public static readExistingExplanations(
25 |     requestBody: ExistingExplanationsRequest
26 |   ): CancelablePromise<Array<AttributedScoredExplanation>> {
27 |     return __request(OpenAPI, {
28 |       method: "POST",
29 |       url: "/existing_explanations",
30 |       body: requestBody,
31 |       mediaType: "application/json",
32 |       errors: {
33 |         422: `Validation Error`,
34 |       },
35 |     });
36 |   }
37 | 
38 |   /**
39 |    * Neuron Record
40 |    * @param requestBody
41 |    * @returns NeuronRecordResponse Successful Response
42 |    * @throws ApiError
43 |    */
44 |   public static readNeuronRecord(
45 |     requestBody: NodeIdAndDatasets
46 |   ): CancelablePromise<NeuronRecordResponse> {
47 |     return __request(OpenAPI, {
48 |       method: "POST",
49 |       url: "/neuron_record",
50 |       body: requestBody,
51 |       mediaType: "application/json",
52 |       errors: {
53 |         422: `Validation Error`,
54 |       },
55 |     });
56 |   }
57 | 
58 |   /**
59 |    * Attention Head Record
60 |    * @param requestBody
61 |    * @returns AttentionHeadRecordResponse Successful Response
62 |    * @throws ApiError
63 |    */
64 |   public static readAttentionHeadRecord(
65 |     requestBody: NodeIdAndDatasets
66 |   ): CancelablePromise<AttentionHeadRecordResponse> {
67 |     return __request(OpenAPI, {
68 |       method: "POST",
69 |       url: "/attention_head_record",
70 |       body: requestBody,
71 |       mediaType: "application/json",
72 |       errors: {
73 |         422: `Validation Error`,
74 |       },
75 |     });
76 |   }
77 | 
78 |   /**
79 |    * Neuron Datasets Metadata
80 |    * @returns NeuronDatasetMetadata Successful Response
81 |    * @throws ApiError
82 |    */
83 |   public static readNeuronDatasetsMetadata(): CancelablePromise<Array<NeuronDatasetMetadata>> {
84 |     return __request(OpenAPI, {
85 |       method: "POST",
86 |       url: "/neuron_datasets_metadata",
87 |     });
88 |   }
89 | }
90 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/colors.ts:
--------------------------------------------------------------------------------
 1 | export type Color = { r: number; g: number; b: number };
 2 | 
 3 | export function interpolateColor(colorLeft: Color, colorRight: Color, value: number): Color {
 4 |   const color = {
 5 |     r: Math.round(colorLeft.r + (colorRight.r - colorLeft.r) * value),
 6 |     g: Math.round(colorLeft.g + (colorRight.g - colorLeft.g) * value),
 7 |     b: Math.round(colorLeft.b + (colorRight.b - colorLeft.b) * value),
 8 |   };
 9 |   return color;
10 | }
11 | 
12 | export function getInterpolatedColor(colors: Color[], boundaries: number[], value: number): Color {
13 |   const index = boundaries.findIndex((boundary) => boundary >= value);
14 |   const colorIndex = Math.max(0, index - 1);
15 |   const colorLeft = colors[colorIndex];
16 |   const colorRight = colors[colorIndex + 1];
17 |   const boundaryLeft = boundaries[colorIndex];
18 |   const boundaryRight = boundaries[colorIndex + 1];
19 |   const ratio = (value - boundaryLeft) / (boundaryRight - boundaryLeft);
20 |   const color = interpolateColor(colorLeft, colorRight, ratio);
21 |   return color;
22 | }
23 | 
24 | export const BLANK_COLOR: Color = { r: 255, g: 255, b: 255 }; // white
25 | export const MAX_OUT_COLOR: Color = { r: 0, g: 255, b: 255 }; // cyan
26 | export const MAX_IN_COLOR: Color = { r: 255, g: 0, b: 255 }; // magenta
27 | 
28 | export function subtractiveMix(color1: Color, color2: Color) {
29 |   // Invert the colors
30 |   let inverted1 = { r: 255 - color1.r, g: 255 - color1.g, b: 255 - color1.b };
31 |   let inverted2 = { r: 255 - color2.r, g: 255 - color2.g, b: 255 - color2.b };
32 | 
33 |   // Mix them additively
34 |   let mixed = {
35 |     r: Math.min(inverted1.r + inverted2.r, 255),
36 |     g: Math.min(inverted1.g + inverted2.g, 255),
37 |     b: Math.min(inverted1.b + inverted2.b, 255),
38 |   };
39 | 
40 |   // Invert the result
41 |   return { r: 255 - mixed.r, g: 255 - mixed.g, b: 255 - mixed.b };
42 | }
43 | 
44 | export const DEFAULT_BOUNDARIES = [0, 1];
45 | 
46 | export const DEFAULT_COLORS: Color[] = [
47 |   { r: 255, g: 255, b: 255 },
48 |   { r: 0, g: 255, b: 0 },
49 | ];
50 | 
51 | export const POSITIVE_NEGATIVE_COLORS: Color[] = [
52 |   { r: 255, g: 0, b: 105 },
53 |   { r: 255, g: 255, b: 255 },
54 |   { r: 0, g: 255, b: 0 },
55 | ];
56 | 
57 | export const POSITIVE_NEGATIVE_BOUNDARIES = [0, 0.5, 1];
58 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/commonUiComponents.tsx:
--------------------------------------------------------------------------------
 1 | import { ReactNode } from "react";
 2 | 
 3 | export const SectionTitle = ({ children }: { children: ReactNode }) => {
 4 |   return <h2 className="text-3xl font-bold mb-4">{children}</h2>;
 5 | };
 6 | 
 7 | export const defaultSmallButtonClasses =
 8 |   "text-black no-underline text-base border-black border font-sans bg-white font-small inline-block rounded " +
 9 |   "transition-all duration-200 ease-in-out hover:bg-gray-100 disabled:bg-gray-300 disabled:cursor-not-allowed px-1 py-0";
10 | 
11 | export const ShowAllOrFewerButton = ({
12 |   showAll,
13 |   setShowAll,
14 | }: {
15 |   showAll: boolean;
16 |   setShowAll: (showAll: boolean) => void;
17 | }) => {
18 |   return (
19 |     <button className={defaultSmallButtonClasses} onClick={() => setShowAll(!showAll)}>
20 |       {showAll ? "Show fewer" : "Show all"}
21 |     </button>
22 |   );
23 | };
24 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/heatmapGrid.tsx:
--------------------------------------------------------------------------------
 1 | import { TokenSequenceAndScalars } from "./types";
 2 | import TokenHeatmap from "./tokenHeatmap";
 3 | 
 4 | export type HeatmapGridProps = {
 5 |   tokenSequences: TokenSequenceAndScalars[] | null;
 6 |   expectedNumSequences: number;
 7 | };
 8 | 
 9 | const HeatmapGrid: React.FC<HeatmapGridProps> = ({ tokenSequences, expectedNumSequences }) => {
10 |   if (tokenSequences === null) {
11 |     // No tokens specified means that we're rendering a skeleton without any content in it. The
12 |     // width and minHeight specified below ensure that the skeleton is the same size as the actual
13 |     // heatmap grid. We specify an array of nulls here, which the TokenHeatmap component will
14 |     // handle gracefully.
15 |     tokenSequences = new Array(expectedNumSequences).fill(null);
16 |   }
17 |   return (
18 |     <div className="w-screen relative mt-6" style={{ marginLeft: "-50vw", left: "50%" }}>
19 |       <div className="flex flow-row px-10 flex-wrap justify-center align-self-center">
20 |         {tokenSequences.map((tokenSequence, i) => (
21 |           <div
22 |             className="my-3 border p-3 m-2 rounded-md"
23 |             style={{ width: 400, minHeight: 194 }}
24 |             key={i}
25 |           >
26 |             <TokenHeatmap tokenSequence={tokenSequence} />
27 |           </div>
28 |         ))}
29 |       </div>
30 |     </div>
31 |   );
32 | };
33 | 
34 | export default HeatmapGrid;
35 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/heatmapGrid2d.tsx:
--------------------------------------------------------------------------------
 1 | import TokenHeatmap2d from "./tokenHeatmap2d";
 2 | import { TokenSequenceAndAttentionScalars } from "./types";
 3 | 
 4 | export type HeatmapGrid2dProps = {
 5 |   tokenSequenceAndAttentionScalars: TokenSequenceAndAttentionScalars[] | null;
 6 |   expectedNumSequences: number;
 7 | };
 8 | 
 9 | const HeatmapGrid2d: React.FC<HeatmapGrid2dProps> = ({
10 |   tokenSequenceAndAttentionScalars,
11 |   expectedNumSequences,
12 | }) => {
13 |   console.log("in HeatmapGrid2d");
14 |   if (tokenSequenceAndAttentionScalars === null) {
15 |     // No tokens specified means that we're rendering a skeleton without any content in it. The
16 |     // width and minHeight specified below ensure that the skeleton is the same size as the actual
17 |     // heatmap grid. We specify an array of nulls here, which the TokenHeatmap component will
18 |     // handle gracefully.
19 |     tokenSequenceAndAttentionScalars = new Array(expectedNumSequences).fill(null);
20 |   }
21 |   return (
22 |     <div className="w-screen relative mt-6" style={{ marginLeft: "-50vw", left: "50%" }}>
23 |       <div className="flex flow-row px-10 flex-wrap justify-center align-self-center">
24 |         {tokenSequenceAndAttentionScalars.map((tokenSequenceAndAttentionScalars, i) => (
25 |           <div
26 |             className="my-3 border p-3 m-2 rounded-md"
27 |             style={{ width: 400, minHeight: 194 }}
28 |             key={i}
29 |           >
30 |             <TokenHeatmap2d tokenSequenceAndAttentionScalars={tokenSequenceAndAttentionScalars} />
31 |           </div>
32 |         ))}
33 |       </div>
34 |     </div>
35 |   );
36 | };
37 | 
38 | export default HeatmapGrid2d;
39 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/images.d.ts:
--------------------------------------------------------------------------------
1 | declare module "*.png" {
2 |   const value: any;
3 |   export = value;
4 | }
5 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/index.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   margin: 0;
 3 |   font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", "Ubuntu",
 4 |     "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif;
 5 |   -webkit-font-smoothing: antialiased;
 6 |   -moz-osx-font-smoothing: grayscale;
 7 | }
 8 | 
 9 | code {
10 |   font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New", monospace;
11 | }
12 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 6 |     <meta name="theme-color" content="#000000" />
 7 |     <meta name="description" content="Neuron viewer" />
 8 |     <title>Neuron Viewer</title>
 9 |   </head>
10 |   <body>
11 |     <noscript>You need to enable JavaScript to run this app.</noscript>
12 |     <div id="root"></div>
13 |     <!--
14 |       This HTML file is a template.
15 |       If you open it directly in the browser, you will see an empty page.
16 | 
17 |       You can add webfonts, meta tags, or analytics to this file.
18 |       The build step will place the bundled scripts into the <body> tag.
19 | 
20 |       To begin the development, run `npm start` or `yarn start`.
21 |       To create a production bundle, use `npm run build` or `yarn build`.
22 |     -->
23 |     <script src="/src/index.tsx" async type="module"></script>
24 |     <link href="App.css" rel="stylesheet" />
25 |   </body>
26 | </html>
27 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | import ReactDOM from "react-dom/client";
 3 | import "./index.css";
 4 | import App from "./App";
 5 | import { BrowserRouter } from "react-router-dom";
 6 | const root = ReactDOM.createRoot(document.getElementById("root")!);
 7 | 
 8 | root.render(
 9 |   <BrowserRouter>
10 |     <React.StrictMode>
11 |       <App />
12 |     </React.StrictMode>
13 |   </BrowserRouter>
14 | );
15 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/modelInteractions.tsx:
--------------------------------------------------------------------------------
 1 | // React component that handles interactions with a subject or explainer model. Two interactions are
 2 | // currently supported:
 3 | // 1) Getting activations for a particular prompt from the subject model.
 4 | // 2) Scoring explanations using an explainer model.
 5 | 
 6 | import React, { ChangeEvent, KeyboardEvent } from "react";
 7 | import { SectionTitle, defaultSmallButtonClasses } from "./commonUiComponents";
 8 | 
 9 | type ModelInteractionsProps = {
10 |   onGetActivationsForPrompt: (value: string) => void;
11 |   // Scoring explanations is currently only possible for neurons and autoencoder latents. We don't
12 |   // show this option for attention heads.
13 |   onScoreExplanation?: (value: string) => void;
14 | };
15 | 
16 | const GET_ACTIVATIONS_FOR_PROMPT = "Get activations for prompt";
17 | const SCORE_EXPLANATION = "Score explanation";
18 | 
19 | const ModelInteractions: React.FC<ModelInteractionsProps> = ({
20 |   onGetActivationsForPrompt,
21 |   onScoreExplanation,
22 | }) => {
23 |   const [textboxValue, setTextboxValue] = React.useState<string>("");
24 |   const toolkit = [GET_ACTIVATIONS_FOR_PROMPT];
25 |   if (onScoreExplanation) {
26 |     toolkit.push(SCORE_EXPLANATION);
27 |   }
28 | 
29 |   const [activeTool, setActiveTool] = React.useState<string | null>(
30 |     toolkit.length === 0 ? null : toolkit[0]
31 |   );
32 |   if (toolkit.length === 0) {
33 |     return null;
34 |   }
35 | 
36 |   return (
37 |     <>
38 |       <SectionTitle>Interact with the model</SectionTitle>
39 |       <div className="mb-10 flex-row">
40 |         <div className="flex flex-flow">
41 |           {toolkit.map((tool, i) => (
42 |             <div style={{ width: 240 }} key={i}>
43 |               {toolkit.length > 1 ? (
44 |                 <button className={defaultSmallButtonClasses} onClick={() => setActiveTool(tool)}>
45 |                   {tool}
46 |                 </button>
47 |               ) : (
48 |                 <p>{tool}</p> // If there's only one tool, don't make it a button.
49 |               )}
50 |             </div>
51 |           ))}
52 |         </div>
53 |         <div>
54 |           <textarea
55 |             rows={5}
56 |             name="comment"
57 |             id="comment"
58 |             className="block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500 sm:text-sm"
59 |             placeholder="⌘+Enter to run"
60 |             defaultValue={""}
61 |             onChange={(e: ChangeEvent<HTMLTextAreaElement>) => setTextboxValue(e.target.value)}
62 |             onKeyDown={(e: KeyboardEvent) => {
63 |               if (e.key === "Enter" && e.metaKey) {
64 |                 if (activeTool === GET_ACTIVATIONS_FOR_PROMPT) {
65 |                   onGetActivationsForPrompt(textboxValue);
66 |                 } else if (activeTool === SCORE_EXPLANATION && onScoreExplanation) {
67 |                   onScoreExplanation(textboxValue);
68 |                 }
69 |               }
70 |             }}
71 |           />
72 |         </div>
73 |       </div>
74 |     </>
75 |   );
76 | };
77 | 
78 | export default ModelInteractions;
79 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/navigation.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useEffect, useCallback } from "react";
 2 | import { Link, useNavigate, useLocation } from "react-router-dom";
 3 | import { Node } from "./types";
 4 | 
 5 | type NavigationProps = {
 6 |   activeNode: Node;
 7 | };
 8 | 
 9 | const Navigation: React.FC<NavigationProps> = ({ activeNode }) => {
10 |   const location = useLocation();
11 | 
12 |   const getPath = useCallback(
13 |     (nodeIndexOffset: number, layerIndexOffset: number) => {
14 |       const currentPath = location.pathname;
15 |       const currentPathWithoutNode = currentPath.substring(0, currentPath.lastIndexOf("/"));
16 |       const currentPathWithoutLayer = currentPathWithoutNode.substring(
17 |         0,
18 |         currentPathWithoutNode.lastIndexOf("/")
19 |       );
20 |       const newLayerIndex = activeNode.layerIndex + layerIndexOffset;
21 |       const newNodeIndex = activeNode.nodeIndex + nodeIndexOffset;
22 |       // queryString is the part of the URL after the ?, which typically contains the prompt of interest
23 |       const queryString = location.search;
24 |       return `${currentPathWithoutLayer}/${newLayerIndex}/${newNodeIndex}${queryString}`;
25 |     },
26 |     [activeNode, location]
27 |   );
28 | 
29 |   const navigate = useNavigate();
30 |   const nextNodePath = getPath(1, 0);
31 |   const previousNodePath = getPath(-1, 0);
32 |   const nextLayerPath = getPath(0, 1);
33 |   const previousLayerPath = getPath(0, -1);
34 | 
35 |   useEffect(() => {
36 |     const handleKeyPress = (event: KeyboardEvent) => {
37 |       if (event.target instanceof HTMLInputElement || event.target instanceof HTMLTextAreaElement) {
38 |         return;
39 |       }
40 | 
41 |       switch (event.key) {
42 |         case "ArrowLeft":
43 |           event.preventDefault();
44 |           navigate(previousNodePath);
45 |           break;
46 |         case "ArrowRight":
47 |           event.preventDefault();
48 |           navigate(nextNodePath);
49 |           break;
50 |         case "ArrowUp":
51 |           event.preventDefault();
52 |           navigate(nextLayerPath);
53 |           break;
54 |         case "ArrowDown":
55 |           event.preventDefault();
56 |           navigate(previousLayerPath);
57 |           break;
58 |       }
59 |     };
60 |     window.addEventListener("keydown", handleKeyPress);
61 |     return () => {
62 |       window.removeEventListener("keydown", handleKeyPress);
63 |     };
64 |   }, [previousNodePath, nextNodePath, nextLayerPath, previousLayerPath, navigate]);
65 | 
66 |   return (
67 |     <div style={{ position: "absolute", top: 0, left: 15 }}>
68 |       <Link className="inline-block mr-4 pt-4 text-blue-500 underline" to="/">
69 |         Home
70 |       </Link>
71 |       <Link className="inline-block mr-4 pt-4 text-blue-500 underline" to={previousNodePath}>
72 |         Previous Node (&#8592;)
73 |       </Link>
74 |       <Link className="inline-block mr-4 pt-4 text-blue-500 underline" to={nextNodePath}>
75 |         Next Node (&#8594;)
76 |       </Link>
77 |       <Link className="inline-block mr-4 pt-4 text-blue-500 underline" to={previousLayerPath}>
78 |         Previous Layer (&#8595;)
79 |       </Link>
80 |       <Link className="inline-block mr-4 pt-4 text-blue-500 underline" to={nextLayerPath}>
81 |         Next Layer (&#8593;)
82 |       </Link>
83 |       <h3 className="flex flex-row inline-block mr-4 pt-4">
84 |         {activeNode.layerIndex}:{activeNode.nodeIndex}
85 |       </h3>
86 |     </div>
87 |   );
88 | };
89 | 
90 | export default Navigation;
91 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/panes/explanation.tsx:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from "react";
 2 | import { PaneProps } from ".";
 3 | import { SectionTitle } from "../commonUiComponents";
 4 | import { AttributedScoredExplanation } from "../client";
 5 | import { readExistingExplanations } from "../requests/readRequests";
 6 | 
 7 | const ExplanationDisplay: React.FC<PaneProps> = ({ activeNode }) => {
 8 |   const [isLoading, setIsLoading] = useState<boolean>(true);
 9 |   const [scoredExplanations, setScoredExplanations] = useState<
10 |     AttributedScoredExplanation[] | null
11 |   >(null);
12 | 
13 |   useEffect(() => {
14 |     const loadExplanations = async () => {
15 |       console.log("getting explanations");
16 |       const explanationDatasets = new URLSearchParams(window.location.search)
17 |         .get("explanation_datasets")
18 |         ?.split(",");
19 |       setScoredExplanations(await readExistingExplanations(activeNode, explanationDatasets));
20 |       setIsLoading(false);
21 |     };
22 |     loadExplanations();
23 |   }, [activeNode]);
24 | 
25 |   console.log("scored explanations are", scoredExplanations);
26 | 
27 |   return (
28 |     <div className="min-w-0 flex-1">
29 |       <SectionTitle>Model-generated explanations</SectionTitle>
30 |       {isLoading ? (
31 |         <OneExplanation explanation={null} score={null} />
32 |       ) : scoredExplanations!.length === 1 ? (
33 |         <OneExplanation
34 |           explanation={scoredExplanations![0].explanation}
35 |           score={scoredExplanations![0].score}
36 |         />
37 |       ) : (
38 |         <div className="flex justify-center w-full">
39 |           <table className="table-auto border-collapse min-w-max">
40 |             <thead>
41 |               <tr>
42 |                 <th className="border px-4 py-2">Dataset Name</th>
43 |                 <th className="border px-4 py-2">Explanation Text</th>
44 |                 <th className="border px-4 py-2">Score</th>
45 |               </tr>
46 |             </thead>
47 |             <tbody>
48 |               {scoredExplanations!.map((item, index) => (
49 |                 <tr key={index}>
50 |                   {/*
51 |                    * Allow enough horizontal space to fit typical dataset names and explanations.
52 |                    * Break long dataset names so they don't overflow horizontally.
53 |                    */}
54 |                   <td className="border px-4 py-2 max-w-md break-all">{item.datasetName}</td>
55 |                   <td className="border px-4 py-2 max-w-lg">{item.explanation}</td>
56 |                   <td className="border px-4 py-2">{item.score && item.score.toFixed(2)}</td>
57 |                 </tr>
58 |               ))}
59 |             </tbody>
60 |           </table>
61 |         </div>
62 |       )}
63 |       <div className="h-8"></div>
64 |     </div>
65 |   );
66 | };
67 | 
68 | interface OneExplanationProps {
69 |   explanation: string | null;
70 |   score?: number | null;
71 | }
72 | 
73 | const OneExplanation: React.FC<OneExplanationProps> = ({ explanation, score }) => (
74 |   <blockquote className="p-1 px-4 mx-1 my-0">
75 |     <p className="py-1">
76 |       <em>{explanation || "loading..."}</em>
77 |     </p>
78 |     <p className="py-1">score: {score ? score.toFixed(2) : "undefined"}</p>
79 |   </blockquote>
80 | );
81 | 
82 | export default ExplanationDisplay;
83 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/panes/fetchAndDisplayPane.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useEffect, useState } from "react";
 2 | 
 3 | // fetchDataFunc and displayDataFunc should be created with useCallback, otherwise
 4 | // they could change every time the parent renders and trigger a rerun of the underlying request
 5 | export interface FetchAndDisplayProps<PanePropsT, DataT> {
 6 |   paneProps: PanePropsT;
 7 |   fetchDataFunc: () => Promise<DataT>;
 8 |   displayDataFunc: (
 9 |     data: DataT,
10 |     isLoading: boolean,
11 |     showAll: boolean,
12 |     setShowAll: (showAll: boolean) => void
13 |   ) => JSX.Element;
14 |   initialData?: DataT;
15 | }
16 | 
17 | export const FetchAndDisplayPane = <PanePropsT, DataT>({
18 |   paneProps,
19 |   fetchDataFunc,
20 |   displayDataFunc,
21 |   initialData,
22 | }: FetchAndDisplayProps<PanePropsT, DataT>): React.ReactElement => {
23 |   const [data, setData] = useState<DataT | null>(initialData ?? null);
24 |   const [showAll, setShowAll] = useState(false);
25 |   const [isLoading, setIsLoading] = useState(true);
26 |   const [errorMessage, setErrorMessage] = useState<string | null>(null);
27 | 
28 |   useEffect(() => {
29 |     async function fetchData() {
30 |       try {
31 |         const result = await fetchDataFunc();
32 |         setErrorMessage(null);
33 |         setData(result);
34 |         setIsLoading(false);
35 |       } catch (error) {
36 |         if (error instanceof Error) {
37 |           setErrorMessage(error.message);
38 |         } else {
39 |           setErrorMessage("Unknown error");
40 |         }
41 |       }
42 |     }
43 | 
44 |     fetchData();
45 |   }, [paneProps, fetchDataFunc]);
46 | 
47 |   if (errorMessage != null) {
48 |     return (
49 |       <div className="flex justify-center items-center">
50 |         <p className="text-gray-500 mb-2">Failed to load data: {errorMessage}</p>
51 |       </div>
52 |     );
53 |   }
54 | 
55 |   return displayDataFunc(data!, isLoading, showAll, setShowAll);
56 | };
57 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/panes/index.ts:
--------------------------------------------------------------------------------
 1 | import { Node } from "../types";
 2 | import ActivationsForPrompt from "./activationsForPrompt";
 3 | import DatasetExamples from "./datasetExamples";
 4 | import Explanation from "./explanation";
 5 | import LogitLens from "./logitLens";
 6 | import ScoreExplanation from "./scoreExplanation";
 7 | 
 8 | export const PaneComponents = {
 9 |   ActivationsForPrompt,
10 |   DatasetExamples,
11 |   Explanation,
12 |   LogitLens,
13 |   ScoreExplanation,
14 | } as const;
15 | 
16 | export type PaneComponentType = keyof typeof PaneComponents;
17 | 
18 | export interface PaneProps {
19 |   activeNode: Node;
20 | }
21 | 
22 | export interface SentencePaneProps extends PaneProps {
23 |   sentence: string;
24 | }
25 | 
26 | export interface ExplanationPaneProps extends PaneProps {
27 |   explanation: string;
28 | }
29 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/panes/scoreExplanation.tsx:
--------------------------------------------------------------------------------
 1 | import { ExplanationPaneProps } from ".";
 2 | import { ScoredExplanation } from "../types";
 3 | import { scoreExplanation } from "../requests/explainerRequests";
 4 | import { FetchAndDisplayPane, FetchAndDisplayProps } from "./fetchAndDisplayPane";
 5 | import { useCallback } from "react";
 6 | 
 7 | const ScoreExplanation: React.FC<ExplanationPaneProps> = ({ activeNode, explanation }) => {
 8 |   const fetchExplanationScore = useCallback(async () => {
 9 |     const score = (await scoreExplanation(activeNode, explanation)).score.toFixed(2);
10 |     return {
11 |       explanation,
12 |       score,
13 |     };
14 |   }, [activeNode, explanation]);
15 | 
16 |   const displayScoredExplanation = useCallback<
17 |     FetchAndDisplayProps<ExplanationPaneProps, ScoredExplanation>["displayDataFunc"]
18 |   >((scoredExplanation, isLoading) => {
19 |     return (
20 |       <div className="min-w-0 flex-1">
21 |         <div className="text-md text-gray-700" style={{ width: 400 }}>
22 |           <p>
23 |             Scoring explanation
24 |             <span className="inline-flex m-1 items-center px-2.5 py-0.5 rounded-full text-md font-medium bg-gray-100 text-gray-800">
25 |               {scoredExplanation.explanation}
26 |             </span>
27 |             score
28 |             <span className="inline-flex m-1 items-center px-2.5 py-0.5 rounded-full text-md font-medium bg-gray-100 text-gray-800">
29 |               {isLoading ? "..." : scoredExplanation.score!}
30 |             </span>
31 |           </p>
32 |         </div>
33 |       </div>
34 |     );
35 |   }, []);
36 | 
37 |   return (
38 |     <FetchAndDisplayPane
39 |       paneProps={activeNode}
40 |       fetchDataFunc={fetchExplanationScore}
41 |       displayDataFunc={displayScoredExplanation}
42 |       initialData={{ explanation }}
43 |     />
44 |   );
45 | };
46 | 
47 | export default ScoreExplanation;
48 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/plots.tsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/transformer-debugger/87e6db7b7e73ded5037eeeff05deb5e81548a10a/neuron_viewer/src/plots.tsx


--------------------------------------------------------------------------------
/neuron_viewer/src/requests/explainerRequests.ts:
--------------------------------------------------------------------------------
 1 | import { ExplainerService } from "../client";
 2 | import { Node } from "../types";
 3 | import { getDatasetNameBasedOnNodeType } from "./paths";
 4 | import { getDerivedScalarType } from "./readRequests";
 5 | 
 6 | export const explain = async (activeNode: Node) => {
 7 |   return ExplainerService.explainerExplain({
 8 |     dst: getDerivedScalarType(activeNode.nodeType),
 9 |     layerIndex: activeNode.layerIndex,
10 |     activationIndex: activeNode.nodeIndex,
11 |     datasets: [getDatasetNameBasedOnNodeType(activeNode.nodeType)],
12 |   });
13 | };
14 | 
15 | export const scoreExplanation = async (
16 |   activeNode: Node,
17 |   explanation: string,
18 |   maxSequences: number = 2
19 | ) => {
20 |   return ExplainerService.explainerScore({
21 |     dst: getDerivedScalarType(activeNode.nodeType),
22 |     layerIndex: activeNode.layerIndex,
23 |     activationIndex: activeNode.nodeIndex,
24 |     datasets: [getDatasetNameBasedOnNodeType(activeNode.nodeType)],
25 |     explanation,
26 |     maxSequences,
27 |   });
28 | };
29 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/requests/inferenceRequests.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   InferenceService,
 3 |   DerivedScalarsRequest,
 4 |   BatchedRequest,
 5 |   ModelInfoResponse,
 6 |   BatchedTdbRequest,
 7 |   DerivedAttentionScalarsRequest,
 8 | } from "../client";
 9 | 
10 | export const derivedScalars = async (request: DerivedScalarsRequest) => {
11 |   return await InferenceService.inferenceDerivedScalars(request);
12 | };
13 | 
14 | export const derivedAttentionScalars = async (request: DerivedAttentionScalarsRequest) => {
15 |   return await InferenceService.inferenceDerivedAttentionScalars(request);
16 | };
17 | 
18 | export const combinedInference = async (request: BatchedRequest) => {
19 |   return await InferenceService.inferenceBatched(request);
20 | };
21 | 
22 | export const batchedTdb = async (request: BatchedTdbRequest) => {
23 |   return await InferenceService.inferenceBatchedTdb(request);
24 | };
25 | 
26 | export const getModelInfo: () => Promise<ModelInfoResponse> = async () => {
27 |   return await InferenceService.inferenceModelInfo();
28 | };
29 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/requests/paths.ts:
--------------------------------------------------------------------------------
 1 | import { OpenAPI } from "../client";
 2 | import { NodeType } from "../types";
 3 | 
 4 | export const getQueryParams = () => {
 5 |   const urlParams = new URLSearchParams(window.location.search);
 6 |   const params: { [key: string]: any } = {};
 7 |   for (const [key, value] of urlParams.entries()) {
 8 |     params[key] = value;
 9 |   }
10 |   return params;
11 | };
12 | 
13 | export function getDatasetName(): string {
14 |   // Get the current top-level URL.
15 |   const url = new URL(window.location.href);
16 |   // Grab the part matching "/<datasetName>/".
17 |   const match = url.pathname.match(/\/([^/]*)\//);
18 |   return match![1];
19 | }
20 | 
21 | export function getFirstPartOfDatasetName(): string {
22 |   return getDatasetName().split("_")[0];
23 | }
24 | 
25 | export function getDatasetNameBasedOnNodeType(nodeType: NodeType): string {
26 |   // This function is used for explainerRequests and readRequests, which should only
27 |   // use one autoencoder name. The autoencoder name is infered from the node type.
28 |   if (
29 |     nodeType === NodeType.AUTOENCODER_LATENT ||
30 |     nodeType === NodeType.MLP_AUTOENCODER_LATENT ||
31 |     nodeType === NodeType.ATTENTION_AUTOENCODER_LATENT ||
32 |     nodeType === NodeType.AUTOENCODER_LATENT_BY_TOKEN_PAIR
33 |   ) {
34 |     // if there are multiple autoencoders, we need to use the full name to disambiguate
35 |     const parts = getDatasetName().split("_");
36 |     const modelName = parts[0];
37 |     const autoencoderName = parts
38 |       .slice(1)
39 |       .find(
40 |         (part) =>
41 |           (nodeType === NodeType.AUTOENCODER_LATENT && part.includes("")) ||
42 |           (nodeType === NodeType.MLP_AUTOENCODER_LATENT &&
43 |             (part.includes("resid-delta-mlp") || part.includes("mlp-post-act"))) ||
44 |           (nodeType === NodeType.ATTENTION_AUTOENCODER_LATENT &&
45 |             part.includes("resid-delta-attn")) ||
46 |           (nodeType === NodeType.AUTOENCODER_LATENT_BY_TOKEN_PAIR &&
47 |             part.includes("resid-delta-attn"))
48 |       );
49 |     return `${modelName}_${autoencoderName}`;
50 |   }
51 |   // everything other than autoencoder should only use the first part of the dataset name, before the underscore
52 |   return getFirstPartOfDatasetName();
53 | }
54 | 
55 | export function getActivationServerUrl(): string {
56 |   if (process.env.NEURON_VIEWER_ACTIVATION_SERVER_URL) {
57 |     return process.env.NEURON_VIEWER_ACTIVATION_SERVER_URL;
58 |   }
59 |   return "http://0.0.0.0:8000";
60 | }
61 | 
62 | OpenAPI.BASE = getActivationServerUrl();
63 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/tokenHeatmap.tsx:
--------------------------------------------------------------------------------
 1 | import { Tooltip } from "@nextui-org/react";
 2 | import { formatToken } from "./tokenRendering";
 3 | import { TokenSequenceAndScalars } from "./types";
 4 | import { Color, DEFAULT_BOUNDARIES, DEFAULT_COLORS, getInterpolatedColor } from "./colors";
 5 | 
 6 | type TokenHeatmapProps = {
 7 |   tokenSequence?: TokenSequenceAndScalars; // undefined means we're rendering an empty box while loading.
 8 |   onClick?: (index: number) => void;
 9 |   colors?: Color[];
10 |   boundaries?: number[];
11 |   fixedWidth?: boolean;
12 | };
13 | 
14 | const TokenHeatmap: React.FC<TokenHeatmapProps> = ({
15 |   tokenSequence,
16 |   onClick,
17 |   colors,
18 |   boundaries,
19 |   fixedWidth,
20 | }) => {
21 |   return (
22 |     <div className="block" style={{ width: "100%" }}>
23 |       {tokenSequence &&
24 |         tokenSequence.map(({ token, scalar, normalizedScalar }, i) => {
25 |           const color = getInterpolatedColor(
26 |             colors || DEFAULT_COLORS,
27 |             boundaries || DEFAULT_BOUNDARIES,
28 |             normalizedScalar || scalar
29 |           );
30 |           return (
31 |             <Tooltip content={`Activation: ${scalar.toFixed(2)} Index: ${i}`}>
32 |               <span
33 |                 key={i}
34 |                 className={`whitespace-pre-wrap` + fixedWidth ? " font-mono" : ""}
35 |                 style={{
36 |                   transition: "500ms ease-in all",
37 |                   background: `rgba(${color.r}, ${color.g}, ${color.b}, 0.5)`,
38 |                 }}
39 |                 onClick={() => onClick && onClick(i)}
40 |               >
41 |                 {formatToken(token, /* dotsForSpaces= */ false)}
42 |               </span>
43 |             </Tooltip>
44 |           );
45 |         })}
46 |     </div>
47 |   );
48 | };
49 | export default TokenHeatmap;
50 | 


--------------------------------------------------------------------------------
/neuron_viewer/src/tokenRendering.tsx:
--------------------------------------------------------------------------------
 1 | // Functions for formatting and rendering tokens.
 2 | 
 3 | export function formatToken(token: string, dotsForSpaces: boolean = true) {
 4 |   const result = token.replace(/\n/g, "↵");
 5 |   if (dotsForSpaces) {
 6 |     // Note: There's a zero-width space just before the middle dot below, to allow line wrapping.
 7 |     return result.replace(/ /g, "​·");
 8 |   }
 9 |   return result;
10 | }
11 | 
12 | export function renderToken(token: string) {
13 |   return <span className="font-mono">{formatToken(token)}</span>;
14 | }
15 | 
16 | export function renderTokenOnBlue(token: string, key?: number) {
17 |   return renderTokenOnColor(token, "bg-blue-100", key);
18 | }
19 | 
20 | export function renderTokenOnGray(token: string, key?: number) {
21 |   return renderTokenOnColor(token, "bg-gray-100", key);
22 | }
23 | 
24 | export function renderTokenOnColor(token: string, bgColorClass: string, key?: number) {
25 |   return (
26 |     <span
27 |       key={key}
28 |       className={`inline-flex m-1 items-center px-2.5 rounded-full text-xs font-medium ${bgColorClass} text-gray-800 font-mono`}
29 |     >
30 |       {formatToken(token)}
31 |     </span>
32 |   );
33 | }
34 | 


--------------------------------------------------------------------------------
/neuron_viewer/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | const { nextui } = require("@nextui-org/react");
 3 | 
 4 | module.exports = {
 5 |   content: [
 6 |     "./src/**/*.{html,js,jsx,ts,tsx}",
 7 |     "./node_modules/@nextui-org/theme/dist/**/*.{js,ts,jsx,tsx}",
 8 |   ],
 9 |   theme: {
10 |     extend: {
11 |       colors: {
12 |         clifford: '#da373d',
13 |       },
14 |     },
15 |   },
16 |   darkMode: "class",
17 |   plugins: [
18 |     nextui()
19 |   ]
20 | }
21 | 


--------------------------------------------------------------------------------
/neuron_viewer/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es2021",
 4 |     "module": "commonjs",
 5 |     "lib": ["dom", "dom.iterable", "esnext"],
 6 |     "allowJs": false,
 7 |     "skipLibCheck": true,
 8 |     "esModuleInterop": true,
 9 |     "allowSyntheticDefaultImports": true,
10 |     "strict": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "moduleResolution": "node",
13 |     "resolveJsonModule": true,
14 |     "isolatedModules": true,
15 |     "noEmit": true,
16 |     "noEmitOnError": true,
17 |     "jsx": "react-jsx",
18 |     "noImplicitAny": true,
19 |     "strictNullChecks": true,
20 |     "strictFunctionTypes": true,
21 |     "strictPropertyInitialization": true,
22 |     "noImplicitReturns": true,
23 |     "noImplicitThis": true,
24 |     "noFallthroughCasesInSwitch": true
25 |   },
26 |   "include": ["src"]
27 | }
28 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 100
 3 | target-version = ['py311']
 4 | 
 5 | [tool.ruff]
 6 | line-length = 100
 7 | select = [
 8 |     "E", "F", "W", "B", "C4", "PIE", "NPY", "PLE",
 9 |     "DTZ003", "DTZ004",
10 |     "G010",
11 |     "PLW0120", "PLW0129", "PLW0711", "PLW2101",
12 |     "SIM101", "SIM110", "SIM201", "SIM202", "SIM222", "SIM223",
13 |     "S506",
14 |     "RET501", "RET502",
15 |     "RUF006", "RUF008", "RUF011", "RUF013", "RUF016", "RUF017", "RUF200",
16 |     "COM818", "COM819",
17 |     "ISC001",
18 |     "PYI016", "PYI018", "PYI025",
19 |     "PERF102",
20 |     "UP006", "UP007",
21 |     "FURB148", "FURB163", "FURB181",
22 |     "ASYNC100", "ASYNC102",
23 |     "TID251",
24 | ]
25 | ignore = [
26 |     "B905",
27 |     "E2",
28 |     "E402",
29 |     "E501",
30 |     "E701",
31 |     "E711",
32 |     "E731",
33 |     "E741",
34 |     "B011",
35 |     "C408",
36 |     "NPY002",
37 |     "PIE790",
38 | ]
39 | unfixable = [
40 |     "F841",
41 |     "F601",
42 |     "F602",
43 |     "B018",
44 |     "SIM222",
45 |     "SIM223",
46 |     "B006",
47 | ]
48 | 
49 | target-version = "py311"
50 | 
51 | [tool.ruff.lint]
52 | preview = true
53 | 
54 | [tool.ruff.per-file-ignores]
55 | "__init__.py" = ["F401", "F403"]
56 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | asyncio_mode = auto
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | setup(
 4 |     name="neuron_explainer",
 5 |     packages=find_packages(),
 6 |     version="0.0.1",
 7 |     author="OpenAI",
 8 |     install_requires=[
 9 |         "aiohttp",
10 |         "click",
11 |         "fastapi==0.97",
12 |         "fire",
13 |         "httpx>=0.22",
14 |         "mypy==1.7.1",
15 |         "numpy",
16 |         "orjson",
17 |         "pre-commit",
18 |         "pydantic<2.0.0",
19 |         "pytest",
20 |         "pytest-asyncio",
21 |         "scikit-learn",
22 |         "starlette",
23 |         "tiktoken",
24 |         "torch>=1.13",
25 |         "uvicorn",
26 |     ],
27 |     url="",
28 |     description="",
29 |     python_requires=">=3.11",
30 | )
31 | 


--------------------------------------------------------------------------------