├── .gitignore ├── README.md ├── __init__.py ├── case_study_caught.ipynb ├── case_study_citations.ipynb ├── case_study_local_context.ipynb ├── greaterthan.ipynb ├── interp-comparison.ipynb ├── requirements.txt ├── restricted blind case studies.ipynb ├── sae_training ├── __init__.py ├── activations_store.py ├── config.py ├── geom_median │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── fig │ │ ├── gm.jpg │ │ └── illustration.png │ ├── pyproject.toml │ ├── setup.py │ └── src │ │ └── geom_median │ │ ├── __init__.py │ │ ├── numpy │ │ ├── __init__.py │ │ ├── main.py │ │ ├── utils.py │ │ ├── weiszfeld_array.py │ │ └── weiszfeld_list_of_array.py │ │ └── torch │ │ ├── __init__.py │ │ ├── main.py │ │ ├── utils.py │ │ ├── weiszfeld_array.py │ │ └── weiszfeld_list_of_array.py ├── optim.py ├── requirements.txt ├── sparse_autoencoder.py ├── train_sae_on_language_model.py └── utils.py ├── setup.sh ├── sweep.ipynb ├── train_transcoder.py ├── transcoder_circuits ├── __init__.py ├── circuit_analysis.py ├── feature_dashboards.py └── replacement_ctx.py └── walkthrough.ipynb /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /case_study_caught.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/case_study_caught.ipynb -------------------------------------------------------------------------------- /case_study_citations.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/case_study_citations.ipynb -------------------------------------------------------------------------------- /case_study_local_context.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/case_study_local_context.ipynb -------------------------------------------------------------------------------- /greaterthan.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/greaterthan.ipynb -------------------------------------------------------------------------------- /interp-comparison.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/interp-comparison.ipynb -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/requirements.txt -------------------------------------------------------------------------------- /restricted blind case studies.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/restricted blind case studies.ipynb -------------------------------------------------------------------------------- /sae_training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sae_training/activations_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/activations_store.py -------------------------------------------------------------------------------- /sae_training/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/config.py -------------------------------------------------------------------------------- /sae_training/geom_median/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/.gitignore -------------------------------------------------------------------------------- /sae_training/geom_median/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/LICENSE -------------------------------------------------------------------------------- /sae_training/geom_median/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/README.md -------------------------------------------------------------------------------- /sae_training/geom_median/__init__.py: -------------------------------------------------------------------------------- 1 | from .src.geom_median import numpy, torch 2 | -------------------------------------------------------------------------------- /sae_training/geom_median/fig/gm.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/fig/gm.jpg -------------------------------------------------------------------------------- /sae_training/geom_median/fig/illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/fig/illustration.png -------------------------------------------------------------------------------- /sae_training/geom_median/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/pyproject.toml -------------------------------------------------------------------------------- /sae_training/geom_median/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/setup.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/numpy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/numpy/__init__.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/numpy/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/numpy/main.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/numpy/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/numpy/utils.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/numpy/weiszfeld_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/numpy/weiszfeld_array.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/numpy/weiszfeld_list_of_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/numpy/weiszfeld_list_of_array.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/torch/__init__.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/torch/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/torch/main.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/torch/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/torch/utils.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/torch/weiszfeld_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/torch/weiszfeld_array.py -------------------------------------------------------------------------------- /sae_training/geom_median/src/geom_median/torch/weiszfeld_list_of_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/geom_median/src/geom_median/torch/weiszfeld_list_of_array.py -------------------------------------------------------------------------------- /sae_training/optim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/optim.py -------------------------------------------------------------------------------- /sae_training/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/requirements.txt -------------------------------------------------------------------------------- /sae_training/sparse_autoencoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/sparse_autoencoder.py -------------------------------------------------------------------------------- /sae_training/train_sae_on_language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/train_sae_on_language_model.py -------------------------------------------------------------------------------- /sae_training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sae_training/utils.py -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/setup.sh -------------------------------------------------------------------------------- /sweep.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/sweep.ipynb -------------------------------------------------------------------------------- /train_transcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/train_transcoder.py -------------------------------------------------------------------------------- /transcoder_circuits/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /transcoder_circuits/circuit_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/transcoder_circuits/circuit_analysis.py -------------------------------------------------------------------------------- /transcoder_circuits/feature_dashboards.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/transcoder_circuits/feature_dashboards.py -------------------------------------------------------------------------------- /transcoder_circuits/replacement_ctx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/transcoder_circuits/replacement_ctx.py -------------------------------------------------------------------------------- /walkthrough.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobdunefsky/transcoder_circuits/HEAD/walkthrough.ipynb --------------------------------------------------------------------------------