├── .gitignore ├── LICENSE ├── README.md ├── align_data ├── __init__.py ├── alignment_newsletter │ ├── __init__.py │ └── alignment_newsletter.py ├── analysis │ └── count_tokens.py ├── arbital │ ├── __init__.py │ └── arbital.py ├── arxiv_papers │ ├── __init__.py │ └── arxiv_papers.py ├── audio_transcripts │ ├── __init__.py │ └── audio_transcripts.py ├── blogs │ ├── __init__.py │ ├── gwern_blog.py │ ├── markdown_blogs.py │ ├── medium_blog.py │ ├── other_blog.py │ └── wp_blog.py ├── common │ ├── alignment_dataset.py │ └── utils.py ├── distill │ ├── __init__.py │ └── distill.py ├── ebooks │ ├── __init__.py │ ├── agentmodels.py │ ├── gdrive_ebooks.py │ └── mdebooks.py ├── gdocs │ ├── __init__.py │ └── gdocs.py ├── greaterwrong │ ├── __init__.py │ └── greaterwrong.py ├── nonarxiv_papers │ ├── __init__.py │ └── nonarxiv_papers.py ├── postprocess │ └── postprocess.py ├── reports │ ├── __init__.py │ └── reports.py └── stampy │ ├── __init__.py │ └── stampy.py ├── config.json ├── data └── raw │ ├── ai-alignment-papers.csv │ ├── alignment_newsletter.xlsx │ ├── distill_posts │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Discussion and Author Responses.html │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Learning from Incorrectly Labeled Data.html │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Robust Feature Leakage.html │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Two Examples of Useful, Non-Robust Features.html │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'.html │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Adversarial Example Researchers Need to Expand What is Meant by 'Robustness'.html │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Adversarial Examples are Just Bugs, Too.html │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Adversarially Robust Neural Style Transfer.html │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Learning from Incorrectly Labeled Data.html │ ├── A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Two Examples of Useful, Non-Robust Features.html │ ├── A Gentle Introduction to Graph Neural Networks.html │ ├── A Visual Exploration of Gaussian Processes.html │ ├── AI Safety Needs Social Scientists.html │ ├── Activation Atlas.html │ ├── Adversarial Reprogramming of Neural Cellular Automata.html │ ├── Adversarially Robust Neural Style Transfer.html │ ├── An Overview of Early Vision in InceptionV1.html │ ├── Attention and Augmented Recurrent Neural Networks.html │ ├── Branch Specialization.html │ ├── Communicating with Interactive Articles.html │ ├── Computing Receptive Fields of Convolutional Neural Networks.html │ ├── Curve Circuits.html │ ├── Curve Detectors.html │ ├── Deconvolution and Checkerboard Artifacts.html │ ├── Differentiable Image Parameterizations.html │ ├── Experiments in Handwriting with a Neural Network.html │ ├── Exploring Bayesian Optimization.html │ ├── Feature Visualization.html │ ├── Feature-wise transformations.html │ ├── Growing Neural Cellular Automata.html │ ├── High-Low Frequency Detectors.html │ ├── How to Use t-SNE Effectively.html │ ├── Multimodal Neurons in Artificial Neural Networks.html │ ├── Naturally Occurring Equivariance in Neural Networks.html │ ├── Open Questions about Generative Adversarial Networks.html │ ├── Research Debt.html │ ├── Robust Feature Leakage.html │ ├── Self-Organising Textures.html │ ├── Self-classifying MNIST Digits.html │ ├── Sequence Modeling with CTC.html │ ├── The Building Blocks of Interpretability.html │ ├── The Paths Perspective on Value Learning.html │ ├── Thread Differentiable Self-organizing Systems.html │ ├── Thread_Circuits.html │ ├── Understanding Convolutions on Graphs.html │ ├── Understanding RL Vision.html │ ├── Using Artificial Intelligence to Augment Human Intelligence.html │ ├── Visualizing Neural Networks with the Grand Tour.html │ ├── Visualizing Weights.html │ ├── Visualizing memorization in RNNs.html │ ├── Visualizing the Impact of Feature Attribution Baselines.html │ ├── Weight Banding.html │ ├── Why Momentum Really Works.html │ └── Zoom In An Introduction to Circuits.html │ └── stampy │ └── stampy.csv ├── imgs └── dataset_sources.PNG ├── main.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/README.md -------------------------------------------------------------------------------- /align_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/__init__.py -------------------------------------------------------------------------------- /align_data/alignment_newsletter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/alignment_newsletter/__init__.py -------------------------------------------------------------------------------- /align_data/alignment_newsletter/alignment_newsletter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/alignment_newsletter/alignment_newsletter.py -------------------------------------------------------------------------------- /align_data/analysis/count_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/analysis/count_tokens.py -------------------------------------------------------------------------------- /align_data/arbital/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/arbital/__init__.py -------------------------------------------------------------------------------- /align_data/arbital/arbital.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/arbital/arbital.py -------------------------------------------------------------------------------- /align_data/arxiv_papers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/arxiv_papers/__init__.py -------------------------------------------------------------------------------- /align_data/arxiv_papers/arxiv_papers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/arxiv_papers/arxiv_papers.py -------------------------------------------------------------------------------- /align_data/audio_transcripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/audio_transcripts/__init__.py -------------------------------------------------------------------------------- /align_data/audio_transcripts/audio_transcripts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/audio_transcripts/audio_transcripts.py -------------------------------------------------------------------------------- /align_data/blogs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/blogs/__init__.py -------------------------------------------------------------------------------- /align_data/blogs/gwern_blog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/blogs/gwern_blog.py -------------------------------------------------------------------------------- /align_data/blogs/markdown_blogs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/blogs/markdown_blogs.py -------------------------------------------------------------------------------- /align_data/blogs/medium_blog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/blogs/medium_blog.py -------------------------------------------------------------------------------- /align_data/blogs/other_blog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/blogs/other_blog.py -------------------------------------------------------------------------------- /align_data/blogs/wp_blog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/blogs/wp_blog.py -------------------------------------------------------------------------------- /align_data/common/alignment_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/common/alignment_dataset.py -------------------------------------------------------------------------------- /align_data/common/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/common/utils.py -------------------------------------------------------------------------------- /align_data/distill/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/distill/__init__.py -------------------------------------------------------------------------------- /align_data/distill/distill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/distill/distill.py -------------------------------------------------------------------------------- /align_data/ebooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/ebooks/__init__.py -------------------------------------------------------------------------------- /align_data/ebooks/agentmodels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/ebooks/agentmodels.py -------------------------------------------------------------------------------- /align_data/ebooks/gdrive_ebooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/ebooks/gdrive_ebooks.py -------------------------------------------------------------------------------- /align_data/ebooks/mdebooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/ebooks/mdebooks.py -------------------------------------------------------------------------------- /align_data/gdocs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/gdocs/__init__.py -------------------------------------------------------------------------------- /align_data/gdocs/gdocs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/gdocs/gdocs.py -------------------------------------------------------------------------------- /align_data/greaterwrong/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/greaterwrong/__init__.py -------------------------------------------------------------------------------- /align_data/greaterwrong/greaterwrong.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/greaterwrong/greaterwrong.py -------------------------------------------------------------------------------- /align_data/nonarxiv_papers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/nonarxiv_papers/__init__.py -------------------------------------------------------------------------------- /align_data/nonarxiv_papers/nonarxiv_papers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/nonarxiv_papers/nonarxiv_papers.py -------------------------------------------------------------------------------- /align_data/postprocess/postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/postprocess/postprocess.py -------------------------------------------------------------------------------- /align_data/reports/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/reports/__init__.py -------------------------------------------------------------------------------- /align_data/reports/reports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/reports/reports.py -------------------------------------------------------------------------------- /align_data/stampy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/stampy/__init__.py -------------------------------------------------------------------------------- /align_data/stampy/stampy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/align_data/stampy/stampy.py -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/config.json -------------------------------------------------------------------------------- /data/raw/ai-alignment-papers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/ai-alignment-papers.csv -------------------------------------------------------------------------------- /data/raw/alignment_newsletter.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/alignment_newsletter.xlsx -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Discussion and Author Responses.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Discussion and Author Responses.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Learning from Incorrectly Labeled Data.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Learning from Incorrectly Labeled Data.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Robust Feature Leakage.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Robust Feature Leakage.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Two Examples of Useful, Non-Robust Features.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features' Two Examples of Useful, Non-Robust Features.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Adversarial Example Researchers Need to Expand What is Meant by 'Robustness'.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Adversarial Example Researchers Need to Expand What is Meant by 'Robustness'.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Adversarial Examples are Just Bugs, Too.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Adversarial Examples are Just Bugs, Too.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Adversarially Robust Neural Style Transfer.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Adversarially Robust Neural Style Transfer.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Learning from Incorrectly Labeled Data.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Learning from Incorrectly Labeled Data.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Two Examples of Useful, Non-Robust Features.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Discussion of 'Adversarial Examples Are Not Bugs, They Are Features'_ Two Examples of Useful, Non-Robust Features.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Gentle Introduction to Graph Neural Networks.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Gentle Introduction to Graph Neural Networks.html -------------------------------------------------------------------------------- /data/raw/distill_posts/A Visual Exploration of Gaussian Processes.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/A Visual Exploration of Gaussian Processes.html -------------------------------------------------------------------------------- /data/raw/distill_posts/AI Safety Needs Social Scientists.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/AI Safety Needs Social Scientists.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Activation Atlas.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Activation Atlas.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Adversarial Reprogramming of Neural Cellular Automata.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Adversarial Reprogramming of Neural Cellular Automata.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Adversarially Robust Neural Style Transfer.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Adversarially Robust Neural Style Transfer.html -------------------------------------------------------------------------------- /data/raw/distill_posts/An Overview of Early Vision in InceptionV1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/An Overview of Early Vision in InceptionV1.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Attention and Augmented Recurrent Neural Networks.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Attention and Augmented Recurrent Neural Networks.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Branch Specialization.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Branch Specialization.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Communicating with Interactive Articles.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Communicating with Interactive Articles.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Computing Receptive Fields of Convolutional Neural Networks.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Computing Receptive Fields of Convolutional Neural Networks.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Curve Circuits.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Curve Circuits.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Curve Detectors.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Curve Detectors.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Deconvolution and Checkerboard Artifacts.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Deconvolution and Checkerboard Artifacts.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Differentiable Image Parameterizations.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Differentiable Image Parameterizations.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Experiments in Handwriting with a Neural Network.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Experiments in Handwriting with a Neural Network.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Exploring Bayesian Optimization.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Exploring Bayesian Optimization.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Feature Visualization.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Feature Visualization.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Feature-wise transformations.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Feature-wise transformations.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Growing Neural Cellular Automata.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Growing Neural Cellular Automata.html -------------------------------------------------------------------------------- /data/raw/distill_posts/High-Low Frequency Detectors.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/High-Low Frequency Detectors.html -------------------------------------------------------------------------------- /data/raw/distill_posts/How to Use t-SNE Effectively.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/How to Use t-SNE Effectively.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Multimodal Neurons in Artificial Neural Networks.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Multimodal Neurons in Artificial Neural Networks.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Naturally Occurring Equivariance in Neural Networks.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Naturally Occurring Equivariance in Neural Networks.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Open Questions about Generative Adversarial Networks.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Open Questions about Generative Adversarial Networks.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Research Debt.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Research Debt.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Robust Feature Leakage.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Robust Feature Leakage.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Self-Organising Textures.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Self-Organising Textures.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Self-classifying MNIST Digits.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Self-classifying MNIST Digits.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Sequence Modeling with CTC.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Sequence Modeling with CTC.html -------------------------------------------------------------------------------- /data/raw/distill_posts/The Building Blocks of Interpretability.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/The Building Blocks of Interpretability.html -------------------------------------------------------------------------------- /data/raw/distill_posts/The Paths Perspective on Value Learning.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/The Paths Perspective on Value Learning.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Thread Differentiable Self-organizing Systems.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Thread Differentiable Self-organizing Systems.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Thread_Circuits.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Thread_Circuits.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Understanding Convolutions on Graphs.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Understanding Convolutions on Graphs.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Understanding RL Vision.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Understanding RL Vision.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Using Artificial Intelligence to Augment Human Intelligence.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Using Artificial Intelligence to Augment Human Intelligence.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Visualizing Neural Networks with the Grand Tour.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Visualizing Neural Networks with the Grand Tour.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Visualizing Weights.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Visualizing Weights.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Visualizing memorization in RNNs.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Visualizing memorization in RNNs.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Visualizing the Impact of Feature Attribution Baselines.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Visualizing the Impact of Feature Attribution Baselines.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Weight Banding.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Weight Banding.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Why Momentum Really Works.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Why Momentum Really Works.html -------------------------------------------------------------------------------- /data/raw/distill_posts/Zoom In An Introduction to Circuits.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/distill_posts/Zoom In An Introduction to Circuits.html -------------------------------------------------------------------------------- /data/raw/stampy/stampy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/data/raw/stampy/stampy.csv -------------------------------------------------------------------------------- /imgs/dataset_sources.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/imgs/dataset_sources.PNG -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/main.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moirage/alignment-research-dataset/HEAD/setup.py --------------------------------------------------------------------------------