├── .gitignore ├── README.md ├── benchmarks ├── README.md ├── arc.py ├── datasets │ ├── math_test.csv │ └── mmlu.csv ├── gpqa.py ├── gsm8k.py ├── mathematics.py └── mmlu.py ├── honeycomb ├── README.md ├── critique.txt ├── prompt.txt ├── queries.csv ├── queries.ipynb ├── queries.py └── utils.py ├── langchain ├── .env.example ├── .gitignore ├── README.md ├── inspect_langchain.py ├── wikipedia.jsonl └── wikipedia.py ├── requirements.txt └── slides ├── images ├── inspect-honeycomb-validate.png ├── inspect-mathmatics.png ├── inspect-notebook-eval.png ├── inspect-view-answers.png ├── inspect-view-honeycomb-critique-score.png ├── inspect-view-honeycomb-critique.png ├── inspect-view-honeycomb-validate-prompt-2.png ├── inspect-view-honeycomb-validate-prompt.png ├── inspect-view-honeycomb-validate.png ├── inspect-view-messages.png ├── inspect-view-scoring.png ├── inspect-wikipedia-eval.png ├── inspect-wikipedia-explanation.png ├── inspect-wikipedia-messages.png ├── inspect-wikipedia-scoring.png └── log-incorrect.png ├── inspect.qmd └── intro-to-inspect.pdf /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /benchmarks/arc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/benchmarks/arc.py -------------------------------------------------------------------------------- /benchmarks/datasets/math_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/benchmarks/datasets/math_test.csv -------------------------------------------------------------------------------- /benchmarks/datasets/mmlu.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/benchmarks/datasets/mmlu.csv -------------------------------------------------------------------------------- /benchmarks/gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/benchmarks/gpqa.py -------------------------------------------------------------------------------- /benchmarks/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/benchmarks/gsm8k.py -------------------------------------------------------------------------------- /benchmarks/mathematics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/benchmarks/mathematics.py -------------------------------------------------------------------------------- /benchmarks/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/benchmarks/mmlu.py -------------------------------------------------------------------------------- /honeycomb/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/honeycomb/README.md -------------------------------------------------------------------------------- /honeycomb/critique.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/honeycomb/critique.txt -------------------------------------------------------------------------------- /honeycomb/prompt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/honeycomb/prompt.txt -------------------------------------------------------------------------------- /honeycomb/queries.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/honeycomb/queries.csv -------------------------------------------------------------------------------- /honeycomb/queries.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/honeycomb/queries.ipynb -------------------------------------------------------------------------------- /honeycomb/queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/honeycomb/queries.py -------------------------------------------------------------------------------- /honeycomb/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/honeycomb/utils.py -------------------------------------------------------------------------------- /langchain/.env.example: -------------------------------------------------------------------------------- 1 | TAVILY_API_KEY=your-tavily-api-key 2 | 3 | -------------------------------------------------------------------------------- /langchain/.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .venv/ 3 | -------------------------------------------------------------------------------- /langchain/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/langchain/README.md -------------------------------------------------------------------------------- /langchain/inspect_langchain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/langchain/inspect_langchain.py -------------------------------------------------------------------------------- /langchain/wikipedia.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/langchain/wikipedia.jsonl -------------------------------------------------------------------------------- /langchain/wikipedia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/langchain/wikipedia.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/requirements.txt -------------------------------------------------------------------------------- /slides/images/inspect-honeycomb-validate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-honeycomb-validate.png -------------------------------------------------------------------------------- /slides/images/inspect-mathmatics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-mathmatics.png -------------------------------------------------------------------------------- /slides/images/inspect-notebook-eval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-notebook-eval.png -------------------------------------------------------------------------------- /slides/images/inspect-view-answers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-view-answers.png -------------------------------------------------------------------------------- /slides/images/inspect-view-honeycomb-critique-score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-view-honeycomb-critique-score.png -------------------------------------------------------------------------------- /slides/images/inspect-view-honeycomb-critique.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-view-honeycomb-critique.png -------------------------------------------------------------------------------- /slides/images/inspect-view-honeycomb-validate-prompt-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-view-honeycomb-validate-prompt-2.png -------------------------------------------------------------------------------- /slides/images/inspect-view-honeycomb-validate-prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-view-honeycomb-validate-prompt.png -------------------------------------------------------------------------------- /slides/images/inspect-view-honeycomb-validate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-view-honeycomb-validate.png -------------------------------------------------------------------------------- /slides/images/inspect-view-messages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-view-messages.png -------------------------------------------------------------------------------- /slides/images/inspect-view-scoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-view-scoring.png -------------------------------------------------------------------------------- /slides/images/inspect-wikipedia-eval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-wikipedia-eval.png -------------------------------------------------------------------------------- /slides/images/inspect-wikipedia-explanation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-wikipedia-explanation.png -------------------------------------------------------------------------------- /slides/images/inspect-wikipedia-messages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-wikipedia-messages.png -------------------------------------------------------------------------------- /slides/images/inspect-wikipedia-scoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/inspect-wikipedia-scoring.png -------------------------------------------------------------------------------- /slides/images/log-incorrect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/images/log-incorrect.png -------------------------------------------------------------------------------- /slides/inspect.qmd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/inspect.qmd -------------------------------------------------------------------------------- /slides/intro-to-inspect.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjallaire/inspect-llm-workshop/HEAD/slides/intro-to-inspect.pdf --------------------------------------------------------------------------------