├── README.md ├── assets └── overall_fig.png ├── evaluation_set ├── flask_evaluation.jsonl └── flask_hard_evaluation.jsonl ├── gpt_review ├── aggregate_difficulty_skill.py ├── aggregate_domain.py ├── aggregate_skill.py ├── gpt4_eval.py ├── outputs │ ├── alpaca_13b.jsonl │ ├── bard_review.jsonl │ ├── chatgpt_review.jsonl │ ├── claude_v1_review.jsonl │ ├── davinci_003_review.jsonl │ ├── gpt4_review.jsonl │ ├── llama2_chat_13b.jsonl │ ├── llama2_chat_70b.jsonl │ ├── tulu_13b_review.jsonl │ ├── tulu_30b_review.jsonl │ ├── tulu_65b_review.jsonl │ ├── tulu_7b_review.jsonl │ ├── vicuna_13b.jsonl │ ├── vicuna_33b.jsonl │ └── wizardlm_13b.jsonl └── src │ ├── prompt.jsonl │ └── reviewer.jsonl ├── input_data └── flask_evaluation_raw.jsonl ├── metadata_annotation ├── difficulty │ ├── difficulty_annotation.py │ └── src │ │ ├── prompt.jsonl │ │ └── reviewer.jsonl ├── domain │ ├── domain_annotation.py │ └── src │ │ ├── domain.json │ │ ├── prompt.jsonl │ │ └── reviewer.jsonl └── skillset │ ├── skillset_annotation.py │ └── src │ ├── prompt.jsonl │ ├── reviewer.jsonl │ └── skillset_description.json ├── model_output ├── conversation.py ├── inference.py ├── load_model.py └── outputs │ ├── alpaca_13b.jsonl │ ├── bard.jsonl │ ├── chatgpt.jsonl │ ├── claude_v1.jsonl │ ├── davinci_003.jsonl │ ├── gpt4.jsonl │ ├── llama2_chat_13b.jsonl │ ├── llama2_chat_70b.jsonl │ ├── tulu_13b.jsonl │ ├── tulu_30b.jsonl │ ├── tulu_65b.jsonl │ ├── tulu_7b.jsonl │ ├── vicuna_13b.jsonl │ ├── vicuna_33b.jsonl │ └── wizardlm_13b.jsonl ├── openai_concurrent.py └── openai_info └── api_info.json /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/README.md -------------------------------------------------------------------------------- /assets/overall_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/assets/overall_fig.png -------------------------------------------------------------------------------- /evaluation_set/flask_evaluation.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/evaluation_set/flask_evaluation.jsonl -------------------------------------------------------------------------------- /evaluation_set/flask_hard_evaluation.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/evaluation_set/flask_hard_evaluation.jsonl -------------------------------------------------------------------------------- /gpt_review/aggregate_difficulty_skill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/aggregate_difficulty_skill.py -------------------------------------------------------------------------------- /gpt_review/aggregate_domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/aggregate_domain.py -------------------------------------------------------------------------------- /gpt_review/aggregate_skill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/aggregate_skill.py -------------------------------------------------------------------------------- /gpt_review/gpt4_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/gpt4_eval.py -------------------------------------------------------------------------------- /gpt_review/outputs/alpaca_13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/alpaca_13b.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/bard_review.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/bard_review.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/chatgpt_review.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/chatgpt_review.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/claude_v1_review.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/claude_v1_review.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/davinci_003_review.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/davinci_003_review.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/gpt4_review.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/gpt4_review.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/llama2_chat_13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/llama2_chat_13b.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/llama2_chat_70b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/llama2_chat_70b.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/tulu_13b_review.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/tulu_13b_review.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/tulu_30b_review.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/tulu_30b_review.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/tulu_65b_review.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/tulu_65b_review.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/tulu_7b_review.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/tulu_7b_review.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/vicuna_13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/vicuna_13b.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/vicuna_33b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/vicuna_33b.jsonl -------------------------------------------------------------------------------- /gpt_review/outputs/wizardlm_13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/outputs/wizardlm_13b.jsonl -------------------------------------------------------------------------------- /gpt_review/src/prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/src/prompt.jsonl -------------------------------------------------------------------------------- /gpt_review/src/reviewer.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/gpt_review/src/reviewer.jsonl -------------------------------------------------------------------------------- /input_data/flask_evaluation_raw.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/input_data/flask_evaluation_raw.jsonl -------------------------------------------------------------------------------- /metadata_annotation/difficulty/difficulty_annotation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/difficulty/difficulty_annotation.py -------------------------------------------------------------------------------- /metadata_annotation/difficulty/src/prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/difficulty/src/prompt.jsonl -------------------------------------------------------------------------------- /metadata_annotation/difficulty/src/reviewer.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/difficulty/src/reviewer.jsonl -------------------------------------------------------------------------------- /metadata_annotation/domain/domain_annotation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/domain/domain_annotation.py -------------------------------------------------------------------------------- /metadata_annotation/domain/src/domain.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/domain/src/domain.json -------------------------------------------------------------------------------- /metadata_annotation/domain/src/prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/domain/src/prompt.jsonl -------------------------------------------------------------------------------- /metadata_annotation/domain/src/reviewer.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/domain/src/reviewer.jsonl -------------------------------------------------------------------------------- /metadata_annotation/skillset/skillset_annotation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/skillset/skillset_annotation.py -------------------------------------------------------------------------------- /metadata_annotation/skillset/src/prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/skillset/src/prompt.jsonl -------------------------------------------------------------------------------- /metadata_annotation/skillset/src/reviewer.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/skillset/src/reviewer.jsonl -------------------------------------------------------------------------------- /metadata_annotation/skillset/src/skillset_description.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/metadata_annotation/skillset/src/skillset_description.json -------------------------------------------------------------------------------- /model_output/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/conversation.py -------------------------------------------------------------------------------- /model_output/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/inference.py -------------------------------------------------------------------------------- /model_output/load_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/load_model.py -------------------------------------------------------------------------------- /model_output/outputs/alpaca_13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/alpaca_13b.jsonl -------------------------------------------------------------------------------- /model_output/outputs/bard.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/bard.jsonl -------------------------------------------------------------------------------- /model_output/outputs/chatgpt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/chatgpt.jsonl -------------------------------------------------------------------------------- /model_output/outputs/claude_v1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/claude_v1.jsonl -------------------------------------------------------------------------------- /model_output/outputs/davinci_003.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/davinci_003.jsonl -------------------------------------------------------------------------------- /model_output/outputs/gpt4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/gpt4.jsonl -------------------------------------------------------------------------------- /model_output/outputs/llama2_chat_13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/llama2_chat_13b.jsonl -------------------------------------------------------------------------------- /model_output/outputs/llama2_chat_70b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/llama2_chat_70b.jsonl -------------------------------------------------------------------------------- /model_output/outputs/tulu_13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/tulu_13b.jsonl -------------------------------------------------------------------------------- /model_output/outputs/tulu_30b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/tulu_30b.jsonl -------------------------------------------------------------------------------- /model_output/outputs/tulu_65b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/tulu_65b.jsonl -------------------------------------------------------------------------------- /model_output/outputs/tulu_7b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/tulu_7b.jsonl -------------------------------------------------------------------------------- /model_output/outputs/vicuna_13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/vicuna_13b.jsonl -------------------------------------------------------------------------------- /model_output/outputs/vicuna_33b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/vicuna_33b.jsonl -------------------------------------------------------------------------------- /model_output/outputs/wizardlm_13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/model_output/outputs/wizardlm_13b.jsonl -------------------------------------------------------------------------------- /openai_concurrent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/openai_concurrent.py -------------------------------------------------------------------------------- /openai_info/api_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaistAI/FLASK/HEAD/openai_info/api_info.json --------------------------------------------------------------------------------