├── .gitignore ├── README.md ├── environment.yml ├── evaluation_configs ├── base.json ├── check.json ├── exec.json └── pro.json ├── evaluators ├── __init__.py ├── codellama_evaluator.py ├── codellama_evaluator_py.py ├── openai_evaluator.py ├── openai_evaluator_py.py ├── oracle_evaluator.py └── oracle_evaluator_py.py ├── generation_configs ├── greedy.json └── temp_sampling.json ├── generators ├── __init__.py └── hf_generator.py ├── inference.py ├── inference_py.py ├── intrin_eval.py ├── intrin_eval_py.py ├── overview.png ├── planning_methods ├── __init__.py ├── greedy.py ├── greedy_py.py ├── iter_correction.py ├── iter_correction_py.py ├── mc_tot.py ├── mc_tot_py.py ├── rerank.py └── rerank_py.py ├── preprocess.py ├── preprocess_evaluator.py ├── preprocess_evaluator_cls.py ├── preprocess_evaluator_prompt.py ├── retrievers └── bm25.py ├── scripts ├── e2e_eval │ ├── e2e_eval_gsm8k_ic.sh │ ├── e2e_eval_gsm8k_oracle.sh │ ├── e2e_eval_gsm8k_rr.sh │ ├── e2e_eval_gsm8k_ts.sh │ ├── e2e_eval_text2sql_ic.sh │ ├── e2e_eval_text2sql_oracle.sh │ ├── e2e_eval_text2sql_rr.sh │ └── e2e_eval_text2sql_ts.sh ├── intrin_eval │ ├── intrin_eval_gsm8k.sh │ ├── intrin_eval_text2sql.sh │ └── intrin_eval_text2sql_ft.sh ├── preproc │ ├── preproc_evaluator.sh │ ├── preproc_evaluator_cls.sh │ ├── preproc_evaluator_prompt.sh │ └── preproc_raw.sh └── train_evaluator.sh ├── train_evaluator.py └── utils ├── __init__.py ├── constants.py ├── exec_eval.py ├── exec_py.py ├── inference_utils.py ├── normalize_sql.py ├── parse.py └── train_utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/README.md -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/environment.yml -------------------------------------------------------------------------------- /evaluation_configs/base.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluation_configs/base.json -------------------------------------------------------------------------------- /evaluation_configs/check.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluation_configs/check.json -------------------------------------------------------------------------------- /evaluation_configs/exec.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluation_configs/exec.json -------------------------------------------------------------------------------- /evaluation_configs/pro.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluation_configs/pro.json -------------------------------------------------------------------------------- /evaluators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluators/codellama_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluators/codellama_evaluator.py -------------------------------------------------------------------------------- /evaluators/codellama_evaluator_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluators/codellama_evaluator_py.py -------------------------------------------------------------------------------- /evaluators/openai_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluators/openai_evaluator.py -------------------------------------------------------------------------------- /evaluators/openai_evaluator_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluators/openai_evaluator_py.py -------------------------------------------------------------------------------- /evaluators/oracle_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluators/oracle_evaluator.py -------------------------------------------------------------------------------- /evaluators/oracle_evaluator_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/evaluators/oracle_evaluator_py.py -------------------------------------------------------------------------------- /generation_configs/greedy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/generation_configs/greedy.json -------------------------------------------------------------------------------- /generation_configs/temp_sampling.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/generation_configs/temp_sampling.json -------------------------------------------------------------------------------- /generators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /generators/hf_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/generators/hf_generator.py -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/inference.py -------------------------------------------------------------------------------- /inference_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/inference_py.py -------------------------------------------------------------------------------- /intrin_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/intrin_eval.py -------------------------------------------------------------------------------- /intrin_eval_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/intrin_eval_py.py -------------------------------------------------------------------------------- /overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/overview.png -------------------------------------------------------------------------------- /planning_methods/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /planning_methods/greedy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/planning_methods/greedy.py -------------------------------------------------------------------------------- /planning_methods/greedy_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/planning_methods/greedy_py.py -------------------------------------------------------------------------------- /planning_methods/iter_correction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/planning_methods/iter_correction.py -------------------------------------------------------------------------------- /planning_methods/iter_correction_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/planning_methods/iter_correction_py.py -------------------------------------------------------------------------------- /planning_methods/mc_tot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/planning_methods/mc_tot.py -------------------------------------------------------------------------------- /planning_methods/mc_tot_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/planning_methods/mc_tot_py.py -------------------------------------------------------------------------------- /planning_methods/rerank.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/planning_methods/rerank.py -------------------------------------------------------------------------------- /planning_methods/rerank_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/planning_methods/rerank_py.py -------------------------------------------------------------------------------- /preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/preprocess.py -------------------------------------------------------------------------------- /preprocess_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/preprocess_evaluator.py -------------------------------------------------------------------------------- /preprocess_evaluator_cls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/preprocess_evaluator_cls.py -------------------------------------------------------------------------------- /preprocess_evaluator_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/preprocess_evaluator_prompt.py -------------------------------------------------------------------------------- /retrievers/bm25.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/retrievers/bm25.py -------------------------------------------------------------------------------- /scripts/e2e_eval/e2e_eval_gsm8k_ic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/e2e_eval/e2e_eval_gsm8k_ic.sh -------------------------------------------------------------------------------- /scripts/e2e_eval/e2e_eval_gsm8k_oracle.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/e2e_eval/e2e_eval_gsm8k_oracle.sh -------------------------------------------------------------------------------- /scripts/e2e_eval/e2e_eval_gsm8k_rr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/e2e_eval/e2e_eval_gsm8k_rr.sh -------------------------------------------------------------------------------- /scripts/e2e_eval/e2e_eval_gsm8k_ts.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/e2e_eval/e2e_eval_gsm8k_ts.sh -------------------------------------------------------------------------------- /scripts/e2e_eval/e2e_eval_text2sql_ic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/e2e_eval/e2e_eval_text2sql_ic.sh -------------------------------------------------------------------------------- /scripts/e2e_eval/e2e_eval_text2sql_oracle.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/e2e_eval/e2e_eval_text2sql_oracle.sh -------------------------------------------------------------------------------- /scripts/e2e_eval/e2e_eval_text2sql_rr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/e2e_eval/e2e_eval_text2sql_rr.sh -------------------------------------------------------------------------------- /scripts/e2e_eval/e2e_eval_text2sql_ts.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/e2e_eval/e2e_eval_text2sql_ts.sh -------------------------------------------------------------------------------- /scripts/intrin_eval/intrin_eval_gsm8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/intrin_eval/intrin_eval_gsm8k.sh -------------------------------------------------------------------------------- /scripts/intrin_eval/intrin_eval_text2sql.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/intrin_eval/intrin_eval_text2sql.sh -------------------------------------------------------------------------------- /scripts/intrin_eval/intrin_eval_text2sql_ft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/intrin_eval/intrin_eval_text2sql_ft.sh -------------------------------------------------------------------------------- /scripts/preproc/preproc_evaluator.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/preproc/preproc_evaluator.sh -------------------------------------------------------------------------------- /scripts/preproc/preproc_evaluator_cls.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/preproc/preproc_evaluator_cls.sh -------------------------------------------------------------------------------- /scripts/preproc/preproc_evaluator_prompt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/preproc/preproc_evaluator_prompt.sh -------------------------------------------------------------------------------- /scripts/preproc/preproc_raw.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/preproc/preproc_raw.sh -------------------------------------------------------------------------------- /scripts/train_evaluator.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/scripts/train_evaluator.sh -------------------------------------------------------------------------------- /train_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/train_evaluator.py -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/utils/constants.py -------------------------------------------------------------------------------- /utils/exec_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/utils/exec_eval.py -------------------------------------------------------------------------------- /utils/exec_py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/utils/exec_py.py -------------------------------------------------------------------------------- /utils/inference_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/utils/inference_utils.py -------------------------------------------------------------------------------- /utils/normalize_sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/utils/normalize_sql.py -------------------------------------------------------------------------------- /utils/parse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/utils/parse.py -------------------------------------------------------------------------------- /utils/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSU-NLP-Group/llm-planning-eval/HEAD/utils/train_utils.py --------------------------------------------------------------------------------