├── .gitignore
├── LICENSE
├── README.md
├── assets
├── ALCE.png
└── moose.png
├── configs
├── asqa_alpaca-7b_shot1_ndoc3_gtr_default.yaml
├── asqa_alpaca-7b_shot2_ndoc3_gtr_light_inst.yaml
├── asqa_gpt4_shot2_ndoc10_gtr_default.yaml
├── asqa_gpt4_shot2_ndoc20_gtr_default.yaml
├── asqa_gpt4_shot2_ndoc5_gtr_default.yaml
├── asqa_llama-13b_shot2_ndoc10_gtr_extraction.yaml
├── asqa_llama-13b_shot2_ndoc10_gtr_summary.yaml
├── asqa_llama-13b_shot2_ndoc3_gtr_light_inst.yaml
├── asqa_llama-13b_shot2_ndoc3_gtr_oracle_light_inst.yaml
├── asqa_llama-30b_shot2_ndoc3_gtr_light_inst.yaml
├── asqa_llama-7b_shot1_ndoc3_gtr_default.yaml
├── asqa_llama-7b_shot2_ndoc3_gtr_light_inst.yaml
├── asqa_llama2_shot2_ndoc5_gtr_default.yaml
├── asqa_opt-6.7b_shot1_ndoc3_gtr_default.yaml
├── asqa_turbo_shot2_closedbook.yaml
├── asqa_turbo_shot2_ndoc0_gtr_interact_search_ret_all_forcecite.yaml
├── asqa_turbo_shot2_ndoc10_gtr_default.yaml
├── asqa_turbo_shot2_ndoc10_gtr_extraction.yaml
├── asqa_turbo_shot2_ndoc10_gtr_interact_doc_id_extraction.yaml
├── asqa_turbo_shot2_ndoc10_gtr_interact_doc_id_summary.yaml
├── asqa_turbo_shot2_ndoc10_gtr_interact_search_summary.yaml
├── asqa_turbo_shot2_ndoc10_gtr_interact_search_summary_ret_all.yaml
├── asqa_turbo_shot2_ndoc10_gtr_summary.yaml
├── asqa_turbo_shot2_ndoc3_gtr_default.yaml
├── asqa_turbo_shot2_ndoc5_dpr_default.yaml
├── asqa_turbo_shot2_ndoc5_gtr_default.yaml
├── asqa_turbo_shot2_ndoc5_gtr_extraction.yaml
├── asqa_turbo_shot2_ndoc5_gtr_interact_doc_id_extraction.yaml
├── asqa_turbo_shot2_ndoc5_gtr_interact_doc_id_summary.yaml
├── asqa_turbo_shot2_ndoc5_gtr_interact_search_extraction.yaml
├── asqa_turbo_shot2_ndoc5_gtr_interact_search_summary.yaml
├── asqa_turbo_shot2_ndoc5_gtr_light_inst.yaml
├── asqa_turbo_shot2_ndoc5_gtr_summary.yaml
├── asqa_turbo_shot2_ndoc5_oracle_default.yaml
├── eli5_gpt4_shot2_ndoc10_bm25_default.yaml
├── eli5_gpt4_shot2_ndoc20_bm25_default.yaml
├── eli5_gpt4_shot2_ndoc5_bm25_default.yaml
├── eli5_llama-13b_shot2_ndoc10_bm25_extraction.yaml
├── eli5_llama-13b_shot2_ndoc10_bm25_summary.yaml
├── eli5_llama-13b_shot2_ndoc3_bm25_light_inst.yaml
├── eli5_llama-13b_shot2_ndoc3_bm25_oracle_light_inst.yaml
├── eli5_llama2_shot2_ndoc5_bm25_default.yaml
├── eli5_turbo_shot2_closedbook.yaml
├── eli5_turbo_shot2_ndoc0_bm25_interact_search_ret_all_forcecite.yaml
├── eli5_turbo_shot2_ndoc10_bm25_default.yaml
├── eli5_turbo_shot2_ndoc10_bm25_extraction.yaml
├── eli5_turbo_shot2_ndoc10_bm25_interact_doc_id_extraction.yaml
├── eli5_turbo_shot2_ndoc10_bm25_interact_doc_id_summary.yaml
├── eli5_turbo_shot2_ndoc10_bm25_interact_search_extraction.yaml
├── eli5_turbo_shot2_ndoc10_bm25_summary.yaml
├── eli5_turbo_shot2_ndoc5_bm25_default.yaml
├── eli5_turbo_shot2_ndoc5_bm25_extraction.yaml
├── eli5_turbo_shot2_ndoc5_bm25_interact_doc_id_extraction.yaml
├── eli5_turbo_shot2_ndoc5_bm25_interact_doc_id_summary.yaml
├── eli5_turbo_shot2_ndoc5_bm25_interact_search_extraction.yaml
├── eli5_turbo_shot2_ndoc5_bm25_interact_search_summary.yaml
├── eli5_turbo_shot2_ndoc5_bm25_summary.yaml
├── eli5_turbo_shot2_ndoc5_oracle_default.yaml
├── qampari_gpt4_shot2_ndoc10_gtr_default.yaml
├── qampari_gpt4_shot2_ndoc20_gtr_default.yaml
├── qampari_gpt4_shot2_ndoc5_gtr_default.yaml
├── qampari_llama-13b_shot2_ndoc10_gtr_extraction.yaml
├── qampari_llama-13b_shot2_ndoc10_gtr_summary.yaml
├── qampari_llama-13b_shot2_ndoc3_gtr_light_inst.yaml
├── qampari_llama-13b_shot2_ndoc3_gtr_oracle_light_inst.yaml
├── qampari_llama2_shot2_ndoc5_gtr_default.yaml
├── qampari_turbo_shot2_closedbook.yaml
├── qampari_turbo_shot2_ndoc0_gtr_interact_search_ret_all_forcecite.yaml
├── qampari_turbo_shot2_ndoc10_gtr_extraction.yaml
├── qampari_turbo_shot2_ndoc10_gtr_interact_doc_id_extraction.yaml
├── qampari_turbo_shot2_ndoc10_gtr_interact_doc_id_summary.yaml
├── qampari_turbo_shot2_ndoc10_gtr_summary.yaml
├── qampari_turbo_shot2_ndoc5_gtr_default.yaml
├── qampari_turbo_shot2_ndoc5_gtr_extraction.yaml
├── qampari_turbo_shot2_ndoc5_gtr_interact_doc_id_extraction.yaml
├── qampari_turbo_shot2_ndoc5_gtr_interact_doc_id_summary.yaml
├── qampari_turbo_shot2_ndoc5_gtr_summary.yaml
└── qampari_turbo_shot2_ndoc5_oracle_default.yaml
├── download_data.sh
├── eval.py
├── human_eval
├── README.md
├── analyze.py
├── human_eval_citations_completed.json
└── human_eval_utility_completed.json
├── paper
└── ALCE.pdf
├── post_hoc_cite.py
├── prompts
├── asqa_closedbook.json
├── asqa_default.json
├── asqa_interact_doc_id.json
├── asqa_interact_search.json
├── asqa_light_inst.json
├── asqa_sum_or_ext.json
├── eli5_closedbook.json
├── eli5_default.json
├── eli5_interact_doc_id.json
├── eli5_interact_search.json
├── eli5_light_inst.json
├── eli5_sum_or_ext.json
├── qampari_closedbook.json
├── qampari_default.json
├── qampari_interact_doc_id.json
├── qampari_interact_search.json
├── qampari_light_inst.json
└── qampari_sum_or_ext.json
├── retrieval.py
├── run.py
├── searcher.py
├── tools
├── gen_summary.py
└── rerank_outputs.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 | data/
163 | result
164 | *.tar
165 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Princeton Natural Language Processing
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Enabling Large Language Models to Generate Text with Citations
2 |
3 |

*: ALCE is pronounced as /elk/ as ALCE is the Latin word for elk (Europe) or moose (North America).
4 |
5 |
6 |
7 |
8 | This repository contains the code and data for paper [Enabling Large Language Models to Generate Text with Citations](https://arxiv.org/abs/2305.14627).
9 | In this paper, we propose ALCE, a benchmark for **A**utomatic **L**LMs' **C**itation Evaluation.
10 | ALCE contains three datasets: ASQA, QAMPARI, and ELI5.
11 | We provide automatic evaluation code of LLM generations around three dimensions: fluency, correctness, and citation quality.
12 | This repository also includes code to reproduce the baselines in our paper.
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | ## Quick Links
22 |
23 | - [Requirements](#requirements)
24 | - [Data](#data)
25 | - [Code Structure](#code-structure)
26 | - [Reproducing Baselines](#reproducing-baselines)
27 | - [Evaluation](#evaluation)
28 | - [Human Evaluation](#human-evaluation)
29 | - [Bug or Questions](#bug-or-questions)
30 | - [Citation](#citation)
31 |
32 |
33 | ## Requirements
34 |
35 | Please install the latest versions of PyTorch (`torch`), HuggingFace Transformers (`transformers`), HuggingFace Accelerate (`accelerate`), and the OpenAI API package (`openai`). This codebase is tested on
36 | `torch==2.1.0.dev20230514+cu118`, `transformers==4.28.1`, `accelerate==0.17.1`, and `openai==0.27.4` with Python 3.9.7.
37 |
38 | ## Data
39 |
40 | You can download datasets (along with retrieval results) by running the following command:
41 |
42 | ```bash
43 | bash download_data.sh
44 | ```
45 |
46 | All the data will be stored in `data/`. Our data included top-100 DPR/GTR retrieved results for ASQA and QAMPARI, and top-100 BM25 retrieved results for ELI5. We also provide reranked oracle retrieval results, where top-5 passages can achieve the same recall as the original top-100 recall.
47 |
48 | ### Retrieval
49 |
50 | You can reproduce the passage retrieval step with the following command:
51 | ```bash
52 | python retrieval.py --data {path/to/data} --retriever {bm25/gtr} --output_file {path/to/output}
53 | ```
54 |
55 | There are additional packages required for the retrieval steps.
56 | Specifically, you need to install `pyserini==0.21.0`(their github [repo](https://github.com/castorini/pyserini/tree/master) is helpful) and `sentence-transformers==2.2.2`.
57 |
58 | For the BM25 retrieval over Common Crawl using Sphere, you must first download the index from the Sphere [repo](https://github.com/facebookresearch/Sphere), and set the environmental variable `BM25_SPHERE_PATH` to the path of the downloaded index.
59 | Specifically, you can use the following command:
60 | ```bash
61 | wget -P faiss_index https://dl.fbaipublicfiles.com/sphere/sphere_sparse_index.tar.gz
62 | tar -xzvf faiss_index/sphere_sparse_index.tar.gz -C faiss_index
63 | export BM25_SPHERE_PATH=$PWD/faiss_index
64 | ```
65 | It's important to note that given the large size of the corpus, this step is extremely expensive and time-consuming. We found that larger CPU memory tends to help with the speed.
66 |
67 | For GTR, we first build an index using the DPR wikipedia snapshot, which you can obtain using the download script from the DPR [repo](https://github.com/facebookresearch/DPR), and then setting the environmental variable `DPR_WIKI_TSV` to the path of the tsv file.
68 | Specifically, you can use the following command:
69 | ```bash
70 | wget https://dl.fbaipublicfiles.com/dpr/wikipedia_split/psgs_w100.tsv.gz
71 | gzip -xzvf psgs_w100.tsv.gz
72 | export DPR_WIKI_TSV=$PWD/psgs_w100.tsv
73 | ```
74 | Then, you want to set `GTR_EMB` to the path of the GTR embeddings of the Wikipedia corpus, and running the retrieval script for the first time will automatically build and save the index.
75 | Building the dense index can be expensive for GPU memory (we use 80GB GPUs for this) and time-consuming; the entire index will take about 31GB.
76 | If you find this step to be too expensive, you can also download it using:
77 | ```bash
78 | wget https://huggingface.co/datasets/princeton-nlp/gtr-t5-xxl-wikipedia-psgs_w100-index/resolve/main/gtr_wikipedia_index.pkl
79 | export GTR_EMB=$PWD/gtr_wikipedia_index.pkl
80 | ```
81 |
82 | To reproduce the DPR retrieval, we refer the DPR [repo](https://github.com/facebookresearch/DPR), which we used the original DPR checkpoint trained on NQ.
83 |
84 | ## Code Structure
85 |
86 | * `run.py`: run file to reproduce our baseline generations.
87 | * `eval.py`: eval file to evaluate generations.
88 | * `prompts`: folder that contains all prompt files.
89 | * `configs/`: folder that contains all config files to reproduce baselines.
90 | * `tools/`: misc code (generate summaries/snippets, reranking, etc.)
91 |
92 |
93 | ## Reproducing Baselines
94 |
95 |
96 | You can reproduce baselines from our paper by
97 |
98 | ```bash
99 | python run.py --config configs/{config_name}
100 | ```
101 |
102 | You can also overwrite any arguments in the config file or add new arguments simply through command line:
103 | ```
104 | python run.py --config configs/{config_name} --seed 43 --model vicuna-13b
105 | ```
106 |
107 | The naming of config files follow the rule of `{LLM}_{#demos and #passages}_{retriever}_{method}.yaml`. Method names include:
108 | * `default` corresponds to the **Vanilla** model in our paper.
109 | * `summary` corresponds to the **Summary** model.
110 | * `extraction` corresponds to the **Snippet** model.
111 | * `interact_doc_id` corresponds to the **Interact** model.
112 | * `interact_search` corresponds to the **InlineSearch** model.
113 | * `closedbook` corresponds to the **ClosedBook** model.
114 |
115 | Our code support both OpenAI API and offline HuggingFace models:
116 |
117 | * For OpenAI models (for example, ChatGPT), you need to set the environment variable `OPENAI_API_KEY` and `OPENAI_ORG_ID`. If you are using the Azure OpenAI API, you need to set the environment variable of `OPENAI_API_KEY` and `OPENAI_API_BASE`. You also need to add the flag `--azure`.
118 | * Note that in Azure OpenAI API, ChatGPT's name is different and you should set it by `--model gpt-35-turbo`.
119 | * For the open-source models, you should set the model name equal to the input of HuggingFace models' `.from_pretrained` method. This could either be a local directory (e.g. for the older LLaMA models) or a path to the HuggingFace hub.
120 |
121 | For detailed argument usage, please refer to `run.py`.
122 |
123 | Model output along with gold answers and run configs will be stored in a json file in `result/`.
124 |
125 |
126 | ### Post-hoc citation
127 |
128 | For closed-book models, one can use `post_hoc_cite.py` to add citations in a post-hoc manner (using GTR-large). To run post-hoc citation, execute
129 | ```bash
130 | python post_hoc_cite.py --f result/{RESULT JSON FILE NAME} --external_docs data/{CORRESPONDING DATA}
131 | ```
132 |
133 | The output file with post-hoc citations will be stored in `result/`, with a suffix `post_hoc_cite.gtr-t5-large-external`.
134 |
135 | ## Evaluation
136 |
137 | ACLE evaluation is implemented in `eval.py`.
138 |
139 | For ASQA, use the following command
140 | ```bash
141 | python eval.py --f {path/to/result/file} --citations --qa --mauve
142 | ```
143 |
144 | For QAMPARI, use the following command
145 | ```bash
146 | python eval.py --f {path/to/result/file} --citations
147 | ```
148 |
149 | For ELI5, use the following command
150 | ```bash
151 | python eval.py --f {path/to/result/file} --citations --claims_nli --mauve
152 | ```
153 |
154 | The evaluation result will be saved in `result/`, with the same name as the input and a suffix `.score`.
155 |
156 | ## Human Evaluation
157 |
158 | The results from our human evaluation (Section 6) are located under the directory [`human_eval`](human_eval).
159 | Both the data and the analysis are available, please refer to the directory for details.
160 |
161 | ## Bug or Questions?
162 |
163 | If you have any questions related to the code or the paper, feel free to email Tianyu (`tianyug@cs.princeton.edu`). If you encounter any problems when using the code, or want to report a bug, you can open an issue. Please try to specify the problem with details so we can help you better and quicker!
164 |
165 |
166 |
167 | ## Citation
168 |
169 | Please cite our paper if you use ALCE in your work:
170 |
171 | ```bibtex
172 | @inproceedings{gao2023enabling,
173 | title={Enabling Large Language Models to Generate Text with Citations},
174 | author={Gao, Tianyu and Yen, Howard and Yu, Jiatong and Chen, Danqi},
175 | year={2023},
176 | booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
177 | }
178 | ```
179 |
--------------------------------------------------------------------------------
/assets/ALCE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-nlp/ALCE/246c476a4edfc564266b7346b6e29ef4861ae937/assets/ALCE.png
--------------------------------------------------------------------------------
/assets/moose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-nlp/ALCE/246c476a4edfc564266b7346b6e29ef4861ae937/assets/moose.png
--------------------------------------------------------------------------------
/configs/asqa_alpaca-7b_shot1_ndoc3_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 1
5 | ndoc: 3
6 | dataset_name: asqa
7 | tag: gtr
8 | model: alpaca-7b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/asqa_alpaca-7b_shot2_ndoc3_gtr_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_light_inst.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: asqa
7 | tag: gtr_light_inst
8 | model: alpaca-7b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/asqa_gpt4_shot2_ndoc10_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: gtr
8 | model: gpt-4-0613
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/asqa_gpt4_shot2_ndoc20_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 20
6 | dataset_name: asqa
7 | tag: gtr
8 | model: gpt-4-0613
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/asqa_gpt4_shot2_ndoc5_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr
8 | model: gpt-4-0613
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/asqa_llama-13b_shot2_ndoc10_gtr_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_sum_or_ext.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: extraction
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 | use_shorter: extraction
12 |
--------------------------------------------------------------------------------
/configs/asqa_llama-13b_shot2_ndoc10_gtr_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_sum_or_ext.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: summary
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 | use_shorter: summary
12 |
--------------------------------------------------------------------------------
/configs/asqa_llama-13b_shot2_ndoc3_gtr_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_light_inst.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: asqa
7 | tag: gtr_light_inst
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/asqa_llama-13b_shot2_ndoc3_gtr_oracle_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_light_inst.json
3 | eval_file: data/asqa_eval_gtr_top100_reranked_oracle.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: asqa
7 | tag: gtr_oracle_light_inst
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/asqa_llama-30b_shot2_ndoc3_gtr_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_light_inst.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: asqa
7 | tag: gtr_light_inst
8 | model: llama-30b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/asqa_llama-7b_shot1_ndoc3_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 1
5 | ndoc: 3
6 | dataset_name: asqa
7 | tag: gtr
8 | model: llama-7b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/asqa_llama-7b_shot2_ndoc3_gtr_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_light_inst.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: asqa
7 | tag: gtr_light_inst
8 | model: llama-7b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/asqa_llama2_shot2_ndoc5_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr
8 | model: meta-llama/Llama-2-70b-chat-hf
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/asqa_opt-6.7b_shot1_ndoc3_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 1
5 | ndoc: 3
6 | dataset_name: asqa
7 | tag: gtr
8 | model: facebook/opt-6.7b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_closedbook.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_closedbook.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 0
6 | dataset_name: asqa
7 | tag: closedbook
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc0_gtr_interact_search_ret_all_forcecite.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_interact_search.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 0
6 | dataset_name: asqa
7 | tag: gtr_interact_search_ret_all
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | interactive: true
11 | interactive_query: search
12 | retriever: gtr-t5-large
13 | retrieve_in_all_docs: true
14 | force_cite_show: true
15 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc10_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: gtr
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc10_gtr_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_sum_or_ext.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: gtr_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc10_gtr_interact_doc_id_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_interact_doc_id.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: gtr_interact_doc_id_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc10_gtr_interact_doc_id_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_interact_doc_id.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: gtr_interact_doc_id_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc10_gtr_interact_search_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_interact_search.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: gtr_interact_search_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: search
13 | retriever: gtr-t5-large
14 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc10_gtr_interact_search_summary_ret_all.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_interact_search.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: gtr_interact_search_summary_ret_all
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: search
13 | retriever: gtr-t5-large
14 | retrieve_in_all_docs: true
15 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc10_gtr_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_sum_or_ext.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: asqa
7 | tag: gtr_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc3_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: asqa
7 | tag: gtr
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_dpr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_dpr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: dpr
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_gtr_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_sum_or_ext.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_gtr_interact_doc_id_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_interact_doc_id.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr_interact_doc_id_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_gtr_interact_doc_id_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_interact_doc_id.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr_interact_doc_id_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_gtr_interact_search_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_interact_search.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr_interact_search_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 | interactive: true
12 | interactive_query: search
13 | retriever: gtr-t5-large
14 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_gtr_interact_search_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_interact_search.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr_interact_search_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: search
13 | retriever: gtr-t5-large
14 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_gtr_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_light_inst.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr_light_inst
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_gtr_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_sum_or_ext.json
3 | eval_file: data/asqa_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: gtr_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 |
--------------------------------------------------------------------------------
/configs/asqa_turbo_shot2_ndoc5_oracle_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/asqa_default.json
3 | eval_file: data/asqa_eval_gtr_top100_reranked_oracle.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: asqa
7 | tag: oracle
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/eli5_gpt4_shot2_ndoc10_bm25_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_default.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: eli5
7 | tag: bm25
8 | model: gpt-4-0613
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/eli5_gpt4_shot2_ndoc20_bm25_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_default.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 20
6 | dataset_name: eli5
7 | tag: bm25
8 | model: gpt-4-0613
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/eli5_gpt4_shot2_ndoc5_bm25_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_default.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: bm25
8 | model: gpt-4-0613
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/eli5_llama-13b_shot2_ndoc10_bm25_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_sum_or_ext.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: eli5
7 | tag: extraction
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 | use_shorter: extraction
12 |
--------------------------------------------------------------------------------
/configs/eli5_llama-13b_shot2_ndoc10_bm25_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_sum_or_ext.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: eli5
7 | tag: summary
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 | use_shorter: summary
12 |
--------------------------------------------------------------------------------
/configs/eli5_llama-13b_shot2_ndoc3_bm25_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_light_inst.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: eli5
7 | tag: bm25_light_inst
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/eli5_llama-13b_shot2_ndoc3_bm25_oracle_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_light_inst.json
3 | eval_file: data/eli5_eval_bm25_top100_reranked_oracle.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: eli5
7 | tag: bm25_oracle_light_inst
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/eli5_llama2_shot2_ndoc5_bm25_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_default.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: bm25
8 | model: meta-llama/Llama-2-70b-chat-hf
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_closedbook.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_closedbook.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 0
6 | dataset_name: eli5
7 | tag: closedbook
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc0_bm25_interact_search_ret_all_forcecite.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_interact_search.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 0
6 | dataset_name: eli5
7 | tag: bm25_interact_search_ret_all
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | interactive: true
11 | interactive_query: search
12 | retriever: gtr-t5-large
13 | retrieve_in_all_docs: true
14 | force_cite_show: true
15 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc10_bm25_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_default.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: eli5
7 | tag: bm25
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc10_bm25_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_sum_or_ext.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: eli5
7 | tag: bm25_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc10_bm25_interact_doc_id_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_interact_doc_id.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: eli5
7 | tag: bm25_interact_doc_id_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc10_bm25_interact_doc_id_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_interact_doc_id.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: eli5
7 | tag: bm25_interact_doc_id_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc10_bm25_interact_search_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_interact_search.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: eli5
7 | tag: bm25_interact_search_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 | interactive: true
12 | interactive_query: search
13 | retriever: gtr-t5-large
14 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc10_bm25_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_sum_or_ext.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: eli5
7 | tag: bm25_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc5_bm25_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_default.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: bm25
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc5_bm25_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_sum_or_ext.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: bm25_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc5_bm25_interact_doc_id_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_interact_doc_id.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: bm25_interact_doc_id_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc5_bm25_interact_doc_id_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_interact_doc_id.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: bm25_interact_doc_id_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc5_bm25_interact_search_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_interact_search.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: bm25_interact_search_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 | interactive: true
12 | interactive_query: search
13 | retriever: gtr-t5-large
14 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc5_bm25_interact_search_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_interact_search.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: bm25_interact_search_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: search
13 | retriever: gtr-t5-large
14 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc5_bm25_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_sum_or_ext.json
3 | eval_file: data/eli5_eval_bm25_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: bm25_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 |
--------------------------------------------------------------------------------
/configs/eli5_turbo_shot2_ndoc5_oracle_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/eli5_default.json
3 | eval_file: data/eli5_eval_bm25_top100_reranked_oracle.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: eli5
7 | tag: oracle
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/qampari_gpt4_shot2_ndoc10_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_default.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: qampari
7 | tag: gtr
8 | model: gpt-4-0613
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/qampari_gpt4_shot2_ndoc20_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_default.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 20
6 | dataset_name: qampari
7 | tag: gtr
8 | model: gpt-4-0613
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/qampari_gpt4_shot2_ndoc5_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_default.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: qampari
7 | tag: gtr
8 | model: gpt-4-0613
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/qampari_llama-13b_shot2_ndoc10_gtr_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_sum_or_ext.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: qampari
7 | tag: extraction
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 | use_shorter: extraction
12 |
--------------------------------------------------------------------------------
/configs/qampari_llama-13b_shot2_ndoc10_gtr_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_sum_or_ext.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: qampari
7 | tag: summary
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 | use_shorter: summary
12 |
--------------------------------------------------------------------------------
/configs/qampari_llama-13b_shot2_ndoc3_gtr_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_light_inst.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: qampari
7 | tag: gtr_light_inst
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/qampari_llama-13b_shot2_ndoc3_gtr_oracle_light_inst.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_light_inst.json
3 | eval_file: data/qampari_eval_gtr_top100_reranked_oracle.json
4 | shot: 2
5 | ndoc: 3
6 | dataset_name: qampari
7 | tag: gtr_oracle_light_inst
8 | model: llama-13b
9 | temperature: 1.0
10 | top_p: 0.95
11 |
--------------------------------------------------------------------------------
/configs/qampari_llama2_shot2_ndoc5_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_default.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: qampari
7 | tag: gtr
8 | model: meta-llama/Llama-2-70b-chat-hf
9 | temperature: 1.0
10 | top_p: 0.95
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_closedbook.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_closedbook.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 0
6 | dataset_name: qampari
7 | tag: closedbook
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc0_gtr_interact_search_ret_all_forcecite.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_interact_search.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 0
6 | dataset_name: qampari
7 | tag: gtr_interact_search_ret_all
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | interactive: true
11 | interactive_query: search
12 | retriever: gtr-t5-large
13 | retrieve_in_all_docs: true
14 | force_cite_show: true
15 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc10_gtr_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_sum_or_ext.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: qampari
7 | tag: gtr_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc10_gtr_interact_doc_id_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_interact_doc_id.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: qampari
7 | tag: gtr_interact_doc_id_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc10_gtr_interact_doc_id_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_interact_doc_id.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: qampari
7 | tag: gtr_interact_doc_id_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc10_gtr_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_sum_or_ext.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 10
6 | dataset_name: qampari
7 | tag: gtr_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc5_gtr_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_default.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: qampari
7 | tag: gtr
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc5_gtr_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_sum_or_ext.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: qampari
7 | tag: gtr_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc5_gtr_interact_doc_id_extraction.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_interact_doc_id.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: qampari
7 | tag: gtr_interact_doc_id_extraction
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: extraction
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc5_gtr_interact_doc_id_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_interact_doc_id.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: qampari
7 | tag: gtr_interact_doc_id_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 | interactive: true
12 | interactive_query: doc_id
13 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc5_gtr_summary.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_sum_or_ext.json
3 | eval_file: data/qampari_eval_gtr_top100.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: qampari
7 | tag: gtr_summary
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 | use_shorter: summary
11 |
--------------------------------------------------------------------------------
/configs/qampari_turbo_shot2_ndoc5_oracle_default.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prompt_file: prompts/qampari_default.json
3 | eval_file: data/qampari_eval_gtr_top100_reranked_oracle.json
4 | shot: 2
5 | ndoc: 5
6 | dataset_name: qampari
7 | tag: oracle
8 | model: gpt-3.5-turbo-0301
9 | openai_api: true
10 |
--------------------------------------------------------------------------------
/download_data.sh:
--------------------------------------------------------------------------------
1 | wget https://huggingface.co/datasets/princeton-nlp/ALCE-data/resolve/main/ALCE-data.tar
2 | tar xvf ALCE-data.tar
3 | mv ALCE-data data
4 | echo "deleting tar file..."
5 | rm ALCE-data.tar
6 |
--------------------------------------------------------------------------------
/human_eval/README.md:
--------------------------------------------------------------------------------
1 | # Human Evaluation
2 |
3 | In this directory, you can find the human evaluation results in two files: `human_eval_utility_completed.json` and `human_eval_citations_completed.json`.
4 | We evaluated a sample of ASQA and ELI5 results from select models. For more details, please refer to the paper.
5 |
6 | ### Utility
7 | In `human_eval_utility_completed.json`, each model linked to a dictionary where the key is the question id and the value is the model output. This is identical to the data you would find after running evaluation with one additional field -- `utility_score` is rated from 1-5.
8 |
9 | ### Citations
10 | In `human_eval_citations_completed.json`, each model linked to a dictionary where the key is the question id and the value is the model output. This is identical to the data you would find after running evaluation with a few additional fields:
11 |
12 | - `citation_precision_score` can be found for every valid citation in every sentence. The instruction given to the annotator is: "Given the sentence and one of its cited documents, please rate if the document fully supports all claims in the sentence (2), the document partially supports the claims in the sentence (1), or if the document does not support any claims made in the sentence (0)."
13 | - `sentence_recall_score` can be found for every sentence. The instruction given to the annotator is: "Given the sentence and its cited documents, please rate if the model response is fully supported by the documents (1) or if the model response is not fully supported by the documents (0). If the response is fully supported, that means all factually claims made by the response are found in and supported by at least one of the documents. Otherwise, the response is not fully supported."
14 |
15 | We then calculate the humans `overall_precision_score` and `overall_recall_score`. Furthermore, we included the automatic evaluation results for ease of evaluation.
16 |
--------------------------------------------------------------------------------
/human_eval/analyze.py:
--------------------------------------------------------------------------------
1 | from sklearn.metrics import confusion_matrix, cohen_kappa_score
2 | import json
3 | import numpy as np
4 |
5 | with open("human_eval_citations_completed.json") as f:
6 | citation_data = json.load(f)
7 |
8 | # sent human recall, sent human prec, sent auto recall, sent auto prec, cite human prec, cite auto prec
9 | all_scores = [[],[],[],[],[],[]]
10 |
11 | print("model,cite recall (human),cite prec (human),cite recall (automatic),cite prec (automatic)\n")
12 | for dataset, models in citation_data.items():
13 | for model, items in models.items():
14 | model_scores = [[],[],[],[],[],[]]
15 |
16 | for id, item in items.items():
17 | if id == "overall_results":
18 | continue
19 | item_scores = [[],[],[],[],[],[]]
20 |
21 | for i, sent in enumerate(item["sentences"]):
22 |
23 | # these are all sentence level
24 | human_recall = sent["sentence_recall_score"]
25 | human_precision = sent["sentence_precision_score"]
26 | auto_recall = item["automatic_recall_scores"][i]
27 | auto_precision = item["automatic_precision_scores"][i]
28 |
29 | assert human_recall == 0 or human_recall == 1
30 | assert human_precision == 0 or human_precision == 1
31 | assert auto_recall == 0 or auto_recall == 1
32 | assert auto_precision == 0 or auto_precision == 1
33 | human_precision = min(human_recall, human_precision)
34 |
35 | # these are citation level scores
36 | human_citation_precision_scores = [x["citation_precision_score"] for x in sent["citations"]]
37 | # for precision, either 1 or 2 counts as support, so we use min
38 | human_citation_precision_scores = [0 if human_recall == 0 else min(x, 1) for x in human_citation_precision_scores]
39 | auto_citation_precision_scores = item["automatic_citation_precision_scores"][i]
40 | auto_citation_precision_scores = [0 if auto_recall == 0 else min(x, 1) for x in auto_citation_precision_scores]
41 |
42 | assert len(human_citation_precision_scores) == len(auto_citation_precision_scores)
43 |
44 | item_scores[0].append(human_recall)
45 | item_scores[1].append(human_precision)
46 | item_scores[2].append(auto_recall)
47 | item_scores[3].append(auto_precision)
48 | item_scores[4] += human_citation_precision_scores
49 | item_scores[5] += auto_citation_precision_scores
50 |
51 | for i, s in enumerate(all_scores):
52 | all_scores[i] += item_scores[i]
53 |
54 | item_scores = [x if len(x) > 0 else [0] for x in item_scores]
55 |
56 | for i, s in enumerate(model_scores):
57 | s.append(np.mean(item_scores[i]))
58 |
59 | print(f"{model},{np.mean(model_scores[0])*100:.01f},{np.mean(model_scores[4])*100:.01f},{np.mean(model_scores[2])*100:.01f},{np.mean(model_scores[5])*100:.01f}")
60 | print()
61 |
62 | print()
63 |
64 | print("-----citation recall scores-----")
65 | cm = confusion_matrix(all_scores[0], all_scores[2], labels=[0,1])
66 | print("recall cm (total = {cm.sum()})\n", cm)
67 | tn, fp, fn, tp = cm.ravel()
68 | acc = (tn + tp) / (cm.sum())
69 | precision = tn / (tn+fn)
70 | recall = tn / (tn+fp)
71 | print(f"accuracy, recall, precision = {acc*100:.01f},{recall*100:.01f},{precision*100:.01f}")
72 | print(f"recall cohens kappa between automatic and human evaluation:")
73 | print(cohen_kappa_score(all_scores[0], all_scores[2]))
74 | print()
75 |
76 | print("-----citation prec scores-----")
77 | cm = confusion_matrix(all_scores[4], all_scores[5], labels=[0,1])
78 | print(f"precision cm (total = {cm.sum()})\n", cm)
79 | tn, fp, fn, tp = cm.ravel()
80 | acc = (tn + tp) / (cm.sum())
81 | precision = tn / (tn+fn)
82 | recall = tn / (tn+fp)
83 | print(f"accuracy, recall, precision = {acc*100:.01f},{recall*100:.01f},{precision*100:.01f}")
84 | print(f"precision cohens kappa between automatic and human evaluation:")
85 | print(cohen_kappa_score(all_scores[4], all_scores[5]))
--------------------------------------------------------------------------------
/paper/ALCE.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-nlp/ALCE/246c476a4edfc564266b7346b6e29ef4861ae937/paper/ALCE.pdf
--------------------------------------------------------------------------------
/post_hoc_cite.py:
--------------------------------------------------------------------------------
1 | import json
2 | import argparse
3 | from sklearn.feature_extraction.text import TfidfVectorizer
4 | from sklearn.metrics.pairwise import cosine_similarity
5 | from tqdm import tqdm
6 | from nltk import sent_tokenize
7 | import re
8 | import numpy as np
9 | import string
10 | import torch
11 | from searcher import SearcherWithinDocs
12 |
13 | def remove_citations(sent):
14 | return re.sub(r"\[\d+", "", re.sub(r" \[\d+", "", sent)).replace(" |", "").replace("]", "")
15 |
16 | def main():
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument("--f", type=str, help="Output data file")
19 | parser.add_argument("--retriever", type=str, default="gtr-t5-large", help="Retriever to use. Options: `tfidf`, `gtr-t5-large`")
20 | parser.add_argument("--retriever_device", type=str, default="cuda", help="Where to put the dense retriever if using. Options: `cuda`, `cpu`")
21 | parser.add_argument("--overwrite", action="store_true", help="Overwrite existing citations")
22 | parser.add_argument("--external_docs", type=str, default=None, help="Use external documents")
23 |
24 | args = parser.parse_args()
25 |
26 | data = json.load(open(args.f))
27 | new_data = []
28 | if args.external_docs is not None:
29 | external = json.load(open(args.external_docs))
30 |
31 | # Load retrieval model
32 | if "gtr" in args.retriever:
33 | from sentence_transformers import SentenceTransformer
34 | gtr_model = SentenceTransformer(f'sentence-transformers/{args.retriever}', device=args.retriever_device)
35 |
36 | for idx, item in enumerate(tqdm(data['data'])):
37 | doc_list = item['docs']
38 | if args.external_docs is not None:
39 | assert external[idx]['question'] == item['question']
40 | doc_list = external[idx]['docs']
41 | searcher = SearcherWithinDocs(doc_list, args.retriever, model=gtr_model, device=args.retriever_device)
42 |
43 | output = item["output"].strip().split("\n")[0] # Remove new lines and content after
44 | output = item["output"].replace("<|im_end|>", "")
45 | if "qampari" in args.f:
46 | sents = [item['question'] + ' ' + x.strip() for x in item['output'].rstrip(".").split(",")]
47 | else:
48 | sents = sent_tokenize(output)
49 |
50 | new_output = ""
51 | for sent in sents:
52 | original_ref = [int(r[1:])-1 for r in re.findall(r"\[\d+", sent)]
53 |
54 | if len(original_ref) == 0 or args.overwrite:
55 | print("\n-----")
56 | print("Original sentence:", sent)
57 | print("Original ref:", original_ref)
58 | sent = remove_citations(sent)
59 | best_doc_id = searcher.search(sent)
60 | print("New ref:", best_doc_id)
61 | sent = f"[{best_doc_id+1}] " + sent
62 | print("New sentence:", sent)
63 | if "qampari" in args.f:
64 | new_output += sent.replace(item['question'], '').strip() + ", "
65 | else:
66 | new_output += sent + " "
67 | else:
68 | if "qampari" in args.f:
69 | new_output += sent.replace(item['question'], '').strip() + ", "
70 | else:
71 | new_output += sent + " "
72 |
73 | item['output'] = new_output.rstrip().rstrip(",")
74 | print("Final output: " + item['output'])
75 | item['docs'] = doc_list
76 | new_data.append(item)
77 |
78 | data['data'] = new_data
79 | tag = f".{args.retriever}"
80 | if args.overwrite:
81 | tag += "-overwrite"
82 | if args.external_docs is not None:
83 | tag += "-external"
84 |
85 | json.dump(data, open(args.f + f".post_hoc_cite{tag}", 'w'), indent=4)
86 |
87 | if __name__ == "__main__":
88 | main()
89 |
--------------------------------------------------------------------------------
/prompts/asqa_closedbook.json:
--------------------------------------------------------------------------------
1 | {
2 | "instruction": "Instruction: Write an accurate, engaging, and concise answer for the given question. Use an unbiased and journalistic tone. ",
3 | "demo_sep": "\n\n\n",
4 | "demo_prompt": "{INST}\n\nQuestion: {Q}\n\nAnswer: {A}",
5 | "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
6 | "demos": [
7 | {
8 | "question": "Which is the most rainy place on earth?",
9 | "answer": "Several places on Earth claim to be the most rainy, such as Lloró, Colombia, which reported an average annual rainfall of 12,717 mm between 1952 and 1989, and López de Micay, Colombia, which reported an annual 12,892 mm between 1960 and 2012. However, the official record is held by Mawsynram, India with an average annual rainfall of 11,872 mm, although nearby town Sohra, India, also known as Cherrapunji, holds the record for most rain in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861.",
10 | "docs": [
11 | {
12 | "title": "Cherrapunji",
13 | "text": "Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861, however: it received in"
14 | },
15 | {
16 | "title": "Cherrapunji",
17 | "text": "Radio relay station known as Akashvani Cherrapunji. It broadcasts on FM frequencies. Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall"
18 | },
19 | {
20 | "title": "Mawsynram",
21 | "text": "Mawsynram Mawsynram () is a village in the East Khasi Hills district of Meghalaya state in north-eastern India, 65 kilometres from Shillong. Mawsynram receives one of the highest rainfalls in India. It is reportedly the wettest place on Earth, with an average annual rainfall of 11,872 mm, but that claim is disputed by Lloró, Colombia, which reported an average yearly rainfall of 12,717 mm between 1952 and 1989 and López de Micay, also in Colombia, which reported an annual 12,892 mm per year between 1960 and 2012. According to the \"Guinness Book of World Records\", Mawsynram received of rainfall in 1985. Mawsynram is located at 25° 18′"
22 | },
23 | {
24 | "title": "Earth rainfall climatology",
25 | "text": "Pacific Northwest, and the Sierra Nevada range are the wetter portions of the nation, with average rainfall exceeding per year. The drier areas are the Desert Southwest, Great Basin, valleys of northeast Arizona, eastern Utah, central Wyoming, eastern Oregon and Washington and the northeast of the Olympic Peninsula. The Big Bog on the island of Maui receives, on average, every year, making it the wettest location in the US, and all of Oceania. The annual average rainfall maxima across the continent lie across the northwest from northwest Brazil into northern Peru, Colombia, and Ecuador, then along the Atlantic coast of"
26 | },
27 | {
28 | "title": "Going to Extremes",
29 | "text": "in the world. Oymyakon in Siberia, where the average winter temperature is −47 °F (− 44 °C). Arica in Chile, where there had been fourteen consecutive years without rain. Fog is the only local source of water. Mawsynram in India, where average annual rainfall is 14 meters, falling within a four-month period in the monsoon season. The rainfall is approximately equal to that of its neighbor Cherrapunji. Dallol in Ethiopia, known as the 'Hell-hole of creation' where the temperature averages 94 °F (34 °C) over the year. In his second series, Middleton visited places without permanent towns, locations where \"survival\""
30 | }
31 | ]
32 | },
33 | {
34 | "question": "When did the us break away from england?",
35 | "answer": "The United States took the first step towards gaining independence from Great Britain when it declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, 1776, the date when the Declaration of Independence was officially adopted by Congress). The Treaty of Paris was later signed on September 3, 1783, formally separating the United States from the British Empire.",
36 | "docs": [
37 | {
38 | "title": "United States withdrawal from Saudi Arabia",
39 | "text": "United States withdrawal from Saudi Arabia Beginning during Operation Desert Shield in August 1990, while preparing for the Gulf War, the United States sent a large troop contingent to Saudi Arabia. After the war, remnant troops, primarily U.S. Air Force personnel, augmented by a smaller number of coordinating and training personnel from the U.S. Navy, U.S. Army and U.S. Marine Corps remained in Saudi Arabia under the aegis of Joint Task Force Southwest Asia (JTF-SWA), as part of Operation Southern Watch (OSW). The United Kingdom and France also maintained a small contingent of Royal Air Force and French Air Force"
40 | },
41 | {
42 | "title": "Decolonization of the Americas",
43 | "text": "and France has fully \"integrated\" most of its former colonies as fully constituent \"departments\" of France. The United States of America declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, the date when the Declaration of Independence was officially adopted by Congress), in so doing becoming the first independent, foreign-recognized nation in the Americas and the first European colonial entity to break from its mother country. Britain formally acknowledged American independence in 1783 after its defeat in the American Revolutionary War. Although initially occupying only the land east of the Mississippi"
44 | },
45 | {
46 | "title": "American Revolution",
47 | "text": "second British army at Yorktown in the fall of 1781, effectively ending the war. The Treaty of Paris was signed September 3, 1783, formally ending the conflict and confirming the new nation's complete separation from the British Empire. The United States took possession of nearly all the territory east of the Mississippi River and south of the Great Lakes, with the British retaining control of Canada and Spain taking Florida. Among the significant results of the revolution was the creation of the United States Constitution, establishing a relatively strong federal national government that included an executive, a national judiciary, and"
48 | },
49 | {
50 | "title": "Decolonization",
51 | "text": "accelerate decolonialization and bring an end to the colonial empires of its Western allies, most importantly during the 1956 Suez Crisis, but American military bases were established around the world and direct and indirect interventions continued in Korea, Indochina, Latin America (\"inter alia\", the 1965 occupation of the Dominican Republic), Africa, and the Middle East to oppose Communist invasions and insurgencies. Since the dissolution of the Soviet Union, the United States has been far less active in the Americas, but invaded Afghanistan and Iraq following the September 11 attacks in 2001, establishing army and air bases in Central Asia. Before"
52 | },
53 | {
54 | "title": "Decolonization",
55 | "text": "the responsibility of the United Kingdom (with a copy of the new constitution annexed), and finally, if approved, issuance of an Order of Council fixing the exact date of independence. After World War I, several former German and Ottoman territories in the Middle East, Africa, and the Pacific were governed by the UK as League of Nations mandates. Some were administered directly by the UK, and others by British dominions – Nauru and the Territory of New Guinea by Australia, South West Africa by the Union of South Africa, and Western Samoa by New Zealand. Egypt became independent in 1922,"
56 | }
57 | ]
58 | },
59 | {
60 | "question": "Who set the record for longest field goal?",
61 | "answer": "The record for the longest field goal in an NFL game was set by Matt Prater at 64 yards, but the record for the longest field goal at any level was 69 yards, kicked by collegiate kicker Ove Johansson in a 1976 Abilene Christian University football game against East Texas State University.",
62 | "docs": [
63 | {
64 | "title": "Field goal",
65 | "text": "toward its own end. The longest field goal kick in NFL history is 64 yards, a record set by Matt Prater on December 8, 2013. The previous record was 63, originally set by Tom Dempsey (1970) and then matched by Jason Elam (1998), Sebastian Janikowski (2011), David Akers (2012), and Graham Gano (2018). High school, college and most professional football leagues offer only a three-point field goal; however, some professional leagues have encouraged more rare kicks through \"four-point field goals\". NFL Europe encouraged long field goals of 50 yards or more by making those worth four points instead of three"
66 | },
67 | {
68 | "title": "Field goal range",
69 | "text": "35 and 40 yard lines (closer in a crosswind) often will go for the more risky fourth down conversion rather than risk either the touchback or the missed field goal. The longest field goal in recorded football history was 69 yards, set by collegiate kicker Ove Johansson, who was born in Sweden, in a 1976 Abilene Christian University football game against East Texas State University (now Texas A&M Commerce) at Shotwell Stadium in Abilene. The longest successful field goal in the NFL was 64 yards and was completed by Matt Prater in 2013. The NCAA record is 67 yards held"
70 | },
71 | {
72 | "title": "Field goal",
73 | "text": "both end zones) is only 66 yards. Scaccia, while playing indoor football, attempted a 64-yard kick that was inches short of success, hitting the crossbar. Longer field goals have been attempted at times; the longest attempt in the NFL, which was well short and was kicked into the wind, was 76 yards, attempted by Sebastian Janikowski of the Oakland Raiders, in a September 28, 2008 game against the San Diego Chargers. NFL Europe rewarded kickers that successfully kicked a field goal of longer than 50 yards with a bonus point, making such field goals worth 4 points instead of 3;"
74 | },
75 | {
76 | "title": "Field goal",
77 | "text": "this accomplishment is not the official record. All of the above kicks were successful with the use of a kicking tee, which was banned by the NCAA after the 1988 season. The longest known drop-kicked field goal in college football was a 62-yard kick from Pat O'Dea, an Australian kicker who played on the Wisconsin Badgers football team. O'Dea's kick took place in a blizzard against Northwestern on November 15, 1898. The longest field goal in U Sports football history is 59 yards, by Niko Difonte of Calgary Dinos, playing against the UBC Thunderbirds on November 11, 2017. The field"
78 | },
79 | {
80 | "title": "Field goal range",
81 | "text": "NFL and have been banned from NCAA since 1989) is 68 yards held by Fabrizio Scaccia, and the high school record 68 yards held by Dirk Borgognone; high school has wider goal posts and treats a field goal attempt that lands short in the field of play the same as a punt, making longer attempts much less risky. The indoor football record, with narrower and higher goal posts, is 63 yards (set by Aaron Mills), which is practically as long of a field goal as is possible in that variant of the sport, since the field in indoor football (including"
82 | }
83 | ]
84 | },
85 | {
86 | "question": "Who played galen in planet of the apes?",
87 | "answer": "In the 1968 film Planet of the Apes, Galen was played by Wright King. And in the tv series Planet of the Apes, Galen was played by Roddy McDowall.",
88 | "docs": [
89 | {
90 | "title": "Planet of the Apes",
91 | "text": "installment. Jacobs died on June 27, 1973, bringing an end to the APJAC Productions era of the \"Planet of the Apes\" franchise. Former Fox executive Stan Hough took over as producer for the television project, titled \"Planet of the Apes\". CBS picked up the series for its 1974 autumn lineup. Ron Harper and James Naughton played Alan Virdon and Peter Burke, two 20th-century American astronauts who pass through a time warp to a future where apes subjugate humans (unlike the original film, the humans can speak). Roddy McDowall returned to the franchise as Galen, a chimpanzee who joins the astronauts."
92 | },
93 | {
94 | "title": "Planet of the Apes (1968 film)",
95 | "text": "chimpanzees: animal psychologist Zira (Kim Hunter) and surgeon Galen (Wright King). While unable to speak as his throat wound is healing, called \"Bright Eyes\" by Zira and placed with one of the captive primitive humans he later names \"Nova\", Taylor observes the enhanced society of talking apes and in a strict caste system: the gorillas being the military police, hunters and workers; the orangutans overseeing the affairs of government, science, and religion; and intellectual chimpanzees being mostly scientists. While their society is a theocracy similar to the beginnings of the human Industrial Era, the apes consider the primitive humans as"
96 | },
97 | {
98 | "title": "Planet of the Apes (1968 film)",
99 | "text": "Planet of the Apes (1968 film) Planet of the Apes is a 1968 American science fiction film directed by Franklin J. Schaffner. It stars Charlton Heston, Roddy McDowall, Kim Hunter, Maurice Evans, James Whitmore, James Daly and Linda Harrison. The screenplay by Michael Wilson and Rod Serling was loosely based on the 1963 French novel \"La Plan\u00e8te des Singes\" by Pierre Boulle. Jerry Goldsmith composed the groundbreaking avant-garde score. It was the first in a series of five films made between 1968 and 1973, all produced by Arthur P. Jacobs and released by 20th Century Fox. The film tells the"
100 | },
101 | {
102 | "title": "Planet of the Apes",
103 | "text": "Rupert Wyatt. To portray ape characters realistically, the production avoided practical effects in favor of performance capture acting, partnering with New Zealand visual effects company Weta Digital. Wyatt cast James Franco as Will Rodman, while veteran performance capture actor Andy Serkis signed on to star as Caesar. \"Rise\" debuted on August 5, 2011. Critics reviewed it positively, especially praising the visual effects and Serkis's performance. It was a major box office hit, taking in $482 million globally, more than five times its $93 million budget. Weta's special effects earned the film two Visual Effects Society Awards and an Oscar nomination"
104 | },
105 | {
106 | "title": "Planet of the Apes",
107 | "text": "film stars Mark Wahlberg as astronaut Leo Davidson, who accidentally travels through a wormhole to a distant planet where talking apes enslave humans. He leads a human revolt and upends ape civilization by discovering that the apes evolved from the normal earth primates who had accompanied his mission, and arrived years before. Helena Bonham Carter played chimpanzee Ari, while Tim Roth played the human-hating chimpanzee General Thade. The film received mixed reviews; most critics believed it failed to compare to the original. Much of the negative commentary focused on the confusing plot and twist ending, though many reviewers praised the"
108 | }
109 | ]
110 | }
111 | ]
112 | }
113 |
--------------------------------------------------------------------------------
/prompts/asqa_default.json:
--------------------------------------------------------------------------------
1 | {
2 | "instruction": "Instruction: Write an accurate, engaging, and concise answer for the given question using only the provided search results (some of which might be irrelevant) and cite them properly. Use an unbiased and journalistic tone. Always cite for any factual claim. When citing several search results, use [1][2][3]. Cite at least one document and at most three documents in each sentence. If multiple documents support the sentence, only cite a minimum sufficient subset of the documents.",
3 | "demo_sep": "\n\n\n",
4 | "demo_prompt": "{INST}\n\nQuestion: {Q}\n\n{D}\nAnswer: {A}",
5 | "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
6 | "demos": [
7 | {
8 | "question": "Which is the most rainy place on earth?",
9 | "answer": "Several places on Earth claim to be the most rainy, such as Lloró, Colombia, which reported an average annual rainfall of 12,717 mm between 1952 and 1989, and López de Micay, Colombia, which reported an annual 12,892 mm between 1960 and 2012 [3]. However, the official record is held by Mawsynram, India with an average annual rainfall of 11,872 mm [3], although nearby town Sohra, India, also known as Cherrapunji, holds the record for most rain in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861 [1].",
10 | "docs": [
11 | {
12 | "title": "Cherrapunji",
13 | "text": "Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861, however: it received in"
14 | },
15 | {
16 | "title": "Cherrapunji",
17 | "text": "Radio relay station known as Akashvani Cherrapunji. It broadcasts on FM frequencies. Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall"
18 | },
19 | {
20 | "title": "Mawsynram",
21 | "text": "Mawsynram Mawsynram () is a village in the East Khasi Hills district of Meghalaya state in north-eastern India, 65 kilometres from Shillong. Mawsynram receives one of the highest rainfalls in India. It is reportedly the wettest place on Earth, with an average annual rainfall of 11,872 mm, but that claim is disputed by Lloró, Colombia, which reported an average yearly rainfall of 12,717 mm between 1952 and 1989 and López de Micay, also in Colombia, which reported an annual 12,892 mm per year between 1960 and 2012. According to the \"Guinness Book of World Records\", Mawsynram received of rainfall in 1985. Mawsynram is located at 25° 18′"
22 | },
23 | {
24 | "title": "Earth rainfall climatology",
25 | "text": "Pacific Northwest, and the Sierra Nevada range are the wetter portions of the nation, with average rainfall exceeding per year. The drier areas are the Desert Southwest, Great Basin, valleys of northeast Arizona, eastern Utah, central Wyoming, eastern Oregon and Washington and the northeast of the Olympic Peninsula. The Big Bog on the island of Maui receives, on average, every year, making it the wettest location in the US, and all of Oceania. The annual average rainfall maxima across the continent lie across the northwest from northwest Brazil into northern Peru, Colombia, and Ecuador, then along the Atlantic coast of"
26 | },
27 | {
28 | "title": "Going to Extremes",
29 | "text": "in the world. Oymyakon in Siberia, where the average winter temperature is −47 °F (− 44 °C). Arica in Chile, where there had been fourteen consecutive years without rain. Fog is the only local source of water. Mawsynram in India, where average annual rainfall is 14 meters, falling within a four-month period in the monsoon season. The rainfall is approximately equal to that of its neighbor Cherrapunji. Dallol in Ethiopia, known as the 'Hell-hole of creation' where the temperature averages 94 °F (34 °C) over the year. In his second series, Middleton visited places without permanent towns, locations where \"survival\""
30 | }
31 | ]
32 | },
33 | {
34 | "question": "When did the us break away from england?",
35 | "answer": "The United States took the first step towards gaining independence from Great Britain when it declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, 1776, the date when the Declaration of Independence was officially adopted by Congress) [2]. The Treaty of Paris was later signed on September 3, 1783, formally separating the United States from the British Empire [3].",
36 | "docs": [
37 | {
38 | "title": "United States withdrawal from Saudi Arabia",
39 | "text": "United States withdrawal from Saudi Arabia Beginning during Operation Desert Shield in August 1990, while preparing for the Gulf War, the United States sent a large troop contingent to Saudi Arabia. After the war, remnant troops, primarily U.S. Air Force personnel, augmented by a smaller number of coordinating and training personnel from the U.S. Navy, U.S. Army and U.S. Marine Corps remained in Saudi Arabia under the aegis of Joint Task Force Southwest Asia (JTF-SWA), as part of Operation Southern Watch (OSW). The United Kingdom and France also maintained a small contingent of Royal Air Force and French Air Force"
40 | },
41 | {
42 | "title": "Decolonization of the Americas",
43 | "text": "and France has fully \"integrated\" most of its former colonies as fully constituent \"departments\" of France. The United States of America declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, the date when the Declaration of Independence was officially adopted by Congress), in so doing becoming the first independent, foreign-recognized nation in the Americas and the first European colonial entity to break from its mother country. Britain formally acknowledged American independence in 1783 after its defeat in the American Revolutionary War. Although initially occupying only the land east of the Mississippi"
44 | },
45 | {
46 | "title": "American Revolution",
47 | "text": "second British army at Yorktown in the fall of 1781, effectively ending the war. The Treaty of Paris was signed September 3, 1783, formally ending the conflict and confirming the new nation's complete separation from the British Empire. The United States took possession of nearly all the territory east of the Mississippi River and south of the Great Lakes, with the British retaining control of Canada and Spain taking Florida. Among the significant results of the revolution was the creation of the United States Constitution, establishing a relatively strong federal national government that included an executive, a national judiciary, and"
48 | },
49 | {
50 | "title": "Decolonization",
51 | "text": "accelerate decolonialization and bring an end to the colonial empires of its Western allies, most importantly during the 1956 Suez Crisis, but American military bases were established around the world and direct and indirect interventions continued in Korea, Indochina, Latin America (\"inter alia\", the 1965 occupation of the Dominican Republic), Africa, and the Middle East to oppose Communist invasions and insurgencies. Since the dissolution of the Soviet Union, the United States has been far less active in the Americas, but invaded Afghanistan and Iraq following the September 11 attacks in 2001, establishing army and air bases in Central Asia. Before"
52 | },
53 | {
54 | "title": "Decolonization",
55 | "text": "the responsibility of the United Kingdom (with a copy of the new constitution annexed), and finally, if approved, issuance of an Order of Council fixing the exact date of independence. After World War I, several former German and Ottoman territories in the Middle East, Africa, and the Pacific were governed by the UK as League of Nations mandates. Some were administered directly by the UK, and others by British dominions – Nauru and the Territory of New Guinea by Australia, South West Africa by the Union of South Africa, and Western Samoa by New Zealand. Egypt became independent in 1922,"
56 | }
57 | ]
58 | },
59 | {
60 | "question": "Who set the record for longest field goal?",
61 | "answer": "The record for the longest field goal in an NFL game was set by Matt Prater at 64 yards [1], but the record for the longest field goal at any level was 69 yards, kicked by collegiate kicker Ove Johansson in a 1976 Abilene Christian University football game against East Texas State University [2].",
62 | "docs": [
63 | {
64 | "title": "Field goal",
65 | "text": "toward its own end. The longest field goal kick in NFL history is 64 yards, a record set by Matt Prater on December 8, 2013. The previous record was 63, originally set by Tom Dempsey (1970) and then matched by Jason Elam (1998), Sebastian Janikowski (2011), David Akers (2012), and Graham Gano (2018). High school, college and most professional football leagues offer only a three-point field goal; however, some professional leagues have encouraged more rare kicks through \"four-point field goals\". NFL Europe encouraged long field goals of 50 yards or more by making those worth four points instead of three"
66 | },
67 | {
68 | "title": "Field goal range",
69 | "text": "35 and 40 yard lines (closer in a crosswind) often will go for the more risky fourth down conversion rather than risk either the touchback or the missed field goal. The longest field goal in recorded football history was 69 yards, set by collegiate kicker Ove Johansson, who was born in Sweden, in a 1976 Abilene Christian University football game against East Texas State University (now Texas A&M Commerce) at Shotwell Stadium in Abilene. The longest successful field goal in the NFL was 64 yards and was completed by Matt Prater in 2013. The NCAA record is 67 yards held"
70 | },
71 | {
72 | "title": "Field goal",
73 | "text": "both end zones) is only 66 yards. Scaccia, while playing indoor football, attempted a 64-yard kick that was inches short of success, hitting the crossbar. Longer field goals have been attempted at times; the longest attempt in the NFL, which was well short and was kicked into the wind, was 76 yards, attempted by Sebastian Janikowski of the Oakland Raiders, in a September 28, 2008 game against the San Diego Chargers. NFL Europe rewarded kickers that successfully kicked a field goal of longer than 50 yards with a bonus point, making such field goals worth 4 points instead of 3;"
74 | },
75 | {
76 | "title": "Field goal",
77 | "text": "this accomplishment is not the official record. All of the above kicks were successful with the use of a kicking tee, which was banned by the NCAA after the 1988 season. The longest known drop-kicked field goal in college football was a 62-yard kick from Pat O'Dea, an Australian kicker who played on the Wisconsin Badgers football team. O'Dea's kick took place in a blizzard against Northwestern on November 15, 1898. The longest field goal in U Sports football history is 59 yards, by Niko Difonte of Calgary Dinos, playing against the UBC Thunderbirds on November 11, 2017. The field"
78 | },
79 | {
80 | "title": "Field goal range",
81 | "text": "NFL and have been banned from NCAA since 1989) is 68 yards held by Fabrizio Scaccia, and the high school record 68 yards held by Dirk Borgognone; high school has wider goal posts and treats a field goal attempt that lands short in the field of play the same as a punt, making longer attempts much less risky. The indoor football record, with narrower and higher goal posts, is 63 yards (set by Aaron Mills), which is practically as long of a field goal as is possible in that variant of the sport, since the field in indoor football (including"
82 | }
83 | ]
84 | },
85 | {
86 | "question": "Who played galen in planet of the apes?",
87 | "answer": "In the 1968 film Planet of the Apes, Galen was played by Wright King [2]. And in the tv series Planet of the Apes, Galen was played by Roddy McDowall [1].",
88 | "docs": [
89 | {
90 | "title": "Planet of the Apes",
91 | "text": "installment. Jacobs died on June 27, 1973, bringing an end to the APJAC Productions era of the \"Planet of the Apes\" franchise. Former Fox executive Stan Hough took over as producer for the television project, titled \"Planet of the Apes\". CBS picked up the series for its 1974 autumn lineup. Ron Harper and James Naughton played Alan Virdon and Peter Burke, two 20th-century American astronauts who pass through a time warp to a future where apes subjugate humans (unlike the original film, the humans can speak). Roddy McDowall returned to the franchise as Galen, a chimpanzee who joins the astronauts."
92 | },
93 | {
94 | "title": "Planet of the Apes (1968 film)",
95 | "text": "chimpanzees: animal psychologist Zira (Kim Hunter) and surgeon Galen (Wright King). While unable to speak as his throat wound is healing, called \"Bright Eyes\" by Zira and placed with one of the captive primitive humans he later names \"Nova\", Taylor observes the enhanced society of talking apes and in a strict caste system: the gorillas being the military police, hunters and workers; the orangutans overseeing the affairs of government, science, and religion; and intellectual chimpanzees being mostly scientists. While their society is a theocracy similar to the beginnings of the human Industrial Era, the apes consider the primitive humans as"
96 | },
97 | {
98 | "title": "Planet of the Apes (1968 film)",
99 | "text": "Planet of the Apes (1968 film) Planet of the Apes is a 1968 American science fiction film directed by Franklin J. Schaffner. It stars Charlton Heston, Roddy McDowall, Kim Hunter, Maurice Evans, James Whitmore, James Daly and Linda Harrison. The screenplay by Michael Wilson and Rod Serling was loosely based on the 1963 French novel \"La Plan\u00e8te des Singes\" by Pierre Boulle. Jerry Goldsmith composed the groundbreaking avant-garde score. It was the first in a series of five films made between 1968 and 1973, all produced by Arthur P. Jacobs and released by 20th Century Fox. The film tells the"
100 | },
101 | {
102 | "title": "Planet of the Apes",
103 | "text": "Rupert Wyatt. To portray ape characters realistically, the production avoided practical effects in favor of performance capture acting, partnering with New Zealand visual effects company Weta Digital. Wyatt cast James Franco as Will Rodman, while veteran performance capture actor Andy Serkis signed on to star as Caesar. \"Rise\" debuted on August 5, 2011. Critics reviewed it positively, especially praising the visual effects and Serkis's performance. It was a major box office hit, taking in $482 million globally, more than five times its $93 million budget. Weta's special effects earned the film two Visual Effects Society Awards and an Oscar nomination"
104 | },
105 | {
106 | "title": "Planet of the Apes",
107 | "text": "film stars Mark Wahlberg as astronaut Leo Davidson, who accidentally travels through a wormhole to a distant planet where talking apes enslave humans. He leads a human revolt and upends ape civilization by discovering that the apes evolved from the normal earth primates who had accompanied his mission, and arrived years before. Helena Bonham Carter played chimpanzee Ari, while Tim Roth played the human-hating chimpanzee General Thade. The film received mixed reviews; most critics believed it failed to compare to the original. Much of the negative commentary focused on the confusing plot and twist ending, though many reviewers praised the"
108 | }
109 | ]
110 | }
111 | ]
112 | }
--------------------------------------------------------------------------------
/prompts/asqa_light_inst.json:
--------------------------------------------------------------------------------
1 | {
2 | "instruction": "Instruction: Write a high-quality answer for the given question using only the provided search results and cite them properly using [1][2][3].",
3 | "demo_sep": "\n\n\n",
4 | "demo_prompt": "{INST}\n\nQuestion: {Q}\n\n{D}\nAnswer: {A}",
5 | "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
6 | "demos": [
7 | {
8 | "question": "Which is the most rainy place on earth?",
9 | "answer": "Several places on Earth claim to be the most rainy, such as Lloró, Colombia, which reported an average annual rainfall of 12,717 mm between 1952 and 1989, and López de Micay, Colombia, which reported an annual 12,892 mm between 1960 and 2012 [3]. However, the official record is held by Mawsynram, India with an average annual rainfall of 11,872 mm [3], although nearby town Sohra, India, also known as Cherrapunji, holds the record for most rain in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861 [1].",
10 | "docs": [
11 | {
12 | "title": "Cherrapunji",
13 | "text": "Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861, however: it received in"
14 | },
15 | {
16 | "title": "Cherrapunji",
17 | "text": "Radio relay station known as Akashvani Cherrapunji. It broadcasts on FM frequencies. Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall"
18 | },
19 | {
20 | "title": "Mawsynram",
21 | "text": "Mawsynram Mawsynram () is a village in the East Khasi Hills district of Meghalaya state in north-eastern India, 65 kilometres from Shillong. Mawsynram receives one of the highest rainfalls in India. It is reportedly the wettest place on Earth, with an average annual rainfall of 11,872 mm, but that claim is disputed by Lloró, Colombia, which reported an average yearly rainfall of 12,717 mm between 1952 and 1989 and López de Micay, also in Colombia, which reported an annual 12,892 mm per year between 1960 and 2012. According to the \"Guinness Book of World Records\", Mawsynram received of rainfall in 1985. Mawsynram is located at 25° 18′"
22 | },
23 | {
24 | "title": "Earth rainfall climatology",
25 | "text": "Pacific Northwest, and the Sierra Nevada range are the wetter portions of the nation, with average rainfall exceeding per year. The drier areas are the Desert Southwest, Great Basin, valleys of northeast Arizona, eastern Utah, central Wyoming, eastern Oregon and Washington and the northeast of the Olympic Peninsula. The Big Bog on the island of Maui receives, on average, every year, making it the wettest location in the US, and all of Oceania. The annual average rainfall maxima across the continent lie across the northwest from northwest Brazil into northern Peru, Colombia, and Ecuador, then along the Atlantic coast of"
26 | },
27 | {
28 | "title": "Going to Extremes",
29 | "text": "in the world. Oymyakon in Siberia, where the average winter temperature is −47 °F (− 44 °C). Arica in Chile, where there had been fourteen consecutive years without rain. Fog is the only local source of water. Mawsynram in India, where average annual rainfall is 14 meters, falling within a four-month period in the monsoon season. The rainfall is approximately equal to that of its neighbor Cherrapunji. Dallol in Ethiopia, known as the 'Hell-hole of creation' where the temperature averages 94 °F (34 °C) over the year. In his second series, Middleton visited places without permanent towns, locations where \"survival\""
30 | }
31 | ]
32 | },
33 | {
34 | "question": "When did the us break away from england?",
35 | "answer": "The United States took the first step towards gaining independence from Great Britain when it declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, 1776, the date when the Declaration of Independence was officially adopted by Congress) [2]. The Treaty of Paris was later signed on September 3, 1783, formally separating the United States from the British Empire [3].",
36 | "docs": [
37 | {
38 | "title": "United States withdrawal from Saudi Arabia",
39 | "text": "United States withdrawal from Saudi Arabia Beginning during Operation Desert Shield in August 1990, while preparing for the Gulf War, the United States sent a large troop contingent to Saudi Arabia. After the war, remnant troops, primarily U.S. Air Force personnel, augmented by a smaller number of coordinating and training personnel from the U.S. Navy, U.S. Army and U.S. Marine Corps remained in Saudi Arabia under the aegis of Joint Task Force Southwest Asia (JTF-SWA), as part of Operation Southern Watch (OSW). The United Kingdom and France also maintained a small contingent of Royal Air Force and French Air Force"
40 | },
41 | {
42 | "title": "Decolonization of the Americas",
43 | "text": "and France has fully \"integrated\" most of its former colonies as fully constituent \"departments\" of France. The United States of America declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, the date when the Declaration of Independence was officially adopted by Congress), in so doing becoming the first independent, foreign-recognized nation in the Americas and the first European colonial entity to break from its mother country. Britain formally acknowledged American independence in 1783 after its defeat in the American Revolutionary War. Although initially occupying only the land east of the Mississippi"
44 | },
45 | {
46 | "title": "American Revolution",
47 | "text": "second British army at Yorktown in the fall of 1781, effectively ending the war. The Treaty of Paris was signed September 3, 1783, formally ending the conflict and confirming the new nation's complete separation from the British Empire. The United States took possession of nearly all the territory east of the Mississippi River and south of the Great Lakes, with the British retaining control of Canada and Spain taking Florida. Among the significant results of the revolution was the creation of the United States Constitution, establishing a relatively strong federal national government that included an executive, a national judiciary, and"
48 | },
49 | {
50 | "title": "Decolonization",
51 | "text": "accelerate decolonialization and bring an end to the colonial empires of its Western allies, most importantly during the 1956 Suez Crisis, but American military bases were established around the world and direct and indirect interventions continued in Korea, Indochina, Latin America (\"inter alia\", the 1965 occupation of the Dominican Republic), Africa, and the Middle East to oppose Communist invasions and insurgencies. Since the dissolution of the Soviet Union, the United States has been far less active in the Americas, but invaded Afghanistan and Iraq following the September 11 attacks in 2001, establishing army and air bases in Central Asia. Before"
52 | },
53 | {
54 | "title": "Decolonization",
55 | "text": "the responsibility of the United Kingdom (with a copy of the new constitution annexed), and finally, if approved, issuance of an Order of Council fixing the exact date of independence. After World War I, several former German and Ottoman territories in the Middle East, Africa, and the Pacific were governed by the UK as League of Nations mandates. Some were administered directly by the UK, and others by British dominions – Nauru and the Territory of New Guinea by Australia, South West Africa by the Union of South Africa, and Western Samoa by New Zealand. Egypt became independent in 1922,"
56 | }
57 | ]
58 | },
59 | {
60 | "question": "Who set the record for longest field goal?",
61 | "answer": "The record for the longest field goal in an NFL game was set by Matt Prater at 64 yards [1], but the record for the longest field goal at any level was 69 yards, kicked by collegiate kicker Ove Johansson in a 1976 Abilene Christian University football game against East Texas State University [2].",
62 | "docs": [
63 | {
64 | "title": "Field goal",
65 | "text": "toward its own end. The longest field goal kick in NFL history is 64 yards, a record set by Matt Prater on December 8, 2013. The previous record was 63, originally set by Tom Dempsey (1970) and then matched by Jason Elam (1998), Sebastian Janikowski (2011), David Akers (2012), and Graham Gano (2018). High school, college and most professional football leagues offer only a three-point field goal; however, some professional leagues have encouraged more rare kicks through \"four-point field goals\". NFL Europe encouraged long field goals of 50 yards or more by making those worth four points instead of three"
66 | },
67 | {
68 | "title": "Field goal range",
69 | "text": "35 and 40 yard lines (closer in a crosswind) often will go for the more risky fourth down conversion rather than risk either the touchback or the missed field goal. The longest field goal in recorded football history was 69 yards, set by collegiate kicker Ove Johansson, who was born in Sweden, in a 1976 Abilene Christian University football game against East Texas State University (now Texas A&M Commerce) at Shotwell Stadium in Abilene. The longest successful field goal in the NFL was 64 yards and was completed by Matt Prater in 2013. The NCAA record is 67 yards held"
70 | },
71 | {
72 | "title": "Field goal",
73 | "text": "both end zones) is only 66 yards. Scaccia, while playing indoor football, attempted a 64-yard kick that was inches short of success, hitting the crossbar. Longer field goals have been attempted at times; the longest attempt in the NFL, which was well short and was kicked into the wind, was 76 yards, attempted by Sebastian Janikowski of the Oakland Raiders, in a September 28, 2008 game against the San Diego Chargers. NFL Europe rewarded kickers that successfully kicked a field goal of longer than 50 yards with a bonus point, making such field goals worth 4 points instead of 3;"
74 | },
75 | {
76 | "title": "Field goal",
77 | "text": "this accomplishment is not the official record. All of the above kicks were successful with the use of a kicking tee, which was banned by the NCAA after the 1988 season. The longest known drop-kicked field goal in college football was a 62-yard kick from Pat O'Dea, an Australian kicker who played on the Wisconsin Badgers football team. O'Dea's kick took place in a blizzard against Northwestern on November 15, 1898. The longest field goal in U Sports football history is 59 yards, by Niko Difonte of Calgary Dinos, playing against the UBC Thunderbirds on November 11, 2017. The field"
78 | },
79 | {
80 | "title": "Field goal range",
81 | "text": "NFL and have been banned from NCAA since 1989) is 68 yards held by Fabrizio Scaccia, and the high school record 68 yards held by Dirk Borgognone; high school has wider goal posts and treats a field goal attempt that lands short in the field of play the same as a punt, making longer attempts much less risky. The indoor football record, with narrower and higher goal posts, is 63 yards (set by Aaron Mills), which is practically as long of a field goal as is possible in that variant of the sport, since the field in indoor football (including"
82 | }
83 | ]
84 | },
85 | {
86 | "question": "Who played galen in planet of the apes?",
87 | "answer": "In the 1968 film Planet of the Apes, Galen was played by Wright King [2]. And in the tv series Planet of the Apes, Galen was played by Roddy McDowall [1].",
88 | "docs": [
89 | {
90 | "title": "Planet of the Apes",
91 | "text": "installment. Jacobs died on June 27, 1973, bringing an end to the APJAC Productions era of the \"Planet of the Apes\" franchise. Former Fox executive Stan Hough took over as producer for the television project, titled \"Planet of the Apes\". CBS picked up the series for its 1974 autumn lineup. Ron Harper and James Naughton played Alan Virdon and Peter Burke, two 20th-century American astronauts who pass through a time warp to a future where apes subjugate humans (unlike the original film, the humans can speak). Roddy McDowall returned to the franchise as Galen, a chimpanzee who joins the astronauts."
92 | },
93 | {
94 | "title": "Planet of the Apes (1968 film)",
95 | "text": "chimpanzees: animal psychologist Zira (Kim Hunter) and surgeon Galen (Wright King). While unable to speak as his throat wound is healing, called \"Bright Eyes\" by Zira and placed with one of the captive primitive humans he later names \"Nova\", Taylor observes the enhanced society of talking apes and in a strict caste system: the gorillas being the military police, hunters and workers; the orangutans overseeing the affairs of government, science, and religion; and intellectual chimpanzees being mostly scientists. While their society is a theocracy similar to the beginnings of the human Industrial Era, the apes consider the primitive humans as"
96 | },
97 | {
98 | "title": "Planet of the Apes (1968 film)",
99 | "text": "Planet of the Apes (1968 film) Planet of the Apes is a 1968 American science fiction film directed by Franklin J. Schaffner. It stars Charlton Heston, Roddy McDowall, Kim Hunter, Maurice Evans, James Whitmore, James Daly and Linda Harrison. The screenplay by Michael Wilson and Rod Serling was loosely based on the 1963 French novel \"La Plan\u00e8te des Singes\" by Pierre Boulle. Jerry Goldsmith composed the groundbreaking avant-garde score. It was the first in a series of five films made between 1968 and 1973, all produced by Arthur P. Jacobs and released by 20th Century Fox. The film tells the"
100 | },
101 | {
102 | "title": "Planet of the Apes",
103 | "text": "Rupert Wyatt. To portray ape characters realistically, the production avoided practical effects in favor of performance capture acting, partnering with New Zealand visual effects company Weta Digital. Wyatt cast James Franco as Will Rodman, while veteran performance capture actor Andy Serkis signed on to star as Caesar. \"Rise\" debuted on August 5, 2011. Critics reviewed it positively, especially praising the visual effects and Serkis's performance. It was a major box office hit, taking in $482 million globally, more than five times its $93 million budget. Weta's special effects earned the film two Visual Effects Society Awards and an Oscar nomination"
104 | },
105 | {
106 | "title": "Planet of the Apes",
107 | "text": "film stars Mark Wahlberg as astronaut Leo Davidson, who accidentally travels through a wormhole to a distant planet where talking apes enslave humans. He leads a human revolt and upends ape civilization by discovering that the apes evolved from the normal earth primates who had accompanied his mission, and arrived years before. Helena Bonham Carter played chimpanzee Ari, while Tim Roth played the human-hating chimpanzee General Thade. The film received mixed reviews; most critics believed it failed to compare to the original. Much of the negative commentary focused on the confusing plot and twist ending, though many reviewers praised the"
108 | }
109 | ]
110 | }
111 | ]
112 | }
113 |
--------------------------------------------------------------------------------
/prompts/eli5_closedbook.json:
--------------------------------------------------------------------------------
1 | {
2 | "instruction": "Instruction: Write an accurate, engaging, and concise answer for the given question. Use an unbiased and journalistic tone. ",
3 | "demo_sep": "\n\n\n",
4 | "demo_prompt": "{INST}\n\nQuestion: {Q}\n\nAnswer: {A}",
5 | "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
6 | "demos": [
7 | {
8 | "question": "Why did New York City try to ban food donations to the poor?",
9 | "answer": "New York City, under Mayor Michael Bloomberg's administration, banned citizens from donating food directly to homeless shelters because the city could not assess the salt, fat, and fiber content. Bloomberg's administration was heavily criticized for losing their common sense by becoming too focused on what people eat.",
10 | "docs": [
11 | {
12 | "title": "The Future Of America",
13 | "text": "believe that they are \u201chelping\u201d the homeless by passing such laws. In New York City, Mayor Bloomberg has banned citizens from donating food directly to homeless shelters and he is actually convinced that it was the right thing to do for the homeless\u2026 Mayor Michael Bloomberg\u2019s food police have struck again! Outlawed are food donations to homeless shelters because the city can\u2019t assess their salt, fat and fiber content, reports CBS 2\u2019s Marcia Kramer. Glenn Richter arrived at a West Side synagogue on Monday to collect surplus bagels \u2014 fresh nutritious bagels \u2014 to donate to the poor."
14 | },
15 | {
16 | "title": "mayor bloomberg",
17 | "text": "Amuck: Bloomberg Bans Food Donations in New York City Food Might Be Salty or Too High in Calories, City Explains Washington, D.C. \u2013 New York Mayor Michael Bloomberg\u2019s administration is now banning all food being offered to the city\u2019s homeless shelters. New York City\u2019s bureaucrats have become so singularly focused on what people eat, says the National Center for Public Policy Research, that they\u2019ve lost their common sense. \u201cSo much for serving the homeless: The Bloomberg administration is now taking the term \u2018food police\u2019 to new depths, blocking food donations to all government-run facilities that serve the"
18 | },
19 | {
20 | "title": "New York City bans food donations - WND",
21 | "text": "New York City bans food donations - WND Front Page Health U.S. New York City bans food donations Inability to control 'nutritional content' cited as reason New York City homeless shelters have Mayor Michael Bloomberg to thank for a halt in food donations, for which hungry families are waiting, according to one public policy advocate. \"The Bloomberg administration is now taking the term 'food police' to new depths, blocking food donations to all government-run facilities that serve the city's homeless,\" says Jeff Stier, a National Center for Public Policy Research senior fellow. Currently, no food can be given to government-run, New York City facilities, despite hungry crowds perfectly"
22 | },
23 | {
24 | "title": "New York City bans food donations - WND",
25 | "text": "New York City bans food donations - WND Services didn't return WND calls. Stier told WND that he specifically was told by Diamond that the policy was tied to the nutritional guidelines set by the mayor. \"They can say that this ban on donations is a long-standing policy, but they can\u2019t document it,\" Stier told WND. \"I've also been told that there are numerous food shelves that have been accepting food donations, not just one.\" Stier is a member of a New York Synagogue that has donated food for over a decade. He is outraged that the DHS' response to his demand to know why the practice can"
26 | },
27 | {
28 | "title": "New York City bans food donations - WND",
29 | "text": "New York City bans food donations - WND ban on donated food. In fact, it thrives because of food donations. New York City Rescue Mission has been providing food, clothing, shelter and spiritual hope for needy New Yorkers since 1872. \"We feed over 500 people a day, all through donations,\" said James Varnhagen, NYCRM director. \"Boxed food, canned food, prepared food, we take any food,\" he told WND. \"We couldn't survive without donations,\" he said."
30 | }
31 | ]
32 | },
33 | {
34 | "question": "What's the difference between Shia vs. Sunni Islam?",
35 | "answer": "The main difference between Shia and Sunni Muslim is related to ideological heritage and issues of leadership. This difference is first formed after the death of the Prophet Muhammad in 632 A.D. The ideological practice of the Sunni branch strictly follows Prophet Muhammad and his teachings, while the Shia branch follows Prophet Muhammad's son-in-law Ali. Nowadays, Sunni and Shia are the major branches of Islam.",
36 | "docs": [
37 | {
38 | "title": "The Sunni vs Shia Divide - Explained - Globaloi",
39 | "text": "centuries-long strained relationship between Sunnis and Shias. As a scholar of Islam and a public educator, I often field questions about Sunnis, Shias and the sects of Islam. What exactly is the Shia-Sunni divide? And what is its history? History of divide Both Sunnis and Shias \u2013 drawing their faith and practice from the Qur\u2019an and the life of the Prophet Muhammad \u2013 agree on most of the fundamentals of Islam. The differences are related more to historical events, ideological heritage and issues of leadership. The first and central difference emerged after the death of Prophet Muhammad in A.D. 632."
40 | },
41 | {
42 | "title": "What\u2019s the difference between Sunni and Shia Islam? \u2013 Macrosnaps",
43 | "text": "What\u2019s the difference between Sunni and Shia Islam? Sunni and Shia identities (the 2 main branches of Islam) first formed around a dispute over leadership succession after the death of the Prophet Muhammad in 632 A.D. Sunni is the larger branch (estimated 85-90% of total world Muslim population) and it's adherents are referred to as \"people of the tradition of Muhammad\", while Shia are \"followers\" of Muhammad's son-in-law and cousin Ali. Sunnis rely heavily on the practice of the Prophet Muhammad and his teachings, the Shia view their ayatollahs as reflections of God on earth. What challenges does the anti-IS"
44 | },
45 | {
46 | "title": "Difference between Sunni and Shia Muslims | Sunni vs Shia Muslims",
47 | "text": "of Muhammad, the last prophet of God. A follower of Islam is known as a Muslim. Many Muslims believe that their sole purpose is to worship and serve God, for which they have established five pillars of Islam that guides a Muslim on almost every aspect of life and society. Due to differences, Muslims have been divided into two primary sects: The Sunnis and the Shias. These two sects have many similarities and both consider themselves are Muslims, following the will of God. However, they are also different from each other in certain aspects. Both the Sunnis and the Shias,"
48 | },
49 | {
50 | "title": "What is the difference between Shia and Sunni Islam? - Islam Stack Exchange",
51 | "text": "What is the difference between Shia and Sunni Islam? - Islam Stack Exchange between Mutah marriage and Misyar marriage? What theological and historical factors distinguish Ibadi Islam from either Shia or Sunni schools? What are the principle/fundamental differences between Sunni and Shia? Nikah between a Sunni girl and Shia boy What is the difference between \u201cMubtalat-of-Wudu\u201d of Shia and Sunni? How can the Hadith be reliable when Sunnis and Shia follow different points of reference? Rejection of Mutawatir Hadith in Sunni Islam and Shia Islam"
52 | },
53 | {
54 | "title": "What is the difference between Sunni and Shia Islam? | Patrick Syder Travel",
55 | "text": "What is the difference between Sunni and Shia Islam? | Patrick Syder Travel What is the difference between Sunni and Shia Islam? This Channel 4 link answers some of the key questions about the difference between Sunni and Shia Islam and alarmingly, the politics on what is happening and why, in Syria\u2026\u2026. http://www.channel4.com/news/sunni-shia-islam-muslim-syria-middle-east-key-questions \u2190 Ethiopia Appeal \u2013 Help sponsor a nurse to train and to help others G\u00f6bekli Tepe, Turkey: a new wonder of the ancient world by Jeremy Seal (Telegraph Travel Section 23/04/2013) \u2192"
56 | }
57 | ]
58 | },
59 | {
60 | "question": "What causes Bi-polar disorder?",
61 | "answer": "Bipolar disorder is an emotional disorder that causes extreme mood swings between excitement and depression. The spectrum of mood swing may span from days to months. We are still not certain of the exact factors that cause such disorder, but genetics is considered a major factor.",
62 | "docs": [
63 | {
64 | "title": "Bi-polar disorder | definition of Bi-polar disorder by Medical dictionary",
65 | "text": "bi-polar disorder | definition of bi-polar disorder by medical dictionary https://medical-dictionary.thefreedictionary.com/bi-polar+disorder (redirected from bi-polar disorder) related to bi-polar disorder: depression bipolar disorder, formerly known as manic depression, is a mood disorder that causes radical emotional changes and mood swings, from manic, restless highs to depressive, listless lows. most bipolar individuals experience alternating episodes of mania and depression. bipolar disorder is characterized by alternating manic episodes in which the individual feels abnormally euphoric, optimistic, and energetic and depressive periods in which the individual feels sad, hopeless, guilty, and sometimes suicidal. manic or depressive periods may last for days, weeks, or months"
66 | },
67 | {
68 | "title": "Mania and Bi-Polar",
69 | "text": "can go from depressed to \u201csuper happy\u201d all in one day, or even in a few days, does not have a bi-polar disorder Bi-polar looks different depending on the severity of the symptoms. Most bi-polar diagnoses that are made are for bi-polar 2, with bi-polar 1 being much more rare. Bi-polar 1 is so severe that the individual will have periods of such agitation, or such reckless and seemingly foolish behavior that they put themselves or those around them in danger. It is not completely clear what causes bi-polar, but genetics seem to have a large role. The biggest factor"
70 | },
71 | {
72 | "title": "Bi-Polar disorder",
73 | "text": "Bi-Polar disorder Bi-polar is generally a cyclic disease where individuals display depressive and elevated episodes at regular intervals. It is a disorder resulting from the imbalance of the chemicals in the brain that causes a lot of fluctuations of mood. It is a fact that we all experience happy and sad moods, but people with bi-polar disorder experience the changes in mood at an increased level. The cause of this disorder is not known completely. However, it is estimated that there are different factors responsible for it. It is often connected to a genetic component. People suffering from the Bi-polar disorder are"
74 | },
75 | {
76 | "title": "For Individuals \u2014 Adam Schwartz",
77 | "text": "For Individuals \u2014 Adam Schwartz The information is extensive and covers a huge range of topics. Some of the topics include the different types of bi-polar, what it feels like, signs and symptoms, treatments and more. Black Dog Institute bi-polar causes resource specifically covers the variety of areas that could potentially be a cause of bi-polar disorder. Including genetics, environmental factors, pregnancy, and more. Black Dog Institute bi-polar treatments resource specifically covers multiple potential treatments options for bi-polar. Including management, types of psychological treatment, lifestyle changes, and more. Black Dog Institute bi-polar self-test resource is a short self-test for people who may be concerned if"
78 | },
79 | {
80 | "title": "Depression Bi-polar Disorder Symptoms 2019 | Win Over Depression",
81 | "text": "Depression Bi-polar Disorder Symptoms 2019 | Win Over Depression signs and symptoms of bipolar disorder. Learn more about the common symptoms of bipolar depression that some patients may experience. Home \u00bb Trending Health News \u00bb 10 Warning Signs of Bipolar Disorder: Depression. One of the most serious symptoms of bipolar disorder is. Bi Polar Depression. SEVERE SWINGS What is bipolar disorder, is it the same as manic depression, what are the symptoms and is there a cure? Bipolar disorder, or manic depression, causes symptoms of mania and depression. Read about bipolar disorder treatment, medications, and causes of this. Learn more about the different types of bipolar disorder. Find out"
82 | }
83 | ]
84 | },
85 | {
86 | "question": "How do student loans affect getting a mortgage?",
87 | "answer": "When applying for a mortgage, student loans can affect the debt to income ratio, which is a key factor in determining the amount that an individual can afford to pay for the mortgage. While student loan repayments do not appear in an individual's credit history and do not affect credit scores, lenders do consider the amount of an individual's student loan repayments when assessing their mortgage application. Some 83% of non-homeowners say student loan debt is preventing them from buying a home, according to the National Association of Realtors. It is important to note that student loans do not prevent an individual from getting a mortgage.",
88 | "docs": [
89 | {
90 | "title": "Student Loans \u2013 How do they work? | The Financial Review",
91 | "text": "typical debt. Student loan repayments do not appear in an individual\u2019s credit history, therefore there are no implications whatsoever. This also extends to applications for credit cards \u2013 student \u2018loans\u2019 are not acknowledged. One noteworthy aspect that is affected by student loans however, is mortgage applications. Nevertheless, it does not prevent an individual from getting a mortgage. For example, lenders will consider the amount of an individual\u2019s student loan repayments in order to assess the debt to income ratio and therefore establish the amount that the individual can afford to pay for the mortgage. Just as they do with other"
92 | },
93 | {
94 | "title": "How Does Student Loan Debt Affect Buying a Home? | Experian",
95 | "text": "Rates & Affordability How Student Loans Affect Getting a Mortgage Student Loan Impact on Credit Scores Other Factors for Getting Approved for a Mortgage If you're a recent college grad and hope to become a homeowner in the near future, you should know that student loan debt could affect buying a home by making it more difficult to get a mortgage. Some 83% of non-homeowners say student loan debt is preventing them from buying a home, according to the National Association of Realtors (NAR). But while student loan payments can make it harder to save for a down payment on"
96 | },
97 | {
98 | "title": "Studentloanify - How your student loans affect your home mortgage prospects",
99 | "text": "Though it may not seem fair, your student loan situation impacts your home mortgage outlook. Many people carry student loan debt, but it\u2019s the amount of the loan and how you handle your student loan repayment plan that will influence your ability to get a home mortgage as well as what your interest rate will be. Here are some specific factors about your student loan that will affect your home mortgage prospects. On your mortgage loan application, you will have to report how much your monthly student loan payment is. This amount will be deducted from your monthly gross income"
100 | },
101 | {
102 | "title": "How do student loans affect your credit score? | Student Loan Planner",
103 | "text": "How do student loans affect your credit score? | Student Loan Planner Your credit score is the three-digit number that dictates a lot in your adult life. Whether you\u2019re applying for a mortgage or looking to get an auto loan, this seemingly arbitrary number determines whether you get approved for a loan and also affects your interest rate. If you\u2019re a student loan borrower you may wonder, \u201cDo student loans affect credit score?\u201d You might be especially curious if you\u2019re in the process of applying for a mortgage. Here\u2019s how student loans affect your credit score and what to know for big life events, like getting a mortgage. Do student loans affect"
104 | },
105 | {
106 | "title": "Does Student Loan Debt Affect Getting A Mortgage?",
107 | "text": "Does Student Loan Debt Affect Getting A Mortgage? Home \u00bb Does Student Loan Debt Affect Getting A Mortgage? Last year, I helped answer a reader\u2019s question about applying for a mortgage while on Income Based Repayment. However, over the last several months, I\u2019ve been getting bombarded with questions about how student loan debt impacts your ability to get a mortgage. Maybe it\u2019s because the housing market is improving, or maybe it\u2019s because people are finally taking their student loan debt seriously. Anyway, I wanted to share a few reader questions and then look at whether student loan debt affects getting a mortgage. Here are the reader questions I\u2019ve"
108 | }
109 | ]
110 | }
111 | ]
112 | }
113 |
--------------------------------------------------------------------------------
/prompts/eli5_light_inst.json:
--------------------------------------------------------------------------------
1 | {
2 | "instruction": "Instruction: Write a high-quality answer for the given question using only the provided search results and cite them properly using [1][2][3].",
3 | "demo_sep": "\n\n\n",
4 | "demo_prompt": "{INST}\n\nQuestion: {Q}\n\n{D}\nAnswer: {A}",
5 | "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
6 | "demos": [
7 | {
8 | "question": "Why did New York City try to ban food donations to the poor?",
9 | "answer": "New York City, under Mayor Michael Bloomberg's administration, banned citizens from donating food directly to homeless shelters because the city could not assess the salt, fat, and fiber content [1][2][3]. Bloomberg's administration was heavily criticized for losing their common sense by becoming too focused on what people eat [2].",
10 | "docs": [
11 | {
12 | "title": "The Future Of America",
13 | "text": "believe that they are \u201chelping\u201d the homeless by passing such laws. In New York City, Mayor Bloomberg has banned citizens from donating food directly to homeless shelters and he is actually convinced that it was the right thing to do for the homeless\u2026 Mayor Michael Bloomberg\u2019s food police have struck again! Outlawed are food donations to homeless shelters because the city can\u2019t assess their salt, fat and fiber content, reports CBS 2\u2019s Marcia Kramer. Glenn Richter arrived at a West Side synagogue on Monday to collect surplus bagels \u2014 fresh nutritious bagels \u2014 to donate to the poor."
14 | },
15 | {
16 | "title": "mayor bloomberg",
17 | "text": "Amuck: Bloomberg Bans Food Donations in New York City Food Might Be Salty or Too High in Calories, City Explains Washington, D.C. \u2013 New York Mayor Michael Bloomberg\u2019s administration is now banning all food being offered to the city\u2019s homeless shelters. New York City\u2019s bureaucrats have become so singularly focused on what people eat, says the National Center for Public Policy Research, that they\u2019ve lost their common sense. \u201cSo much for serving the homeless: The Bloomberg administration is now taking the term \u2018food police\u2019 to new depths, blocking food donations to all government-run facilities that serve the"
18 | },
19 | {
20 | "title": "New York City bans food donations - WND",
21 | "text": "New York City bans food donations - WND Front Page Health U.S. New York City bans food donations Inability to control 'nutritional content' cited as reason New York City homeless shelters have Mayor Michael Bloomberg to thank for a halt in food donations, for which hungry families are waiting, according to one public policy advocate. \"The Bloomberg administration is now taking the term 'food police' to new depths, blocking food donations to all government-run facilities that serve the city's homeless,\" says Jeff Stier, a National Center for Public Policy Research senior fellow. Currently, no food can be given to government-run, New York City facilities, despite hungry crowds perfectly"
22 | },
23 | {
24 | "title": "New York City bans food donations - WND",
25 | "text": "New York City bans food donations - WND Services didn't return WND calls. Stier told WND that he specifically was told by Diamond that the policy was tied to the nutritional guidelines set by the mayor. \"They can say that this ban on donations is a long-standing policy, but they can\u2019t document it,\" Stier told WND. \"I've also been told that there are numerous food shelves that have been accepting food donations, not just one.\" Stier is a member of a New York Synagogue that has donated food for over a decade. He is outraged that the DHS' response to his demand to know why the practice can"
26 | },
27 | {
28 | "title": "New York City bans food donations - WND",
29 | "text": "New York City bans food donations - WND ban on donated food. In fact, it thrives because of food donations. New York City Rescue Mission has been providing food, clothing, shelter and spiritual hope for needy New Yorkers since 1872. \"We feed over 500 people a day, all through donations,\" said James Varnhagen, NYCRM director. \"Boxed food, canned food, prepared food, we take any food,\" he told WND. \"We couldn't survive without donations,\" he said."
30 | }
31 | ]
32 | },
33 | {
34 | "question": "What's the difference between Shia vs. Sunni Islam?",
35 | "answer": "The main difference between Shia and Sunni Muslim is related to ideological heritage and issues of leadership [1]. This difference is first formed after the death of the Prophet Muhammad in 632 A.D. [1][2]. The ideological practice of the Sunni branch strictly follows Prophet Muhammad and his teachings, while the Shia branch follows Prophet Muhammad's son-in-law Ali [2]. Nowadays, Sunni and Shia are the major branches of Islam [3].",
36 | "docs": [
37 | {
38 | "title": "The Sunni vs Shia Divide - Explained - Globaloi",
39 | "text": "centuries-long strained relationship between Sunnis and Shias. As a scholar of Islam and a public educator, I often field questions about Sunnis, Shias and the sects of Islam. What exactly is the Shia-Sunni divide? And what is its history? History of divide Both Sunnis and Shias \u2013 drawing their faith and practice from the Qur\u2019an and the life of the Prophet Muhammad \u2013 agree on most of the fundamentals of Islam. The differences are related more to historical events, ideological heritage and issues of leadership. The first and central difference emerged after the death of Prophet Muhammad in A.D. 632."
40 | },
41 | {
42 | "title": "What\u2019s the difference between Sunni and Shia Islam? \u2013 Macrosnaps",
43 | "text": "What\u2019s the difference between Sunni and Shia Islam? Sunni and Shia identities (the 2 main branches of Islam) first formed around a dispute over leadership succession after the death of the Prophet Muhammad in 632 A.D. Sunni is the larger branch (estimated 85-90% of total world Muslim population) and it's adherents are referred to as \"people of the tradition of Muhammad\", while Shia are \"followers\" of Muhammad's son-in-law and cousin Ali. Sunnis rely heavily on the practice of the Prophet Muhammad and his teachings, the Shia view their ayatollahs as reflections of God on earth. What challenges does the anti-IS"
44 | },
45 | {
46 | "title": "Difference between Sunni and Shia Muslims | Sunni vs Shia Muslims",
47 | "text": "of Muhammad, the last prophet of God. A follower of Islam is known as a Muslim. Many Muslims believe that their sole purpose is to worship and serve God, for which they have established five pillars of Islam that guides a Muslim on almost every aspect of life and society. Due to differences, Muslims have been divided into two primary sects: The Sunnis and the Shias. These two sects have many similarities and both consider themselves are Muslims, following the will of God. However, they are also different from each other in certain aspects. Both the Sunnis and the Shias,"
48 | },
49 | {
50 | "title": "What is the difference between Shia and Sunni Islam? - Islam Stack Exchange",
51 | "text": "What is the difference between Shia and Sunni Islam? - Islam Stack Exchange between Mutah marriage and Misyar marriage? What theological and historical factors distinguish Ibadi Islam from either Shia or Sunni schools? What are the principle/fundamental differences between Sunni and Shia? Nikah between a Sunni girl and Shia boy What is the difference between \u201cMubtalat-of-Wudu\u201d of Shia and Sunni? How can the Hadith be reliable when Sunnis and Shia follow different points of reference? Rejection of Mutawatir Hadith in Sunni Islam and Shia Islam"
52 | },
53 | {
54 | "title": "What is the difference between Sunni and Shia Islam? | Patrick Syder Travel",
55 | "text": "What is the difference between Sunni and Shia Islam? | Patrick Syder Travel What is the difference between Sunni and Shia Islam? This Channel 4 link answers some of the key questions about the difference between Sunni and Shia Islam and alarmingly, the politics on what is happening and why, in Syria\u2026\u2026. http://www.channel4.com/news/sunni-shia-islam-muslim-syria-middle-east-key-questions \u2190 Ethiopia Appeal \u2013 Help sponsor a nurse to train and to help others G\u00f6bekli Tepe, Turkey: a new wonder of the ancient world by Jeremy Seal (Telegraph Travel Section 23/04/2013) \u2192"
56 | }
57 | ]
58 | },
59 | {
60 | "question": "What causes Bi-polar disorder?",
61 | "answer": "Bipolar disorder is an emotional disorder that causes extreme mood swings between excitement and depression [1][3]. The spectrum of mood swing may span from days to months [1][2]. We are still not certain of the exact factors that cause such disorder, but genetics is considered a major factor [2][3].",
62 | "docs": [
63 | {
64 | "title": "Bi-polar disorder | definition of Bi-polar disorder by Medical dictionary",
65 | "text": "bi-polar disorder | definition of bi-polar disorder by medical dictionary https://medical-dictionary.thefreedictionary.com/bi-polar+disorder (redirected from bi-polar disorder) related to bi-polar disorder: depression bipolar disorder, formerly known as manic depression, is a mood disorder that causes radical emotional changes and mood swings, from manic, restless highs to depressive, listless lows. most bipolar individuals experience alternating episodes of mania and depression. bipolar disorder is characterized by alternating manic episodes in which the individual feels abnormally euphoric, optimistic, and energetic and depressive periods in which the individual feels sad, hopeless, guilty, and sometimes suicidal. manic or depressive periods may last for days, weeks, or months"
66 | },
67 | {
68 | "title": "Mania and Bi-Polar",
69 | "text": "can go from depressed to \u201csuper happy\u201d all in one day, or even in a few days, does not have a bi-polar disorder Bi-polar looks different depending on the severity of the symptoms. Most bi-polar diagnoses that are made are for bi-polar 2, with bi-polar 1 being much more rare. Bi-polar 1 is so severe that the individual will have periods of such agitation, or such reckless and seemingly foolish behavior that they put themselves or those around them in danger. It is not completely clear what causes bi-polar, but genetics seem to have a large role. The biggest factor"
70 | },
71 | {
72 | "title": "Bi-Polar disorder",
73 | "text": "Bi-Polar disorder Bi-polar is generally a cyclic disease where individuals display depressive and elevated episodes at regular intervals. It is a disorder resulting from the imbalance of the chemicals in the brain that causes a lot of fluctuations of mood. It is a fact that we all experience happy and sad moods, but people with bi-polar disorder experience the changes in mood at an increased level. The cause of this disorder is not known completely. However, it is estimated that there are different factors responsible for it. It is often connected to a genetic component. People suffering from the Bi-polar disorder are"
74 | },
75 | {
76 | "title": "For Individuals \u2014 Adam Schwartz",
77 | "text": "For Individuals \u2014 Adam Schwartz The information is extensive and covers a huge range of topics. Some of the topics include the different types of bi-polar, what it feels like, signs and symptoms, treatments and more. Black Dog Institute bi-polar causes resource specifically covers the variety of areas that could potentially be a cause of bi-polar disorder. Including genetics, environmental factors, pregnancy, and more. Black Dog Institute bi-polar treatments resource specifically covers multiple potential treatments options for bi-polar. Including management, types of psychological treatment, lifestyle changes, and more. Black Dog Institute bi-polar self-test resource is a short self-test for people who may be concerned if"
78 | },
79 | {
80 | "title": "Depression Bi-polar Disorder Symptoms 2019 | Win Over Depression",
81 | "text": "Depression Bi-polar Disorder Symptoms 2019 | Win Over Depression signs and symptoms of bipolar disorder. Learn more about the common symptoms of bipolar depression that some patients may experience. Home \u00bb Trending Health News \u00bb 10 Warning Signs of Bipolar Disorder: Depression. One of the most serious symptoms of bipolar disorder is. Bi Polar Depression. SEVERE SWINGS What is bipolar disorder, is it the same as manic depression, what are the symptoms and is there a cure? Bipolar disorder, or manic depression, causes symptoms of mania and depression. Read about bipolar disorder treatment, medications, and causes of this. Learn more about the different types of bipolar disorder. Find out"
82 | }
83 | ]
84 | },
85 | {
86 | "question": "How do student loans affect getting a mortgage?",
87 | "answer": "When applying for a mortgage, student loans can affect the debt to income ratio, which is a key factor in determining the amount that an individual can afford to pay for the mortgage [1]. While student loan repayments do not appear in an individual's credit history and do not affect credit scores, lenders do consider the amount of an individual's student loan repayments when assessing their mortgage application [1][2][3]. Some 83% of non-homeowners say student loan debt is preventing them from buying a home, according to the National Association of Realtors [2]. It is important to note that student loans do not prevent an individual from getting a mortgage [1].",
88 | "docs": [
89 | {
90 | "title": "Student Loans \u2013 How do they work? | The Financial Review",
91 | "text": "typical debt. Student loan repayments do not appear in an individual\u2019s credit history, therefore there are no implications whatsoever. This also extends to applications for credit cards \u2013 student \u2018loans\u2019 are not acknowledged. One noteworthy aspect that is affected by student loans however, is mortgage applications. Nevertheless, it does not prevent an individual from getting a mortgage. For example, lenders will consider the amount of an individual\u2019s student loan repayments in order to assess the debt to income ratio and therefore establish the amount that the individual can afford to pay for the mortgage. Just as they do with other"
92 | },
93 | {
94 | "title": "How Does Student Loan Debt Affect Buying a Home? | Experian",
95 | "text": "Rates & Affordability How Student Loans Affect Getting a Mortgage Student Loan Impact on Credit Scores Other Factors for Getting Approved for a Mortgage If you're a recent college grad and hope to become a homeowner in the near future, you should know that student loan debt could affect buying a home by making it more difficult to get a mortgage. Some 83% of non-homeowners say student loan debt is preventing them from buying a home, according to the National Association of Realtors (NAR). But while student loan payments can make it harder to save for a down payment on"
96 | },
97 | {
98 | "title": "Studentloanify - How your student loans affect your home mortgage prospects",
99 | "text": "Though it may not seem fair, your student loan situation impacts your home mortgage outlook. Many people carry student loan debt, but it\u2019s the amount of the loan and how you handle your student loan repayment plan that will influence your ability to get a home mortgage as well as what your interest rate will be. Here are some specific factors about your student loan that will affect your home mortgage prospects. On your mortgage loan application, you will have to report how much your monthly student loan payment is. This amount will be deducted from your monthly gross income"
100 | },
101 | {
102 | "title": "How do student loans affect your credit score? | Student Loan Planner",
103 | "text": "How do student loans affect your credit score? | Student Loan Planner Your credit score is the three-digit number that dictates a lot in your adult life. Whether you\u2019re applying for a mortgage or looking to get an auto loan, this seemingly arbitrary number determines whether you get approved for a loan and also affects your interest rate. If you\u2019re a student loan borrower you may wonder, \u201cDo student loans affect credit score?\u201d You might be especially curious if you\u2019re in the process of applying for a mortgage. Here\u2019s how student loans affect your credit score and what to know for big life events, like getting a mortgage. Do student loans affect"
104 | },
105 | {
106 | "title": "Does Student Loan Debt Affect Getting A Mortgage?",
107 | "text": "Does Student Loan Debt Affect Getting A Mortgage? Home \u00bb Does Student Loan Debt Affect Getting A Mortgage? Last year, I helped answer a reader\u2019s question about applying for a mortgage while on Income Based Repayment. However, over the last several months, I\u2019ve been getting bombarded with questions about how student loan debt impacts your ability to get a mortgage. Maybe it\u2019s because the housing market is improving, or maybe it\u2019s because people are finally taking their student loan debt seriously. Anyway, I wanted to share a few reader questions and then look at whether student loan debt affects getting a mortgage. Here are the reader questions I\u2019ve"
108 | }
109 | ]
110 | }
111 | ]
112 | }
--------------------------------------------------------------------------------
/prompts/qampari_closedbook.json:
--------------------------------------------------------------------------------
1 | {
2 | "instruction": "Instruction: Provide a list of accurate answers for the given question. Separate answers by commas. For questions that have more than 5 answers, write at least 5 answers.",
3 | "demo_sep": "\n\n\n",
4 | "demo_prompt": "{INST}\n\nQuestion: {Q}\n\nAnswer: {A}",
5 | "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
6 | "demos": [
7 | {
8 | "question": "Which books were written by Nevil Shute?",
9 | "answer": "Marazan, Stephen Morris, Beyond the Black Stump, Lonely Road, The Chequer Board, In the Wet, Trustee from the Toolroom, Round the Bend, No Highway, Ruined City, On the Beach.",
10 | "docs": [
11 | {
12 | "title": "Nevil Shute",
13 | "text": "early stages. My congratulations.\" His celebrity as a writer caused the Ministry of Information to send him to the Normandy Landings on 6 June 1944 and later to Burma as a correspondent. He finished the war with the rank of lieutenant commander in the Royal Navy Volunteer Reserves (RNVR). Shute's first novel, \"Stephen Morris\", was written in 1923, but not published until 1961. His first published novel was \"Marazan\", which came out in 1926. After that he averaged one novel every two years through the 1950s, with the exception of a six-year hiatus while he was establishing his own aircraft"
14 | },
15 | {
16 | "title": "Nevil Shute",
17 | "text": "theme is the bridging of social barriers such as class (\"Lonely Road\" and \"Landfall\"), race (\"The Chequer Board\"), or religion (\"Round the Bend\"). The Australian novels are individual hymns to that country, with subtle disparagement of the mores of the United States (\"Beyond the Black Stump\") and overt antipathy towards the post-World War II socialist government of Shute's native Britain (\"The Far Country\" and \"In the Wet\"). Shute's heroes tended to be like himself: middle class solicitors, doctors, accountants, bank managers, engineers, generally university graduates. However (as in \"Trustee from the Toolroom\"), Shute valued the honest artisans and their social"
18 | },
19 | {
20 | "title": "Nevil Shute",
21 | "text": "construction company, Airspeed Ltd. His popularity grew slowly with each novel, but he became much more famous after the publication of \"On the Beach\" in 1957. Shute's novels are written in a simple, highly readable style, with clearly delineated plot lines. Where there is a romantic element, sex is referred to only obliquely. Many of the stories are introduced by a narrator who is not a character in the story. The most common theme in Shute's novels is the dignity of work, spanning all classes, whether an Eastern European bar \"hostess\" (\"Ruined City\") or brilliant boffin (\"No Highway\"). Another recurrent"
22 | },
23 | {
24 | "title": "The Chequer Board",
25 | "text": "the Burmese people\", both of which are central to the book's story. Shute was concerned that sales of the book in the United States would be negatively impacted by the book's open-minded handling of racial issues; as it turned out, sales soared. Shute and his wife traveled the U.S. on Greyhound buses to \"\"get in touch with the man on the street,\"\" finding the experience refreshing. Afterwards he wrote \"\"Sincerity is the first attribute for making money in the business of writing novels.\"\" The Chequer Board The Chequer Board is a novel by Nevil Shute, first published in the United"
26 | },
27 | {
28 | "title": "In the Wet",
29 | "text": "had used the idea of multiple votes for merit in his short story \"The Curious Republic of Gondour\". In the Wet In The Wet is a novel by Nevil Shute that was first published in the United Kingdom in 1953. It contains many of the typical elements of a hearty and adventurous Shute yarn such as flying, the future, mystic states, and ordinary people doing extraordinary things. The story is opened by its initial narrator \u2013 an Anglican priest in the Bush Brotherhood named Roger Hargreaves \u2013 who describes his ordinary circumstances in a large parish of the Australian outback"
30 | }
31 | ]
32 | },
33 | {
34 | "question": "Which film has Gong Li as a member of its cast?",
35 | "answer": "The Story of Qiu Ju, Farewell My Concubine, Flirting Scholar, The Monkey King 2, Mulan, Saturday Fiction, Coming Home.",
36 | "docs": [
37 | {
38 | "title": "Gong Li",
39 | "text": "Gong Li Gong Li (born 31 December 1965) is a Chinese-born Singaporean film actress. She achieved international prominence through her close collaborations with Chinese director Zhang Yimou and won the Volpi Cup for Best Actress at Venice for her performance in his 1992 film \"The Story of Qiu Ju\". She has been credited with helping to bring Chinese cinema to prominence in Europe and the United States. In 2006, she was voted the most beautiful woman in China. Gong has won numerous accolades for her work as an actress; she won the New York Film Critics Circle Award for Best"
40 | },
41 | {
42 | "title": "Gong Li",
43 | "text": "making her realize that she has assisted the dark cynical system. In 1993, she received a New York Film Critics Circle award for her role in \"Farewell My Concubine\" (1993). Directed by Chen Kaige, the film was her first major role with a director other than Zhang Yimou. In the same year, she was awarded with the Berlinale Camera at the 43rd Berlin International Film Festival. \"Premiere\" magazine ranked her performance in \"Farewell My Concubine\" as the 89th greatest performance of all time. She also worked with renowned director Stephen Chow in comedy films \"\" (1991) and \"Flirting Scholar\" (1993)."
44 | },
45 | {
46 | "title": "Gong Li",
47 | "text": "International Film Festival. Later that same year, she reunited with Zhang Yimou for the film \"Coming Home\", which is set during the throes of the Cultural Revolution; this film was their first collaboration since 2006. In 2016, Gong took on her first action role in \"The Monkey King 2\", playing the White Bone Demon. In 2018, Gong was cast in Lou Ye's period drama \"Saturday Fiction\", where she plays an actress who is working undercover gathering intelligence for the Allies. That year, she was also cast in the live-action adaptation of the 1998 Disney animated film \"Mulan\", as an unspecified"
48 | },
49 | {
50 | "title": "Zhang Yimou",
51 | "text": "in Zhang's earlier films. \"Raise the Red Lantern\" was nominated in the Best Foreign Language Film category at the 1992 Academy Awards, becoming the second Chinese film to earn this distinction (after Zhang's \"Ju Dou\"). It eventually lost out to Gabriele Salvatores's \"Mediterraneo\". Zhang's next directorial work, \"The Story of Qiu Ju\", in 1992, once again starring Gong Li in the lead role. The film, which tells the tale of a peasant woman seeking justice for her husband after he was beaten by a village official, was a hit at film festivals and won the Golden Lion award at the"
52 | },
53 | {
54 | "title": "Gong Li",
55 | "text": "Gong Li Gong Li (born 31 December 1965) is a Chinese-born Singaporean film actress. She achieved international prominence through her close collaborations with Chinese director Zhang Yimou and won the Volpi Cup for Best Actress at Venice for her performance in his 1992 film \"The Story of Qiu Ju\". She has been credited with helping to bring Chinese cinema to prominence in Europe and the United States. In 2006, she was voted the most beautiful woman in China. Gong has won numerous accolades for her work as an actress; she won the New York Film Critics Circle Award for Best"
56 | }
57 | ]
58 | },
59 | {
60 | "question": "In which years did Patti LaBelle publish music?",
61 | "answer": "2006, 1977, 2004, 2005, 2000, 2006.",
62 | "docs": [
63 | {
64 | "title": "The Gospel According to Patti LaBelle",
65 | "text": "The Gospel According to Patti LaBelle The Gospel According to Patti LaBelle is the first gospel album released by singer Patti LaBelle, released in November 2006. This project began three years ago when Patti's late musical director and close friend Budd Ellison told a skeptical LaBelle that \"it's now or never, Patti.\" The album is dedicated to his memory as he succumbed to prostate cancer before the album saw a release. The album was released on November 21, 2006 through indie label Umbrella/Bungalow Records, also home to Carl Thomas, Rodney Jerkins, Dean \"DC\" Charles, and other artists. \"The Gospel According"
66 | },
67 | {
68 | "title": "Patti LaBelle (album)",
69 | "text": "scaled the high sixties on the \"Billboard\" R&B chart, it soon became one of her famous show-stoppers while performing the song. LaBelle performed the song at her first solo concert in London, getting a standing ovation, which helped to give LaBelle motivation to continue her career. The album, when released, performed successfully, reaching number 62 on the \"Billboard\" 200 and number 31 on the R&B albums chart, while critics hailed the album. Patti LaBelle (album) Patti LaBelle is the debut solo album by singer Patti LaBelle, released in 1977. The first album LaBelle recorded after sixteen years fronting the band"
70 | },
71 | {
72 | "title": "Patti LaBelle",
73 | "text": "win. In 2000, LaBelle released her final MCA album, \"When a Woman Loves\", before signing with Def Soul Classics to release the 2004 album, \"Timeless Journey\". Following the release of her 2005 covers album, \"Classic Moments\", LaBelle engaged in a rivalry with Antonio \"L.A.\" Reid over the direction of her career, leading to her leaving the label.In the same year, the World Music Awards recognized her years in the music business by awarding her the Legend Award. In 2006, she released her first gospel album, \"The Gospel According to Patti LaBelle\" on the Bungalo label, the album later peaking at"
74 | },
75 | {
76 | "title": "Patti LaBelle",
77 | "text": "Patti LaBelle Patti LaBelle (born Patricia Louise Holt; May 24, 1944) is an American singer, actress, and entrepreneur. LaBelle began her career in the early 1960s as lead singer and front woman of the vocal group, Patti LaBelle and the Bluebelles. Following the group's name change to Labelle in the early 1970s, they released the iconic disco song \"Lady Marmalade\" and the group later became the first African-American vocal group to land the cover of \"Rolling Stone\" magazine. After the group split in 1976, LaBelle began a successful solo career, starting with her critically acclaimed debut album, which included the"
78 | },
79 | {
80 | "title": "The Gospel According to Patti LaBelle",
81 | "text": "Billboard's Top Gospel Albums chart for 17 weeks. \"Where Love Begins,\" a duet with Yolanda Adams was played frequently on R&B and gospel radio stations and debuted at #68 on Billboard's Hot R&B/Hip-Hop tracks. The second single \"Anything\" featuring Kanye West, Mary Mary and Consequence hit #64 on Billboards Hot R&B/Hip-Hop tracks. In 2008, the album was nominated for a Dove Award for Contemporary Gospel Album of the Year at the 39th GMA Dove Awards. The Gospel According to Patti LaBelle The Gospel According to Patti LaBelle is the first gospel album released by singer Patti LaBelle, released in November"
82 | }
83 | ]
84 | },
85 | {
86 | "question": "Glenn Ford was a member of cast in which film?",
87 | "answer": "So Ends Our Night, Heaven with a Barbed Wire Fence, Happy Birthday to Me, The Greatest Gift, The Gift, The Brotherhood of the Bell.",
88 | "docs": [
89 | {
90 | "title": "Glenn Ford",
91 | "text": "name came from his father's hometown of Glenford, Alberta. His first major movie part was in the 1939 film, \"Heaven with a Barbed Wire Fence\". Top Hollywood director John Cromwell was impressed enough with his work to borrow him from Columbia for the independently produced drama, \"So Ends Our Night\" (1941), where Ford delivered a poignant portrayal of a 19-year-old German exile on the run in Nazi-occupied Europe. Working with Academy Award-winning Fredric March and wooing (onscreen) 30-year-old Margaret Sullavan, recently nominated for an Oscar, Ford's shy, ardent young refugee riveted attention even in such stellar company. \"Glenn Ford, a"
92 | },
93 | {
94 | "title": "Glenn Ford",
95 | "text": "were Westerns. He suggested doing a Western series, instead, which resulted in the \"modern-day Western\" series, \"Cade's County\". Ford played southwestern Sheriff Cade for one season (1971\u20131972) in a mix of police mystery and western drama. In \"The Family Holvak\" (1975\u20131976), Ford portrayed a Depression-era preacher in a family drama, reprising the same character he had played in the TV film, \"The Greatest Gift\". In 1978 Ford was host, presenter and narrator of the disaster documentary series 'When Havoc Struck'. In 1981, Ford co-starred with Melissa Sue Anderson in the slasher film \"Happy Birthday to Me\". In 1991, Ford agreed"
96 | },
97 | {
98 | "title": "CBS Thursday Night Movie",
99 | "text": "Night Movie\" opened its fall schedule with the premiere of a low-budget, made-for-TV movie, rather than a proven Hollywood blockbuster guaranteed to lure mass viewership, it became CBS's way of declaring its commitment to product that, although cheaply manufactured, was nevertheless new and topical. In this case, the movie was \"The Brotherhood of the Bell\", and the film's star was Glenn Ford, a movie actor who had never appeared in a television-film. In fact, before shooting on the project even began, Ford had been warned by friends in the industry that he would hate the experience. Instead, the actor reported"
100 | },
101 | {
102 | "title": "The Trouble with Girls (film) ",
103 | "text": "with Charlene, but when she refuses to give in, he deceives her and uses the local police force to be sure that she must leave on the train with the rest of the troupe. Cast notes In June 1959 it was announced that Don Mankiewicz would write a screenplay of an unpublished story by Mauri Grashin, Day Keene, and Dwight Babcock. By December 1960, with the project titled \"Chautauqua\", MGM was ready to make the film with Glenn Ford. Rumours circulating in Hollywood at the time stated that Presley would co-star with Ford, Hope Lange, and Arthur O'Connell, but nothing"
104 | },
105 | {
106 | "title": "Trouble in the Glen",
107 | "text": "Mel Ferrer. It was Orson Welles' fifth British movie in six months. Filming started 15 December 1953. The film received very poor reviews. Trouble in the Glen Trouble in the Glen is a 1954 British comedy film directed by Herbert Wilcox and starring Margaret Lockwood, Orson Welles, Forrest Tucker and Victor McLaglen. It is loosely based on Maurice Walsh's 1950 novel of the same name. It was filmed in Trucolor for Republic Pictures. After moving from South America to the Scottish Highlands, millionaire Sanin Cejador y Mengues (Welles) reassumes the title of laird of Glen Easan, which he inherited from"
108 | }
109 | ]
110 | }
111 | ]
112 | }
113 |
--------------------------------------------------------------------------------
/prompts/qampari_default.json:
--------------------------------------------------------------------------------
1 | {
2 | "instruction": "Instruction: Provide a list of accurate answers for the given question using only the provided search results (some of which might be irrelevant) and cite them properly. Always cite one and only one document for each answer. Separate answers by commas. For questions that have more than 5 answers, write at least 5 answers.",
3 | "demo_sep": "\n\n\n",
4 | "demo_prompt": "{INST}\n\nQuestion: {Q}\n\n{D}\nAnswer: {A}",
5 | "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
6 | "demos": [
7 | {
8 | "question": "Which books were written by Nevil Shute?",
9 | "answer": "Marazan [1], Stephen Morris [1], Beyond the Black Stump [2], Lonely Road [2], The Chequer Board [2], In the Wet [2], Trustee from the Toolroom [2], Round the Bend [2], No Highway [3], Ruined City [3], On the Beach [3].",
10 | "docs": [
11 | {
12 | "title": "Nevil Shute",
13 | "text": "early stages. My congratulations.\" His celebrity as a writer caused the Ministry of Information to send him to the Normandy Landings on 6 June 1944 and later to Burma as a correspondent. He finished the war with the rank of lieutenant commander in the Royal Navy Volunteer Reserves (RNVR). Shute's first novel, \"Stephen Morris\", was written in 1923, but not published until 1961. His first published novel was \"Marazan\", which came out in 1926. After that he averaged one novel every two years through the 1950s, with the exception of a six-year hiatus while he was establishing his own aircraft"
14 | },
15 | {
16 | "title": "Nevil Shute",
17 | "text": "theme is the bridging of social barriers such as class (\"Lonely Road\" and \"Landfall\"), race (\"The Chequer Board\"), or religion (\"Round the Bend\"). The Australian novels are individual hymns to that country, with subtle disparagement of the mores of the United States (\"Beyond the Black Stump\") and overt antipathy towards the post-World War II socialist government of Shute's native Britain (\"The Far Country\" and \"In the Wet\"). Shute's heroes tended to be like himself: middle class solicitors, doctors, accountants, bank managers, engineers, generally university graduates. However (as in \"Trustee from the Toolroom\"), Shute valued the honest artisans and their social"
18 | },
19 | {
20 | "title": "Nevil Shute",
21 | "text": "construction company, Airspeed Ltd. His popularity grew slowly with each novel, but he became much more famous after the publication of \"On the Beach\" in 1957. Shute's novels are written in a simple, highly readable style, with clearly delineated plot lines. Where there is a romantic element, sex is referred to only obliquely. Many of the stories are introduced by a narrator who is not a character in the story. The most common theme in Shute's novels is the dignity of work, spanning all classes, whether an Eastern European bar \"hostess\" (\"Ruined City\") or brilliant boffin (\"No Highway\"). Another recurrent"
22 | },
23 | {
24 | "title": "The Chequer Board",
25 | "text": "the Burmese people\", both of which are central to the book's story. Shute was concerned that sales of the book in the United States would be negatively impacted by the book's open-minded handling of racial issues; as it turned out, sales soared. Shute and his wife traveled the U.S. on Greyhound buses to \"\"get in touch with the man on the street,\"\" finding the experience refreshing. Afterwards he wrote \"\"Sincerity is the first attribute for making money in the business of writing novels.\"\" The Chequer Board The Chequer Board is a novel by Nevil Shute, first published in the United"
26 | },
27 | {
28 | "title": "In the Wet",
29 | "text": "had used the idea of multiple votes for merit in his short story \"The Curious Republic of Gondour\". In the Wet In The Wet is a novel by Nevil Shute that was first published in the United Kingdom in 1953. It contains many of the typical elements of a hearty and adventurous Shute yarn such as flying, the future, mystic states, and ordinary people doing extraordinary things. The story is opened by its initial narrator \u2013 an Anglican priest in the Bush Brotherhood named Roger Hargreaves \u2013 who describes his ordinary circumstances in a large parish of the Australian outback"
30 | }
31 | ]
32 | },
33 | {
34 | "question": "Which film has Gong Li as a member of its cast?",
35 | "answer": "The Story of Qiu Ju [1], Farewell My Concubine [2], Flirting Scholar [2], The Monkey King 2 [3], Mulan [3], Saturday Fiction [3], Coming Home [3].",
36 | "docs": [
37 | {
38 | "title": "Gong Li",
39 | "text": "Gong Li Gong Li (born 31 December 1965) is a Chinese-born Singaporean film actress. She achieved international prominence through her close collaborations with Chinese director Zhang Yimou and won the Volpi Cup for Best Actress at Venice for her performance in his 1992 film \"The Story of Qiu Ju\". She has been credited with helping to bring Chinese cinema to prominence in Europe and the United States. In 2006, she was voted the most beautiful woman in China. Gong has won numerous accolades for her work as an actress; she won the New York Film Critics Circle Award for Best"
40 | },
41 | {
42 | "title": "Gong Li",
43 | "text": "making her realize that she has assisted the dark cynical system. In 1993, she received a New York Film Critics Circle award for her role in \"Farewell My Concubine\" (1993). Directed by Chen Kaige, the film was her first major role with a director other than Zhang Yimou. In the same year, she was awarded with the Berlinale Camera at the 43rd Berlin International Film Festival. \"Premiere\" magazine ranked her performance in \"Farewell My Concubine\" as the 89th greatest performance of all time. She also worked with renowned director Stephen Chow in comedy films \"\" (1991) and \"Flirting Scholar\" (1993)."
44 | },
45 | {
46 | "title": "Gong Li",
47 | "text": "International Film Festival. Later that same year, she reunited with Zhang Yimou for the film \"Coming Home\", which is set during the throes of the Cultural Revolution; this film was their first collaboration since 2006. In 2016, Gong took on her first action role in \"The Monkey King 2\", playing the White Bone Demon. In 2018, Gong was cast in Lou Ye's period drama \"Saturday Fiction\", where she plays an actress who is working undercover gathering intelligence for the Allies. That year, she was also cast in the live-action adaptation of the 1998 Disney animated film \"Mulan\", as an unspecified"
48 | },
49 | {
50 | "title": "Zhang Yimou",
51 | "text": "in Zhang's earlier films. \"Raise the Red Lantern\" was nominated in the Best Foreign Language Film category at the 1992 Academy Awards, becoming the second Chinese film to earn this distinction (after Zhang's \"Ju Dou\"). It eventually lost out to Gabriele Salvatores's \"Mediterraneo\". Zhang's next directorial work, \"The Story of Qiu Ju\", in 1992, once again starring Gong Li in the lead role. The film, which tells the tale of a peasant woman seeking justice for her husband after he was beaten by a village official, was a hit at film festivals and won the Golden Lion award at the"
52 | },
53 | {
54 | "title": "Gong Li",
55 | "text": "Gong Li Gong Li (born 31 December 1965) is a Chinese-born Singaporean film actress. She achieved international prominence through her close collaborations with Chinese director Zhang Yimou and won the Volpi Cup for Best Actress at Venice for her performance in his 1992 film \"The Story of Qiu Ju\". She has been credited with helping to bring Chinese cinema to prominence in Europe and the United States. In 2006, she was voted the most beautiful woman in China. Gong has won numerous accolades for her work as an actress; she won the New York Film Critics Circle Award for Best"
56 | }
57 | ]
58 | },
59 | {
60 | "question": "In which years did Patti LaBelle publish music?",
61 | "answer": "2006 [1], 1977 [2], 2004 [3], 2005 [3], 2000 [3], 2006 [3].",
62 | "docs": [
63 | {
64 | "title": "The Gospel According to Patti LaBelle",
65 | "text": "The Gospel According to Patti LaBelle The Gospel According to Patti LaBelle is the first gospel album released by singer Patti LaBelle, released in November 2006. This project began three years ago when Patti's late musical director and close friend Budd Ellison told a skeptical LaBelle that \"it's now or never, Patti.\" The album is dedicated to his memory as he succumbed to prostate cancer before the album saw a release. The album was released on November 21, 2006 through indie label Umbrella/Bungalow Records, also home to Carl Thomas, Rodney Jerkins, Dean \"DC\" Charles, and other artists. \"The Gospel According"
66 | },
67 | {
68 | "title": "Patti LaBelle (album)",
69 | "text": "scaled the high sixties on the \"Billboard\" R&B chart, it soon became one of her famous show-stoppers while performing the song. LaBelle performed the song at her first solo concert in London, getting a standing ovation, which helped to give LaBelle motivation to continue her career. The album, when released, performed successfully, reaching number 62 on the \"Billboard\" 200 and number 31 on the R&B albums chart, while critics hailed the album. Patti LaBelle (album) Patti LaBelle is the debut solo album by singer Patti LaBelle, released in 1977. The first album LaBelle recorded after sixteen years fronting the band"
70 | },
71 | {
72 | "title": "Patti LaBelle",
73 | "text": "win. In 2000, LaBelle released her final MCA album, \"When a Woman Loves\", before signing with Def Soul Classics to release the 2004 album, \"Timeless Journey\". Following the release of her 2005 covers album, \"Classic Moments\", LaBelle engaged in a rivalry with Antonio \"L.A.\" Reid over the direction of her career, leading to her leaving the label.In the same year, the World Music Awards recognized her years in the music business by awarding her the Legend Award. In 2006, she released her first gospel album, \"The Gospel According to Patti LaBelle\" on the Bungalo label, the album later peaking at"
74 | },
75 | {
76 | "title": "Patti LaBelle",
77 | "text": "Patti LaBelle Patti LaBelle (born Patricia Louise Holt; May 24, 1944) is an American singer, actress, and entrepreneur. LaBelle began her career in the early 1960s as lead singer and front woman of the vocal group, Patti LaBelle and the Bluebelles. Following the group's name change to Labelle in the early 1970s, they released the iconic disco song \"Lady Marmalade\" and the group later became the first African-American vocal group to land the cover of \"Rolling Stone\" magazine. After the group split in 1976, LaBelle began a successful solo career, starting with her critically acclaimed debut album, which included the"
78 | },
79 | {
80 | "title": "The Gospel According to Patti LaBelle",
81 | "text": "Billboard's Top Gospel Albums chart for 17 weeks. \"Where Love Begins,\" a duet with Yolanda Adams was played frequently on R&B and gospel radio stations and debuted at #68 on Billboard's Hot R&B/Hip-Hop tracks. The second single \"Anything\" featuring Kanye West, Mary Mary and Consequence hit #64 on Billboards Hot R&B/Hip-Hop tracks. In 2008, the album was nominated for a Dove Award for Contemporary Gospel Album of the Year at the 39th GMA Dove Awards. The Gospel According to Patti LaBelle The Gospel According to Patti LaBelle is the first gospel album released by singer Patti LaBelle, released in November"
82 | }
83 | ]
84 | },
85 | {
86 | "question": "Glenn Ford was a member of cast in which film?",
87 | "answer": "So Ends Our Night [1], Heaven with a Barbed Wire Fence [1], Happy Birthday to Me [2], The Greatest Gift [2], The Gift [2], The Brotherhood of the Bell [3].",
88 | "docs": [
89 | {
90 | "title": "Glenn Ford",
91 | "text": "name came from his father's hometown of Glenford, Alberta. His first major movie part was in the 1939 film, \"Heaven with a Barbed Wire Fence\". Top Hollywood director John Cromwell was impressed enough with his work to borrow him from Columbia for the independently produced drama, \"So Ends Our Night\" (1941), where Ford delivered a poignant portrayal of a 19-year-old German exile on the run in Nazi-occupied Europe. Working with Academy Award-winning Fredric March and wooing (onscreen) 30-year-old Margaret Sullavan, recently nominated for an Oscar, Ford's shy, ardent young refugee riveted attention even in such stellar company. \"Glenn Ford, a"
92 | },
93 | {
94 | "title": "Glenn Ford",
95 | "text": "were Westerns. He suggested doing a Western series, instead, which resulted in the \"modern-day Western\" series, \"Cade's County\". Ford played southwestern Sheriff Cade for one season (1971\u20131972) in a mix of police mystery and western drama. In \"The Family Holvak\" (1975\u20131976), Ford portrayed a Depression-era preacher in a family drama, reprising the same character he had played in the TV film, \"The Greatest Gift\". In 1978 Ford was host, presenter and narrator of the disaster documentary series 'When Havoc Struck'. In 1981, Ford co-starred with Melissa Sue Anderson in the slasher film \"Happy Birthday to Me\". In 1991, Ford agreed"
96 | },
97 | {
98 | "title": "CBS Thursday Night Movie",
99 | "text": "Night Movie\" opened its fall schedule with the premiere of a low-budget, made-for-TV movie, rather than a proven Hollywood blockbuster guaranteed to lure mass viewership, it became CBS's way of declaring its commitment to product that, although cheaply manufactured, was nevertheless new and topical. In this case, the movie was \"The Brotherhood of the Bell\", and the film's star was Glenn Ford, a movie actor who had never appeared in a television-film. In fact, before shooting on the project even began, Ford had been warned by friends in the industry that he would hate the experience. Instead, the actor reported"
100 | },
101 | {
102 | "title": "The Trouble with Girls (film) ",
103 | "text": "with Charlene, but when she refuses to give in, he deceives her and uses the local police force to be sure that she must leave on the train with the rest of the troupe. Cast notes In June 1959 it was announced that Don Mankiewicz would write a screenplay of an unpublished story by Mauri Grashin, Day Keene, and Dwight Babcock. By December 1960, with the project titled \"Chautauqua\", MGM was ready to make the film with Glenn Ford. Rumours circulating in Hollywood at the time stated that Presley would co-star with Ford, Hope Lange, and Arthur O'Connell, but nothing"
104 | },
105 | {
106 | "title": "Trouble in the Glen",
107 | "text": "Mel Ferrer. It was Orson Welles' fifth British movie in six months. Filming started 15 December 1953. The film received very poor reviews. Trouble in the Glen Trouble in the Glen is a 1954 British comedy film directed by Herbert Wilcox and starring Margaret Lockwood, Orson Welles, Forrest Tucker and Victor McLaglen. It is loosely based on Maurice Walsh's 1950 novel of the same name. It was filmed in Trucolor for Republic Pictures. After moving from South America to the Scottish Highlands, millionaire Sanin Cejador y Mengues (Welles) reassumes the title of laird of Glen Easan, which he inherited from"
108 | }
109 | ]
110 | }
111 | ]
112 | }
113 |
--------------------------------------------------------------------------------
/prompts/qampari_light_inst.json:
--------------------------------------------------------------------------------
1 | {
2 | "instruction": "Instruction: Provide a list of accurate answers for the given question using only the provided search results. Always cite one and only one document for each answer. Separate answers by commas. For questions that have more than 5 answers, write at least 5 answers.",
3 | "demo_sep": "\n\n\n",
4 | "demo_prompt": "{INST}\n\nQuestion: {Q}\n\n{D}\nAnswer: {A}",
5 | "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
6 | "demos": [
7 | {
8 | "question": "Which books were written by Nevil Shute?",
9 | "answer": "Marazan [1], Stephen Morris [1], Beyond the Black Stump [2], Lonely Road [2], The Chequer Board [2], In the Wet [2], Trustee from the Toolroom [2], Round the Bend [2], No Highway [3], Ruined City [3], On the Beach [3].",
10 | "docs": [
11 | {
12 | "title": "Nevil Shute",
13 | "text": "early stages. My congratulations.\" His celebrity as a writer caused the Ministry of Information to send him to the Normandy Landings on 6 June 1944 and later to Burma as a correspondent. He finished the war with the rank of lieutenant commander in the Royal Navy Volunteer Reserves (RNVR). Shute's first novel, \"Stephen Morris\", was written in 1923, but not published until 1961. His first published novel was \"Marazan\", which came out in 1926. After that he averaged one novel every two years through the 1950s, with the exception of a six-year hiatus while he was establishing his own aircraft"
14 | },
15 | {
16 | "title": "Nevil Shute",
17 | "text": "theme is the bridging of social barriers such as class (\"Lonely Road\" and \"Landfall\"), race (\"The Chequer Board\"), or religion (\"Round the Bend\"). The Australian novels are individual hymns to that country, with subtle disparagement of the mores of the United States (\"Beyond the Black Stump\") and overt antipathy towards the post-World War II socialist government of Shute's native Britain (\"The Far Country\" and \"In the Wet\"). Shute's heroes tended to be like himself: middle class solicitors, doctors, accountants, bank managers, engineers, generally university graduates. However (as in \"Trustee from the Toolroom\"), Shute valued the honest artisans and their social"
18 | },
19 | {
20 | "title": "Nevil Shute",
21 | "text": "construction company, Airspeed Ltd. His popularity grew slowly with each novel, but he became much more famous after the publication of \"On the Beach\" in 1957. Shute's novels are written in a simple, highly readable style, with clearly delineated plot lines. Where there is a romantic element, sex is referred to only obliquely. Many of the stories are introduced by a narrator who is not a character in the story. The most common theme in Shute's novels is the dignity of work, spanning all classes, whether an Eastern European bar \"hostess\" (\"Ruined City\") or brilliant boffin (\"No Highway\"). Another recurrent"
22 | },
23 | {
24 | "title": "The Chequer Board",
25 | "text": "the Burmese people\", both of which are central to the book's story. Shute was concerned that sales of the book in the United States would be negatively impacted by the book's open-minded handling of racial issues; as it turned out, sales soared. Shute and his wife traveled the U.S. on Greyhound buses to \"\"get in touch with the man on the street,\"\" finding the experience refreshing. Afterwards he wrote \"\"Sincerity is the first attribute for making money in the business of writing novels.\"\" The Chequer Board The Chequer Board is a novel by Nevil Shute, first published in the United"
26 | },
27 | {
28 | "title": "In the Wet",
29 | "text": "had used the idea of multiple votes for merit in his short story \"The Curious Republic of Gondour\". In the Wet In The Wet is a novel by Nevil Shute that was first published in the United Kingdom in 1953. It contains many of the typical elements of a hearty and adventurous Shute yarn such as flying, the future, mystic states, and ordinary people doing extraordinary things. The story is opened by its initial narrator \u2013 an Anglican priest in the Bush Brotherhood named Roger Hargreaves \u2013 who describes his ordinary circumstances in a large parish of the Australian outback"
30 | }
31 | ]
32 | },
33 | {
34 | "question": "Which film has Gong Li as a member of its cast?",
35 | "answer": "The Story of Qiu Ju [1], Farewell My Concubine [2], Flirting Scholar [2], The Monkey King 2 [3], Mulan [3], Saturday Fiction [3], Coming Home [3].",
36 | "docs": [
37 | {
38 | "title": "Gong Li",
39 | "text": "Gong Li Gong Li (born 31 December 1965) is a Chinese-born Singaporean film actress. She achieved international prominence through her close collaborations with Chinese director Zhang Yimou and won the Volpi Cup for Best Actress at Venice for her performance in his 1992 film \"The Story of Qiu Ju\". She has been credited with helping to bring Chinese cinema to prominence in Europe and the United States. In 2006, she was voted the most beautiful woman in China. Gong has won numerous accolades for her work as an actress; she won the New York Film Critics Circle Award for Best"
40 | },
41 | {
42 | "title": "Gong Li",
43 | "text": "making her realize that she has assisted the dark cynical system. In 1993, she received a New York Film Critics Circle award for her role in \"Farewell My Concubine\" (1993). Directed by Chen Kaige, the film was her first major role with a director other than Zhang Yimou. In the same year, she was awarded with the Berlinale Camera at the 43rd Berlin International Film Festival. \"Premiere\" magazine ranked her performance in \"Farewell My Concubine\" as the 89th greatest performance of all time. She also worked with renowned director Stephen Chow in comedy films \"\" (1991) and \"Flirting Scholar\" (1993)."
44 | },
45 | {
46 | "title": "Gong Li",
47 | "text": "International Film Festival. Later that same year, she reunited with Zhang Yimou for the film \"Coming Home\", which is set during the throes of the Cultural Revolution; this film was their first collaboration since 2006. In 2016, Gong took on her first action role in \"The Monkey King 2\", playing the White Bone Demon. In 2018, Gong was cast in Lou Ye's period drama \"Saturday Fiction\", where she plays an actress who is working undercover gathering intelligence for the Allies. That year, she was also cast in the live-action adaptation of the 1998 Disney animated film \"Mulan\", as an unspecified"
48 | },
49 | {
50 | "title": "Zhang Yimou",
51 | "text": "in Zhang's earlier films. \"Raise the Red Lantern\" was nominated in the Best Foreign Language Film category at the 1992 Academy Awards, becoming the second Chinese film to earn this distinction (after Zhang's \"Ju Dou\"). It eventually lost out to Gabriele Salvatores's \"Mediterraneo\". Zhang's next directorial work, \"The Story of Qiu Ju\", in 1992, once again starring Gong Li in the lead role. The film, which tells the tale of a peasant woman seeking justice for her husband after he was beaten by a village official, was a hit at film festivals and won the Golden Lion award at the"
52 | },
53 | {
54 | "title": "Gong Li",
55 | "text": "Gong Li Gong Li (born 31 December 1965) is a Chinese-born Singaporean film actress. She achieved international prominence through her close collaborations with Chinese director Zhang Yimou and won the Volpi Cup for Best Actress at Venice for her performance in his 1992 film \"The Story of Qiu Ju\". She has been credited with helping to bring Chinese cinema to prominence in Europe and the United States. In 2006, she was voted the most beautiful woman in China. Gong has won numerous accolades for her work as an actress; she won the New York Film Critics Circle Award for Best"
56 | }
57 | ]
58 | },
59 | {
60 | "question": "In which years did Patti LaBelle publish music?",
61 | "answer": "2006 [1], 1977 [2], 2004 [3], 2005 [3], 2000 [3], 2006 [3].",
62 | "docs": [
63 | {
64 | "title": "The Gospel According to Patti LaBelle",
65 | "text": "The Gospel According to Patti LaBelle The Gospel According to Patti LaBelle is the first gospel album released by singer Patti LaBelle, released in November 2006. This project began three years ago when Patti's late musical director and close friend Budd Ellison told a skeptical LaBelle that \"it's now or never, Patti.\" The album is dedicated to his memory as he succumbed to prostate cancer before the album saw a release. The album was released on November 21, 2006 through indie label Umbrella/Bungalow Records, also home to Carl Thomas, Rodney Jerkins, Dean \"DC\" Charles, and other artists. \"The Gospel According"
66 | },
67 | {
68 | "title": "Patti LaBelle (album)",
69 | "text": "scaled the high sixties on the \"Billboard\" R&B chart, it soon became one of her famous show-stoppers while performing the song. LaBelle performed the song at her first solo concert in London, getting a standing ovation, which helped to give LaBelle motivation to continue her career. The album, when released, performed successfully, reaching number 62 on the \"Billboard\" 200 and number 31 on the R&B albums chart, while critics hailed the album. Patti LaBelle (album) Patti LaBelle is the debut solo album by singer Patti LaBelle, released in 1977. The first album LaBelle recorded after sixteen years fronting the band"
70 | },
71 | {
72 | "title": "Patti LaBelle",
73 | "text": "win. In 2000, LaBelle released her final MCA album, \"When a Woman Loves\", before signing with Def Soul Classics to release the 2004 album, \"Timeless Journey\". Following the release of her 2005 covers album, \"Classic Moments\", LaBelle engaged in a rivalry with Antonio \"L.A.\" Reid over the direction of her career, leading to her leaving the label.In the same year, the World Music Awards recognized her years in the music business by awarding her the Legend Award. In 2006, she released her first gospel album, \"The Gospel According to Patti LaBelle\" on the Bungalo label, the album later peaking at"
74 | },
75 | {
76 | "title": "Patti LaBelle",
77 | "text": "Patti LaBelle Patti LaBelle (born Patricia Louise Holt; May 24, 1944) is an American singer, actress, and entrepreneur. LaBelle began her career in the early 1960s as lead singer and front woman of the vocal group, Patti LaBelle and the Bluebelles. Following the group's name change to Labelle in the early 1970s, they released the iconic disco song \"Lady Marmalade\" and the group later became the first African-American vocal group to land the cover of \"Rolling Stone\" magazine. After the group split in 1976, LaBelle began a successful solo career, starting with her critically acclaimed debut album, which included the"
78 | },
79 | {
80 | "title": "The Gospel According to Patti LaBelle",
81 | "text": "Billboard's Top Gospel Albums chart for 17 weeks. \"Where Love Begins,\" a duet with Yolanda Adams was played frequently on R&B and gospel radio stations and debuted at #68 on Billboard's Hot R&B/Hip-Hop tracks. The second single \"Anything\" featuring Kanye West, Mary Mary and Consequence hit #64 on Billboards Hot R&B/Hip-Hop tracks. In 2008, the album was nominated for a Dove Award for Contemporary Gospel Album of the Year at the 39th GMA Dove Awards. The Gospel According to Patti LaBelle The Gospel According to Patti LaBelle is the first gospel album released by singer Patti LaBelle, released in November"
82 | }
83 | ]
84 | },
85 | {
86 | "question": "Glenn Ford was a member of cast in which film?",
87 | "answer": "So Ends Our Night [1], Heaven with a Barbed Wire Fence [1], Happy Birthday to Me [2], The Greatest Gift [2], The Gift [2], The Brotherhood of the Bell [3].",
88 | "docs": [
89 | {
90 | "title": "Glenn Ford",
91 | "text": "name came from his father's hometown of Glenford, Alberta. His first major movie part was in the 1939 film, \"Heaven with a Barbed Wire Fence\". Top Hollywood director John Cromwell was impressed enough with his work to borrow him from Columbia for the independently produced drama, \"So Ends Our Night\" (1941), where Ford delivered a poignant portrayal of a 19-year-old German exile on the run in Nazi-occupied Europe. Working with Academy Award-winning Fredric March and wooing (onscreen) 30-year-old Margaret Sullavan, recently nominated for an Oscar, Ford's shy, ardent young refugee riveted attention even in such stellar company. \"Glenn Ford, a"
92 | },
93 | {
94 | "title": "Glenn Ford",
95 | "text": "were Westerns. He suggested doing a Western series, instead, which resulted in the \"modern-day Western\" series, \"Cade's County\". Ford played southwestern Sheriff Cade for one season (1971\u20131972) in a mix of police mystery and western drama. In \"The Family Holvak\" (1975\u20131976), Ford portrayed a Depression-era preacher in a family drama, reprising the same character he had played in the TV film, \"The Greatest Gift\". In 1978 Ford was host, presenter and narrator of the disaster documentary series 'When Havoc Struck'. In 1981, Ford co-starred with Melissa Sue Anderson in the slasher film \"Happy Birthday to Me\". In 1991, Ford agreed"
96 | },
97 | {
98 | "title": "CBS Thursday Night Movie",
99 | "text": "Night Movie\" opened its fall schedule with the premiere of a low-budget, made-for-TV movie, rather than a proven Hollywood blockbuster guaranteed to lure mass viewership, it became CBS's way of declaring its commitment to product that, although cheaply manufactured, was nevertheless new and topical. In this case, the movie was \"The Brotherhood of the Bell\", and the film's star was Glenn Ford, a movie actor who had never appeared in a television-film. In fact, before shooting on the project even began, Ford had been warned by friends in the industry that he would hate the experience. Instead, the actor reported"
100 | },
101 | {
102 | "title": "The Trouble with Girls (film) ",
103 | "text": "with Charlene, but when she refuses to give in, he deceives her and uses the local police force to be sure that she must leave on the train with the rest of the troupe. Cast notes In June 1959 it was announced that Don Mankiewicz would write a screenplay of an unpublished story by Mauri Grashin, Day Keene, and Dwight Babcock. By December 1960, with the project titled \"Chautauqua\", MGM was ready to make the film with Glenn Ford. Rumours circulating in Hollywood at the time stated that Presley would co-star with Ford, Hope Lange, and Arthur O'Connell, but nothing"
104 | },
105 | {
106 | "title": "Trouble in the Glen",
107 | "text": "Mel Ferrer. It was Orson Welles' fifth British movie in six months. Filming started 15 December 1953. The film received very poor reviews. Trouble in the Glen Trouble in the Glen is a 1954 British comedy film directed by Herbert Wilcox and starring Margaret Lockwood, Orson Welles, Forrest Tucker and Victor McLaglen. It is loosely based on Maurice Walsh's 1950 novel of the same name. It was filmed in Trucolor for Republic Pictures. After moving from South America to the Scottish Highlands, millionaire Sanin Cejador y Mengues (Welles) reassumes the title of laird of Glen Easan, which he inherited from"
108 | }
109 | ]
110 | }
111 | ]
112 | }
113 |
--------------------------------------------------------------------------------
/retrieval.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import csv
3 | import json
4 | import os
5 | import time
6 | import pickle
7 |
8 | import numpy as np
9 | import torch
10 | from tqdm import tqdm
11 | from sentence_transformers import SentenceTransformer
12 |
13 | TOPK = 100
14 |
15 | def bm25_sphere_retrieval(data):
16 | from pyserini.search import LuceneSearcher
17 | index_path = os.environ.get("BM25_SPHERE_PATH")
18 | print("loading bm25 index, this may take a while...")
19 | searcher = LuceneSearcher(index_path)
20 |
21 | print("running bm25 retrieval...")
22 | for d in tqdm(data):
23 | query = d["question"]
24 | try:
25 | hits = searcher.search(query, TOPK)
26 | except Exception as e:
27 | #https://github.com/castorini/pyserini/blob/1bc0bc11da919c20b4738fccc020eee1704369eb/scripts/kilt/anserini_retriever.py#L100
28 | if "maxClauseCount" in str(e):
29 | query = " ".join(query.split())[:950]
30 | hits = searcher.search(query, TOPK)
31 | else:
32 | raise e
33 |
34 | docs = []
35 | for hit in hits:
36 | h = json.loads(str(hit.docid).strip())
37 | docs.append({
38 | "title": h["title"],
39 | "text": hit.raw,
40 | "url": h["url"],
41 | })
42 | d["docs"] = docs
43 |
44 |
45 | def gtr_build_index(encoder, docs):
46 | with torch.inference_mode():
47 | embs = encoder.encode(docs, batch_size=4, show_progress_bar=True, normalize_embeddings=True)
48 | embs = embs.astype("float16")
49 |
50 | GTR_EMB = os.environ.get("GTR_EMB")
51 | with open(GTR_EMB, "wb") as f:
52 | pickle.dump(embs, f)
53 | return embs
54 |
55 |
56 | def gtr_wiki_retrieval(data):
57 | device = "cuda" if torch.cuda.is_available() else "cpu"
58 | print("loading GTR encoder...")
59 | encoder = SentenceTransformer("sentence-transformers/gtr-t5-xxl", device = device)
60 |
61 | questions = [d["question"] for d in data]
62 | with torch.inference_mode():
63 | queries = encoder.encode(questions, batch_size=4, show_progress_bar=True, normalize_embeddings=True)
64 | queries = torch.tensor(queries, dtype=torch.float16, device="cpu")
65 |
66 | # the wikipedia split from DPR repo: https://github.com/facebookresearch/DPR
67 | DPR_WIKI_TSV = os.environ.get("DPR_WIKI_TSV")
68 | docs = []
69 | print("loading wikipedia file...")
70 | with open(DPR_WIKI_TSV) as f:
71 | reader = csv.reader(f, delimiter="\t")
72 | for i, row in enumerate(reader):
73 | if i == 0:
74 | continue
75 | docs.append(row[2] + "\n" + row[1])
76 |
77 | GTR_EMB = os.environ.get("GTR_EMB")
78 | if not os.path.exists(GTR_EMB):
79 | print("gtr embeddings not found, building...")
80 | embs = gtr_build_index(encoder, docs)
81 | else:
82 | print("gtr embeddings found, loading...")
83 | with open(GTR_EMB, "rb") as f:
84 | embs = pickle.load(f)
85 |
86 | del(encoder) # save gpu mem
87 |
88 | gtr_emb = torch.tensor(embs, dtype=torch.float16, device=device)
89 |
90 | print("running GTR retrieval...")
91 | for qi, q in enumerate(tqdm(queries)):
92 | q = q.to(device)
93 | scores = torch.matmul(gtr_emb, q)
94 | score, idx = torch.topk(scores, TOPK)
95 | ret = []
96 | for i in range(idx.size(0)):
97 | title, text = docs[idx[i].item()].split("\n")
98 | ret.append({"id": str(idx[i].item()+1),"title": title, "text": text, "score": score[i].item()})
99 | data[qi]["docs"] = ret
100 | q = q.to("cpu")
101 |
102 |
103 | if __name__ == "__main__":
104 | parser = argparse.ArgumentParser(description="Passage retrieval.")
105 | parser.add_argument("--retriever", type=str, default=None, help="options: bm25/gtr")
106 | parser.add_argument("--data_file", type=str, default=None, help="path to the data file")
107 | parser.add_argument("--output_file", type=str, default=None, help="same format as the data file but with the retrieved docs.")
108 | args = parser.parse_args()
109 |
110 | with open(args.data_file) as f:
111 | data = json.load(f)
112 |
113 | if args.retriever == "bm25":
114 | bm25_sphere_retrieval(data)
115 | elif args.retriever == "gtr":
116 | gtr_wiki_retrieval(data)
117 | else:
118 | raise NotImplementedError
119 |
120 | with open(args.output_file, "w") as f:
121 | json.dump(data, f, indent=4)
122 |
--------------------------------------------------------------------------------
/searcher.py:
--------------------------------------------------------------------------------
1 | import json
2 | from sklearn.feature_extraction.text import TfidfVectorizer
3 | from sklearn.metrics.pairwise import cosine_similarity
4 | import numpy as np
5 | import torch
6 | from torch.nn.functional import normalize
7 |
8 | def doc_to_text_tfidf(doc):
9 | return doc['title'] + ' ' + doc['text']
10 |
11 | def doc_to_text_dense(doc):
12 | return doc['title'] + '. ' + doc['text']
13 |
14 |
15 | class SearcherWithinDocs:
16 |
17 | def __init__(self, docs, retriever, model=None, device="cuda"):
18 | self.retriever = retriever
19 | self.docs = docs
20 | self.device = device
21 | if retriever == "tfidf":
22 | self.tfidf = TfidfVectorizer()
23 | self.tfidf_docs = self.tfidf.fit_transform([doc_to_text_tfidf(doc) for doc in docs])
24 | elif "gtr" in retriever:
25 | self.model = model
26 | self.embeddings = self.model.encode([doc_to_text_dense(doc) for doc in docs], device=self.device, convert_to_numpy=False, convert_to_tensor=True, normalize_embeddings=True)
27 | else:
28 | raise NotImplementedError
29 |
30 | def search(self, query):
31 | # Return the top-1 result doc id
32 |
33 | if self.retriever == "tfidf":
34 | tfidf_query = self.tfidf.transform([query])[0]
35 | similarities = [cosine_similarity(tfidf_doc, tfidf_query) for tfidf_doc in self.tfidf_docs]
36 | best_doc_id = np.argmax(similarities)
37 | return best_doc_id
38 | elif "gtr" in self.retriever:
39 | q_embed = self.model.encode([query], device=self.device, convert_to_numpy=False, convert_to_tensor=True, normalize_embeddings=True)
40 | score = torch.matmul(self.embeddings, q_embed.t()).squeeze(1).detach().cpu().numpy()
41 | best_doc_id = np.argmax(score)
42 | return best_doc_id
43 | else:
44 | raise NotImplementedError
45 |
--------------------------------------------------------------------------------
/tools/gen_summary.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import openai
3 | import json
4 | from tqdm import tqdm
5 | from transformers import AutoTokenizer
6 | import numpy as np
7 |
8 | OPENAI_API_KEY = ""
9 | OPENAI_ORG_ID = ""
10 |
11 | def main():
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument("--f", type=str, help="Data file")
14 | parser.add_argument("--model", type=str, default="gpt-3.5-turbo-0301", help="What model to use")
15 | parser.add_argument("--temperature", type=float, default=0.7, help="Temperature for decoding")
16 | parser.add_argument("--target", type=str, default="summary", help="Summary or extraction? Options: `summary`, `extraction`")
17 | parser.add_argument("--max_tokens", type=int, default=100, help="Max tokens to generate")
18 | parser.add_argument("--ndoc", type=int, default=20, help="Generate summary for the top-ndoc documents")
19 | args = parser.parse_args()
20 |
21 | openai.organization = OPENAI_ORG_ID
22 | openai.api_key = OPENAI_API_KEY
23 |
24 | data = json.load(open(args.f))
25 | total_tokens = 0
26 | new_f_temp = args.f.replace(".json", f"_w_{args.target}_top{args.ndoc}_workinprogress.json")
27 |
28 | for item_id, item in enumerate(tqdm(data)):
29 | for doc_id, doc in enumerate(item['docs'][:args.ndoc]):
30 | if args.target in doc:
31 | print("pass")
32 | continue
33 | if args.target == "summary":
34 | prompt = [
35 | {'role': 'system', 'content': "You are a helpful assistant that summarizes the following documents with respect to questions of interest."},
36 | {'role': 'user', 'content': f"Summarize the following document within 50 words with the question of interest \"{item['question']}\" Return \"irrelevant\" if the document is irrelevant to the question. Try to keep all the important dates, numbers, and names.\n\nTitle: {doc['title']}\nText: {doc['text']}\nSummary:"}
37 | ]
38 | elif args.target == "extraction":
39 | prompt = [
40 | {'role': 'system', 'content': "You are a helpful assistant that extracts answers to questions from given documents."},
41 | {'role': 'user', 'content': f"Given the follow passage and the question \"{item['question']}\", extract a useful span from the passage that can answer the question. Resolve all the coreference issues to make the extracted span understandable standalone. If the passage is not helpful for answering the question, return \"irrelevant\".\n\nTitle: {doc['title']}\nText: {doc['text']}\nExtracted span:"}
42 | ]
43 | else:
44 | raise NotImplementedError
45 |
46 | ok = False
47 | retry_count = 0
48 | while not ok:
49 | retry_count += 1
50 | try:
51 | response = openai.ChatCompletion.create(
52 | model=args.model,
53 | messages=prompt,
54 | temperature=args.temperature,
55 | max_tokens=args.max_tokens,
56 | )
57 | ok = True
58 | except Exception as error:
59 | if retry_count <= 5:
60 | print(f"Retry for {retry_count} times ({error})")
61 | continue
62 | print(error)
63 | import pdb; pdb.set_trace()
64 |
65 | content = response['choices'][0]['message']['content'].strip()
66 | total_tokens += response['usage']['total_tokens']
67 | data[item_id]['docs'][doc_id][args.target] = content
68 |
69 | print("--------------------")
70 | print(f"Question: {item['question']}")
71 | print(f"Document ({doc['title']}): {doc['text']}")
72 | print("---")
73 | print(f"{args.target}: {content}")
74 |
75 | # Save intermediate results in case the program crashes
76 | if item_id % 10 == 0:
77 | json.dump(data, open(new_f_temp, "w"), indent=4)
78 |
79 | new_f = args.f.replace(".json", f"_w_{args.target}_top{args.ndoc}.json")
80 | json.dump(data, open(new_f, "w"), indent=4)
81 |
82 | print("Cost: %.1f" % (total_tokens / 1000 * 0.002))
83 |
84 | if __name__ == "__main__":
85 | main()
86 |
--------------------------------------------------------------------------------
/tools/rerank_outputs.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import re
4 | import logging
5 | logger = logging.getLogger(__name__)
6 | logger.setLevel(logging.INFO)
7 |
8 | from nltk import sent_tokenize
9 | from tqdm import tqdm
10 | import torch
11 | from transformers import (
12 | AutoModelForSeq2SeqLM,
13 | AutoTokenizer,
14 | )
15 |
16 |
17 | AUTOAIS_MODEL="google/t5_xxl_true_nli_mixture"
18 | global autoais_model, autoais_tokenizer
19 | autoais_model, autoais_tokenizer = None, None
20 |
21 |
22 | def remove_citations(sent):
23 | return re.sub(r"\[\d+", "", re.sub(r" \[\d+", "", sent)).replace(" |", "").replace("]", "")
24 |
25 |
26 | def get_max_memory():
27 | """Get the maximum memory available for the current GPU for loading models."""
28 | free_in_GB = int(torch.cuda.mem_get_info()[0]/1024**3)
29 | max_memory = f'{free_in_GB-6}GB'
30 | n_gpus = torch.cuda.device_count()
31 | max_memory = {i: max_memory for i in range(n_gpus)}
32 | return max_memory
33 |
34 |
35 | def rerank_outputs(data, mode, at_most_citations=None, qampari=False):
36 | global autoais_model, autoais_tokenizer
37 | if autoais_model is None:
38 | logger.info("Loading AutoAIS model...")
39 | autoais_model = AutoModelForSeq2SeqLM.from_pretrained(AUTOAIS_MODEL, torch_dtype=torch.bfloat16, max_memory=get_max_memory(), device_map="auto")
40 | autoais_tokenizer = AutoTokenizer.from_pretrained(AUTOAIS_MODEL, use_fast=False)
41 | def _nli_prob(passage, claim):
42 | input_text =f"premise: {passage} hypothesis: {claim}"
43 | input_ids = autoais_tokenizer(input_text, return_tensors="pt").input_ids.to(autoais_model.device)
44 | with torch.inference_mode():
45 | outputs = autoais_model.generate(input_ids, output_scores=True, return_dict_in_generate=True)
46 | outputs = outputs.scores[0]
47 | one_input_id = 209
48 | prob = torch.nn.functional.softmax(outputs[0], -1)[one_input_id].item()
49 | return prob if "prob" in mode else 1 if prob >=0.5 else 0
50 |
51 | logger.info(f"Reranking outputs using mode {mode}...")
52 |
53 | for i, item in enumerate(tqdm(data)):
54 | outputs = item["output"]
55 | assert type(outputs) == list
56 |
57 | best_score = 0
58 | best_output = ""
59 |
60 | for output in outputs:
61 | if qampari:
62 | sents = [item["question"] + " " + x.strip() for x in output.rstrip(".").split(",")]
63 | else:
64 | sents = sent_tokenize(output)
65 | if len(sents) == 0:
66 | continue
67 | target_sents = [remove_citations(sent).strip() for sent in sents]
68 |
69 | entail = 0
70 |
71 | for sent_idx, sent in enumerate(sents):
72 | target_sent = target_sents[sent_idx]
73 | ref = [int(r[1:])-1 for r in re.findall(r"\[\d+", sent)]
74 | if len(ref) == 0:
75 | flag = 0
76 | elif any([ref_id >= len(item["docs"]) for ref_id in ref]):
77 | flag = 0
78 | else:
79 | if at_most_citations is not None:
80 | ref = ref[:at_most_citations]
81 | passage = "\n".join([f"Title: {doc['title']}\n{doc['text']}" for doc in item["docs"]])
82 | flag = _nli_prob(passage, target_sent)
83 | entail += flag
84 | score = entail / len(sents)
85 |
86 | if score > best_score:
87 | best_score = score
88 | best_output = output
89 |
90 | item["output"] = best_output
91 |
92 | logger.info(f"Done with reranking outputs")
93 |
94 |
95 | def main():
96 | parser = argparse.ArgumentParser()
97 | parser.add_argument("--f", type=str, required=True, help="Output file. Should have field `question`, `output`, (ROUGE) `answer`, \
98 | (accuracy) `qa_pairs`, (AIS) `docs`")
99 | parser.add_argument("--rerank_mode", type=str, default=None, help="How to rerank outputs: {prob, discrete}")
100 | parser.add_argument("--at_most_citations", type=int, default=3, help="Max num citations to consider.")
101 | args = parser.parse_args()
102 |
103 | with open(args.f) as f:
104 | data_with_config = json.load(f)
105 | data = data_with_config['data']
106 | qampari = "qampari" in args.f
107 |
108 | assert args.rerank_mode is not None
109 |
110 | rerank_outputs(data, mode=args.rerank_mode, at_most_citations=args.at_most_citations, qampari=qampari)
111 | with open(args.f + ".rerank", "w") as f:
112 | json.dump(data_with_config, f, indent=4)
113 |
114 |
115 | if __name__ == "__main__":
116 | main()
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | logger = logging.getLogger(__name__)
3 | logger.setLevel(logging.INFO)
4 |
5 | import torch
6 | import json
7 | import re
8 | import os
9 | import string
10 | import time
11 |
12 | def normalize_answer(s):
13 | def remove_articles(text):
14 | return re.sub(r"\b(a|an|the)\b", " ", text)
15 |
16 | def white_space_fix(text):
17 | return " ".join(text.split())
18 |
19 | def remove_punc(text):
20 | exclude = set(string.punctuation)
21 | return "".join(ch for ch in text if ch not in exclude)
22 |
23 | def lower(text):
24 | return text.lower()
25 |
26 | return white_space_fix(remove_articles(remove_punc(lower(s))))
27 |
28 |
29 | def remove_citations(sent):
30 | return re.sub(r"\[\d+", "", re.sub(r" \[\d+", "", sent)).replace(" |", "").replace("]", "")
31 |
32 |
33 | def get_max_memory():
34 | """Get the maximum memory available for the current GPU for loading models."""
35 | free_in_GB = int(torch.cuda.mem_get_info()[0]/1024**3)
36 | max_memory = f'{free_in_GB-6}GB'
37 | n_gpus = torch.cuda.device_count()
38 | max_memory = {i: max_memory for i in range(n_gpus)}
39 | return max_memory
40 |
41 |
42 | def make_doc_prompt(doc, doc_id, doc_prompt, use_shorter=None):
43 | # For doc prompt:
44 | # - {ID}: doc id (starting from 1)
45 | # - {T}: title
46 | # - {P}: text
47 | # use_shorter: None, "summary", or "extraction"
48 |
49 | text = doc['text']
50 | if use_shorter is not None:
51 | text = doc[use_shorter]
52 | return doc_prompt.replace("{T}", doc["title"]).replace("{P}", text).replace("{ID}", str(doc_id+1))
53 |
54 |
55 | def get_shorter_text(item, docs, ndoc, key):
56 | doc_list = []
57 | for item_id, item in enumerate(docs):
58 | if key not in item:
59 | if len(doc_list) == 0:
60 | # If there aren't any document, at least provide one (using full text)
61 | item[key] = item['text']
62 | doc_list.append(item)
63 | logger.warn(f"No {key} found in document. It could be this data do not contain {key} or previous documents are not relevant. This is document {item_id}. This question will only have {len(doc_list)} documents.")
64 | break
65 | if "irrelevant" in item[key] or "Irrelevant" in item[key]:
66 | continue
67 | doc_list.append(item)
68 | if len(doc_list) >= ndoc:
69 | break
70 | return doc_list
71 |
72 |
73 | def make_demo(item, prompt, ndoc=None, doc_prompt=None, instruction=None, use_shorter=None, test=False):
74 | # For demo prompt
75 | # - {INST}: the instruction
76 | # - {D}: the documents
77 | # - {Q}: the question
78 | # - {A}: the answers
79 | # ndoc: number of documents to put in context
80 | # use_shorter: None, "summary", or "extraction"
81 |
82 | prompt = prompt.replace("{INST}", instruction).replace("{Q}", item['question'])
83 | if "{D}" in prompt:
84 | if ndoc == 0:
85 | prompt = prompt.replace("{D}\n", "") # if there is no doc we also delete the empty line
86 | else:
87 | doc_list = get_shorter_text(item, item["docs"], ndoc, use_shorter) if use_shorter is not None else item["docs"][:ndoc]
88 | text = "".join([make_doc_prompt(doc, doc_id, doc_prompt, use_shorter=use_shorter) for doc_id, doc in enumerate(doc_list)])
89 | prompt = prompt.replace("{D}", text)
90 |
91 | if not test:
92 | answer = "\n" + "\n".join(item["answer"]) if isinstance(item["answer"], list) else item["answer"]
93 | prompt = prompt.replace("{A}", "").rstrip() + answer
94 | else:
95 | prompt = prompt.replace("{A}", "").rstrip() # remove any space or \n
96 |
97 | return prompt
98 |
99 |
100 | def load_model(model_name_or_path, dtype=torch.float16, int8=False, reserve_memory=10):
101 | # Load a huggingface model and tokenizer
102 | # dtype: torch.float16 or torch.bfloat16
103 | # int8: whether to use int8 quantization
104 | # reserve_memory: how much memory to reserve for the model on each gpu (in GB)
105 |
106 | # Load the FP16 model
107 | from transformers import AutoModelForCausalLM, AutoTokenizer
108 | logger.info(f"Loading {model_name_or_path} in {dtype}...")
109 | if int8:
110 | logger.warn("Use LLM.int8")
111 | start_time = time.time()
112 | model = AutoModelForCausalLM.from_pretrained(
113 | model_name_or_path,
114 | device_map='auto',
115 | torch_dtype=dtype,
116 | max_memory=get_max_memory(),
117 | load_in_8bit=int8,
118 | )
119 | logger.info("Finish loading in %.2f sec." % (time.time() - start_time))
120 |
121 | # Load the tokenizer
122 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False)
123 |
124 | # Fix OPT bos token problem in HF
125 | if "opt" in model_name_or_path:
126 | tokenizer.bos_token = ""
127 | tokenizer.padding_side = "left"
128 |
129 | return model, tokenizer
130 |
--------------------------------------------------------------------------------