├── pictures
    └── logo.jpg
├── .idea
    ├── vcs.xml
    ├── misc.xml
    ├── .gitignore
    ├── inspectionProfiles
    │   └── profiles_settings.xml
    ├── modules.xml
    └── Cognitive_Mirage.iml
├── LICENSE
└── README.md


/pictures/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hongbinye/Cognitive-Mirage-Hallucinations-in-LLMs/HEAD/pictures/logo.jpg


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (base)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 | 


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/Cognitive_Mirage.iml" filepath="$PROJECT_DIR$/.idea/Cognitive_Mirage.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/Cognitive_Mirage.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="PYTHON_MODULE" version="4">
3 |   <component name="NewModuleRootManager">
4 |     <content url="file://$MODULE_DIR$" />
5 |     <orderEntry type="inheritedJdk" />
6 |     <orderEntry type="sourceFolder" forTests="false" />
7 |   </component>
8 | </module>


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Hongbin Ye
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [//]: # (# Hallucination in Large Language Models: A Review)
  2 | 
  3 | <p align="center">
  4 |      <img src="pictures/logo.jpg" width="500"/>
  5 |       <p>
  6 | 
  7 | <div align="center">
  8 | 
  9 |    [![Awesome](https://awesome.re/badge.svg)]() 
 10 |    [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
 11 |    ![](https://img.shields.io/github/last-commit/hongbinye/Cognitive-Mirage-Hallucinations-in-LLMs?color=green) 
 12 |    ![](https://img.shields.io/badge/PRs-Welcome-red) 
 13 |  
 14 | </div>
 15 | 
 16 | 
 17 | Uncritical trust in LLMs can give rise to a phenomenon `Cognitive Mirage`, leading to misguided decision-making and a cascade of unintended consequences. 
 18 | To effectively control the risk of  `hallucinations`, we summarize recent progress in hallucination theories and solutions in this paper. We propose to organize relevant work by a comprehensive survey.
 19 | 
 20 | ### 
 21 | **[:bell: News! :bell: ]
 22 | We have released a new survey paper:"[Cognitive Mirage: A Review of Hallucinations in Large Language Models](https://arxiv.org/abs/2309.06794)" based on this repository, with a perspective of Hallucinations in LLMs! We are looking forward to any comments or discussions on this topic :)** 
 23 | 
 24 | ## 🕵️ Introduction
 25 | 
 26 | As large language models continue to develop in the field of AI, text generation systems are susceptible to a worrisome phenomenon known
 27 | as hallucination. In this study, we summarize recent compelling insights into hallucinations in LLMs. We present a novel taxonomy of hallucinations from various text generation tasks, thus provide theoretical insights, detection methods and improvement approaches.
 28 | Based on this, future research directions are proposed. Our contribution are threefold: (1) We provide a detailed and complete taxonomy
 29 | for hallucinations appearing in text generation tasks; (2) We provide theoretical analyses of hallucinations in LLMs and provide existing
 30 | detection and improvement methods; (3) We propose several research directions that can be developed in the future. As hallucinations garner significant attention from the community, we will maintain updates on relevant research progress.
 31 | 
 32 | 
 33 | 
 34 | ## 🏆 A Timeline of LLMs
 35 | 
 36 | | LLM Name                                                                                                                           | Title                          | Authors        | Publication Date |
 37 | |------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|----------------|------------------|
 38 | | [T5](https://arxiv.org/abs/1910.10683)                                                                                             | Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. | Colin Raffel, Noam Shazeer, Adam Roberts | 2019.10          |
 39 | | [GPT-3](https://proceedings.neurips.cc/paper/2020/hash/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html)                             | Language Models are Few-Shot Learners. | Tom B. Brown, Benjamin Mann, Nick Ryder | 2020.12          |
 40 | | [mT5](https://doi.org/10.18653/v1/2021.naacl-main.41)                                                                              | mT5: A Massively Multilingual Pre-trained Text-to-Text Transformer. | Linting Xue, Noah Constant, Adam Roberts | 2021.3           |
 41 | | [Codex](https://arxiv.org/abs/2107.03374)                                                                                          | Evaluating Large Language Models Trained on Code. | Mark Chen, Jerry Tworek, Heewoo Jun | 2021.7           |
 42 | | [FLAN](https://openreview.net/forum?id=gEZrGCozdqR)                                                                                | Finetuned Language Models are Zero-Shot Learners. | Jason Wei, Maarten Bosma, Vincent Y. Zhao | 2021.9           |
 43 | | [WebGPT](https://arxiv.org/abs/2112.09332)                                                                                         | WebGPT: Browser-assisted question-answering with human feedback. | Reiichiro Nakano, Jacob Hilton, Suchir Balaji | 2021.12          |
 44 | | [InstructGPT](https://proceedings.neurips.cc//paper_files/paper/2022/hash/b1efde53be364a73914f58805a001731-Abstract-Conference.html) | Training language models to follow instructions with human feedback. | Long Ouyang, Jeffrey Wu, Xu Jiang | 2022.3           |
 45 | | [CodeGen](https://arxiv.org/abs/2203.13474)                                                                                        | CodeGen: An Open Large Language Model for Code with Multi-Turn Program Synthesis. | Erik Nijkamp, Bo Pang, Hiroaki Hayashi | 2022.3           |
 46 | | [Claude](https://arxiv.org/abs/2204.05862)                                                                                         | Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback. | Yuntao Bai, Andy Jones, Kamal Ndousse | 2022.4           |
 47 | | [PaLM](https://proceedings.neurips.cc//paper_files/paper/2022/hash/b1efde53be364a73914f58805a001731-Abstract-Conference.html)                                    | PaLM: Scaling Language Modeling with Pathways. | Aakanksha Chowdhery, Sharan Narang, Jacob Devlin | 2022.4        |
 48 | | [OPT](https://arxiv.org/abs/2205.01068)                                                                                           | OPT: Open Pre-trained Transformer Language Models. | Susan Zhang, Stephen Roller, Naman Goyal | 2022.5          |
 49 | | [Super-NaturalInstructions](https://doi.org/10.18653/v1/2022.emnlp-main.340)                                                                            | ESuper-NaturalInstructions: Generalization via Declarative Instructions on 1600+ NLP Tasks. | Yizhong Wang, Swaroop Mishra, Pegah Alipoormolabashi, Yeganeh Kordi | 2022.9          |
 50 | | [GLM](https://arxiv.org/abs/2210.02414)                                                                                            | GLM-130B: An Open Bilingual Pre-trained Model. | Aohan Zeng, Xiao Liu, Zhengxiao Du | 2022.10          |
 51 | | [BLOOM](https://doi.org/10.48550/arXiv.2211.05100)                                                                                      | BLOOM: A 176B-Parameter Open-Access Multilingual Language Model. | Teven Le Scao, Angela Fan, Christopher Akiki | 2022.11         |
 52 | | [LLaMA](https://arxiv.org/abs/2302.13971)                                                                                             | LLaMA: Open and Efficient Foundation Language Models. | Hugo Touvron, Thibaut Lavril, Gautier Izacard | 2023.2          |
 53 | | [Alpaca](https://crfm.stanford.edu/2023/03/13/alpaca.html)                                                                                     | Alpaca: A Strong, Replicable Instruction-Following Model. | Rohan Taori, Ishaan Gulrajani, Tianyi Zhang | 2023.3          |
 54 | | [GPT-4](https://arxiv.org/abs/2303.08774v2)                                                                                            | GPT-4 Technical Report. | OpenAI | 2023.3          |
 55 | | [WizardLM](https://doi.org/10.48550/arXiv.2304.12244)                                                                                         | WizardLM: Empowering Large Language Models to Follow Complex Instructions. | Can Xu, Qingfeng Sun, Kai Zheng | 2023.4          |
 56 | | [Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/)                                                                                          | Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90% ChatGPT Quality. | The Vicuna Team | 2023.5          |
 57 | | [ChatGLM](https://chatglm.cn/blog)                                                                                               | ChatGLM. | Wisdom and Clear Speech Team | 2023.6          |
 58 | | [Llama2](https://arxiv.org/abs/2307.09288)                                                                                             | Llama 2: Open Foundation and Fine-Tuned Chat Models. | Hugo Touvron, Louis Martin, Kevin Stone | 2023.7          |
 59 | 
 60 |    
 61 | ## 🏳‍🌈 Definition of Hallucination
 62 | 
 63 |   - **"Truthful AI: Developing and governing AI that does not lie."**, 2021.10
 64 |     - Owain Evans, Owen Cotton-Barratt, Lukas Finnveden
 65 |     - [[Paper]](https://arxiv.org/abs/2110.06674)
 66 | 
 67 |   - **"Survey of Hallucination in Natural Language Generation"**, 2022.2
 68 |     - Ziwei Ji, Nayeon Lee, Rita Frieske
 69 |     - [[Paper]](https://dblp.org/search?q=Survey%20of%20hallucination%20in%20natural%20language%20generation)
 70 |     
 71 |   - **"Context-faithful Prompting for Large Language Models."**, 2023.3
 72 |     - Wenxuan Zhou, Sheng Zhang, Hoifung Poon
 73 |     - [[Paper]](https://arxiv.org/abs/2303.11315)
 74 | 
 75 |   - **"Do Language Models Know When They’re Hallucinating References?"**, 2023.5
 76 |     - Ayush Agrawal, Lester Mackey, Adam Tauman Kalai
 77 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.18248)
 78 | 
 79 | ## 🎉 Mechanism Analysis
 80 | 
 81 | ### ✨Data Attribution
 82 | 
 83 |   - **"Data Distributional Properties Drive Emergent In-Context Learning in Transformers."**, 2022.5 
 84 |     - Stephanie C. Y. Chan, Adam Santoro, Andrew K. Lampinen
 85 |     - [[Paper]](http://papers.nips.cc/paper_files/paper/2022/hash/77c6ccacfd9962e2307fc64680fc5ace-Abstract-Conference.html)
 86 | 
 87 |   - **"Towards Tracing Factual Knowledge in Language Models Back to the Training Data."**, 2022.5
 88 |     - Ekin Akyürek, Tolga Bolukbasi, Frederick Liu
 89 |     - [[Paper]](https://arxiv.org/abs/2205.11482)
 90 | 
 91 |   - **"A Multitask, Multilingual, Multimodal Evaluation of ChatGPT on Reasoning, Hallucination, and Interactivity."**, 2023.2
 92 |     - Yejin Bang, Samuel Cahyawijaya, Nayeon Lee
 93 |     - [[Paper]](https://doi.org/10.48550/arXiv.2302.04023)
 94 |   
 95 |   - **"Hallucinations in Large Multilingual Translation Models."**, 2023.3
 96 |     - Nuno Miguel Guerreiro, Duarte M. Alves, Jonas Waldendorf
 97 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.16104)
 98 | 
 99 |   - **"Visual Instruction Tuning."**, 2023.4
100 |     - Haotian Liu, Chunyuan Li, Qingyang Wu
101 |     - [[Paper]](https://doi.org/10.48550/arXiv.2304.08485)
102 |     
103 |   - **"Evaluating Object Hallucination in Large Vision-Language Models."**, 2023.5
104 |     - Yifan Li, Yifan Du, Kun Zhou
105 |     - [[Paper]](https://arxiv.org/abs/2305.10355)
106 |     
107 |   - **"Sources of Hallucination by Large Language Models on Inference Tasks."**, 2023.5
108 |     - Nick McKenna, Tianyi Li, Liang Cheng
109 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.14552)
110 | 
111 |   - **"Automatic Evaluation of Attribution by Large Language Models."**, 2023.5
112 |     - Xiang Yue, Boshi Wang, Kai Zhang
113 |     - [[Paper]](https://arxiv.org/abs/2305.06311)
114 | 
115 |   - **"Mitigating Hallucination in Large Multi-Modal Models via Robust Instruction Tuning."**, 2023.6
116 |     - Fuxiao Liu, Kevin Lin, Linjie Li
117 |     - [[Paper]](https://arxiv.org/abs/2306.14565)
118 | 
119 | 
120 | ### ✨Knowledge Gap
121 | 
122 | 
123 |   - **"A Survey of Knowledge-enhanced Text Generation."**, 2022.1
124 |     - Wenhao Yu, Chenguang Zhu, Zaitang Li
125 |     - [[Paper]](https://doi.org/10.1145/3512467)
126 | 
127 |   - **"Attributed Text Generation via Post-hoc Research and Revision."**, 2022.10
128 |     - Luyu Gao, Zhuyun Dai, Panupong Pasupat
129 |     - [[Paper]](https://doi.org/10.48550/arXiv.2210.08726)
130 | 
131 |   - **"Artificial Hallucinations in ChatGPT: Implications in Scientific Writing."**, 2023.2
132 |     - Hussam Alkaissi, Samy I McFarlane
133 |     - [[Paper]](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9939079/)
134 | 
135 |   - **"SelfCheckGPT: Zero-Resource Black-Box Hallucination Detection for Generative Large Language Models."**, 2023.3
136 |     - Potsawee Manakul, Adian Liusie, Mark J. F. Gales
137 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.08896)
138 | 
139 |   - **"Why Does ChatGPT Fall Short in Answering Questions Faithfully?"**, 2023.4
140 |     - Shen Zheng, Jie Huang, Kevin Chen-Chuan Chang
141 |     - [[Paper]](https://arxiv.org/abs/2304.10513)
142 | 
143 |   - **"Zero-shot Faithful Factual Error Correction."**, 2023.5
144 |     - Kung-Hsiang Huang, Hou Pong Chan, Heng Ji
145 |     - [[Paper]](https://aclanthology.org/2023.acl-long.311)
146 | 
147 |   - **"Mitigating Language Model Hallucination with Interactive Question-Knowledge Alignment."**, 2023.5
148 |     - Shuo Zhang, Liangming Pan, Junzhou Zhao
149 |     - [[Paper]](https://arxiv.org/abs/2305.13669)
150 | 
151 |   - **"Adaptive Chameleon or Stubborn Sloth: Unraveling the Behavior of Large Language Models in Knowledge Clashes."**, 2023.5
152 |     - Jian Xie, Kai Zhang, Jiangjie Chen
153 |     - [[Paper]](https://arxiv.org/abs/2305.13300)
154 | 
155 |   - **"Evaluating Generative Models for Graph-to-Text Generation."**, 2023.7
156 |     - huzhou Yuan, Michael Färber
157 |     - [[Paper]](https://doi.org/10.48550/arXiv.2307.14712)
158 | 
159 | 
160 |   - **"Overthinking the Truth: Understanding how Language Models Process False Demonstrations."**, 2023.7
161 |     - Danny Halawi, Jean-Stanislas Denain, Jacob Steinhardt
162 |     - [[Paper]](https://arxiv.org/abs/2307.09476)
163 | 
164 |    
165 | ### ✨Optimum Formulation
166 | 
167 |   
168 |   - **"Improved Natural Language Generation via Loss Truncation."**, 2020.5
169 |     - Daniel Kang, Tatsunori Hashimoto
170 |     - [[Paper]](https://doi.org/10.18653/v1/2020.acl-main.66)
171 | 
172 |   - **"The Curious Case of Hallucinations in Neural Machine Translation."**, 2021.4
173 |     - Vikas Raunak, Arul Menezes, Marcin Junczys-Dowmunt
174 |     - [[Paper]](https://doi.org/10.18653/v1/2021.naacl-main.92)
175 |     
176 |   - **"Optimal Transport for Unsupervised Hallucination Detection in Neural Machine Translation."**, 2022.12
177 |     - Nuno Miguel Guerreiro, Pierre Colombo, Pablo Piantanida
178 |     - [[Paper]](https://doi.org/10.48550/arXiv.2212.09631)
179 | 
180 |   - **"Elastic Weight Removal for Faithful and Abstractive Dialogue Generation."**, 2023.3
181 |     - Nico Daheim, Nouha Dziri, Mrinmaya Sachan
182 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.17574)
183 |     
184 |   - **"HistAlign: Improving Context Dependency in Language Generation by Aligning with History."**, 2023.5
185 |     - David Wan, Shiyue Zhang, Mohit Bansal
186 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.04782)
187 |   
188 |   - **"How Language Model Hallucinations Can Snowball."**, 2023.5
189 |     - Muru Zhang, Ofir Press, William Merrill
190 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.13534)
191 | 
192 |   - **"Improving Language Models via Plug-and-Play Retrieval Feedback."**, 2023.5
193 |     - Wenhao Yu, Zhihan Zhang, Zhenwen Liang
194 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.14002)
195 | 
196 |   - **"Teaching Language Models to Hallucinate Less with Synthetic Tasks."**, 2023.10
197 |     - Erik Jones, Hamid Palangi, Clarisse Simões
198 |     - [[Paper]](https://arxiv.org/abs/2310.06827)
199 | 
200 | 
201 | ## 🪁Taxonomy of LLMs Hallucination in NLP tasks
202 |  
203 | ### ✨Machine  Translation
204 |   - **"Unsupervised Cross-lingual Representation Learning at Scale."**, 2020.7
205 |     - Alexis Conneau, Kartikay Khandelwal, Naman Goyal
206 |     - [[Paper]](https://doi.org/10.18653/v1/2020.acl-main.747)
207 | 
208 |   - **"The Curious Case of Hallucinations in Neural Machine Translation."**, 2021.4
209 |     - Vikas Raunak, Arul Menezes, Marcin Junczys-Dowmunt
210 |     - [[Paper]](https://doi.org/10.18653/v1/2021.naacl-main.92)
211 | 
212 |   - **"Overcoming Catastrophic Forgetting in Zero-Shot Cross-Lingual Generation."**, 2022.5
213 |     - Tu Vu, Aditya Barua, Brian Lester
214 |     - [[Paper]](https://doi.org/10.18653/v1/2022.emnlp-main.630)
215 |     
216 |   - **"Looking for a Needle in a Haystack: A Comprehensive Study of Hallucinations in Neural Machine Translation."**, 2022.8
217 |     - Nuno Miguel Guerreiro, Elena Voita, André F. T. Martins
218 |     - [[Paper]](https://aclanthology.org/2023.eacl-main.75)
219 | 
220 |   - **"Prompting PaLM for Translation: Assessing Strategies and Performance."**, 2022.11
221 |     - David Vilar, Markus Freitag, Colin Cherry
222 |     - [[Paper]](https://arxiv.org/abs/2211.09102)
223 | 
224 |   - **"The unreasonable effectiveness of few-shot learning for machine translation."**, 2023.2
225 |     - Xavier Garcia, Yamini Bansal, Colin Cherry
226 |     - [[Paper]](https://arxiv.org/abs/2302.01398)
227 |     
228 |   - **"How Good Are GPT Models at Machine Translation? A Comprehensive Evaluation."**, 2023.2
229 |     - Amr Hendy, Mohamed Abdelrehim, Amr Sharaf
230 |     - [[Paper]](https://arxiv.org/abs/2302.09210)
231 |     
232 |   - **"Hallucinations in Large Multilingual Translation Models."**,  2023.3
233 |     - Nuno Miguel Guerreiro, Duarte M. Alves, Jonas Waldendorf
234 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.16104)
235 |   
236 |   - **"Investigating the Translation Performance of a Large Multilingual Language Model: the Case of BLOOM."**, 2023.3
237 |     - Rachel Bawden, François Yvon
238 |     - [[Paper]](https://arxiv.org/abs/2303.01911)
239 | 
240 |   - **"HalOmi: A Manually Annotated Benchmark for Multilingual Hallucination and Omission Detection in Machine Translation."**, 2023.5
241 |     - David Dale, Elena Voita, Janice Lam, Prangthip Hansanti
242 |     - [[Paper]](https://arxiv.org/abs/2305.11746)
243 |    
244 |   - **"mmT5: Modular Multilingual Pre-Training Solves Source Language Hallucinations."**, 2023.5
245 |     - Jonas Pfeiffer, Francesco Piccinno, Massimo Nicosia
246 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.14224)
247 | 
248 | ### ✨Question and Answer
249 | 
250 |   - **"Hurdles to Progress in Long-form Question Answering."**, 2021.6
251 |     - Kalpesh Krishna, Aurko Roy, Mohit Iyyer
252 |     - [[Paper]](https://aclanthology.org/2021.naacl-main.393/)
253 |   
254 |   - **"Entity-Based Knowledge Conflicts in Question Answering."**, 2021.9
255 |     - Shayne Longpre, Kartik Perisetla, Anthony Chen
256 |     - [[Paper]](https://aclanthology.org/2021.emnlp-main.565/)
257 | 
258 |   - **"TruthfulQA: Measuring How Models Mimic Human Falsehoods."**, 2022.5
259 |     - Stephanie Lin, Jacob Hilton, Owain Evans
260 |     - [[Paper]](https://aclanthology.org/2022.acl-long.229)
261 |     
262 |   - **"Check Your Facts and Try Again: Improving Large Language Models with External Knowledge and Automated Feedback."**, 2023.2
263 |     - Baolin Peng, Michel Galley, Pengcheng He
264 |     - [[Paper]](https://arxiv.org/abs/2302.12813)
265 | 
266 |   - **"Why Does ChatGPT Fall Short in Answering Questions Faithfully?"**, 2023.4
267 |     - Shen Zheng, Jie Huang, Kevin Chen-Chuan Chang
268 |     - [[Paper]](https://doi.org/10.48550/arXiv.2304.10513)
269 | 
270 |   - **"Evaluating Correctness and Faithfulness of Instruction-Following Models for Question Answering."**,  2023.7
271 |     - Vaibhav Adlakha, Parishad BehnamGhader, Xing Han Lu
272 |     - [[Paper]](https://arxiv.org/abs/2307.16877)
273 | 
274 |   - **"Med-HALT: Medical Domain Hallucination Test for Large Language Models."**, 2023.7
275 |     - Logesh Kumar Umapathi, Ankit Pal, Malaikannan Sankarasubbu
276 |     - [[Paper]](https://arxiv.org/abs/2307.15343)
277 | 
278 | ### ✨Dialog System
279 | 
280 |   - **"On the Origin of Hallucinations in Conversational Models: Is it the Datasets or the Models?"**, 2022.7
281 |     - Nouha Dziri, Sivan Milton, Mo Yu
282 |     - [[Paper]](https://aclanthology.org/2022.naacl-main.387/)
283 | 
284 |   - **"Contrastive Learning Reduces Hallucination in Conversations."**, 2022.12
285 |     - Weiwei Sun, Zhengliang Shi, Shen Gao
286 |     - [[Paper]](https://ojs.aaai.org/index.php/AAAI/article/view/26596)
287 | 
288 |   - **"Diving Deep into Modes of Fact Hallucinations in Dialogue Systems."**, 2022.12
289 |     - Souvik Das, Sougata Saha, Rohini K. Srihari
290 |     - [[Paper]](https://doi.org/10.18653/v1/2022.findings-emnlp.48)
291 | 
292 |   - **"Elastic Weight Removal for Faithful and Abstractive Dialogue Generation."**, 2023.3
293 |     - Nico Daheim, Nouha Dziri, Mrinmaya Sachan
294 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.17574)
295 | 
296 | ### ✨Summarization System
297 | 
298 |   - **"Hallucinated but Factual! Inspecting the Factuality of Hallucinations in Abstractive Summarization."**, 2022.5
299 |     - Meng Cao, Yue Dong, Jackie Chi Kit Cheung
300 |     - [[Paper]](https://doi.org/10.18653/v1/2022.acl-long.236)
301 |     
302 |   - **"Evaluating the Factual Consistency of Large Language Models."**, 2022.11
303 |     - Derek Tam, Anisha Mascarenhas, Shiyue Zhang
304 |     - [[Paper]](https://doi.org/10.18653/v1/2023.findings-acl.322)
305 |   
306 |   - **"Why is this misleading?": Detecting News Headline Hallucinations with Explanations."**, 2023.2
307 |     - Jiaming Shen, Jialu Liu, Dan Finnie
308 |     - [[Paper]](https://doi.org/10.1145/3543507.3583375)
309 | 
310 |   - **"Detecting and Mitigating Hallucinations in Multilingual Summarisation."**, 2023.5
311 |     - Yifu Qiu, Yftah Ziser, Anna Korhonen
312 |     - [[Paper]](https://arxiv.org/abs/2305.13632)
313 |     
314 |   - **"LLMs as Factual Reasoners: Insights from Existing Benchmarks and Beyond."**, 2023.5
315 |     - Philippe Laban, Wojciech Kryściński, Divyansh Agarwal
316 |     - [[Paper]](https://arxiv.org/abs/2305.14540)
317 | 
318 |   - **"Evaluating Factual Consistency of Texts with Semantic Role Labeling."**, 2023.5
319 |     - Jing Fan, Dennis Aumiller, Michael Gertz
320 |     - [[Paper]](https://arxiv.org/abs/2305.13309)
321 | 
322 |   - **"Summarization is (Almost) Dead."**, 2023.9
323 |     - Xiao Pu, Mingqi Gao, Xiaojun Wan
324 |     - [[Paper]](https://arxiv.org/abs/2309.09558)
325 | 
326 | ###  ✨Knowledge Graphs with LLMs 
327 | 
328 |   - **"GPT-NER: Named Entity Recognition via Large Language Models."**, 2023.4
329 |     - Shuhe Wang, Xiaofei Sun, Xiaoya Li
330 |     - [[Paper]](https://arxiv.org/abs/2304.10428)
331 | 
332 |   - **"LLMs for Knowledge Graph Construction and Reasoning: Recent Capabilities and Future Opportunities."**, 2023.5
333 |     - Yuqi Zhu, Xiaohan Wang, Jing Chen
334 |     - [[Paper]](https://arxiv.org/abs/2305.13168)
335 |     
336 |   - **"KoLA: Carefully Benchmarking World Knowledge of Large Language Models."**, 2023.6
337 |     - Jifan Yu, Xiaozhi Wang, Shangqing Tu
338 |     - [[Paper]](https://arxiv.org/abs/2306.09296)
339 | 
340 |   - **"Evaluating Generative Models for Graph-to-Text Generation."**, 2023.7
341 |     - Shuzhou Yuan, Michael Färber
342 |     - [[Paper]](https://doi.org/10.48550/arXiv.2307.14712)
343 | 
344 |   - **"Text2KGBench: A Benchmark for Ontology-Driven Knowledge Graph Generation from Text."**, 2023.8
345 |     - Nandana Mihindukulasooriya, Sanju Tiwari, Carlos F. Enguix
346 |     - [[Paper]](https://arxiv.org/abs/2308.02357)
347 | 
348 | 
349 | ###  ✨Cross-modal System
350 | 
351 |   - **"Let there be a clock on the beach: Reducing Object Hallucination in Image Captioning."**, 2021.10
352 |     - Ali Furkan Biten, Lluís Gómez, Dimosthenis Karatzas
353 |     - [[Paper]](https://doi.org/10.1109/WACV51458.2022.00253)
354 | 
355 |   - **"Simple Token-Level Confidence Improves Caption Correctness."**, 2023.5
356 |     - Suzanne Petryk, Spencer Whitehead, Joseph E. Gonzalez
357 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.07021)
358 | 
359 |   - **"Evaluating Object Hallucination in Large Vision-Language Models."**, 2023.5
360 |     - Yifan Li, Yifan Du, Kun Zhou, Jinpeng Wang
361 |     - [[Paper]](https://arxiv.org/abs/2305.10355)
362 |     
363 |   - **"Album Storytelling with Iterative Story-aware Captioning and Large Language Models."**, 2023.5
364 |     - Munan Ning, Yujia Xie, Dongdong Chen
365 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.12943)
366 | 
367 |   - **"Mitigating Hallucination in Large Multi-Modal Models via Robust Instruction Tuning."**, 2023.6
368 |     - Fuxiao Liu, Kevin Lin, Linjie Li
369 |     - [[Paper]](https://arxiv.org/abs/2306.14565)
370 | 
371 |   - **"Fact-Checking of AI-Generated Reports."**, 2023.7
372 |     - Razi Mahmood, Ge Wang, Mannudeep Kalra
373 |     - [[Paper]](https://arxiv.org/abs/2307.14634)
374 | 
375 | 
376 | ### ✨Others
377 |     
378 |   - **"The Scope of ChatGPT in Software Engineering: A Thorough Investigation."**,  2023.5
379 |     - Wei Ma, Shangqing Liu, Wenhan Wang
380 |     - [[Paper]](https://arxiv.org/abs/2305.12138)
381 | 
382 |   - **"Generating Benchmarks for Factuality Evaluation of Language Models."**, 2023.7
383 |     - Dor Muhlgay, Ori Ram, Inbal Magar
384 |     - [[Paper]](https://arxiv.org/abs/2307.06908)
385 | 
386 | # 🔮 Hallucination Detection
387 | 
388 | 
389 | ### ✨Inference Classifier
390 |   - **"Evaluating the Factual Consistency of Large Language Models Through Summarization."**,  2022.11
391 |     - Derek Tam, Anisha Mascarenhas, Shiyue Zhang
392 |     - [[Paper]](https://doi.org/10.18653/v1/2023.findings-acl.322)
393 | 
394 |   - **"Why is this misleading?": Detecting News Headline Hallucinations with Explanations."**, 2023.2
395 |     - Jiaming Shen, Jialu Liu, Daniel Finnie
396 |     - [[Paper]](https://doi.org/10.1145/3543507.3583375)
397 | 
398 |   - **"HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models."**,  2023.5
399 |     - Junyi Li, Xiaoxue Cheng, Wayne Xin Zhao
400 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.11747)
401 |     
402 |   - **"Mitigating Hallucination in Large Multi-Modal Models via Robust Instruction Tuning."**, 2023.6
403 |     - Fuxiao Liu, Kevin Lin, Linjie Li
404 |     - [[Paper]](https://arxiv.org/abs/2306.14565)
405 | 
406 |   - **"Fact-Checking of AI-Generated Reports."**, 2023.7
407 |     - Razi Mahmood, Ge Wang, Mannudeep K. Kalra
408 |     - [[Paper]](https://doi.org/10.48550/arXiv.2307.14634)
409 | 
410 |   - **"Chain of Natural Language Inference for Reducing Large Language Model Ungrounded Hallucinations."**, 2023.10
411 |     - Deren Lei, Yaxi Li, Mengya Hu
412 |     - [[Paper]](https://arxiv.org/abs/2310.03951)
413 | 
414 |     
415 | 
416 | ### ✨Uncertainty Measure
417 | 
418 |   - **"BARTScore: Evaluating Generated Text as Text Generation."**,  2021.6
419 |     - Weizhe Yuan, Graham Neubig, Pengfei Liu
420 |     - [[Paper]](https://proceedings.neurips.cc/paper/2021/hash/e4d2b6e6fdeca3e60e0f1a62fee3d9dd-Abstract.html)
421 | 
422 |   - **"Contrastive Learning Reduces Hallucination in Conversations."**, 2022.12
423 |     - Weiwei Sun, Zhengliang Shi, Shen Gao
424 |     - [[Paper]](https://ojs.aaai.org/index.php/AAAI/article/view/26596)
425 |     
426 |   - **"Knowledge of Knowledge: Exploring Known-Unknowns Uncertainty with Large Language Models."**, 2023.5
427 |     - Alfonso Amayuelas, Liangming Pan, Wenhu Chen
428 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.13712)
429 | 
430 |  - **"Methods for Measuring, Updating, and Visualizing Factual Beliefs in Language Models."**, 2023.5
431 |     - Peter Hase, Mona Diab, Asli Celikyilmaz
432 |     - [[Paper]](https://aclanthology.org/2023.eacl-main.199/)
433 | 
434 |   - **"Measuring and Modifying Factual Knowledge in Large Language Models."**, 2023.6
435 |     - Pouya Pezeshkpour
436 |     - [[Paper]](https://arxiv.org/abs/2306.06264)
437 | 
438 |   - **"LLM Calibration and Automatic Hallucination Detection via Pareto Optimal Self-supervision."**, 2023.6
439 |     - Theodore Zhao, Mu Wei, J. Samuel Preston
440 |     - [[Paper]](https://arxiv.org/abs/2306.16564)
441 | 
442 |   - **"A Stitch in Time Saves Nine: Detecting and Mitigating Hallucinations of LLMs by Validating Low-Confidence Generation."**, 2023.7
443 |     - Neeraj Varshney, Wenlin Yao, Hongming Zhang
444 |     - [[Paper]](https://arxiv.org/abs/2307.03987)
445 | 
446 | 
447 | 
448 | ### ✨Self-Evaluation
449 | 
450 |   - **"Language Models (Mostly) Know What They Know."**, 2022.7
451 |     - Saurav Kadavath, Tom Conerly, Amanda Askell
452 |     - [[Paper]](https://doi.org/10.48550/arXiv.2207.05221)
453 |     
454 |   - **"SelfCheckGPT: Zero-Resource Black-Box Hallucination Detection for Generative Large Language Models."**, 2023.3
455 |     - Potsawee Manakul, Adian Liusie, Mark J. F. Gales
456 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.08896)
457 | 
458 |   - **"Do Language Models Know When They’re Hallucinating References?"**,  2023.5
459 |     - Ayush Agrawal, Lester Mackey, Adam Tauman Kalai
460 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.18248)
461 | 
462 |   - **"Evaluating Object Hallucination in Large Vision-Language Models."**, 2023.5
463 |     - Yifan Li, Yifan Du, Kun Zhou, Jinpeng Wang
464 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.10355)
465 | 
466 |   - **"Self-Checker: Plug-and-Play Modules for Fact-Checking with Large Language Models."**,  2023.5
467 |     - Miaoran Li, Baolin Peng, Zhu Zhang
468 |     - [[Paper]](https://arxiv.org/abs/2305.14623)
469 |     
470 |   - **"LM vs LM: Detecting Factual Errors via Cross Examination."**, 2023.5
471 |     - Roi Cohen, May Hamri, Mor Geva
472 |     - [[Paper]](https://arxiv.org/abs/2305.13281)
473 | 
474 |   - **"Self-contradictory Hallucinations of Large Language Models: Evaluation, Detection and Mitigation."**, 2023.5
475 |     - Niels Mündler, Jingxuan He, Slobodan Jenko
476 |     - [[Paper]](https://arxiv.org/abs/2305.15852)
477 | 
478 |   - **"A New Benchmark and Reverse Validation Method for Passage-level Hallucination Detection."**, 2023.10
479 |     - Shiping Yang, Renliang Sun, Xiaojun Wan
480 |     - [[Paper]](https://arxiv.org/abs/2310.06498)
481 | 
482 | ### ✨Evidence Retrieval
483 | 
484 |   - **"FActScore: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation."**,  2023.5
485 |     - Sewon Min, Kalpesh Krishna, Xinxi Lyu
486 |     - [[Paper]](https://arxiv.org/abs/2305.14251)
487 | 
488 |   - **"Complex Claim Verification with Evidence Retrieved in the Wild."**, 2023.5
489 |     - Jifan Chen, Grace Kim, Aniruddh Sriram
490 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.11859)
491 | 
492 |   - **"Retrieving Supporting Evidence for LLMs Generated Answers."**,  2023.6
493 |     - Siqing Huo, Negar Arabzadeh, Charles L. A. Clarke
494 |     - [[Paper]](https://doi.org/10.48550/arXiv.2306.13781)
495 |     
496 |   - **"FacTool: Factuality Detection in Generative AI -- A Tool Augmented Framework for Multi-Task and Multi-Domain Scenarios."**, 2023.7
497 |     - I-Chun Chern, Steffi Chern, Shiqi Chen
498 |     - [[Paper]](https://arxiv.org/abs/2307.13528)
499 | 
500 | 
501 | # 🪄 Hallucination Correction
502 | 
503 | ### ✨Parameter Enhancement
504 | 
505 |   - **"Factuality Enhanced Language Models for Open-Ended Text Generation."**, 2022.6
506 |     - Nayeon Lee, Wei Ping, Peng Xu
507 |     - [[Paper]](http://papers.nips.cc/paper_files/paper/2022/hash/df438caa36714f69277daa92d608dd63-Abstract-Conference.html)
508 | 
509 |   - **"Contrastive Learning Reduces Hallucination in Conversations."**, 2022.12
510 |     - Weiwei Sun, Zhengliang Shi, Shen Gao
511 |     - [[Paper]](https://ojs.aaai.org/index.php/AAAI/article/view/26596)
512 | 
513 |   - **"Editing Models with Task Arithmetic."**,  2023.2
514 |     - Gabriel Ilharco, Marco Tulio Ribeiro, Mitchell Wortsman
515 |     - [[Paper]](https://openreview.net/forum?id=6t0Kwf8-jrj)
516 | 
517 |   - **"Elastic Weight Removal for Faithful and Abstractive Dialogue Generation."**, 2023.3
518 |     - Nico Daheim, Nouha Dziri, Mrinmaya Sachan
519 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.17574)
520 | 
521 |   - **"HISTALIGN: Improving Context Dependency in Language Generation by Aligning with History."**, 2023.5
522 |     - David Wan, Shiyue Zhang, Mohit Bansal
523 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.04782)
524 |     
525 |   - **"mmT5: Modular Multilingual Pre-Training Solves Source Language Hallucinations."**, 2023.5
526 |     - Jonas Pfeiffer, Francesco Piccinno, Massimo Nicosia
527 |     - [[Paper]](https://arxiv.org/abs/2305.14224)
528 | 
529 |   - **"Trusting Your Evidence: Hallucinate Less with Context-aware Decoding."**, 2023.5
530 |     - Weijia Shi, Xiaochuang Han, Mike Lewis
531 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.14739)
532 | 
533 |   - **"PURR: Efficiently Editing Language Model Hallucinations by Denoising Language Model Corruptions."**, 2023.5
534 |     - Anthony Chen, Panupong Pasupat, Sameer Singh
535 |     - [[Paper]](https://arxiv.org/abs/2305.14908)
536 | 
537 |   - **"Augmented Large Language Models with Parametric Knowledge Guiding."**, 2023.5
538 |     - Ziyang Luo, Can Xu, Pu Zhao, Xiubo Geng
539 |     - [[Paper]](https://arxiv.org/abs/2305.04757)
540 | 
541 |   - **"Inference-Time Intervention: Eliciting Truthful Answers from a Language Model."**, 2023.6
542 |     - Kenneth Li, Oam Patel, Fernanda Viégas
543 |     - [[Paper]](https://arxiv.org/abs/2306.03341)
544 | 
545 |   - **"TRAC: Trustworthy Retrieval Augmented Chatbot."**, 2023.7
546 |     - Shuo Li, Sangdon Park, Insup Lee
547 |     - [[Paper]](https://doi.org/10.48550/arXiv.2307.04642)
548 |     
549 |   - **"EasyEdit: An Easy-to-use Knowledge Editing Framework for Large Language Models"**, 2023.8
550 |     - Peng Wang, Ningyu Zhang, Xin Xie
551 |     - [[Paper]](https://arxiv.org/abs/2308.07269)
552 | 
553 |   - **"DoLa: Decoding by Contrasting Layers Improves Factuality in Large Language Models"**, 2023.9
554 |     - Yung-Sung Chuang, Yujia Xie, Hongyin Luo
555 |     - [[Paper]](https://arxiv.org/abs/2309.03883)
556 | 
557 | 
558 |     
559 | ### ✨Post-hoc Attribution and Edit Technology
560 | 
561 |   - **"Neural Path Hunter: Reducing Hallucination in Dialogue Systems via Path Grounding."**, 2021.4
562 |     - Nouha Dziri, Andrea Madotto, Osmar Zaïane
563 |     - [[Paper]](https://doi.org/10.18653/v1/2021.emnlp-main.168)
564 | 
565 |   - **"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models."**, 2022.1
566 |     - Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma
567 |     - [[Paper]](http://papers.nips.cc/paper_files/paper/2022/hash/9d5609613524ecf4f15af0f7b31abca4-Abstract-Conference.html)
568 | 
569 |   - **"Teaching language models to support answers with verified quotes."**, 2022.3
570 |     - Jacob Menick, Maja Trebacz, Vladimir Mikulik
571 |     - [[Paper]](https://doi.org/10.48550/arXiv.2203.11147)
572 | 
573 |   - **"ORCA: Interpreting Prompted Language Models via Locating Supporting Data Evidence in the Ocean of Pretraining Data."**, 2022.5
574 |     - Xiaochuang Han, Yulia Tsvetkov
575 |     - [[Paper]](https://doi.org/10.48550/arXiv.2205.12600)
576 | 
577 |   - **"Large Language Models are Zero-Shot Reasoners."**, 2022.5
578 |     - Takeshi Kojima, Shixiang Shane Gu, Machel Reid
579 |     - [[Paper]](http://papers.nips.cc/paper_files/paper/2022/hash/8bb0d291acd4acf06ef112099c16f326-Abstract-Conference.html)
580 | 
581 |   - **"Rethinking with Retrieval: Faithful Large Language Model Inference."**, 2023.1
582 |     - Hangfeng He, Hongming Zhang, Dan Roth
583 |     - [[Paper]](https://doi.org/10.48550/arXiv.2301.00303)
584 | 
585 |   - **"TRAK: Attributing Model Behavior at Scale."**, 2023.3
586 |     - Sung Min Park, Kristian Georgiev, Andrew Ilyas
587 |     - [[Paper]](https://proceedings.mlr.press/v202/park23c.html)
588 |     
589 |   - **"Data Portraits: Recording Foundation Model Training Data."**, 2023.3
590 |     - Marc Marone, Benjamin Van Durme
591 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.03919)
592 | 
593 |   - **"Self-Refine: Iterative Refinement with Self-Feedback."**, 2023.3
594 |     - Aman Madaan, Niket Tandon, Prakhar Gupta
595 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.17651)
596 | 
597 |   - **"Reflexion: an autonomous agent with dynamic memory and self-reflection."**,  2023.3
598 |     - Noah Shinn, Beck Labash, Ashwin Gopinath
599 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.11366)
600 | 
601 |   - **"According to ..." Prompting Language Models Improves Quoting from Pre-Training Data."**,  2023.5
602 |     - Orion Weller, Marc Marone, Nathaniel Weir
603 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.13252)
604 | 
605 |   - **"Verify-and-Edit: A Knowledge-Enhanced Chain-of-Thought Framework."**, 2023.5
606 |     - Ruochen Zhao, Xingxuan Li, Shafiq Joty
607 |     - [[Paper]](https://arxiv.org/abs/2305.03268)
608 | 
609 |   - **"Mitigating Hallucination in Large Multi-Modal Models via Robust Instruction Tuning."**, 2023.9
610 |     - Shehzaad Dhuliawala, Mojtaba Komeili, Jing Xu
611 |     - [[Paper]](https://arxiv.org/abs/2309.11495)
612 | 
613 |   - **"Chain of Natural Language Inference for Reducing Large Language Model Ungrounded Hallucinations."**, 2023.10
614 |     - Deren Lei, Yaxi Li, Mengya Hu
615 |     - [[Paper]](https://arxiv.org/abs/2310.03951)
616 | 
617 | 
618 | ### ✨Utilizing Programming Languages
619 | 
620 |   - **"PAL: Program-aided Language Models."**, 2022.11
621 |     - Luyu Gao, Aman Madaan, Shuyan Zhou
622 |     - [[Paper]](https://proceedings.mlr.press/v202/gao23f.html)
623 | 
624 |   - **"Program of Thoughts Prompting: Disentangling Computation from Reasoning for Numerical Reasoning Tasks."**,  2022.11
625 |     - Wenhu Chen, Xueguang Ma, Xinyi Wang
626 |     - [[Paper]](https://doi.org/10.48550/arXiv.2211.12588)
627 |     
628 |   - **"Teaching Algorithmic Reasoning via In-context Learning."**, 2022.11
629 |     - Hattie Zhou, Azade Nova, Hugo Larochelle
630 |     - [[Paper]](https://doi.org/10.48550/arXiv.2211.09066)
631 | 
632 |   - **"Solving Challenging Math Word Problems Using GPT-4 Code Interpreter with Code-based Self-Verification."**, 2023.8
633 |     - Aojun Zhou, Ke Wang, Zimu Lu
634 |     - [[Paper]](https://arxiv.org/abs/2308.07921)
635 | 
636 | 
637 | 
638 | ### ✨Leverage External Knowledge
639 | 
640 | 
641 |   - **"Improving Language Models by Retrieving from Trillions of Tokens."**, 2021.12
642 |     - Sebastian Borgeaud, Arthur Mensch, Jordan Hoffmann
643 |     - [[Paper]](https://proceedings.mlr.press/v162/borgeaud22a.html)
644 | 
645 |   - **"Interleaving Retrieval with Chain-of-Thought Reasoning for Knowledge-Intensive Multi-Step Questions.**,  2022.12 
646 |     - Harsh Trivedi, Niranjan Balasubramanian, Tushar Khot
647 |     - [[Paper]](https://doi.org/10.18653/v1/2023.acl-long.557)
648 | 
649 |   - **"When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories."**,  2022.12
650 |     - Alex Mallen, Akari Asai, Victor Zhong
651 |     - [[Paper]](https://doi.org/10.18653/v1/2023.acl-long.546)
652 | 
653 |   - **"Check Your Facts and Try Again: Improving Large Language Models with External Knowledge and Automated Feedback."**, 2023.2
654 |     - Baolin Peng, Michel Galley, Pengcheng He
655 |     - [[Paper]](https://arxiv.org/abs/2302.12813)
656 | 
657 |   - **"In-Context Retrieval-Augmented Language Models."**, 2023.2
658 |     - Ori Ram, Yoav Levine, Itay Dalmedigos
659 |     - [[Paper]](https://doi.org/10.48550/arXiv.2302.00083)
660 | 
661 |   - **"cTBL: Augmenting Large Language Models for Conversational Tables."**, 2023.3
662 |     - Anirudh S. Sundar, Larry Heck
663 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.12024)
664 | 
665 |   - **"GeneGPT: Augmenting Large Language Models with Domain Tools for Improved Access to Biomedical Information."**, 2023.4
666 |     - Qiao Jin, Yifan Yang, Qingyu Chen
667 |     - [[Paper]](https://arxiv.org/abs/2304.09667)
668 | 
669 |   - **"Active Retrieval Augmented Generation."**,  2023.5
670 |     - Zhengbao Jiang, Frank F. Xu, Luyu Gao
671 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.06983)
672 |     
673 |   - **"Chain of Knowledge: A Framework for Grounding Large Language Models with Structured Knowledge Bases."**, 2023.5
674 |     - Xingxuan Li, Ruochen Zhao, Yew Ken Chia
675 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.13269)
676 |     
677 |   - **"Gorilla: Large Language Model Connected with Massive APIs."**, 2023.5
678 |     - Shishir G. Patil, Tianjun Zhang, Xin Wang
679 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.15334)
680 | 
681 |   - **"RETA-LLM: A Retrieval-Augmented Large Language Model Toolkit."**, 2023.6
682 |     - Jiongnan Liu, Jiajie Jin, Zihan Wang
683 |     - [[Paper]](https://doi.org/10.48550/arXiv.2306.05212)
684 |     
685 |   - **"User-Controlled Knowledge Fusion in Large Language Models: Balancing Creativity and Hallucination."**, 2023.7
686 |     - SChen Zhang
687 |     - [[Paper]](https://arxiv.org/abs/2307.16139)
688 | 
689 |   - **"KnowledGPT: Enhancing Large Language Models with Retrieval and Storage Access on Knowledge Bases."**, 2023.8
690 |     - Xintao Wang, Qianwen Yang, Yongting Qiu
691 |     - [[Paper]](https://doi.org/10.48550/arXiv.2308.11761)
692 | 
693 | 
694 | ### ✨Assessment Feedback
695 | 
696 |   - **"Learning to summarize with human feedback."**, 2020.12
697 |     - Nisan Stiennon, Long Ouyang, Jeffrey Wu
698 |     - [[Paper]](https://proceedings.neurips.cc/paper/2020/hash/1f89885d556929e98d3ef9b86448f951-Abstract.html)
699 | 
700 |   - **"BRIO: Bringing Order to Abstractive Summarization."**, 2022.3
701 |     - Yixin Liu, Pengfei Liu, Dragomir R. Radev
702 |     - [[Paper]](https://doi.org/10.18653/v1/2022.acl-long.207)
703 | 
704 |   - **"Language Models (Mostly) Know What They Know."**, 2022.7
705 |     - Saurav Kadavath, Tom Conerly, Amanda Askell
706 |     - [[Paper]](https://doi.org/10.48550/arXiv.2207.05221)
707 | 
708 |   - **"Check Your Facts and Try Again: Improving Large Language Models with External Knowledge and Automated Feedback."**, 2023.2 
709 |     - Baolin Peng, Michel Galley, Pengcheng He
710 |     - [[Paper]](https://doi.org/10.48550/arXiv.2302.12813)
711 | 
712 |   - **"Chain of Hindsight Aligns Language Models with Feedback."**,  2023.2
713 |     - Hao Liu, Carmelo Sferrazza, Pieter Abbeel
714 |     - [[Paper]](https://doi.org/10.48550/arXiv.2302.02676)
715 | 
716 |   - **"Zero-shot Faithful Factual Error Correction."**,  2023.5
717 |     - Kung-Hsiang Huang, Hou Pong Chan, Heng Ji
718 |     - [[Paper]](https://doi.org/10.18653/v1/2023.acl-long.311)
719 |     
720 |   - **"CRITIC: Large Language Models Can Self-Correct with Tool-Interactive Critiquing."**, 2023.5
721 |     - Zhibin Gou, Zhihong Shao, Yeyun Gong
722 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.11738)
723 | 
724 |   - **"Album Storytelling with Iterative Story-aware Captioning and Large Language Models."**, 2023.5
725 |     - Munan Ning, Yujia Xie, Dongdong Chen
726 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.12943)
727 | 
728 |   - **"How Language Model Hallucinations Can Snowball."**, 2023.5
729 |     - Muru Zhang, Ofir Press, William Merrill
730 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.13534)
731 |     
732 |   - **"Mitigating Language Model Hallucination with Interactive Question-Knowledge Alignment."**, 2023.5
733 |     - Shuo Zhang, Liangming Pan, Junzhou Zhao
734 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.13669)
735 | 
736 |   - **"Improving Language Models via Plug-and-Play Retrieval Feedback."**, 2023.5
737 |     - Wenhao Yu, Zhihan Zhang, Zhenwen Liang
738 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.14002)
739 |     
740 |   - **"PaD: Program-aided Distillation Specializes Large Models in Reasoning."**, 2023.5
741 |     - Xuekai Zhu, Biqing Qi, Kaiyan Zhang
742 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.13888)
743 | 
744 |   - **"Enabling Large Language Models to Generate Text with Citations."**, 2023.5
745 |     - Tianyu Gao, Howard Yen, Jiatong Yu
746 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.14627)
747 | 
748 |   - **"Do Language Models Know When They’re Hallucinating References?"**, 2023.5
749 |     - Ayush Agrawal, Lester Mackey, Adam Tauman Kalai
750 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.18248)
751 |     
752 |   - **"Improving Factuality of Abstractive Summarization via Contrastive Reward Learning."**,  2023.7
753 |     - I-Chun Chern, Zhiruo Wang, Sanjan Das
754 |     - [[Paper]](https://doi.org/10.48550/arXiv.2307.04507)
755 | 
756 |   - **"Towards Mitigating Hallucination in Large Language Models via Self-Reflection."**,  2023.10
757 |     - Ziwei Ji, Tiezheng Yu, Yan Xu
758 |     - [[Paper]](https://arxiv.org/abs/2310.06271)
759 | 
760 | 
761 | ### ✨Mindset Society
762 | 
763 |   - **"Hallucinations in Large Multilingual Translation Models."**, 2023.3
764 |     - Nuno Miguel Guerreiro, Duarte M. Alves, Jonas Waldendorf
765 |     - [[Paper]](https://doi.org/10.48550/arXiv.2303.16104)
766 | 
767 |   - **"Improving Factuality and Reasoning in Language Models through Multiagent Debate."**, 2023.5
768 |     - Yilun Du, Shuang Li, Antonio Torralba
769 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.14325)
770 | 
771 |   - **"Encouraging Divergent Thinking in Large Language Models through Multi-Agent Debate."**, 2023.5
772 |     - Tian Liang, Zhiwei He, Wenxiang Jiao
773 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.19118)
774 | 
775 |   - **"Examining the Inter-Consistency of Large Language Models: An In-depth Analysis via Debate."**, 2023.5
776 |     -  Kai Xiong, Xiao Ding, Yixin Cao
777 |     - [[Paper]](https://doi.org/10.48550/arXiv.2305.11595)
778 |     
779 |   - **"LM vs LM: Detecting Factual Errors via Cross Examination."**, 2023.5
780 |     - Roi Cohen, May Hamri, Mor Geva
781 |     - [[Paper]](https://arxiv.org/abs/2305.13281)
782 | 
783 |   - **"PRD: Peer Rank and Discussion Improve Large Language Model based Evaluations."**, 2023.7
784 |     - Ruosen Li, Teerth Patel, Xinya Du
785 |     - [[Paper]](https://doi.org/10.48550/arXiv.2307.02762)
786 |     
787 |   - **"Unleashing Cognitive Synergy in Large Language Models: A Task-Solving Agent through Multi-Persona Self-Collaboration."**, 2023.7
788 |     - Zhenhailong Wang, Shaoguang Mao, Wenshan Wu
789 |     - [[Paper]](https://doi.org/10.48550/arXiv.2307.05300)
790 | 
791 | 
792 | 
793 | 
794 | ## 🌟 TIPS
795 | If you find this repository useful to your research or work, it is really appreciate to star this repository. 
796 | 
797 | 


--------------------------------------------------------------------------------