Despite the remarkable ability of large language models (LMs) to comprehend
and generate language, they have a tendency to hallucinate and create factually
inaccurate output. Augmenting LMs by retrieving information from external
knowledge resources is one promising solution. Most existing
retrieval-augmented LMs employ a retrieve-and-generate setup that only
retrieves information once based on the input. This is limiting, however, in
more general scenarios involving generation of long texts, where continually
gathering information throughout the generation process is essential. There
have been some past efforts to retrieve information multiple times while
generating outputs, which mostly retrieve documents at fixed intervals using
the previous context as queries. In this work, we provide a generalized view of
active retrieval augmented generation, methods that actively decide when and
what to retrieve across the course of the generation. We propose
Forward-Looking Active REtrieval augmented generation (FLARE), a generic
retrieval-augmented generation method which iteratively uses a prediction of
the upcoming sentence to anticipate future content, which is then utilized as a
query to retrieve relevant documents to regenerate the sentence if it contains
low-confidence tokens. We test FLARE along with baselines comprehensively over
4 long-form knowledge-intensive generation tasks/datasets. FLARE achieves
superior or competitive performance on all tasks, demonstrating the
effectiveness of our method. Code and datasets are available at
https://github.com/jzbjyb/FLARE.
%0 Generic
%1 jiang2023active
%A Jiang, Zhengbao
%A Xu, Frank F.
%A Gao, Luyu
%A Sun, Zhiqing
%A Liu, Qian
%A Dwivedi-Yu, Jane
%A Yang, Yiming
%A Callan, Jamie
%A Neubig, Graham
%D 2023
%K llm retrieval
%T Active Retrieval Augmented Generation
%U http://arxiv.org/abs/2305.06983
%X Despite the remarkable ability of large language models (LMs) to comprehend
and generate language, they have a tendency to hallucinate and create factually
inaccurate output. Augmenting LMs by retrieving information from external
knowledge resources is one promising solution. Most existing
retrieval-augmented LMs employ a retrieve-and-generate setup that only
retrieves information once based on the input. This is limiting, however, in
more general scenarios involving generation of long texts, where continually
gathering information throughout the generation process is essential. There
have been some past efforts to retrieve information multiple times while
generating outputs, which mostly retrieve documents at fixed intervals using
the previous context as queries. In this work, we provide a generalized view of
active retrieval augmented generation, methods that actively decide when and
what to retrieve across the course of the generation. We propose
Forward-Looking Active REtrieval augmented generation (FLARE), a generic
retrieval-augmented generation method which iteratively uses a prediction of
the upcoming sentence to anticipate future content, which is then utilized as a
query to retrieve relevant documents to regenerate the sentence if it contains
low-confidence tokens. We test FLARE along with baselines comprehensively over
4 long-form knowledge-intensive generation tasks/datasets. FLARE achieves
superior or competitive performance on all tasks, demonstrating the
effectiveness of our method. Code and datasets are available at
https://github.com/jzbjyb/FLARE.
@misc{jiang2023active,
abstract = {Despite the remarkable ability of large language models (LMs) to comprehend
and generate language, they have a tendency to hallucinate and create factually
inaccurate output. Augmenting LMs by retrieving information from external
knowledge resources is one promising solution. Most existing
retrieval-augmented LMs employ a retrieve-and-generate setup that only
retrieves information once based on the input. This is limiting, however, in
more general scenarios involving generation of long texts, where continually
gathering information throughout the generation process is essential. There
have been some past efforts to retrieve information multiple times while
generating outputs, which mostly retrieve documents at fixed intervals using
the previous context as queries. In this work, we provide a generalized view of
active retrieval augmented generation, methods that actively decide when and
what to retrieve across the course of the generation. We propose
Forward-Looking Active REtrieval augmented generation (FLARE), a generic
retrieval-augmented generation method which iteratively uses a prediction of
the upcoming sentence to anticipate future content, which is then utilized as a
query to retrieve relevant documents to regenerate the sentence if it contains
low-confidence tokens. We test FLARE along with baselines comprehensively over
4 long-form knowledge-intensive generation tasks/datasets. FLARE achieves
superior or competitive performance on all tasks, demonstrating the
effectiveness of our method. Code and datasets are available at
https://github.com/jzbjyb/FLARE.},
added-at = {2023-08-17T15:00:59.000+0200},
author = {Jiang, Zhengbao and Xu, Frank F. and Gao, Luyu and Sun, Zhiqing and Liu, Qian and Dwivedi-Yu, Jane and Yang, Yiming and Callan, Jamie and Neubig, Graham},
biburl = {https://www.bibsonomy.org/bibtex/2729fd14eb2e2a90a60c44a9ecd8a2a4a/lisa-ee},
description = {Active Retrieval Augmented Generation},
interhash = {2b9880a42ecc07f0c4fd24f86ad71efa},
intrahash = {729fd14eb2e2a90a60c44a9ecd8a2a4a},
keywords = {llm retrieval},
note = {cite arxiv:2305.06983},
timestamp = {2023-08-17T15:00:59.000+0200},
title = {Active Retrieval Augmented Generation},
url = {http://arxiv.org/abs/2305.06983},
year = 2023
}