We explore how generating a chain of thought -- a series of intermediate
reasoning steps -- significantly improves the ability of large language models
to perform complex reasoning. In particular, we show how such reasoning
abilities emerge naturally in sufficiently large language models via a simple
method called chain of thought prompting, where a few chain of thought
demonstrations are provided as exemplars in prompting. Experiments on three
large language models show that chain of thought prompting improves performance
on a range of arithmetic, commonsense, and symbolic reasoning tasks. The
empirical gains can be striking. For instance, prompting a 540B-parameter
language model with just eight chain of thought exemplars achieves state of the
art accuracy on the GSM8K benchmark of math word problems, surpassing even
finetuned GPT-3 with a verifier.
Description
[2201.11903] Chain-of-Thought Prompting Elicits Reasoning in Large Language Models
%0 Journal Article
%1 wei2022chainofthought
%A Wei, Jason
%A Wang, Xuezhi
%A Schuurmans, Dale
%A Bosma, Maarten
%A Ichter, Brian
%A Xia, Fei
%A Chi, Ed
%A Le, Quoc
%A Zhou, Denny
%D 2022
%K AI ML Reasoning education
%T Chain-of-Thought Prompting Elicits Reasoning in Large Language Models
%U http://arxiv.org/abs/2201.11903
%X We explore how generating a chain of thought -- a series of intermediate
reasoning steps -- significantly improves the ability of large language models
to perform complex reasoning. In particular, we show how such reasoning
abilities emerge naturally in sufficiently large language models via a simple
method called chain of thought prompting, where a few chain of thought
demonstrations are provided as exemplars in prompting. Experiments on three
large language models show that chain of thought prompting improves performance
on a range of arithmetic, commonsense, and symbolic reasoning tasks. The
empirical gains can be striking. For instance, prompting a 540B-parameter
language model with just eight chain of thought exemplars achieves state of the
art accuracy on the GSM8K benchmark of math word problems, surpassing even
finetuned GPT-3 with a verifier.
@article{wei2022chainofthought,
abstract = {We explore how generating a chain of thought -- a series of intermediate
reasoning steps -- significantly improves the ability of large language models
to perform complex reasoning. In particular, we show how such reasoning
abilities emerge naturally in sufficiently large language models via a simple
method called chain of thought prompting, where a few chain of thought
demonstrations are provided as exemplars in prompting. Experiments on three
large language models show that chain of thought prompting improves performance
on a range of arithmetic, commonsense, and symbolic reasoning tasks. The
empirical gains can be striking. For instance, prompting a 540B-parameter
language model with just eight chain of thought exemplars achieves state of the
art accuracy on the GSM8K benchmark of math word problems, surpassing even
finetuned GPT-3 with a verifier.},
added-at = {2023-10-19T15:21:28.000+0200},
author = {Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Ichter, Brian and Xia, Fei and Chi, Ed and Le, Quoc and Zhou, Denny},
biburl = {https://www.bibsonomy.org/bibtex/2a3609e7e68b8dc67fd84c4714c51df37/alirezashaar},
description = {[2201.11903] Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
interhash = {e2607a600b1065b71d742ed98846ca4e},
intrahash = {a3609e7e68b8dc67fd84c4714c51df37},
keywords = {AI ML Reasoning education},
note = {cite arxiv:2201.11903},
timestamp = {2023-10-19T15:21:28.000+0200},
title = {Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
url = {http://arxiv.org/abs/2201.11903},
year = 2022
}