This analogy to lossy compression is not just a way to understand ChatGPT’s facility at repackaging information found on the Web by using different words. It’s also a way to understand the “hallucinations,” or nonsensical answers to factual questions, to which large language models such as ChatGPT are all too prone. These hallucinations are compression artifacts, but—like the incorrect labels generated by the Xerox photocopier—they are plausible enough that identifying them requires comparing them against the originals, which in this case means either the Web or our own knowledge of the world.
%0 Journal Article
%1 chiang2023chatgpt
%A Chiang, Ted
%D 2023
%J The New Yorker
%K ChatGPT artificial_intelligence large_language_models lossy_compression natural_language_processing
%T ChatGPT Is a Blurry JPEG of the Web: OpenAI’s chatbot offers paraphrases, whereas Google offers quotes. Which do we prefer?
%U https://www.newyorker.com/tech/annals-of-technology/chatgpt-is-a-blurry-jpeg-of-the-web
%X This analogy to lossy compression is not just a way to understand ChatGPT’s facility at repackaging information found on the Web by using different words. It’s also a way to understand the “hallucinations,” or nonsensical answers to factual questions, to which large language models such as ChatGPT are all too prone. These hallucinations are compression artifacts, but—like the incorrect labels generated by the Xerox photocopier—they are plausible enough that identifying them requires comparing them against the originals, which in this case means either the Web or our own knowledge of the world.
@article{chiang2023chatgpt,
abstract = {This analogy to lossy compression is not just a way to understand ChatGPT’s facility at repackaging information found on the Web by using different words. It’s also a way to understand the “hallucinations,” or nonsensical answers to factual questions, to which large language models such as ChatGPT are all too prone. These hallucinations are compression artifacts, but—like the incorrect labels generated by the Xerox photocopier—they are plausible enough that identifying them requires comparing them against the originals, which in this case means either the Web or our own knowledge of the world.},
added-at = {2023-02-14T10:17:16.000+0100},
author = {Chiang, Ted},
biburl = {https://www.bibsonomy.org/bibtex/27875e64d7d53d240f2ba7d01b5499cc3/meneteqel},
day = 9,
interhash = {53988f52eb52ebe52f820119c582c115},
intrahash = {7875e64d7d53d240f2ba7d01b5499cc3},
journal = {The New Yorker},
keywords = {ChatGPT artificial_intelligence large_language_models lossy_compression natural_language_processing},
language = {en-US},
month = feb,
timestamp = {2023-02-14T10:17:16.000+0100},
title = {ChatGPT Is a Blurry JPEG of the Web: OpenAI’s chatbot offers paraphrases, whereas Google offers quotes. Which do we prefer?},
url = {https://www.newyorker.com/tech/annals-of-technology/chatgpt-is-a-blurry-jpeg-of-the-web},
year = 2023
}