In this paper, we show that Multilingual BERT (M-BERT), released by Devlin et
al. (2018) as a single language model pre-trained from monolingual corpora in
104 languages, is surprisingly good at zero-shot cross-lingual model transfer,
in which task-specific annotations in one language are used to fine-tune the
model for evaluation in another language. To understand why, we present a large
number of probing experiments, showing that transfer is possible even to
languages in different scripts, that transfer works best between typologically
similar languages, that monolingual corpora can train models for
code-switching, and that the model can find translation pairs. From these
results, we can conclude that M-BERT does create multilingual representations,
but that these representations exhibit systematic deficiencies affecting
certain language pairs.
Description
[1906.01502] How multilingual is Multilingual BERT?
%0 Generic
%1 pires2019multilingual
%A Pires, Telmo
%A Schlinger, Eva
%A Garrette, Dan
%D 2019
%K bert masterthesis multilingual
%T How multilingual is Multilingual BERT?
%U http://arxiv.org/abs/1906.01502
%X In this paper, we show that Multilingual BERT (M-BERT), released by Devlin et
al. (2018) as a single language model pre-trained from monolingual corpora in
104 languages, is surprisingly good at zero-shot cross-lingual model transfer,
in which task-specific annotations in one language are used to fine-tune the
model for evaluation in another language. To understand why, we present a large
number of probing experiments, showing that transfer is possible even to
languages in different scripts, that transfer works best between typologically
similar languages, that monolingual corpora can train models for
code-switching, and that the model can find translation pairs. From these
results, we can conclude that M-BERT does create multilingual representations,
but that these representations exhibit systematic deficiencies affecting
certain language pairs.
@misc{pires2019multilingual,
abstract = {In this paper, we show that Multilingual BERT (M-BERT), released by Devlin et
al. (2018) as a single language model pre-trained from monolingual corpora in
104 languages, is surprisingly good at zero-shot cross-lingual model transfer,
in which task-specific annotations in one language are used to fine-tune the
model for evaluation in another language. To understand why, we present a large
number of probing experiments, showing that transfer is possible even to
languages in different scripts, that transfer works best between typologically
similar languages, that monolingual corpora can train models for
code-switching, and that the model can find translation pairs. From these
results, we can conclude that M-BERT does create multilingual representations,
but that these representations exhibit systematic deficiencies affecting
certain language pairs.},
added-at = {2020-09-13T21:03:53.000+0200},
author = {Pires, Telmo and Schlinger, Eva and Garrette, Dan},
biburl = {https://www.bibsonomy.org/bibtex/28a039125e7c0396224acb902eabe6b5c/festplatte},
description = {[1906.01502] How multilingual is Multilingual BERT?},
interhash = {c2b82a5f9e5ff6aa8645d51d4bf32889},
intrahash = {8a039125e7c0396224acb902eabe6b5c},
keywords = {bert masterthesis multilingual},
note = {cite arxiv:1906.01502},
timestamp = {2020-09-13T21:03:53.000+0200},
title = {How multilingual is Multilingual BERT?},
url = {http://arxiv.org/abs/1906.01502},
year = 2019
}