This paper is devoted to verifying of the empirical Zipf and Hips laws in natural languages using Google Books Ngram corpus data. The connection between the Zipf and Heaps law which predicts the power dependence of the vocabulary size on the text size is discussed. In fact, the Heaps exponent in this dependence varies with the increasing of the text corpus. To explain it, the obtained results are compared with the probability model of text generation. Quasi-periodic variations with characteristic time periods of 60-100 years were also found.
Description
Deviations in the Zipf and Heaps laws in natural languages - IOPscience
%0 Journal Article
%1 bochkarev2014
%A Bochkarev, Vladimir V
%A Lerner, Eduard Yu
%A Shevlyakova, Anna V
%D 2014
%J Journal of Physics: Conference Series
%K mybook texts zipf
%N 1
%P 012009
%T Deviations in the Zipf and Heaps laws in natural languages
%U http://stacks.iop.org/1742-6596/490/i=1/a=012009
%V 490
%X This paper is devoted to verifying of the empirical Zipf and Hips laws in natural languages using Google Books Ngram corpus data. The connection between the Zipf and Heaps law which predicts the power dependence of the vocabulary size on the text size is discussed. In fact, the Heaps exponent in this dependence varies with the increasing of the text corpus. To explain it, the obtained results are compared with the probability model of text generation. Quasi-periodic variations with characteristic time periods of 60-100 years were also found.
@article{bochkarev2014,
abstract = {This paper is devoted to verifying of the empirical Zipf and Hips laws in natural languages using Google Books Ngram corpus data. The connection between the Zipf and Heaps law which predicts the power dependence of the vocabulary size on the text size is discussed. In fact, the Heaps exponent in this dependence varies with the increasing of the text corpus. To explain it, the obtained results are compared with the probability model of text generation. Quasi-periodic variations with characteristic time periods of 60-100 years were also found.},
added-at = {2017-12-04T16:58:40.000+0100},
author = {Bochkarev, Vladimir V and Lerner, Eduard Yu and Shevlyakova, Anna V},
biburl = {https://www.bibsonomy.org/bibtex/2879f2b25f839a380e6e909977be3f75f/vitelot},
description = {Deviations in the Zipf and Heaps laws in natural languages - IOPscience},
interhash = {20a63b6357927e10926f76cb2a24f905},
intrahash = {879f2b25f839a380e6e909977be3f75f},
journal = {Journal of Physics: Conference Series},
keywords = {mybook texts zipf},
number = 1,
pages = 012009,
timestamp = {2017-12-04T17:14:01.000+0100},
title = {Deviations in the Zipf and Heaps laws in natural languages},
url = {http://stacks.iop.org/1742-6596/490/i=1/a=012009},
volume = 490,
year = 2014
}