We compare samples of tweets from the Twitter Streaming API constructed from different connections that tracked the same popular keywords at the same time. We find that on average, over 96\% of the tweets seen in one sample are seen in all others. Those tweets found only in a subset of samples do not significantly differ from tweets found in all samples in terms of user popularity or tweet structure. We conclude they are likely the result of a technical artifact rather than any systematic bias.
Description
Two 1%s Don’t Make a Whole: Comparing Simultaneous Samples from Twitter’s Streaming API | SpringerLink
%0 Conference Paper
%1 joseph2014whole
%A Joseph, Kenneth
%A Landwehr, Peter M.
%A Carley, Kathleen M.
%B Social Computing, Behavioral-Cultural Modeling and Prediction
%C Cham
%D 2014
%E Kennedy, William G.
%E Agarwal, Nitin
%E Yang, Shanchieh Jay
%I Springer International Publishing
%K api data quality sample sampling stream streaming twitter
%P 75--83
%R 10.1007/978-3-319-05579-4_10
%T Two 1%s Don't Make a Whole: Comparing Simultaneous Samples from Twitter's Streaming API
%U https://link.springer.com/chapter/10.1007/978-3-319-05579-4_10
%X We compare samples of tweets from the Twitter Streaming API constructed from different connections that tracked the same popular keywords at the same time. We find that on average, over 96\% of the tweets seen in one sample are seen in all others. Those tweets found only in a subset of samples do not significantly differ from tweets found in all samples in terms of user popularity or tweet structure. We conclude they are likely the result of a technical artifact rather than any systematic bias.
%@ 978-3-319-05579-4
@inproceedings{joseph2014whole,
abstract = {We compare samples of tweets from the Twitter Streaming API constructed from different connections that tracked the same popular keywords at the same time. We find that on average, over 96{\%} of the tweets seen in one sample are seen in all others. Those tweets found only in a subset of samples do not significantly differ from tweets found in all samples in terms of user popularity or tweet structure. We conclude they are likely the result of a technical artifact rather than any systematic bias.},
added-at = {2021-05-12T11:21:16.000+0200},
address = {Cham},
author = {Joseph, Kenneth and Landwehr, Peter M. and Carley, Kathleen M.},
biburl = {https://www.bibsonomy.org/bibtex/22a2e0e399e4a808c3da5625c5a272a62/jaeschke},
booktitle = {Social Computing, Behavioral-Cultural Modeling and Prediction},
description = {Two 1%s Don’t Make a Whole: Comparing Simultaneous Samples from Twitter’s Streaming API | SpringerLink},
doi = {10.1007/978-3-319-05579-4_10},
editor = {Kennedy, William G. and Agarwal, Nitin and Yang, Shanchieh Jay},
interhash = {ec9d88f770f9b56f3b1a8a98227542a7},
intrahash = {2a2e0e399e4a808c3da5625c5a272a62},
isbn = {978-3-319-05579-4},
keywords = {api data quality sample sampling stream streaming twitter},
pages = {75--83},
publisher = {Springer International Publishing},
timestamp = {2021-05-12T11:21:16.000+0200},
title = {Two 1%s Don't Make a Whole: Comparing Simultaneous Samples from Twitter's Streaming API},
url = {https://link.springer.com/chapter/10.1007/978-3-319-05579-4_10},
year = 2014
}