Though statistical analyses are centered on research questions and
hypotheses, current statistical analysis tools are not. Users must first
translate their hypotheses into specific statistical tests and then perform API
calls with functions and parameters. To do so accurately requires that users
have statistical expertise. To lower this barrier to valid, replicable
statistical analysis, we introduce Tea, a high-level declarative language and
runtime system. In Tea, users express their study design, any parametric
assumptions, and their hypotheses. Tea compiles these high-level specifications
into a constraint satisfaction problem that determines the set of valid
statistical tests, and then executes them to test the hypothesis. We evaluate
Tea using a suite of statistical analyses drawn from popular tutorials. We show
that Tea generally matches the choices of experts while automatically switching
to non-parametric tests when parametric assumptions are not met. We simulate
the effect of mistakes made by non-expert users and show that Tea automatically
avoids both false negatives and false positives that could be produced by the
application of incorrect statistical tests.
Description
Tea: A High-level Language and Runtime System for Automating Statistical Analysis
%0 Journal Article
%1 jun2019highlevel
%A Jun, Eunice
%A Daum, Maureen
%A Roesch, Jared
%A Chasins, Sarah E.
%A Berger, Emery D.
%A Just, Rene
%A Reinecke, Katharina
%D 2019
%K Statistics software
%T Tea: A High-level Language and Runtime System for Automating Statistical
Analysis
%U http://arxiv.org/abs/1904.05387
%X Though statistical analyses are centered on research questions and
hypotheses, current statistical analysis tools are not. Users must first
translate their hypotheses into specific statistical tests and then perform API
calls with functions and parameters. To do so accurately requires that users
have statistical expertise. To lower this barrier to valid, replicable
statistical analysis, we introduce Tea, a high-level declarative language and
runtime system. In Tea, users express their study design, any parametric
assumptions, and their hypotheses. Tea compiles these high-level specifications
into a constraint satisfaction problem that determines the set of valid
statistical tests, and then executes them to test the hypothesis. We evaluate
Tea using a suite of statistical analyses drawn from popular tutorials. We show
that Tea generally matches the choices of experts while automatically switching
to non-parametric tests when parametric assumptions are not met. We simulate
the effect of mistakes made by non-expert users and show that Tea automatically
avoids both false negatives and false positives that could be produced by the
application of incorrect statistical tests.
@article{jun2019highlevel,
abstract = {Though statistical analyses are centered on research questions and
hypotheses, current statistical analysis tools are not. Users must first
translate their hypotheses into specific statistical tests and then perform API
calls with functions and parameters. To do so accurately requires that users
have statistical expertise. To lower this barrier to valid, replicable
statistical analysis, we introduce Tea, a high-level declarative language and
runtime system. In Tea, users express their study design, any parametric
assumptions, and their hypotheses. Tea compiles these high-level specifications
into a constraint satisfaction problem that determines the set of valid
statistical tests, and then executes them to test the hypothesis. We evaluate
Tea using a suite of statistical analyses drawn from popular tutorials. We show
that Tea generally matches the choices of experts while automatically switching
to non-parametric tests when parametric assumptions are not met. We simulate
the effect of mistakes made by non-expert users and show that Tea automatically
avoids both false negatives and false positives that could be produced by the
application of incorrect statistical tests.},
added-at = {2019-04-19T20:33:03.000+0200},
author = {Jun, Eunice and Daum, Maureen and Roesch, Jared and Chasins, Sarah E. and Berger, Emery D. and Just, Rene and Reinecke, Katharina},
biburl = {https://www.bibsonomy.org/bibtex/233e896ed478e15461976dbe272e72fa1/sigoldberg1},
description = {Tea: A High-level Language and Runtime System for Automating Statistical Analysis},
interhash = {5c3274222b372b6b921a4605d350bef4},
intrahash = {33e896ed478e15461976dbe272e72fa1},
keywords = {Statistics software},
note = {cite arxiv:1904.05387Comment: 11 pages},
timestamp = {2019-04-19T20:34:32.000+0200},
title = {Tea: A High-level Language and Runtime System for Automating Statistical
Analysis},
url = {http://arxiv.org/abs/1904.05387},
year = 2019
}