While the fast-paced inception of novel tasks and new datasets helps foster
active research in a community towards interesting directions, keeping track of
the abundance of research activity in different areas on different datasets is
likely to become increasingly difficult. The community could greatly benefit
from an automatic system able to summarize scientific results, e.g., in the
form of a leaderboard. In this paper we build two datasets and develop a
framework (TDMS-IE) aimed at automatically extracting task, dataset, metric and
score from NLP papers, towards the automatic construction of leaderboards.
Experiments show that our model outperforms several baselines by a large
margin. Our model is a first step towards automatic leaderboard construction,
e.g., in the NLP domain.
Description
[1906.09317] Identification of Tasks, Datasets, Evaluation Metrics, and Numeric Scores for Scientific Leaderboards Construction
%0 Generic
%1 hou2019identification
%A Hou, Yufang
%A Jochim, Charles
%A Gleize, Martin
%A Bonin, Francesca
%A Ganguly, Debasis
%D 2019
%K ai benchmark dataset evaluation leaderboard learning machine ml
%T Identification of Tasks, Datasets, Evaluation Metrics, and Numeric
Scores for Scientific Leaderboards Construction
%U http://arxiv.org/abs/1906.09317
%X While the fast-paced inception of novel tasks and new datasets helps foster
active research in a community towards interesting directions, keeping track of
the abundance of research activity in different areas on different datasets is
likely to become increasingly difficult. The community could greatly benefit
from an automatic system able to summarize scientific results, e.g., in the
form of a leaderboard. In this paper we build two datasets and develop a
framework (TDMS-IE) aimed at automatically extracting task, dataset, metric and
score from NLP papers, towards the automatic construction of leaderboards.
Experiments show that our model outperforms several baselines by a large
margin. Our model is a first step towards automatic leaderboard construction,
e.g., in the NLP domain.
@misc{hou2019identification,
abstract = {While the fast-paced inception of novel tasks and new datasets helps foster
active research in a community towards interesting directions, keeping track of
the abundance of research activity in different areas on different datasets is
likely to become increasingly difficult. The community could greatly benefit
from an automatic system able to summarize scientific results, e.g., in the
form of a leaderboard. In this paper we build two datasets and develop a
framework (TDMS-IE) aimed at automatically extracting task, dataset, metric and
score from NLP papers, towards the automatic construction of leaderboards.
Experiments show that our model outperforms several baselines by a large
margin. Our model is a first step towards automatic leaderboard construction,
e.g., in the NLP domain.},
added-at = {2021-05-20T09:51:27.000+0200},
author = {Hou, Yufang and Jochim, Charles and Gleize, Martin and Bonin, Francesca and Ganguly, Debasis},
biburl = {https://www.bibsonomy.org/bibtex/28725725958d09d8cb57955a20f02172b/jaeschke},
description = {[1906.09317] Identification of Tasks, Datasets, Evaluation Metrics, and Numeric Scores for Scientific Leaderboards Construction},
interhash = {2ea76a5b6cbfc76c784cbffd47159b8d},
intrahash = {8725725958d09d8cb57955a20f02172b},
keywords = {ai benchmark dataset evaluation leaderboard learning machine ml},
note = {cite arxiv:1906.09317Comment: ACL 2019},
timestamp = {2021-05-20T09:51:27.000+0200},
title = {Identification of Tasks, Datasets, Evaluation Metrics, and Numeric
Scores for Scientific Leaderboards Construction},
url = {http://arxiv.org/abs/1906.09317},
year = 2019
}