We examine the novel task of domain-independent scientific concept extraction from abstracts of scholarly articles and present two contributions. First, we suggest a set of generic scientific concepts that have been identified in a systematic annotation process. This set of concepts is utilised to annotate a corpus of scientific abstracts from 10 domains of Science, Technology and Medicine at the phrasal level in a joint effort with domain experts. The resulting dataset is used in a set of benchmark experiments to (a) provide baseline performance for this task, (b) examine the transferability of concepts between domains. Second, we present a state-of-the-art deep learning baseline. Further, we propose the active learning strategy for an optimal selection of instances from among the various domains in our data. The experimental results show that (1) a substantial agreement is achievable by non-experts after consultation with domain experts, (2) the baseline system achieves a fairly high F1 score, (3) active learning enables us to nearly halve the amount of required training data.
Beschreibung
Domain-Independent Extraction of Scientific Concepts from Research Articles | SpringerLink
%0 Conference Paper
%1 10.1007/978-3-030-45439-5_17
%A Brack, Arthur
%A D'Souza, Jennifer
%A Hoppe, Anett
%A Auer, Sören
%A Ewerth, Ralph
%B Advances in Information Retrieval
%C Cham
%D 2020
%E Jose, Joemon M.
%E Yilmaz, Emine
%E Magalhães, João
%E Castells, Pablo
%E Ferro, Nicola
%E Silva, Mário J.
%E Martins, Flávio
%I Springer International Publishing
%K articles concepts extraction research
%P 251--266
%T Domain-Independent Extraction of Scientific Concepts from Research Articles
%X We examine the novel task of domain-independent scientific concept extraction from abstracts of scholarly articles and present two contributions. First, we suggest a set of generic scientific concepts that have been identified in a systematic annotation process. This set of concepts is utilised to annotate a corpus of scientific abstracts from 10 domains of Science, Technology and Medicine at the phrasal level in a joint effort with domain experts. The resulting dataset is used in a set of benchmark experiments to (a) provide baseline performance for this task, (b) examine the transferability of concepts between domains. Second, we present a state-of-the-art deep learning baseline. Further, we propose the active learning strategy for an optimal selection of instances from among the various domains in our data. The experimental results show that (1) a substantial agreement is achievable by non-experts after consultation with domain experts, (2) the baseline system achieves a fairly high F1 score, (3) active learning enables us to nearly halve the amount of required training data.
%@ 978-3-030-45439-5
@inproceedings{10.1007/978-3-030-45439-5_17,
abstract = {We examine the novel task of domain-independent scientific concept extraction from abstracts of scholarly articles and present two contributions. First, we suggest a set of generic scientific concepts that have been identified in a systematic annotation process. This set of concepts is utilised to annotate a corpus of scientific abstracts from 10 domains of Science, Technology and Medicine at the phrasal level in a joint effort with domain experts. The resulting dataset is used in a set of benchmark experiments to (a) provide baseline performance for this task, (b) examine the transferability of concepts between domains. Second, we present a state-of-the-art deep learning baseline. Further, we propose the active learning strategy for an optimal selection of instances from among the various domains in our data. The experimental results show that (1) a substantial agreement is achievable by non-experts after consultation with domain experts, (2) the baseline system achieves a fairly high F1 score, (3) active learning enables us to nearly halve the amount of required training data.},
added-at = {2021-02-11T12:33:55.000+0100},
address = {Cham},
author = {Brack, Arthur and D'Souza, Jennifer and Hoppe, Anett and Auer, S{\"o}ren and Ewerth, Ralph},
biburl = {https://www.bibsonomy.org/bibtex/2b504b8be49292a6fe113a4d1e2c3cfc6/parismic},
booktitle = {Advances in Information Retrieval},
description = {Domain-Independent Extraction of Scientific Concepts from Research Articles | SpringerLink},
editor = {Jose, Joemon M. and Yilmaz, Emine and Magalh{\~a}es, Jo{\~a}o and Castells, Pablo and Ferro, Nicola and Silva, M{\'a}rio J. and Martins, Fl{\'a}vio},
interhash = {e7cf9636dfe2b79ad9ef93a75193e158},
intrahash = {b504b8be49292a6fe113a4d1e2c3cfc6},
isbn = {978-3-030-45439-5},
keywords = {articles concepts extraction research},
pages = {251--266},
publisher = {Springer International Publishing},
timestamp = {2021-02-11T12:33:55.000+0100},
title = {Domain-Independent Extraction of Scientific Concepts from Research Articles},
year = 2020
}