RNA sequencing (RNA-seq) is gaining popularity as a complementary assay to genome sequencing for precisely identifying the molecular causes of rare disorders. A powerful approach is to identify aberrant gene expression levels as potential pathogenic events. However, existing methods for detecting aberrant read counts in RNA-seq data either lack assessments of statistical significance, so that establishing cutoffs is arbitrary, or rely on subjective manual corrections for confounders. Here, we describe OUTRIDER (Outlier in RNA-Seq Finder), an algorithm developed to address these issues. The algorithm uses an autoencoder to model read-count expectations according to the gene covariation resulting from technical, environmental, or common genetic variations. Given these expectations, the RNA-seq read counts are assumed to follow a negative binomial distribution with a gene-specific dispersion. Outliers are then identified as read counts that significantly deviate from this distribution. The model is automatically fitted to achieve the best recall of artificially corrupted data. Precision-recall analyses using simulated outlier read counts demonstrated the importance of controlling for covariation and significance-based thresholds. OUTRIDER is open source and includes functions for filtering out genes not expressed in a dataset, for identifying outlier samples with too many aberrantly expressed genes, and for detecting aberrant gene expression on the basis of false-discovery-rate-adjusted p values. Overall, OUTRIDER provides an end-to-end solution for identifying aberrantly expressed genes and is suitable for use by rare-disease diagnostic platforms.
Description
OUTRIDER: A Statistical Method for Detecting Aberrantly Expressed Genes in RNA Sequencing Data. - PubMed - NCBI
%0 Journal Article
%1 Brechtmann:2018:Am-J-Hum-Genet:30503520
%A Brechtmann, F
%A Mertes, C
%A Matusevičiūtė, A
%A Yépez, V A
%A Avsec, Ž
%A Herzog, M
%A Bader, D M
%A Prokisch, H
%A Gagneur, J
%D 2018
%J Am J Hum Genet
%K MUSTREAD deg detection differential-expression open-source outlier rna-seq software
%N 6
%P 907-917
%R 10.1016/j.ajhg.2018.10.025
%T OUTRIDER: A Statistical Method for Detecting Aberrantly Expressed Genes in RNA Sequencing Data
%U https://www.ncbi.nlm.nih.gov/pubmed/30503520
%V 103
%X RNA sequencing (RNA-seq) is gaining popularity as a complementary assay to genome sequencing for precisely identifying the molecular causes of rare disorders. A powerful approach is to identify aberrant gene expression levels as potential pathogenic events. However, existing methods for detecting aberrant read counts in RNA-seq data either lack assessments of statistical significance, so that establishing cutoffs is arbitrary, or rely on subjective manual corrections for confounders. Here, we describe OUTRIDER (Outlier in RNA-Seq Finder), an algorithm developed to address these issues. The algorithm uses an autoencoder to model read-count expectations according to the gene covariation resulting from technical, environmental, or common genetic variations. Given these expectations, the RNA-seq read counts are assumed to follow a negative binomial distribution with a gene-specific dispersion. Outliers are then identified as read counts that significantly deviate from this distribution. The model is automatically fitted to achieve the best recall of artificially corrupted data. Precision-recall analyses using simulated outlier read counts demonstrated the importance of controlling for covariation and significance-based thresholds. OUTRIDER is open source and includes functions for filtering out genes not expressed in a dataset, for identifying outlier samples with too many aberrantly expressed genes, and for detecting aberrant gene expression on the basis of false-discovery-rate-adjusted p values. Overall, OUTRIDER provides an end-to-end solution for identifying aberrantly expressed genes and is suitable for use by rare-disease diagnostic platforms.
@article{Brechtmann:2018:Am-J-Hum-Genet:30503520,
abstract = {RNA sequencing (RNA-seq) is gaining popularity as a complementary assay to genome sequencing for precisely identifying the molecular causes of rare disorders. A powerful approach is to identify aberrant gene expression levels as potential pathogenic events. However, existing methods for detecting aberrant read counts in RNA-seq data either lack assessments of statistical significance, so that establishing cutoffs is arbitrary, or rely on subjective manual corrections for confounders. Here, we describe OUTRIDER (Outlier in RNA-Seq Finder), an algorithm developed to address these issues. The algorithm uses an autoencoder to model read-count expectations according to the gene covariation resulting from technical, environmental, or common genetic variations. Given these expectations, the RNA-seq read counts are assumed to follow a negative binomial distribution with a gene-specific dispersion. Outliers are then identified as read counts that significantly deviate from this distribution. The model is automatically fitted to achieve the best recall of artificially corrupted data. Precision-recall analyses using simulated outlier read counts demonstrated the importance of controlling for covariation and significance-based thresholds. OUTRIDER is open source and includes functions for filtering out genes not expressed in a dataset, for identifying outlier samples with too many aberrantly expressed genes, and for detecting aberrant gene expression on the basis of false-discovery-rate-adjusted p values. Overall, OUTRIDER provides an end-to-end solution for identifying aberrantly expressed genes and is suitable for use by rare-disease diagnostic platforms.},
added-at = {2019-03-30T10:35:19.000+0100},
author = {Brechtmann, F and Mertes, C and Matusevičiūtė, A and Y{\'e}pez, V A and Avsec, Ž and Herzog, M and Bader, D M and Prokisch, H and Gagneur, J},
biburl = {https://www.bibsonomy.org/bibtex/23034926faa27fe74b98529f0d0dee988/marcsaric},
description = {OUTRIDER: A Statistical Method for Detecting Aberrantly Expressed Genes in RNA Sequencing Data. - PubMed - NCBI},
doi = {10.1016/j.ajhg.2018.10.025},
interhash = {7c353375ca5a87fb4cdeb6fbf24128a3},
intrahash = {3034926faa27fe74b98529f0d0dee988},
journal = {Am J Hum Genet},
keywords = {MUSTREAD deg detection differential-expression open-source outlier rna-seq software},
month = dec,
number = 6,
pages = {907-917},
pmid = {30503520},
timestamp = {2019-03-30T10:35:19.000+0100},
title = {OUTRIDER: A Statistical Method for Detecting Aberrantly Expressed Genes in RNA Sequencing Data},
url = {https://www.ncbi.nlm.nih.gov/pubmed/30503520},
volume = 103,
year = 2018
}