The functions of proteins and RNAs are defined by the collective interactions of many residues, and yet most statistical models of biological sequences consider sites nearly independently. Recent approaches have demonstrated benefits of including interactions to capture pairwise covariation, but leave higher-order dependencies out of reach. Here we show how it is possible to capture higher-order, context-dependent constraints in biological sequences via latent variable models with nonlinear dependencies. We found that DeepSequence (https://github.com/debbiemarkslab/DeepSequence), a probabilistic model for sequence families, predicted the effects of mutations across a variety of deep mutational scanning experiments substantially better than existing methods based on the same evolutionary data. The model, learned in an unsupervised manner solely on the basis of sequence information, is grounded with biologically motivated priors, reveals the latent organization of sequence families, and can be used to explore new parts of sequence space.
Description
Deep generative models of genetic variation capture the effects of mutations | Nature Methods
%0 Journal Article
%1 riesselman2018generative
%A Riesselman, Adam J.
%A Ingraham, John B.
%A Marks, Debora S.
%D 2018
%J Nature Methods
%K afcs background cvae deep gan generative genetics information knowledge models mutations vae
%N 10
%P 816--822
%R 10.1038/s41592-018-0138-4
%T Deep generative models of genetic variation capture the effects of mutations
%U https://doi.org/10.1038/s41592-018-0138-4
%V 15
%X The functions of proteins and RNAs are defined by the collective interactions of many residues, and yet most statistical models of biological sequences consider sites nearly independently. Recent approaches have demonstrated benefits of including interactions to capture pairwise covariation, but leave higher-order dependencies out of reach. Here we show how it is possible to capture higher-order, context-dependent constraints in biological sequences via latent variable models with nonlinear dependencies. We found that DeepSequence (https://github.com/debbiemarkslab/DeepSequence), a probabilistic model for sequence families, predicted the effects of mutations across a variety of deep mutational scanning experiments substantially better than existing methods based on the same evolutionary data. The model, learned in an unsupervised manner solely on the basis of sequence information, is grounded with biologically motivated priors, reveals the latent organization of sequence families, and can be used to explore new parts of sequence space.
@article{riesselman2018generative,
abstract = {The functions of proteins and RNAs are defined by the collective interactions of many residues, and yet most statistical models of biological sequences consider sites nearly independently. Recent approaches have demonstrated benefits of including interactions to capture pairwise covariation, but leave higher-order dependencies out of reach. Here we show how it is possible to capture higher-order, context-dependent constraints in biological sequences via latent variable models with nonlinear dependencies. We found that DeepSequence (https://github.com/debbiemarkslab/DeepSequence), a probabilistic model for sequence families, predicted the effects of mutations across a variety of deep mutational scanning experiments substantially better than existing methods based on the same evolutionary data. The model, learned in an unsupervised manner solely on the basis of sequence information, is grounded with biologically motivated priors, reveals the latent organization of sequence families, and can be used to explore new parts of sequence space.},
added-at = {2019-06-19T00:33:39.000+0200},
author = {Riesselman, Adam J. and Ingraham, John B. and Marks, Debora S.},
biburl = {https://www.bibsonomy.org/bibtex/2cf613b2c8713c275d3bdf3a536f5ea10/becker},
description = {Deep generative models of genetic variation capture the effects of mutations | Nature Methods},
doi = {10.1038/s41592-018-0138-4},
interhash = {b2bbe45825b8ccd7dd127807e86a58e7},
intrahash = {cf613b2c8713c275d3bdf3a536f5ea10},
issn = {15487105},
journal = {Nature Methods},
keywords = {afcs background cvae deep gan generative genetics information knowledge models mutations vae},
number = 10,
pages = {816--822},
refid = {Riesselman2018},
timestamp = {2019-06-19T00:34:04.000+0200},
title = {Deep generative models of genetic variation capture the effects of mutations},
url = {https://doi.org/10.1038/s41592-018-0138-4},
volume = 15,
year = 2018
}