We consider a family of deep neural networks consisting of two groups of
convolutional layers, a downsampling operator, and a fully connected layer. The
network structure depends on two structural parameters which determine the
numbers of convolutional layers and the width of the fully connected layer. We
establish an approximation theory with explicit approximation rates when the
approximated function takes a composite form $fQ$ with a feature
polynomial $Q$ and a univariate function $f$. In particular, we prove that such
a network can outperform fully connected shallow networks in approximating
radial functions with $Q(x) =|x|^2$, when the dimension $d$ of data from
$R^d$ is large. This gives the first rigorous proof for the
superiority of deep convolutional neural networks in approximating functions
with special structures. Then we carry out generalization analysis for
empirical risk minimization with such a deep network in a regression framework
with the regression function of the form $fQ$. Our network structure
which does not use any composite information or the functions $Q$ and $f$ can
automatically extract features and make use of the composite nature of the
regression function via tuning the structural parameters. Our analysis provides
an error bound which decreases with the network depth to a minimum and then
increases, verifying theoretically a trade-off phenomenon observed for network
depths in many practical applications.
Description
Theory of Deep Convolutional Neural Networks III: Approximating Radial Functions
%0 Generic
%1 mao2021theory
%A Mao, Tong
%A Shi, Zhongjie
%A Zhou, Ding-Xuan
%D 2021
%K cnn theory
%T Theory of Deep Convolutional Neural Networks III: Approximating Radial
Functions
%U http://arxiv.org/abs/2107.00896
%X We consider a family of deep neural networks consisting of two groups of
convolutional layers, a downsampling operator, and a fully connected layer. The
network structure depends on two structural parameters which determine the
numbers of convolutional layers and the width of the fully connected layer. We
establish an approximation theory with explicit approximation rates when the
approximated function takes a composite form $fQ$ with a feature
polynomial $Q$ and a univariate function $f$. In particular, we prove that such
a network can outperform fully connected shallow networks in approximating
radial functions with $Q(x) =|x|^2$, when the dimension $d$ of data from
$R^d$ is large. This gives the first rigorous proof for the
superiority of deep convolutional neural networks in approximating functions
with special structures. Then we carry out generalization analysis for
empirical risk minimization with such a deep network in a regression framework
with the regression function of the form $fQ$. Our network structure
which does not use any composite information or the functions $Q$ and $f$ can
automatically extract features and make use of the composite nature of the
regression function via tuning the structural parameters. Our analysis provides
an error bound which decreases with the network depth to a minimum and then
increases, verifying theoretically a trade-off phenomenon observed for network
depths in many practical applications.
@misc{mao2021theory,
abstract = {We consider a family of deep neural networks consisting of two groups of
convolutional layers, a downsampling operator, and a fully connected layer. The
network structure depends on two structural parameters which determine the
numbers of convolutional layers and the width of the fully connected layer. We
establish an approximation theory with explicit approximation rates when the
approximated function takes a composite form $f\circ Q$ with a feature
polynomial $Q$ and a univariate function $f$. In particular, we prove that such
a network can outperform fully connected shallow networks in approximating
radial functions with $Q(x) =|x|^2$, when the dimension $d$ of data from
$\mathbb{R}^d$ is large. This gives the first rigorous proof for the
superiority of deep convolutional neural networks in approximating functions
with special structures. Then we carry out generalization analysis for
empirical risk minimization with such a deep network in a regression framework
with the regression function of the form $f\circ Q$. Our network structure
which does not use any composite information or the functions $Q$ and $f$ can
automatically extract features and make use of the composite nature of the
regression function via tuning the structural parameters. Our analysis provides
an error bound which decreases with the network depth to a minimum and then
increases, verifying theoretically a trade-off phenomenon observed for network
depths in many practical applications.},
added-at = {2021-07-06T20:49:15.000+0200},
author = {Mao, Tong and Shi, Zhongjie and Zhou, Ding-Xuan},
biburl = {https://www.bibsonomy.org/bibtex/289b74b9bba34bf1ed494d7d957bdede3/rcb},
description = {Theory of Deep Convolutional Neural Networks III: Approximating Radial Functions},
interhash = {5bb2612ccc162b5ab521afea02cd4126},
intrahash = {89b74b9bba34bf1ed494d7d957bdede3},
keywords = {cnn theory},
note = {cite arxiv:2107.00896},
timestamp = {2021-07-06T20:49:15.000+0200},
title = {Theory of Deep Convolutional Neural Networks III: Approximating Radial
Functions},
url = {http://arxiv.org/abs/2107.00896},
year = 2021
}