%% R.E. Showalter: Chapter 7
\chapter{Optimization and Approximation Topics}
\markboth{CHAPTER VII. OPTIMIZATION AND APPROXIMATION}{}
\section{Dirichlet's Principle}
\setcounter{equation}{0}
\setcounter{theorem}{0}
When we considered elliptic boundary value problems in Chapter III we
found it useful to pose them in a weak form.
For example, the Dirichlet problem
\begin{equation}\label{eq711}
\left. \begin{array}{rcll}
-\Delta_n u(x)&=&F(x)\ ,&\qquad x\in G\ ,\\
\noalign{\vskip6pt}
u(s)&=&0\ ,&\qquad s\in \partial G\end{array} \right\}
\end{equation}
on a bounded open set $G$ in $\RR^n$ was posed (and solved) in the form
\begin{equation}\label{eq712}
u\in H_0^1 (G) \ ;\quad \int_G \nabla u\cdot\nabla v\,dx =
\int_G F(x)v(x)\,dx\ ,\qquad v\in H_0^1 (G)\ .
\end{equation}
In the process of formulating certain problems of mathematical physics as
boundary value problems of the type \eqn{711}, integrals of the form
appearing in \eqn{712} arise naturally.
Specifically, in describing the displacement $u(x)$ at a point $x\in G$
of a stretched string $(n=1)$ or membrane $(n=2)$ resulting from a unit
tension and distributed external force $F(x)$, we find the
{\it potential energy\/} is given by
\begin{equation}\label{eq713}
E(u) = \left(\tfrac12\right) \int_G |\nabla u(x)|^2\,dx -
\int_G F(x)u(x)\,dx\ .
\end{equation}
Dirichlet's principle is the statement that the solution $u$ of \eqn{712}
is that function in $H_0^1(G)$ at which the functional $E(\cdot)$ attains
its minimum.
That is, $u$ is the solution of
\begin{equation}\label{eq714}
u\in H_0^1 (G) :E(u) \le E(v)\ ,\qquad v\in H_0^1 (G)\ .
\end{equation}
To prove that \eqn{714} characterizes $u$, we need only to note that for
each $v\in H_0^1(G)$
$$E(u+v) - E(u) = \int_G (\nabla u\cdot\nabla v- Fv)\,dx
+ \left(\tfrac12\right) \int_G |\nabla v|^2 \,dx $$
and the first term vanishes because of \eqn{712}.
Thus $E(u+v) \ge E(u)$ and equality holds only if $v\equiv 0$.
The preceding remarks suggest an alternate proof of the existence of a
solution of \eqn{712}, hence, of \eqn{711}.
Namely, we seek the element $u$ of $H_0^1(G)$ at which the energy function
$E(\cdot)$ attains its minimum, then show that $u$ is the solution of
\eqn{712}.
This program is carried out in Section~2 where we minimize functions more
general than \eqn{713} over closed convex subsets of Hilbert space.
These more general functions permit us to solve some nonlinear elliptic
boundary value problems.
By considering convex sets instead of subspaces we obtain some elementary
results on unilateral boundary value problems.
These arise in applications where the solution is subjected to a one-sided
constraint, e.g., $u(x)\ge0$, and their solutions are characterized by
variational inequalities.
These topics are presented in Section~3, and in Section~4 we give a brief
discussion of some optimal control problems for elliptic boundary value
problems.
Finally, Dirichlet's principle provides a means of numerically approximating
the solution of \eqn{712}.
We pick a convenient finite-dimensional subspace of $H_0^1(G)$ and minimize
$E(\cdot)$ over this subspace.
This is the Rayleigh-Ritz method and leads to an approximate algebraic
problem for \eqn{712}.
This method is described in Section~5, and in Section~6 we shall obtain
related approximation procedures for evolution equations of first or
second order.
\section{Minimization of Convex Functions} % 2
\setcounter{equation}{0}
Suppose $F$ is a real-valued function defined on a closed interval $K$
(possibly infinite). If $F$ is continuous and if either $K$ is bounded
or $F(x)\to+\infty$ as $|x|\to +\infty$, then $F$ attains its minimum
value at some point of $K$.
This result will be extended to certain real-valued functions on Hilbert
space and the notions developed will be extremely useful in the remainder
of this chapter.
An essential point is to characterize the minimum by the derivative of $F$.
Throughout this section $V$ is a real separable Hilbert space, $K$ is a
non-empty subset of $V$ and $F:K\to \RR$ is a function.
\subsection{} % 2.1
We recall from Section I.6 that the space $V$ is weakly (sequentially)
compact. It is worthwhile to consider subsets of $V$ which inherit
this property.
Thus, $K$ is called {\it weakly\/} ({\it sequentially\/}) {\it closed\/}
if the limit of every weakly convergent sequence from $K$ is contained in $K$.
Since convergence (in norm) implies weak convergence, a weakly closed
set is necessarily closed.
\begin{lemma}\label{lem7-2-1}
If $K$ is closed and convex (cf.\ Section I.4.2), then it is weakly closed.
\end{lemma}
\proof
Let $x$ be a vector not in $K$.
From Theorem I.\ref{thm1-4C} there is an $x_0\in K$ which is closest to $x$.
By translation, if necessary, we may suppose $(x_0+x)/2=\theta$, i.e.,
$x=-x_0$. Clearly $(x,x_0)<0$ so we need to show that $(z,x_0)\ge0$
for all $z\in K$; from this the desired result follows easily.
Since $K$ is convex, the function $\varphi :[0,1]\to\RR$ given by
$$\varphi (t) = \|(1-t)x_0 + tz -x\|_V^2\ ,\qquad 0\le t\le 1\ ,$$
has its minimum at $t=0$.
Hence, the right-derivative $\varphi^+ (0)$ is non-negative, i.e.,
$$(x_0-x,z-x_0) \ge 0\ .$$
Since $x=-x_0$, this gives $(x_0,z) \ge \|x_0\|_V^2>0$.
The preceding result and Theorem I.\ref{thm1-6B} show that each closed,
convex and bounded subset of $V$ is weakly sequentially compact.
We shall need to consider situations in which $K$ is not bounded
(e.g., $K=V$); the following is then appropriate.
\definition
The function $F$ has the {\it growth property\/} at $x\in K$ if,
for some $R>0$, $y\in K$ and $\|y-x\|\ge R$ implies $F(y)>F(x)$.
\qed
The continuity requirement that is adequate for our purposes is the
following.
\definition
The function $F:K\to \RR$ is {\it weakly lower-semi-continuous\/} at
$x\in K$ if for every sequence $\{x_n\}$ in $K$ which weakly converges
to $x\in K$ we have $F(x) \le \liminf F(x_n)$.
[Recall that for any sequence $\{a_n\}$ in $\RR$, $\liminf (a_n)\equiv
\sup_{k\ge0} (\inf_{n\ge k} (a_n))$.]
\qed
\begin{theorem}\label{thm7-2A}
Let $K$ be closed and convex and $F:K\to\RR$ be weakly lower-semi-continuous
at every point of $K$.
If
{\rm (a)}~$K$ is bounded or if
{\rm (b)}~$F$ has the growth property at some point in $K$,
then there exists an $x_0\in K$ such that $F(x_0) \le F(x)$ for all $x\in K$.
That is, $F$ attains its minimum on $K$.
\end{theorem}
\proof
Let $m=\inf \{F(x):x\in K\}$ and $\{x_n\}$ a sequence in $K$ for which
$m=\lim F(x_n)$.
If (a) holds, then by weak sequential compactness there is a subsequence
of $\{x_n\}$ denoted by $\{x_{n'}\}$ which converges weakly to $x_0\in V$;
Lemma \ref{lem7-2-1} shows $x_0\in K$.
The weak lower-semi-continuity of $F$ shows $F(x_0)\le \liminf F(x_{n'})=m$,
hence, $F(x_0)=m$ and the result follows.
For the case of (b),
let $F$ have the growth property at $z\in K$ and let $R>0$ be such that
$F(x)>F(z)$ whenever $\|z-x\|\ge R$ and $x\in K$.
Then set $B\equiv \{x\in V: \|x-z\|\le R\}$ and apply (a) to the closed,
convex and bounded set $B\cap K$.
The result follows from the observation $\inf\{F(x) :{x\in K}\} = \inf
\{F(x):x\in B\cap K\}$.
We note that if $K$ is bounded then $F$ has the growth property at every
point of $K$; thus the case (b) of Theorem \ref{thm7-2A} includes (a)
as a special case.
Nevertheless, we prefer to leave Theorem \ref{thm7-2A} in its
(possibly) more instructive form as given.
\subsection{} % 2.2
The condition that a function be weakly lower-semi-continuous is in general
difficult to verify.
However for those functions which are convex (see below), the
lower-semi-continuity is the same for the weak and strong notions; this can
be proved directly from Lemma \ref{lem7-2-1}.
We shall consider a class of functions for which convexity and lower
semicontinuity are easy to check and, furthermore, this class contains
all examples of interest to us here.
\definition
The function $F:K\to \RR$ is {\it convex\/} if its domain $K$ is convex
and for all $x,y\in K$ and $t\in [0,1]$ we have
\begin{equation}\label{eq721}
F(tx+(1-t)y) \le tF(x) + (1-t)F(y)\ .
\end{equation}
\qed
\definition
The function $F:K\to \RR$ is {\it $G$-differentiable\/} at $x\in K$ if
$K$ is convex and if there is a $F'(x)\in V'$ such that
$$\lim_{t\to0^+} {1\over t} \Bigl[ F(x+t(y-x)) - F(x)\Bigr]
= F'(x)(y-x)$$
for all $y\in K$. $F'(x)$ is called the {\it $G$-differential\/} of $F$ at $x$.
If $F$ is $G$-differentiable at every point in $K$, then $F':K\to V'$ is
the {\it gradient\/} of $F$ on $K$ and $F$ is the {\it potential\/} of the
function $F'$.
\qed
The $G$-differential $F'(x)$ is precisely the directional derivative of $F$
at the point $x$ in the direction toward $y$.
The following shows how it characterizes convexity of $F$.
\begin{theorem}\label{thm7-2B}
Let $F:K\to \RR$ be $G$-differentiable on the convex set $K$.
The following are equivalent:
{\rm (a)}~$F$ is convex,
{\rm (b)}~For each pair $x,y\in K$ we have
\begin{equation}\label{eq722}
F'(x) (y-x) \le F(y) - F(x)\ .
\end{equation}
{\rm (c)} For each pair $x,y\in K$ we have
\begin{equation}\label{eq723}
(F'(x)-F'(y)) (x-y) \ge 0\ .
\end{equation}
\end{theorem}
\proof
If $F$ is convex, then $F(x+t(y-x)) \le F(x) +t(F(y)-F(x))$ for $x,y\in K$
and $t\in [0,1]$, so \eqn{722} follows.
Thus (a) implies (b).
If (b) holds, we obtain $F'(y) (x-y)\le F(x)-F(y)$ and
$F(x)-F(y)\le F'(x)(x-y)$, so (c) follows.
Finally, we show (c) implies (a).
Let $x,y\in K$ and define $\varphi :[0,1]\to\RR$ by
$$\varphi (t) = F(tx+(1-t)y) = F(y+t(x-y))\ ,\qquad t\in [0,1]\ .$$
Then $\varphi' (t)=F'(y+t(x-y))(x-y)$ and we have for $0\le sg(x)\}$$
and $-\Delta_n u=F$ in $G_+$.
That is, in $G_0$ $(G_+)$ the first (respectively, second) inequality
in \eqn{736} is replaced by the corresponding equation.
There is a {\it free boundary\/} at the interface between $G_0$ and $G_+$;
locating this free boundary is equivalent to reducing \eqn{736} to a
Dirichlet problem.
\subsection{Unilateral Boundary Condition} % 3.5
Choose $V= H^1(G)$ and $K= \{ v\in V: v\ge g_1$ on $\partial G\}$,
where $g_1 \in H^1(G)$ is given.
Let $F(\cdot) \in L^2(G)$, $g_2 \in L^2(\partial G)$ and define $f\in V'$ by
$$f(v)= \int_G Fv\,dx + \int_{\partial G} g_2 v\,ds\ ,\qquad v\in V$$
where we suppress the trace operator in the above and hereafter.
Set $a(u,v) = (u,v)_{H^1(G)}$.
Theorem \ref{thm7-3A} shows there exists a unique solution $u\in K$
of \eqn{734}.
This solution is characterized by the following:
\begin{equation}\label{eq737}
\left. \begin{array}{l}
-\Delta_n u+u=F\hbox{ in } G\ ,\\
\noalign{\vskip6pt}
u\ge g_1\hbox{ on } \partial G\ ,\\
\noalign{\vskip6pt}
\ds {\partial u\over\partial\nu}\ge g_2\hbox{ on } \partial G\ ,\ \hbox{ and}\\
\noalign{\vskip6pt}
\ds\left({\partial u\over\partial\nu} -g_2\right) (u-g_1)=0\hbox{ on }
\partial G\ .\end{array}\right\}
\end{equation}
We shall show that the solution of \eqn{734} satisfies \eqn{737};
the converse is left to an exercise.
The first inequality in \eqn{737} follows from $u\in K$.
If $\varphi\in C_0^\infty (G)$, then setting $v=u+\varphi$, then $v=u-\varphi$
in \eqn{734} we obtain the partial differential equation in \eqn{737}.
Inserting this equation in \eqn{734} and using the abstract Green's
formula (Theorem III.\ref{thm3-2C}), we obtain
\begin{equation}\label{eq738}
\int_{\partial G} {\partial u\over\partial\nu} (v-u)\,ds
\ge \int_{\partial G} g_2(v-u)\ ,\qquad v\in K\ .
\end{equation}
If $w\in H^1(G)$ satisfies $w\ge 0$ on $\partial G$, we may set $v=u+w$ in
\eqn{738}; this gives the second inequality in \eqn{737}.
Setting $v=g_1$ in \eqn{738} yields the last equation in \eqn{737}.
Note that there is a region $\Gamma_0$ in $\partial G$ on which $u=g_1$,
and $\partial u/\partial\nu = g_2$ on $\partial G\sim \Gamma_0$.
Thus, finding $u$ is equivalent to finding $\Gamma_0$, so we may think
of \eqn{737} as another free boundary problem.
\section{Optimal Control of Boundary Value Problems} % 4.
\setcounter{equation}{0}
\subsection{} % 4.1
Various optimal control problems are naturally formulated as minimization
problems like those of Section~2.
We illustrate the situation with a model problem which we discuss in this
section.
\example
Let $G$ be a bounded open set in $\RR^n$ whose boundary $\partial G$
is a $C^1$-manifold with $G$ on one side.
Let $F\in L^2(G)$ and $g\in L^2(\partial G)$ be given.
Then for each {\it control\/} $v\in L^2(\partial G)$ there is a
corresponding {\it state\/} $y\in H^1(G)$ obtained as the unique
solution of the {\it system\/}
\begin{equation}\label{eq741}
\left. \begin{array}{ll}
-\Delta_n y +y=F&\qquad \hbox{in } G\\
\ds {\partial y\over\partial\nu} = g+v&\qquad \hbox{on }\partial G
\end{array}\right\}
\end{equation}
and we denote the dependence of $y$ on $v$ by $y=y(v)$.
Assume that we may observe the state $y$ only on $\partial G$ and that our
objective is to choose $v$ so as to place the {\it observation\/}
$y(v)|_{\partial G}$ closest to a given desired observation
$w\in L^2(\partial G)$.
Each control $v$ is exerted at some {\it cost\/}, so the optimal control
problem is to minimize the ``error plus cost''
\begin{equation}\label{eq742}
J(v) = \int_{\partial G} |y(v)-w|^2\,dx + c\int_{\partial G} |v|^2\,dx
\end{equation}
over some given set of {\it admissible controls\/} in $L^2(\partial G)$.
An admissible control $u$ at which $J$ attains its minimum is called
an {\it optimal control\/}.
We shall briefly consider problems of existence or uniqueness of
optimal controls and alternate characterizations of them, and then apply
these general results to our model problem.
We shall formulate the model problem \eqn{741}, \eqn{742} in an abstract
setting suggested by Chapter~III.
Thus, let $V$ and $H$ be real Hilbert spaces with $V$ dense and
continuously imbedded in $H$; identify the pivot space $H$ with its dual
and thereby obtain the inclusions $V\hookrightarrow H\hookrightarrow V'$.
Let $a(\cdot,\cdot)$ be a continuous, bilinear and coercive form on $V$
for which the corresponding operator $\A:V\to V'$ given by
$$a(u,v) = \A u(v)\ ,\qquad u,v\in V$$
is necessarily a continuous bijection with continuous inverse.
Finally, let $f\in V'$ be given.
(The system \eqn{741} with $v\equiv 0$ can be obtained as the operator
equation $\A y=f$ for appropriate choices of the preceding data;
cf.\ Section III.4.2 and below.)
To obtain a control problem we specify in addition to the state space $V$
and data space $V'$ a Hilbert space $U$ of controls and an operator
$\B \in \L(U,V')$.
Then for each control $v\in U$, the corresponding state $y=y(v)$ is the
solution of the system (cf. \eqn{741})
\begin{equation}\label{eq743}
\A y = f + \B v\ ,\qquad y= y(v)\ .
\end{equation}
We are given a Hilbert space $W$ of observations and an operator
$\C \in \L(V,W)$.
For each state $y\in V$ there is a corresponding observation $\C y\in W$
which we want to force close to a given desired observation $w\in W$.
The cost of applying the control $v\in U$ is given by $Nv(v)$ where
$N\in \L(U,U')$ is symmetric and monotone.
Thus, to each control $v\in U$ there is the ``error plus cost'' given by
\begin{equation}\label{eq744}
J(v) \equiv \|\C y (v)-w\|_W^2 + Nv(v)\ .
\end{equation}
The {\it optimal control problem\/} is to minimize \eqn{744} over a given
non-empty closed convex subset $U_{\ad}$ of {\it admissible
controls\/} in $U$. An {\it optimal control\/} is a solution of
\begin{equation}\label{eq745}
u\in U_{\ad} :J(u)\le J(v)\ \hbox{ for all }\ v\in U_{\ad}\ .
\end{equation}
\subsection{} % 4.2
Our objectives are to give sufficient conditions for the existence (and
possible uniqueness) of optimal controls and to characterize them in a form
which gives more information.
We shall use Theorem \ref{thm7-2E} to attain these objectives.
In order to compute the $G$-differential of $J$ we first obtain from
\eqn{743} the identity
$$\C y (v) -w = \C \A^{-1}\B v + \C\A^{-1} f-w$$
which we use to write \eqn{744} in the form
$$J(v) = \|\C \A^{-1}\B v\|_W^2 + Nv(v) + 2(\C\A^{-1}\B v,\C\A^{-1} f-w)_W
+ \|\C\A^{-1} f-w\|_W^2\ .$$
Having expressed $J$ as the sum of quadratic, linear and constant terms,
we easily obtain the $G$-differential
\begin{eqnarray}\label{eq746}
J'(v)(\varphi) & = & 2\Bigl\{ (\C\A^{-1} \B v,\C\A^{-1}\B\varphi)_W\\
&&\qquad+ Nv(\varphi)+ (\C\A^{-1} \B\varphi,\C\A^{-1} f-w)_W\Bigr\}\nonumber\\
&=& 2\left\{ (\C y(v)-w,\C\A^{-1}\B\varphi)_W +Nv(\varphi)\right\}\ .
\nonumber\end{eqnarray}
Thus, we find that the gradient $J'$ is monotone and
$$\left(\tfrac12\right) J'(v)(v) \ge Nv(v) - (\const)\|v\|_U\ ,$$
so $J'$ is coercive if $N$ is coercive, i.e., if
\begin{equation}\label{eq747}
Nv (v) \ge c\|v\|_U^2\ ,\qquad v\in U_{\ad}\ ,
\end{equation}
for some $c>0$.
Thus, we obtain from Theorem \ref{thm7-2E} the following.
\begin{theorem}\label{thm7-4A}
Let the optimal control problem be given as in Section 4.1.
That is, we are to minimize \eqn{744} subject to \eqn{743} over the non-empty
closed convex set $U_{\ad}$.
Then if either
{\rm (a)}~$U_{\ad}$ is bounded or
{\rm (b)}~$N$ is coercive over $U_{\ad}$,
then the set of optimal controls is non-empty, closed and convex.
\end{theorem}
\begin{corollary}\label{cor7-4B}
In case {\rm (b)} there is a unique optimal control.
\end{corollary}
\proof
This follows from Theorem \ref{thm7-2G} since \eqn{747} implies
$J'$ is strictly monotone.
\qed
\subsection{} % 4.3.
We shall characterize the optimal controls by variational inequalities.
Thus, $u$ is an optimal control if and only if
\begin{equation}\label{eq748}
u\in U_{\ad} :J'(u)(v-u) \ge 0\ ,\qquad v\in U_{\ad}\ ;
\end{equation}
this is just \eqn{725}.
This variational inequality is given by \eqn{746}, of course, but the
resulting form is difficult to interpret.
The difficulty is that it compares elements of the observation space $W$
with those of the control space $U$; we shall obtain an equivalent
characterization which contains a variational inequality only in the control
space $U$.
In order to convert the first term on the right side of \eqn{746} into a
more convenient form, we shall use the Riesz map $R_W$ of $W$ onto $W'$
given by (cf. Section I.4.3)
$$R_W (x)(y) = (x,y)_W\ ,\qquad x,y\in W$$
and the dual $\C' \in \L(W',V')$ of $\C$ given by (cf.\ Section I.5.1)
$$\C' (f)(x) = f(\C (x))\ ,\qquad f\in W'\ ,\ x\in V\ .$$
Then from \eqn{746} we obtain
\begin{eqnarray*}
\left(\tfrac12\right) J'(u)(v)
&=& (\C y(u)-w,\C\A^{-1}\B v)_W + Nu(v)\\
&=& R_W (\C y(u)-w)(\C\A^{-1}\B v) + Nu(v)\\
&=& \C'R_W (\C y(u)-w)(\A^{-1}\B v) + Nu(v)\ ,\qquad u,v\in U\ .
\end{eqnarray*}
To continue we shall need the dual operator $\A'\in \L(V,V')$ given by
$$\A' x(y) = \A y(x)\ ,\qquad x,y\in V\ ,$$
where $V''$ is naturally identified with $V$.
Since $\A'$ arises from the bilinear form adjoint to $a(\cdot,\cdot)$, $\A'$
is an isomorphism.
Thus, for each control $v\in U$ we can define the corresponding
{\it adjoint state\/} $p=p(v)$ as the unique solution of the system
\begin{equation}\label{eq749}
\A' p = \C' R_W(\C y(v)-w)\ ,\qquad p=p(v)\ .
\end{equation}
From above we then have
\begin{eqnarray*}
\left(\tfrac12\right) J'(u)(v)
&=& \A'p(u) (\A^{-1}\B v) + Nu(v)\\
&=& \B v(p(u)) + Nu(v)\\
&=& \B' p(u)(v) + Nu(v)
\end{eqnarray*}
where $\B' \in\L(V,U')$ is the indicated dual operator.
These computations lead to a formulation of \eqn{748} which we summarize
as follows.
\begin{theorem}\label{thm7-4C}
Let the optimal control problem be given as in \eqn{741}.
Then a necessary and sufficient condition for $u$ to be an optimal
control is that it satisfy the following system:
\begin{equation}\label{eq7410}
\left.\begin{array}{l}
u \in U_{\ad}\ ,\quad \A y(u) = f+\B u\ ,\\
\noalign{\vskip6pt}
\A' p(u) = \C' R_W(\C y(u)-w)\ ,\\
\noalign{\vskip6pt}
(\B' p(u) + Nu) (v-u)\ge 0\ ,\ \hbox{ all }\ v\in U_{\ad}\ .
\end{array}\right\}
\end{equation}
\end{theorem}
The system \eqn{7410} is called the {\it optimality system\/} for the
control problem.
We leave it as an exercise to show that a solution of the optimality system
satisfies \eqn{748}.
\subsection{} % 4.4
We shall recover the Example of Section 4.1 from the abstract situation above.
Thus, we choose $V=H^1(G)$, $a(u,v)=(u,v)_{H^1(G)}$, $U=L^2(\partial G)$
and define
$$\begin{array}{rcll}
f(v)&=&\ds \int_G F(x)v(x)\,dx
+\int_{\partial G} g(s)v(s)\,ds\ ,&\qquad v\in V\ ,\\
\noalign{\vskip6pt}
\B u (v)&=&\ds \int_{\partial G} u(s)v(s)\,ds \ ,&\qquad u\in U\ ,\ v\in V\ .
\end{array}
$$
The state $y(u)$ of the system determined by the control $u$ is
given by \eqn{743}, i.e.,
\begin{equation}\label{eq7411}
\begin{array}{l}
-\Delta_n y+y = F\ \hbox{ in }\ G\ ,\\
\noalign{\vskip6pt}
\ds {\partial y\over\partial \nu} = g+u\ \hbox{ on }\ \partial G\ .
\end{array}
\end{equation}
Choose $W= L^2(\partial G)$, $w\in W$, and define
$$\begin{array}{rcll}
Nu(v) &=&\ds c\int_{\partial G} u(s)v(s)\,ds\ ,&\qquad u,v\in W\ ,\ (c\ge 0)\\
\noalign{\vskip6pt}
\C u(v)&\equiv&\ds\int_{\partial G} u(s)v(s)\,ds\ ,&\qquad u\in V\ ,\ v\in W\ .
\end{array}$$
The adjoint state equation \eqn{749} becomes
\begin{equation}\label{eq7412}
\begin{array}{l}
-\Delta_n p+p = 0\ \hbox{ in }\ G\\
\noalign{\vskip6pt}
\ds {\partial p\over\partial\nu} = y-w\ \hbox{ on }\ \partial G
\end{array}
\end{equation}
and the variational inequality is given by
\begin{equation}\label{eq7413}
u\in U_{\ad} : \int_{\partial G} (p+cu) (v-u)\,ds \ge0\ ,\qquad v\in U_{\ad}\ .
\end{equation}
From Theorem \ref{thm7-4A} we obtain the existence of an optimal control
if $U_{\ad}$ is bounded or if $c>0$. Note that
\begin{equation}\label{eq7414}
J(v) = \int_{\partial G} |y(v)-w|^2 \,ds + c\int_{\partial G} |v|^2\,ds
\end{equation}
is strictly convex in either case, so uniqueness follows in both situations.
Theorem \ref{thm7-4C} shows the unique optimal control $u$ is characterized
by the optimality system \eqn{7411}, \eqn{7412}, \eqn{7413}.
We illustrate the use of this system in two cases.
\subsection{$U_{\ad} = L^2(\partial G)$} % 4.5
This is the case of {\it no constraints\/} on the control.
Existence of an optimal control follows if $c>0$.
Then \eqn{7413} is equivalent to $p+cu=0$.
The optimality system is equivalent to
$$\begin{array}{ll}
-\Delta_n y+y = F\ ,&\qquad -\Delta_n p+p=0\ \hbox{ in } G\\
\noalign{\vskip6pt}
\ds {\partial y\over\partial \nu} = g-\left({1\over c}\right) p\ ,
&\qquad \ds {\partial p\over\partial\nu} = y-w\ \hbox{ on }\ \partial G
\end{array}$$
and the optimal control is given by $u=-(1/c)p$.
Consider the preceding case with $c=0$.
We show that an optimal control might not exist.
First show $\inf \{J(v) : v\in U\}=0$.
Pick a sequence $\{w_m\}$ of very smooth functions on $\partial G$
such that $w_m\to w$ in $L^2(\partial G)$.
Define $y_m$ by
$$\begin{array}{rcl}
-\Delta_n y_m + y_m &=& F\ \hbox{ in }\ G\\
\noalign{\vskip6pt}
y_m&=& w_m\ \hbox{ on }\ \partial G\end{array}$$
and set $v_m = (\partial y_m/\partial \nu) -g$, $m\ge1$.
Then $v_m\in L^2(\partial G)$ and $J(v_m) = \|w_m-w\|_{L^2(\partial G)}^2\to0$.
Second, note that if $u$ is an optimal control, then $J(u)=0$ and the
corresponding state $y$ satisfies
$$\begin{array}{rcl}
-\Delta_n y+y &=&F\ \hbox{ in }\ G\\
\noalign{\vskip6pt}
y&=&w \ \hbox{ on }\ \partial G\ .\end{array}$$
Then we have (formally) $u= (\partial y/\partial \nu)-g$.
However, if $w\in L^2(\partial G)$ one does not in general have
$(\partial y/\partial\nu) \in L^2(\partial G)$.
Thus $u$ might not be in $L^2(\partial G)$ in which case there is no
optimal control (in $L^2(\partial G)$).
\subsection{} % 4.6
$U_{\ad} = \{v\in L^2 (\partial G) :0\le v(s) \le M$ a.e.$\}$.
Since the set of admissible controls is bounded, there exists a unique
optimal control $u$ characterized by the optimality system \eqn{7410}.
Thus, $u$ is characterized by \eqn{7411}, \eqn{7412} and
\begin{eqnarray}\label{eq7415}
&&\hbox{if }\ 0__w(s)\}$ and
$\Gamma_- =\{s\in\Gamma :y(s)< w(s)\}$.
On any interval in $\Gamma_0$ we have $p=0$ (by definition of $\Gamma$)
and ${\partial p\over \partial\nu} = 0$ from \eqn{7412}.
From the uniqueness of the Cauchy problem for the elliptic equation
in \eqn{7412}, we obtain $p=0$ in $G$, hence, $y=w$ on $\partial G$.
But this implies $y=Y$, hence \eqn{7416} holds.
This contradiction shows $\Gamma_0$ is empty.
On any interval in $\Gamma_+$ we have $p=0$ and
${\partial p\over\partial \nu} >0$.
Thus, $p<0$ in some neighborhood (in $\bar G$) of that interval.
But $\Delta p <0$ in the neighborhood follows from \eqn{7412},
so a maximum principle implies ${\partial p\over\partial\nu}\le0$
on that interval.
This contradiction shows $\Gamma_+$ is empty.
A similar argument holds for $\Gamma_-$ and the desired result follows.
\section{Approximation of Elliptic Problems} % 5.
\setcounter{equation}{0}
We shall discuss the {\it Rayleigh-Ritz-Galerkin\/}
procedure for approximating the
solution of an elliptic boundary value problem.
This procedure can be motivated by the situation of Section 3.1 where the
abstract boundary value problem \eqn{735} is known to be equivalent to
minimizing a quadratic function \eqn{731} over the Hilbert space $V$.
The procedure is to pick a closed subspace $S$ of $V$ and minimize the
quadratic function over $S$.
This is the Rayleigh-Ritz method.
The resulting solution is close to the original solution if $S$ closely
approximates $V$.
The approximate solution is characterized by the abstract boundary vlaue problem
obtained by replacing $V$ with $S$; this gives the (equivalent) Galerkin method
of obtaining an approximate solution.
The very important {\it finite-element method\/}
consists of the above procedure
applied with a space $S$ of piecewise polynomial functions which approximates
the whole space $V$.
The resulting finite-dimensional problem can be solved efficiently by computers.
Our objectives are to describe the Rayleigh-Ritz-Galerkin procedure, obtain
estimates on the error that results from the approximation, and
then to give some
typical convergence rates that result from standard finite-element or
{\it spline\/} approximations of the space.
We shall also construct some of these approximating subspaces and prove
the error
estimates as an application of the minimization theory of Section~2.
\subsection{} % 5.1
Suppose we are given an abstract boundary value problem:
$V$ is a Hilbert space, $a(\cdot,\cdot):V\times V\to \KK$ is continuous and
sesquilinear, and $f\in V'$.
The problem is to find $u$ satisfying
\begin{equation}\label{eq751}
u\in V: a(u,v) = f(v)\ ,\qquad v\in V\ .
\end{equation}
Let $S$ be a subspace of $V$. Then we may consider the related problem of
determining $u_s$ satisfying
\begin{equation}\label{eq752}
u_s \in S: a(u_s,v) = f(v)\ ,\qquad v\in S\ .
\end{equation}
We shall show that the error $u_s-u$ is small if $S$ approximates $V$
sufficiently well.
\begin{theorem}\label{thm7-5A}
Let $a(\cdot,\cdot)$ be a $V$-coercive continuous sesquilinear form and $f\in V'$.
Let $S$ be a closed subspace of $V$.
Then \eqn{751} has a unique solution $u$ and \eqn{752} has a unique solution $u_s$.
Furthermore we have the estimate
\begin{equation}\label{eq753}
\|u_s-u\| \le (K/c) \inf \{ \|u-v\| :v\in S\}\ ,
\end{equation}
where $K$ is the bound on $a(\cdot,\cdot)$ (cf.\ the inequality {\rm I.(5.2)\/})
and $c$ is the coercivity constant (cf.\ the inequality {\rm III.(2.3)\/}).
\end{theorem}
\proof
The existence and uniqueness of the solutions $u$ and $u_s$ of \eqn{751} and
\eqn{752} follow immediately from Theorem III.\ref{thm3-2A}
or Theorem \ref{thm7-3A}, so we need only to
verify the estimate \eqn{753}.
By subtracting \eqn{751} from \eqn{752} we obtain
\begin{equation}\label{eq754}
a(u_s-u,v)=0\ ,\qquad v\in S\ .
\end{equation}
Thus for any $w\in S$ we have
$$a(u_s-u,u_s-u) = a(u_s-u,w-u) + a(u_s-u,u_s-w)\ .$$
Since $u_s-w\equiv v\in S$ it follows that the last term is zero, so we obtain
$$c\|u_s-u\|^2 \le K\|u_s - u\|\, \| w-u\|\ ,\qquad w\in S\ .$$
This gives the desired result.
Consider for the moment the case of $V$ being separable.
Thus, there is a sequence $\{v_1,v_2,v_3,\ldots\}$ in $V$ which is a basis for $V$.
For each integer $m\ge1$, the set $\{v_1,v_2,\ldots,v_m\}$ is linearly independent
and its linear span will be denoted by $V_m$.
If $P_m$ is the projection of $V$ into $V_m$, then $\lim_{m\to\infty} P_mv=v$
for all $v\in V$.
The problem \eqn{752} with $S=V_m$ is equivalent to
$$u_m \in V_m : a(u_m ,v_j) = f(v_j)\ ,\qquad 1\le j\le m\ .$$
There is exactly one such $u_m$ for each integer $m\ge1$ and we have the
estimates $\|u_m-u\| \le (K/c)\|u-P_m u\|$.
Hence, $\lim_{m\to\infty} u_m=u$ in $V$ and the rate of convergence is determined
by that of $\{P_mu\}$ to the solution $u$ of \eqn{751}.
Thus we are led to consider an approximating finite-dimensional problem.
Specifically $u_m$ is determined by the point $x= (x_1,x_2,\ldots,x_m)\in \KK^m$
through the identity $u_m = \sum_{i=1}^m x_i v_i$, and \eqn{752} is equivalent
to the $m\times m$ system of linear equations
\begin{equation}\label{eq755}
\sum_{i=1}^m a(v_i,v_j) x_i = f(v_j)\ ,\qquad 1\le j\le m\ .
\end{equation}
Since $a(\cdot,\cdot)$ is $V$-coercive, the $m\times m$ coefficient matrix
$(a(v_i,v_j))$ is invertible and the linear system \eqn{755} can be solved for $x$.
The dimension of the system is frequently of the order $m=10^2$ or $10^3$, so the
actual computation of the solution may be a non-trivial consideration.
It is helpful to choose the basis functions so that most of the coefficients
are zero.
Thus, the matrix is {\it sparse\/} and various special techniques are available
for efficiently solving the large linear system.
This sparseness of the coefficient matrix is one of the computational advantages
of using finite-element spaces.
A very special example will be given in Section 5.4 below.
\subsection{} % 5.2
The fundamental estimate \eqn{753} is a bound on the error in the norm of
the Hilbert space $V$.
In applications to elliptic boundary value problems this corresponds to
an {\it energy estimate\/}.
We shall estimate the error in the norm of a pivot space $H$.
Since this norm is weaker we expect an improvement on the rate of
convergence with respect to the approximation of $V$ by $S$.
\begin{theorem}\label{thm7-5B}
Let $a(\cdot,\cdot)$ be a continuous, sesquilinear and coercive form on the
Hilbert space $V$, and let $H$ be a Hilbert space identified with its dual
and in which $V$ is dense, and continuously imbedded.
Thus, $V\hookrightarrow H\hookrightarrow V'$.
Let $A^* : D^*\to H$ be the operator on $H$ which is determined by the
adjoint sesquilinear form, i.e.,
$$\overline{a(v,w)} = (A^*w,v)_H\ ,\qquad w\in D^*\ ,\ v\in V$$
(cf. Section III.7.5).
Let $S$ be a closed subspace of $V$ and $e^*(S)$ a corresponding constant
for which we have
\begin{equation}\label{eq756}
\inf \{\|w-v\| :v\in S\} \le e^* (S)|A^* w|_H\ ,\qquad w\in D^*\ .
\end{equation}
Then the solutions $u$ of \eqn{751} and $u_s$ of \eqn{752} satisfy
the estimate
\begin{equation}\label{eq757}
|u-u_s|_H \le (K^2/c)\inf \{\|u-v\| :v\in S\} e^* (S)\ .
\end{equation}
\end{theorem}
\proof
We may assume $u\ne u_s$; define $g= (u-u_s)/|u-u_s|_H$ and choose $w\in D^*$
so that $A^*w=g$. That is,
$$a(v,w) = (v,g)_H\ ,\qquad v\in V\ ,$$
and this implies that
$$a(u-u_s,w) = (u-u_s,g)_H = |u-u_s|_H\ .$$
From this identity and \eqn{754} we obtain for any $v\in S$
$$|u-u_s|_H = a(u-u_s,w-v) \le K\|u-u_s\|\, \|w-v\|
\le K\|u-u_s\| e^* (S) |A^* w|_H\ .$$
Since $|A^* w|_H = |g|_H =1$, the estimate \eqn{757} follows from \eqn{753}.
\begin{corollary}\label{cor7-5C}
Let $A:D\to H$ be the operator on $H$ determined by $a(\cdot,\cdot)$, $V$,
$H$, i.e.,
$$a(w,v) = (Aw,v)_H\ ,\qquad w\in D\ ,\ v\in V\ .$$
Let $e(S)$ be a constant for which
$$\inf \{\|w-v\| :v\in S\} \le e(S)|Aw|_H\ ,\qquad w\in D\ .$$
Then the solutions of \eqn{751} and \eqn{752} satisfy the estimate
\begin{equation}\label{eq758}
|u-u_s|_H \le (K^2/c)e(S) e^* (S) |Au|_H\ .
\end{equation}
\end{corollary}
The estimate \eqn{757} will provide the rate of convergence of the error
that is faster than that of \eqn{753}.
The added factor $e^*(S)$ arising in \eqn{756} will depend on how well
$S$ approximates the subspace $D^*$ of ``smoother'' or ``more regular''
elements of $V$.
\subsection{} % 5.3
We shall combine the estimates \eqn{753} and \eqn{757} with approximation
results that are typical of finite-element or spline function subspaces of
$H^1(G)$. This will result in rate of convergence estimates in terms of a
parameter $h>0$ related to mesh size in the approximation scheme.
The {\it approximation assumption\/} that we make is as follows:
Suppose $\H$ is a set of positive numbers, $M$ and $k\ge0$ are integers,
and $\S\equiv \{S_h :h\in\H\}$ is a collection of closed subspaces of
$V\subset H^1(G)$ such that
\begin{equation}\label{eq759}
\inf \{\|w-v\|_{H^1(G)} :v\in S_h\} \le Mh^{j-1} \|w\|_{H^j(G)}
\end{equation}
for all $h\in\H$, $1\le j\le k+2$, and $w\in H^j(G)\cap V$.
The integer $k+1$ is called the {\it degree\/} of $\S$.
\begin{theorem}\label{thm7-5D}
Let $V$ be a closed subspace of $H^1(G)$ with $H_0^1(G)\subset V$ and
let $a(\cdot,\cdot) :V\times V\to\KK$ be continuous, sesquilinear and
$V$-coercive. Let $\S$ be a collection of closed subspaces of $V$ satisfying
\eqn{759} for some $k\ge0$, and assume $a(\cdot,\cdot)$ is $k$-regular on $V$.
Let $F\in H^k(G)$ and define $f\in V'$ by $f(v) = (F,v)_H$, $v\in V$, where
$H\equiv L^2 (G)$.
Let $u$ be the solution of \eqn{751} and, for each $h\in\H$, $u_h$ be the
solution of \eqn{752} with $S=S_h$.
Then for some constant $c_1$ we have
\begin{equation}\label{eq7510}
\|u-u_h\|_{H^1(G)} \le c_1 h^{k+1}\ ,\qquad h\in \H\ .
\end{equation}
If in addition the sesquilinear form adjoint to $a(\cdot,\cdot)$ is
$0$-regular, then for some constant $c_2$ we have
\begin{equation}\label{eq7511}
\|u-u_h\|_{L^2(G)} \le c_2 h^{k+2}\ ,\qquad h\in \H\ .
\end{equation}
\end{theorem}
\proof
Since $F\in H^k(G)$ and $a(\cdot,\cdot)$ is $k$-regular it follows that
$u\in H^{k+2}(G)$.
Hence we combine \eqn{753} with \eqn{759} to obtain \eqn{7510}.
If the adjoint form is $0$-regular, then in Theorem \ref{thm7-5B} we
have $D^* \subset H^2(G)$ and $\|w\|_{H^2(G)} \le (\const)\|A^* w\|_{L^2(G)}$.
Hence \eqn{759} with $j=2$ gives \eqn{756} with $e^*(S_h) = (\const)h$.
Thus \eqn{7511} follows from \eqn{757}.
Sufficient conditions for $a(\cdot,\cdot)$ to be $k$-regular were given
in Section III.6.
Note that this permits all the hypotheses in Theorem \ref{thm7-5D} to be
placed on the {\it data\/} in the problem \eqn{751} being solved.
For problems for which appropriate regularity results are not available,
one may of course assume the appropriate smoothness of the solution.
\subsection{} % 5.4
Let $G$ be the interval $(0,1)$ and $V$ a closed subspace of $H^1(G)$.
Any function $f\in V$ can be approximated by a piecewise-linear $f_0\in V$;
we need only to choose $f_0$ so that it agrees with $f$ at the endpoints
of the intervals on which $f_0$ is affine.
This is a simple {\it Lagrange interpolation\/} of $f$ by the linear
finite-element function $f_0$, and it leads to a family of approximating
subspaces of degree~1.
We shall describe the spaces and prove the estimates \eqn{759} for this
example.
Let $P=\{0 = x_0 0\ ,
\end{equation}
with $u(0)=u_0$.
Since $\M$ is symmetric, such a solution satisfies
\begin{equation}\label{eq762}
D_t(u(t),u(t))_m + 2\ell (u(t),u(t)) = 2f(t)(u(t))\ ,\qquad t>0\ ,
\end{equation}
where $\ell (\cdot,\cdot) :V\times V\to\RR$ is the bilinear form
associated with $\L$.
This gives the identity
\begin{equation}\label{eq763}
\|u(t)\|_m^2 +2\int_0^t \ell(u(s),u(s))\,ds
= \|u_0\|_m^2 +2\int_0^t f(s)(u(s))\,ds\ ,\qquad t>0\ ,
\end{equation}
involving the $V_m$ norm $\|\cdot\|_m$ of the solution.
Since the right side of \eqn{762} is bounded by $T\|f\|_{V'_m}^2 + T^{-1}
\|u\|_m^2$ for any given $T>0$, we obtain from \eqn{762}
$$D_t (e^{-t/T} \|u(t)\|_m^2) + e^{-t/T} 2\ell(u(t),u(t))
\le Te^{-t/T} \|f(t)\|_{V'_m}^2$$
and from this follows the a-priori estimate
\begin{equation}\label{eq764}
\|u(t)\|_m^2 + 2\int_0^t \ell (u(s),u(s))\,ds
\le e\|u_0\|^2 + Te \int_0^t \|f(s)\|_{V'_m}^2\,ds\ ,\qquad
0\le t\le T\ .
\end{equation}
In the situations we consider below, $\L$ is monotone, hence, \eqn{764}
gives an upper bound on the $V_m$ norm of the solution.
In order to motivate the Faedo-Galerkin approximation, we note that a
solution $u$ of \eqn{761} satisfies
\begin{equation}\label{eq765}
(u'(t),v)_m + \ell (u(t),v) = f(t)(v)\ ,\qquad v\in V\ ,\ t>0\ .
\end{equation}
Since $V$ is dense in $V_m$, \eqn{765} is actually equivalent to \eqn{761}.
Let $S$ be a subspace of $V$.
Then we consider the related problem of determining $u_s\in C([0,\infty),
S)\cap C^1((0,\infty),S)$ which satisfies
\begin{equation}\label{eq766}
(u'_s(t),v)_m + \ell (u_s(t),v) = f(t)(v)\ ,\qquad v\in S\ ,\ t>0
\end{equation}
and an initial condition to be specified.
Consider the case of $S$ being a finite-dimensional subspace of $V$;
let $\{v_1,v_2,\ldots,v_m\}$ be a basis for $S$.
Then the solution of \eqn{766} is of the form
$$u_s(t) = \sum_{i=1}^m x_i (t) v_i$$
where $x(t)\equiv (x_1(t),x_2(t),\ldots,x_m(t))$ is in
$C([0,\infty),\RR^m) \cap C^1 ((0,\infty),\RR^m)$, and \eqn{766} is
equivalent to the system of ordinary differential equations
\begin{equation}\label{eq767}
\sum_{i=1}^m (v_i,v_j)_m x'_i(t)
+ \sum_{i=1}^m \ell (v_i,v_j)x_i (t)
= f(t)(v_j)\ ,\qquad 1\le j\le m\ .
\end{equation}
The linear system \eqn{767} has a unique solution $x(t)$ with the initial
condition $x(0)$ determined by $u_s (0)=\sum_{i=1}^m x_i (0)v_i$.
(Note that the matrix coefficient of $x'(t)$ in \eqn{767} is symmetric
and positive-definite, hence, nonsingular.)
As in the preceding section, it is helpful to choose the basis functions so
that most of the coefficients in \eqn{767} are zero.
Special efficient computational techniques are then available for the
resulting sparse system.
\subsection{} % 6.2
We now develop estimates on the error, $u(t)- u_s(t)$, in the situation of
Theorem~V.\ref{thm5-2B}.
This covers the case of parabolic and pseudoparabolic equations.
It will be shown that the error in the Faedo-Galerkin procedure for \eqn{761}
is bounded by the error in the corresponding Rayleigh-Ritz-Galerkin
procedure for the elliptic problem determined by the operator $\L$.
Thus, we consider for each $t>0$ the {\it $\L$-projection\/}
of $u(t)$ defined by
\begin{equation}\label{eq768}
u_\ell (t)\in S :\quad \ell (u_\ell (t),v) = \ell(u(t),v)\ ,\qquad v\in S\ .
\end{equation}
\begin{theorem}\label{thm7-6A}
Let the real Hilbert spaces $V$ and $V_m$, operators $\M$ and $\L$, and
data $u_0$ and $f$ be given as in Theorem V.\ref{thm5-2B}, and let $S$ be
a closed subspace of $V$.
Then there exists a unique solution $u$ of \eqn{761} with $u(0)=u_0$ and
there exists a unique solution $u_s$ of \eqn{766} for any prescribed initial
value $u_s(0)\in S$.
Assume $u\in C([0,\infty),V)$ and choose $u_s(0)= u_\ell(0)$, the
$\L$-projection \eqn{768} of $u(0)$.
Then we have the error estimate
\begin{equation}\label{eq769}
\|u(t)-u_s(t)\|_m \le \|u(t) - u_\ell(t)\|_m
+ \int_0^t \|u' (s)-u'_\ell (s)\|_m \,ds\ ,\quad t\ge0\ .
\end{equation}
\end{theorem}
\proof
The existence-uniqueness results are immediate from Theorem V.\ref{thm5-2B},
so we need only to verify \eqn{769}.
Note that $u(0)=u_0$ necessarily belongs to $V$, so \eqn{768} defines
$u_\ell (0)=u_s(0)$.
For any $v\in S$ we obtain from \eqn{765} and \eqn{766}
$$(u'(t) - u'_s (t),v)_m +\ell(u(t)-u_s(t),v)=0\ ,$$
so \eqn{768} gives the identity
$$(u'(t)- u'_\ell(t),v)_m = (u'_s (t)-u'_\ell(t),v)_m
+\ell(u_s(t)-u_\ell(t),v)\ .$$
Setting $v=u_s (t)-u_\ell(t)$ and noting that $\L$ is monotone, we obtain
$$D_t \|u_s (t) -u_\ell(t)\|_m^2 \le 2\|u'(t)-u'_\ell (t)\|_m
\|u_s (t) - u_\ell(t)\|_m\ .$$
The function $t\mapsto \|u_s (t) - u_\ell(t)\|_m$ is absolutely continuous,
hence differentiable almost everywhere, and satisfies
$$D_t \|u_s (t) - u_\ell(t)\|_m^2 = 2\|u_s(t)-u_\ell(t)\|_m
D_t \|u_s (t) - u_\ell (t)\|_m\ .$$
Let $Z=\{t>0: \|u_s (t)-u_\ell(t)\|_m =0\}$.
Clearly, for any $t\notin Z$ we have from above
\begin{equation}\label{eq7610}
D_t \|u_s (t) - u_\ell (t)\|_m \le \|u'(t)-u'_\ell(t)\|_m\ .
\end{equation}
At an accumulation point of $Z$, the estimate \eqn{7610} holds, since the
left side is zero at such a point.
Since $Z$ has at most a countable number of isolated points, this shows that
\eqn{7610} holds at almost every $t>0$.
Integrating \eqn{7610} gives the estimate
$$\|u_s(t) - u_\ell (t)\|_m \le \int_0^t \|u'(s)-u'_\ell (s)\|_m \,ds\ ,
\qquad t\ge0\ ,$$
from which \eqn{769} follows by the triangle inequality.
The fundamental estimate \eqn{769} shows that the error in the approximation
procedure is determined by the error in the $\L$-projection \eqn{768} which
is just the Rayleigh-Ritz-Galerkin procedure of Section~5.
Specifically, when $u\in C^1((0,\infty),V)$ we differentiate \eqn{768}
with respect to $t$ and deduce that $u'_\ell (t)\in S$ is the $\L$-projection
of $u'(t)$.
This regularity of the solution $u$ holds in both parabolic and
pseudoparabolic cases.
We shall illustrate the use of the estimate \eqn{769} by applying it to
a second order parabolic equation which is approximated by using a set of
finite-element subspaces of degree one.
Thus, suppose $\S \equiv \{S_h :h\in\H\}$ is a collection of closed
subspaces of the closed subspace $V$ of $H^1(G)$ and $\S$ is of degree~1;
cf.\ Section~5.3.
Let the continuous bilinear form $a(\cdot,\cdot)$ be $V$-elliptic and
$0$-regular; cf.\ Section III.6.4.
Set $H=L^2 (G)=H'$, so $\M$ is the identity, let $f\equiv 0$, and let
$\ell(\cdot,\cdot)=a(\cdot,\cdot)$.
If $u$ is the solution of \eqn{761} and $u_h$ is the solution of \eqn{766}
with $S=S_h$, then the differentiability in $t>0$ of these functions is
given by Corollary IV.\ref{cor4-6D} and their convergence at $t=0^+$ is
given by Exercise IV.7.8.
We assume the form adjoint to $a(\cdot,\cdot)$ is $0$-regular and obtain
from \eqn{7511} the estimates
\begin{equation}\label{eq7611}
\left.\begin{array}{rcl}
\|u(t)-u_\ell (t)\|_{L^2(G)}&\le&c_2 h^2 \|Au(t)\|_{L^2(G)}\ ,\\
\noalign{\vskip6pt}
\|u'(t) - u'_\ell(t)\|_{L^2(G)}&\le&c_2 h^2 \|A^2u(t)\|_{L^2(G)}\ ,\qquad
t>0\ .\end{array}\right\}
\end{equation}
The a-priori estimate obtained from \eqn{763} shows that $|u(t)|_H$ is
non-increasing and it follows similarly that $|Au(t)|_H$ is
non-increasing for $t>0$.
Thus, if $u_0\in D(A^2)$ we obtain from \eqn{769}, and \eqn{7611} the error
estimate
\begin{equation}\label{eq7612}
\|u(t)-u_h(t)\|_{L^2(G)} \le c_2 h^2 \{\|Au_0\|_{L^2(G)}
+ t\|A^2 u_0\|_{L^2(G)}\}\ .
\end{equation}
Although \eqn{7612} gives the correct rate of convergence, it is far from
optimal in the hypotheses assumed.
For example, one can use estimates from Theorem IV.\ref{thm4-6B} to play
off the factors $t$ and $\|Au'(t)\|_H$ in the second term of \eqn{7612} and
thereby relax the assumption $u_0\in D(A^2)$.
Also, corresponding estimates can be obtained for the non-homogeneous
equation and faster convergence rates can be obtained if approximating
subspaces of higher degree are used.
\subsection{} % 6.3
We turn now to consider the approximation of second-order evolution equations
of the type discussed in Section VI.2.
Thus, we let $\A$ and $\C$ be the respective Riesz maps of the Hilbert spaces
$V$ and $W$, where $V$ is dense and continuously embedded in $W$, hence,
$W'$ is identified with a subspace of $V'$.
Let $\B \in \L(V,V')$, $u_0\in V$, $u_1\in W$ and $f\in C((0,\infty),W')$.
We shall approximate the solution $u\in C([0,\infty),V)\cap C^1((0,\infty),V)
\cap C^1 ([0,\infty),W)\cap C^2 ((0,\infty),W)$ of
\begin{equation}\label{eq7613}
\C u'' (t) + \B u'(t) + \A u(t) = f(t)\ ,\qquad t>0\ ,
\end{equation}
with the initial conditions $u(0)=u_0$, $u'(0)=u_1$.
Equations of this form were solved in Section VI.2 by reduction to an
equivalent first-order system of the form \eqn{761} on appropriate
product spaces.
We recall here the construction, since it will be used for the approximation
procedure.
Define $V_m\equiv V\times W$ with the scalar product
$$([x_1,x_2],[y_1,y_2]) = (x_1,y_1)_V + (x_2,y_2)_W\ ,\qquad
[x_1,x_1],[y_1,y_1] \in V\times W\ ,$$
so $V'_m = V'\times W'$; the Riesz map $\M$ of $V_m$ onto $V'_m$ is given by
$$\M ([x_1,x_2]) = [\A x_1,\C x_2]\ ,\qquad [x_1,x_2]\in V_m\ .$$
Define $V_\ell = V\times V$ and $\L\in \L(V_\ell,V'_\ell)$ by
$$\L ([x_1,x_2]) = [-\A x_2,\A x_1 + \B x_2]\ ,\qquad [x_1,x_2]\in V_\ell\ .$$
Then Theorem VI.\ref{thm6-2A} applies if $\B$ is monotone to give
existence and uniqueness of a solution $w\in C^1([0,\infty),V_m)$ of
\begin{equation}\label{eq7614}
\M w'(t) +\L w(t) = [0,f(t)]\ ,\qquad t>0
\end{equation}
with $w(0)= [u_0,u_1]$ and $f\in C^1([0,\infty),W')$ given so that
$u_0,u_1\in V$ with $\A u_0 + \B u_1 \in W'$.
The solution is given by $w(t) = [u(t),u'(t)]$, $t\ge0$; from the inclusion
$[u,u']\in C^1 ([0,\infty),V\times W)$ and \eqn{7614} we obtain
$[u,u']\in C^1([0,\infty),V\times V)$.
From \eqn{764} follows the a-priori estimate
$$\begin{array}{l}
\ds \| u(t)\|_V^2 + \|u'(t)\|_W^2 + 2\int_0^t
\B u'(s)(u'(s))\,ds \\
\noalign{\vskip6pt}
\qquad \ds\le e(\|u_0\|_V^2 + \|u_1\|_W^2)
+ Te \int_0^t \|f(s)\|_{W'}^2 \,ds\ ,\qquad 0\le t\le T\ ,
\end{array}$$
on a solution $w(t) = [u(t),u'(t)]$ of \eqn{7614}.
The Faedo-Galerkin approximation procedure for the second-order equation is
just the corresponding procedure for \eqn{7614} as given in Section~6.1.
Thus, if $S$ is a finite-dimensional subspace of $V$, then we let $w_s$ be
the solution in $C^1([0,\infty),S\times S)$ of the equation
\begin{equation}\label{eq7615}
(w'_s(t),v)_m + \ell(w(t),v) = [0,f(t)](v)\ ,\qquad v\in S\times S\ ,\ t>0\ ,
\end{equation}
with an initial value $w_s(0)\in S\times S$ to be prescribed below.
If we look at the components of $w_s(t)$ we find from \eqn{7615} that
$w_s(t)=[u_s(t),u'_s(t)]$ for $t>0$ where $u_s\in C^2([0,\infty),S)$
is the soluton of
\begin{equation}\label{eq7616}
(u''_s(t),v)_W + b(u'_s(t),v) + (u_s(t),v)_V
= f(t)(v)\ ,\quad v\in S\ ,\ t>0\ .
\end{equation}
Here $b(\cdot,\cdot)$ denotes the bilinear form on $V$ corresponding to $\B$.
As in Section~6.1, we can choose a basis for $S$ and use it to write
\eqn{7616} as a system of $m$ ordinary differential equations of second order.
Of course this system is equivalent to a system of $2m$ equations of first
order as given by \eqn{7615}, and this latter system may be the easier one
in which to do the computation.
\subsection{} % 6.4
Error estimates for the approximation of \eqn{7613} by the related \eqn{7616}
will be obtained in a special case by applying Theorem \ref{thm7-6A}
directly to the situation described in Section~6.3.
Note that in the derivation of \eqn{769} we needed only that $\L$ is
monotone.
Since $\B$ is monotone, the estimate \eqn{769} holds in the present situation.
This gives an error bound in terms of the $\L$-projection $w_\ell(t)\in
S\times S$ of the solution $w(t)$ of \eqn{7614} as defined by
\begin{equation}\label{eq7617}
\ell (w_\ell (t),v) = \ell (w(t),v)\ ,\qquad v\in S\times S\ .
\end{equation}
The bilinear form $\ell (\cdot,\cdot)$ is not coercive over $V_\ell$ so
we might not expect $w_\ell (t)-w(t)$ to be small.
However, in the special case of $\B = \varep \A$ for some $\varep\ge0$
we find that \eqn{7617} is equivalent to a pair of similar identities
in the component spaces.
That is, if $e(t)\equiv w(t)-w_\ell (t)$ denotes the error in the
$\L$-projection, and if $e(t)=[e_1(t),e_2(t)]$, then \eqn{7617} is
equivalent to
\begin{equation}\label{eq7618}
(e_j(t),v)_V = 0\ ,\qquad v\in S\ ,\ j=1,2\ .
\end{equation}
Thus, if we write $w_\ell (t)=[u_\ell(t),v_\ell(t)]$, we see that $u_\ell(t)$
is the $V$-projection of $u(t)$ on $S$ and $v_\ell(t)=u'_\ell(t)$ is the
projection of $u'(t)$ on $S$.
It follows from these remarks that we have
\begin{equation}\label{eq7619}
\|u(t) -u_\ell(t)\|_V \le \inf \{ \|u(t)-v\|_V :v\in S\}
\end{equation}
and corresponding estimates on $u'(t)-u'_\ell(t)$ and $u''(t)-u''_\ell(t)$.
Our approximation results for \eqn{7613} can be summarized as follows.
\begin{theorem}\label{thm7-6B}
Let the Hilbert spaces $V$ and $W$, operators $\A$ and $\C$, and data
$u_0$, $u_1$ and $f$ be given as in Theorem VI.\ref{thm6-2A}.
Suppose furthermore that $\B=\varep \A$ for some $\varep\ge0$ and that $S$
is a finite-dimensional subspace of $V$.
Then there exists a unique solution $u\in C^1([0,\infty),V)\cap C^2([0,\infty),
W)$ of \eqn{7613} with $u(0)=u_0$ and $u'(0)=u_1$; and there exists a
unique solution $u_s\in C^2([0,\infty),S)$ of \eqn{7616} with initial
data determined by
$$(u_s(0)-u_0,v)_V = (u'_s(0) - u_1,v)_V=0\ ,\qquad v\in S\ .$$
We have the error estimate
\begin{eqnarray}\label{eq7620}
&&(\|u(t)-u_s(t)\|_V^2 + \|u'(t)-u'_s(t)\|_W^2)^{1/2} \nonumber\\
\noalign{\vskip6pt}
&&\qquad
\le (\|u(t)-u_\ell(t)\|_V^2 + \|u'(t)-u'_\ell(t)\|_W^2)^{1/2} \\
&&\qquad\qquad +\ds \int_0^t (\|u'(s)-u'_\ell(s)\|_V^2
+ \|u''(s)- u''_\ell(s)\|_W^2)^{1/2} \,ds\ ,\qquad t\ge0 \nonumber
\end{eqnarray}
where $u_\ell(t)\in S$ is the $V$-projection of $u(t)$ defined by
$$(u_\ell(t),v)_V = (u(t),v)_V\ ,\qquad v\in S\ .$$
Thus \eqn{7619} holds and provides a bound on \eqn{7620}.
\end{theorem}
Finally we indicate how the estimate \eqn{7620} is applied with
finite-element or spline function spaces.
Suppose $\S = \{S_h :h\in\H\}$ is a collection of finite-dimensional
subspaces of the closed subspace $V$ of $H^1(G)$.
Let $k+1$ be the degree of $\S$ which satisfies the approximation
assumption \eqn{759}.
The scalar-product on $V$ is equivalent to the $H^1(G)$ scalar-product
and we assume it is $k$-regular on $V$.
For each $h\in \H$ let $u_h$ be the solution of \eqn{7616} described
above with $S=S_h$, and suppose that the solution $u$ satisfies the
regularity assumptions $u,u'\in L^\infty ([0,T],H^{k+2}(G))$ and
$u''\in L^1([0,T],H^{k+2}(G))$.
Then there is a constant $c_0$ such that
\begin{eqnarray}\label{eq7621}
&&(\|u(t)-u_h(t)\|_V^2 + \|u'(t)-u'_h(t)\|_h^2 )^{1/2}\nonumber\\
\noalign{\vskip6pt}
&&\qquad \le c_0 h^{k+1}\ ,\qquad h\in \H\ ,\ 0\le t\le T\ .
\end{eqnarray}
The preceding results apply to wave equations (cf. Section VI.2.1),
viscoelasticity equations such as VI.(2.9), and
Sobolev equations (cf. Section VI.3).
\exercises
\begin{description}
\item[1.1.]
Show that a solution of the Neumann problem $-\Delta_nu=F$ in $G$,
$\partial u/\partial v=0$ on $\partial G$ is a $u\in H^1(G)$ at which
the functional \eqn{713} attains its minimum value.
\medskip
\item[2.1.]
Show that $F:K\to\RR$ is weakly lower-semi-continuous at each $x\in K$
if and only if $\{x\in V:F(x)\le a\}$ is weakly closed for every $a\in \RR$.
\item[2.2.]
In the proof of Theorem \ref{thm7-2B}, show that $\varphi'(t)=F'(y+t(x-y))
(x-y)$.
\item[2.3.]
In the proof of Theorem \ref{thm7-2E}, verify that $M$ is closed and convex.
\item[2.4.]
Prove Theorem \ref{thm7-2G}.
\item[2.5.]
Let $F$ be $G$-differentiable on $K$. If $F'$ is strictly monotone, prove
directly that \eqn{725} has at most one solution.
\item[2.6.]
Let $G$ be bounded and open in $\RR^n$ and let $F:G\times\RR\to\RR$ satisfy
the following: $F(\cdot,u)$ is measurable for each $u\in\RR$, $F(x,\cdot)$
is absolutely continuous for almost every $x\in G$, and the estimates
$$|F(x,u)| \le a(x) + b|u|^2\quad ,\quad |\partial_u F(x,u)| \le c(x) + b|u|$$
hold for all $u\in \RR$ and a.e.\ $x\in G$, where $a(\cdot)\in L^1(G)$ and
$c(\cdot) \in L^2(G)$.
\begin{description}
\item[(a)] Define $E(u) = \int_G F(x,u(x))\,dx$, $u\in L^2(G)$, and show
$$E'(u)(v) = \int_G \partial_u F(x,u(x))v(x)\,dx\ ,\qquad u,v\in L^2(G)\ .$$
\item[(b)] Show $E'$ is monotone if $\partial_u F(x,\cdot)$ is
non-decreasing for a.e.\ $x\in G$.
\item[(c)] Show $E'$ is coercive if for some $k>0$ and $c_0(\cdot) \in L^2(G)$
we have
$$\partial_u F(x,u)\cdot u\ge k|u|^2 - c_0(x)|u|\ ,$$
for $u\in \RR$ and a.e.\ $x\in G$.
\item[(d)] State and prove some existence theorems and uniqueness theorems
for boundary value problems containing the semi-linear equation
$$-\Delta_n u +f(x,u(x)) =0 \ .$$
\end{description}
\item[2.7.]
Let $G$ be bounded and open in $\RR^n$. Suppose the function $F:G\times
\RR^{n+1}\to \RR$ satisfies the following:
$F(\cdot,\hat u)$ is measurable for $\hat u\in \RR^{n+1}$,
$F(x,\cdot):\RR^{n+1}\to \RR$ is (continuously) differentiable for a.e.
$x\in G$, and the estimates
$$|F(x,\hat u)| \le a(x) + b\sum_{j=0}^n |u_j|^2\quad ,\quad
|\partial_k F(x,\hat u)| \le c(x) + b\sum_{j=0}^n |u_j|$$
as above for every $k$, $0\le k\le n$, where $\partial_k = {\partial\over
\partial u_k}$.
\begin{description}
\item[(a)] Define $E(u) = \int_G F(x,u(x),\nabla u(x))\,dx$,
$u\in H^1(G)$, and show
$$E'(u)(v) = \int_G \sum_{j=0}^n \partial_j F(x,u,\nabla u)\partial_j
v(x)\,dx\ ,\qquad u,v\in H^1(G)\ .$$
\item[(b)] Show $E'$ is monotone if
$$\sum_{j=0}^n (\partial_j F(x,u_0,u_1,\ldots,u_n)- \partial_j F(x,v_0,v_1,
\ldots,v_n)) (u_j-v_j) \ge 0$$
for all $\hat u,\hat v\in \RR^{n+1}$ and a.e. $x\in G$.
\item[(c)] Show $E'$ is coercive if for some $k>0$ and $c_0(\cdot)\in
L^2(G)$
$$\sum_{j=0}^n \partial_j F(x,\hat u)u_j \ge k\sum_{j=0}^n |u_j|^2
- c_0 (x) \sum_{j=0}^n |u_j|$$
for $\hat u\in \RR^{n+1}$ and a.e. $x\in\RR^n$.
\item[(d)] State and prove an existence theorem and a uniqueness theorem
for a boundary value problem containing the nonlinear equation
$$\sum_{j=0}^n \partial_j F_j(x,u,\nabla u) = f(x)\ .$$
\end{description}
\medskip
\item[3.1.]
Prove directly that \eqn{734} has at most one solution when $a(\cdot,\cdot)$
is (strictly) positive.
\item[3.2.]
Give an example of a stretched membrane (or string) problem described in the
form \eqn{736}. Specifically, what does $g$ represent in this application?
\medskip
\item[4.1.]
Show the following optimal control problem is described by the abstract
setting of Section 4.1: find an admissible control $u\in U_{\ad} \subset
L^2(G)$ which minimizes the function
$$J(u) = \int_G |y(u)-w|^2 \,dx + c\int_G |u|^2\,dx$$
subject to the state equations
$$\cases{-\Delta_n y=F+u&in $G$\ ,\cr \noalign{\vskip6pt}
y=0&on $\partial G$\ .\cr}$$
Specifically, identify all the spaces and operators in the abstract
formulation.
\item[4.2.]
Give sufficient conditions on the data above for existence of an optimal
control. Write out the optimality system \eqn{7410} for cases analogous to
Sections 4.5 and 4.6.
\medskip
\item[5.1.]
Write out the special cases of Theorems \ref{thm7-5A} and \ref{thm7-5B} as
they apply to the boundary value problem
$$\cases{-\partial (p(x)\partial u(x)) + q(x) u(x)=f(x)\ ,\qquad 0__