\documentclass[reqno]{amsart} 

\AtBeginDocument{{\noindent\small 
{\em Electronic Journal of Differential Equations},
Vol. 2001(2001), No.~07, pp. 1--12.\newline
ISSN: 1072-6691. URL: http://ejde.math.swt.edu or http://ejde.math.unt.edu
\newline ftp ejde.math.swt.edu \quad ejde.math.unt.edu (login: ftp)}
\thanks{\copyright 2001 Southwest Texas State University.} 
\vspace{1cm}}

\begin{document} 

\title[\hfilneg EJDE--2001/07\hfil A penalty method for approximations]
{ A penalty method for approximations of the
 stationary power-law Stokes problem } 

\author[ Lew Lefton \& Dongming Wei \hfil EJDE--2001/07\hfilneg]
{Lew Lefton \& Dongming Wei }

\address{Lew Lefton \hfill\break\indent
School of Mathematics\\ 
Georgia Institute of Technology \hfill\break\indent
Atlanta, Georgia 30332, USA} 
\email{llefton@math.gatech.edu}

\address{Dongming Wei \hfill\break\indent
Department of Mathematics\\
University of New Orleans \hfill\break\indent
New Orleans, Louisiana 70148, USA}
\email{dwei@math.uno.edu}

\date{}
\thanks{Submitted October 31, 2000. Published January 10, 2001.}
\subjclass{65N30, 65N12, 65N15, 35J70 }
\keywords{Power-law flow, penalty method, stationary Stokes problem,
\hfill\break\indent
non-Newtonian flows, convergence and error estimates, LBB condition}

\begin{abstract}
 We study approximations of the
 steady state Stokes problem governed by the power-law model for
 viscous incompressible non-Newtonian flow using the penalty
 formulation.  We establish convergence and find error estimates. 
\end{abstract}
\maketitle

\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\theoremstyle{remark}\newtheorem{remark}[theorem]{Remark}
\numberwithin{equation}{section}

\newcommand{\abs}[1]{\lvert#1\rvert}
\newcommand{\bu}{\ensuremath{\mathbf{u}}}
\newcommand{\bv}{\ensuremath{\mathbf{v}}}
\newcommand{\bx}{\ensuremath{\mathbf{x}}}
\newcommand{\by}{\ensuremath{\mathbf{y}}}
\newcommand{\bX}{\ensuremath{\mathbf{X}}}
\newcommand{\bD}{\ensuremath{\mathbf{D}}}
\newcommand{\bsigma}{\mbox{\boldmath\ensuremath{\sigma}}}
\newcommand{\bphi}{\mbox{\boldmath\ensuremath{\phi}}}
\newcommand{\bW}{\ensuremath{\mathbf{W}}}
\newcommand{\sn}[3]{\ensuremath{\|#3\|_{#1,#2}}} % Sobolev norm


\section{Introduction}
Let $\Omega $ be a convex bounded domain in
$\mathbb{R}^d$, $d\ge 2$.  We consider the
steady state flow of a fluid in $\Omega$, where 
$\bu(\bx)=(u_1(\bx),\dots ,u_d(\bx))$ denotes the velocity of a fluid
particle at $\bx=(x_1,\dots ,x_d)\in \Omega$.  Let $\bsigma\in
\mathbb{R}^d\times \mathbb{R}^d$ denote the stress tensor for the fluid.  
The momentum equations for an isothermal steady state flow are
\begin{equation}\label{eq:int1} 
\rho(\bu\cdot \nabla ) \bu =
\nabla \cdot \mbox {\bsigma} + \mathbf f \quad \mathrm{in}~\Omega, 
\end{equation} 
where $\rho$ is the density of the fluid,  $\mathbf f =(f_1,\dots ,f_d)$ the
body force, and $\nabla = (\frac{\partial} {\partial x_1},\ldots, 
\frac{\partial} {\partial x_d})$.
The $j^{th}$ component of $(\bu \cdot \nabla) \bu$ is $
\sum_{i=1}^d {u_i}\frac{\partial u_j} {\partial x_i}$ and
$\nabla \cdot \mbox {\bsigma}$ is obtained by applying the divergence
operator, defined by $\nabla \cdot \bu = \sum_{i=1}^d
\frac{\partial u_i}{\partial x_i}$,
to each row of $\mbox {\bsigma}$. We further assume that the
fluid is incompressible so it satisfies the continuity equation
\begin{equation}\label{eq:int2}
\nabla \cdot \bu = 0 \quad \mathrm{in}~ \Omega.
\end{equation}
The rate of deformation tensor $\bD(\bu)\in \mathbb{R}^d\times\mathbb{R}^d$ is the symmetric gradient of
$\bu$ with components $D_{ij}(\bu) = \frac 1 2 \left(\frac{\partial
u_i}{\partial x_j}+\frac{\partial u_j}{\partial x_i}\right)$.  For
incompressible fluids, the second invariant of $\bD(\bu)$ denoted
$\Pi_{\bD}(\bu)$ satisfies \[ -2 \Pi_{\bD}(\bu)= \bD(\bu):\bD(\bu) =
\sum_{i,j=1}^d {D_{ij}(\bu)^2}= \abs{\bD(\bu)}^2,  \]
where $|\cdot|$ denotes the Euclidean matrix norm; that is for ${\bf K}$, a 
$ d \times d$ real matrix, $|{\bf K}|=[\sum_{i,j=1}^d (k_{ij})^2]^{\frac {1}{2}}.$
In the power-law model for non-Newtonian fluid flows, it is assumed
that viscosity $\eta$ varies as a power of the shear-strain rate, giving a
stress tensor $\mbox {\bsigma}$ of the form
\begin{equation}\label{eq:int3}
 \bsigma =-p {\mathbf I} +
\eta(\Pi_{\bD}(\bu)) \bD(\bu), \end{equation} 
where $p$ is the scalar pressure,
 $\mathbf I\in \mathbb{R}^d\times\mathbb{R}^d$ is the $d$ dimensional identity matrix, and
\[
\eta(z)=\eta_0 |z|^{(r-2)/2}, z \in \mathbb{R}.
\]
Here we assume $1< r < +\infty$, and
$\eta_0 > 0$.  Substituting (\ref{eq:int3}) into (\ref{eq:int1}), we
obtain the steady state power-law Navier-Stokes equation
\begin{equation}\label{eq:int4} 
- k \nabla \cdot (\abs{\bD(\bu)}^{r-2}
\bD(\bu) )+\rho(\bu\cdot \nabla) \bu+ \nabla p = \mathbf f, \end{equation}
where $k=\eta_0/2^{\frac{r-2}{2}}$.

The \textit{power-law Stokes equation} is obtained by neglecting the
inertial term $(\bu\cdot \nabla )\bu$ in (\ref{eq:int4}).  This model
of non-Newtonian flow is very popular in chemical engineering  \cite{BCH} as well
as in geophysics \cite{SE}.  It has also been used
in applications for the design of extrusion dies \cite{BBM},
\cite{LWT}, and for the study of the lithosphere \cite{EJ}, \cite{EM1},
\cite{EM2}.  To make the Stokes problem well posed, we assume that the solution satisfies the continuity
equation (\ref{eq:int2}) and, for simplicity, a homogeneous boundary condition 
of Dirichlet type.  The resulting problem is
\begin{equation}\label{eq:int5} 
\gathered 
- k \nabla \cdot(\abs{\bD(\bu)}^{r-2} \bD(\bu) ) +\nabla p = \mathbf f
 \quad \mbox{in}~ \Omega,\\ 
\nabla\cdot \bu = 0 \quad \mbox{in}~ \Omega,\\ 
\bu = \mathbf 0 \quad \mbox{on}~ \partial \Omega,
\endgathered
\end{equation}
where $\partial \Omega$ denotes the boundary of $\Omega$.

\begin{remark}
If $d \ge 2$ and the flow is unidirectional then $\bu(\bx)=
(u(\bx),0,\dots ,0)$, where $u(\bx)$ is a scalar valued function and ${\mathbf f}=(f_1(\bx), 0, \ldots,0)$. For
$\bu$ to satisfy the continuity equation (\ref{eq:int2}), we have
$\frac{\partial u}{\partial x_1}=0$ which implies that $u(\bx)=
u(x_2,\dots ,x_d) $. Substituting $\bu(\bx)$ into equation (\ref{eq:int5}) and writing it as a system shows that  $(\frac{\partial p}{\partial x_2}, \ldots, \frac{\partial p}{\partial x_n}) ={\mathbf 0}$ and $\tilde f \equiv f_1(\bx) - \frac{\partial p}{\partial x_1}$ is independent of $x_1$.
 Thus, we are left with 
a scalar quasilinear elliptic Dirichlet problem in the $\mathbb{R}^{d-1}$ domain $\tilde \Omega = \Omega \cap \{x_1=0\}$
\[ 
-{\tilde k} \Delta_r u = {\tilde f} \quad
\mbox{in}~ {\tilde\Omega},\quad \quad u = 0\quad \mbox{on}~ \partial {\tilde\Omega} 
\] 
where $\Delta_r u = \nabla \cdot(\abs{\nabla u}^{r-2}\nabla u)$ is
the quasilinear generalization of the Laplacian known as the $r$-Laplacian, and
$\nabla u = (\frac{\partial u}{\partial x_2},\dots , \frac{\partial u}{\partial x_d})$.
(We note that $\Delta_p$ is frequently called the $p$-Laplacian in the
literature, but we are using $p$ to denote fluid pressure here.)  There has been
a great deal of analytical (e.g.,~\cite{Bh}, \cite{D}, \cite{MNZ}) and
numerical work (e.g.,~\cite{BL3}, \cite {GM}, \cite{LW2}, \cite{W}) devoted to problems
involving $\Delta_r u$. 
\end{remark}

\begin{remark} In our notation, the power-law index is the value
$n=r-1$.  We recall that a fluid is considered to be Newtonian if it
has power-law index $n = 1$, and non-Newtonian if $n \ne 1$.  The
power-law equation (\ref{eq:int3}) is also called the Ostwald-deWaele
equation.  When $ 0 < n < 1$, which corresponds to our parameter $ 1 <
r < 2$, power-law fluids exhibit a decrease in viscosity with
increasing shear stress and they are known as pseudoplastic or
shear-thinning fluids. When $ 1 < n < \infty $, corresponding to $ 2 <
r < \infty$, power-law fluids exhibit an increase in viscosity with
increasing shear stress and they are known as dilatant or
shear-thickening fluids \cite{Wi}. For a specific example, we cite  \cite{J} where the value of $n$ for a certain tomato paste is given as  $n=0.257$.
\end{remark}
One way of studying a stationary power-law Stokes flow is to 
consider the velocity field $\bu$ as the minimizer of an appropriate 
energy functional.  In order to enforce the constraint of a divergence 
free flow, we seek a minimizer $\bu$ in the space of divergence free 
vector fields, that is, we solve the problem:
\begin{equation}\label{eq:int6}
\gathered
\text{Find the minimum of}~J(\bu)
 = \frac{k}{r}\int_\Omega  |\bD(\bu )|^ {r}\;  d\bx- \int_\Omega
\mathbf f \cdot \bu \;d\bx, \\
 \text{where }~\bu\in\bX= \left\{ \bu \in \bW_0^{1,r}(\Omega): 
\nabla \cdot \bu =0\right\}.
\endgathered\end{equation}
 This is a
minimization problem with a constraint and we refer to it as the
\textit{variational formulation} of power-law flow. The Euler equation
corresponding to 
this minimization problem describes a solution for the velocity field.
Another common way of studying power-law flow
is to simultaneously find a pair $(\bu,p)$ that satisfies (\ref{eq:int5}).  This is called the \textit{mixed weak 
formulation} and it is written down precisely in (\ref{eq:mwf1}) and  (\ref{eq:mwf2}). The connection bewteen these two formulations comes from a technical
inf-sup condition (see Theorem \ref{thm:lbb1}-\ref{thm:lbb2} below) which is frequently called the \textit{LBB condition} named after Ladyzhenskaya, Brezzi, and Bab\v uska. This condition can be stated as the following:  $\exists \, \beta > 0$ such that
\begin{equation}\label{eq:int7}
\inf_{ q \in
L^{r'}_0(\Omega)}\sup_{ \bv \in \bW_0^{1,r}(\Omega)} \frac{\langle \nabla \cdot
\bv , q\rangle}{\|q\|_{0,r'}\|\bv \|_{0,r}}\ge \beta. 
\end{equation}
When this condition holds, the variational formulation and the mixed weak
formulation are equivalent in the sense that $\bu$ is a solution of
the variational formulation if and only if $(\bu ,p)$ is a solution of
the mixed weak formulation with $p$ being solved in terms of $\bf u$
using the inverse of the gradient  operator. The pressure can then be computed after the velocity is known provided that the \textit{LBB} condition holds.

The \textit{LBB} condition is well known to hold for the linear problem ($r=2$) on
Lipschitz domains in any dimension \cite{Ba}, \cite{Br},
\cite{La}.  Since any bounded convex 
domain has a Lipschitz continuous boundary \cite[Corollary
1.2.-2.3]{G} we conclude that the \textit{LBB} condition holds for our $\Omega$ when $r=2$.  For the nonlinear problem $1 < r \le 2$, the \textit{LBB} has been
shown (\cite{AG1}, \cite{BN}) for smooth domains in dimension $d=2$.
This is generalized in \cite{AG2} where it is shown that the \textit{LBB} condition holds for all
dimensions 
$d>1$ and for the full range $ 1< r <\infty$ in Lipschitz domains.   Thus, the mixed weak formulation makes sense and it is 
equivalent to the variational formulation in our setting in this work.

We note that, in the variational formulation, $\bu$ is defined as the
minimum of a convex functional on a separable Banach space, thus (\ref{eq:int6}) always has a unique
solution $\bu$ for any $ 1 < r < \infty$, whether or not the \textit{LBB}
condition holds.  However, the pressure $p$ is not necessarily well
defined, so that results involving the pressure function $p$ typically
require the additional assumption of an \textit{LBB} condition and it is not known if this condition holds in a nonconvex domain.

The two formulations discussed above for the power-law Stokes problem
are both useful for the numerical analysis of the problem.  A finite
element analysis of the power-law Stokes problem using the mixed weak
formulation has been studied by several authors, for example, in  \cite{BN}, \cite{BGN}, \cite{BL1}, \cite{BL2}. 
Finite element analysis using the direct variational formulation
(\ref{eq:int6}) requires one to solve a constrained minimization problem
and construct finite element spaces with approximately divergence free interpolation
functions.  Thus, the variational formulation and its associated
constrained minimization problem is more difficult for both analysis
and numerical approximation.  A natural way to overcome this
difficulty is to introduce a penalty functional that eliminates the
constraint.  The first use of the penalty function method in
conjuction with the finite element method is due to Bab\v uska
\cite{Ba}. The method was quickly adopted as a standard tool for the
finite element analysis of viscous, incompressible fluid flows
\cite  {Zi}. Extensive studies of the penalty method applied to fluid flow 
problems, both experimentally and mathematically, have appeared from
the late seventies to the present day. Here we cite only several
important articles among them \cite{F1}, \cite{F2}, \cite {FKg},
\cite{H}, \cite{HLB}, \cite{OKS}, \cite{Od1}, \cite{Od2}, \cite{R},
\cite{R2}, and \cite{Zi}.  A very general mathematical analysis of the penalty method applied to nonlinear
problems including a class of non-Newtonian fluid flow problems was
presented by Oden \cite{Od2}. His work provides some important convergence
results. It appears that when the penalty method is applied to the Newtonian Stokesian flow problems, the resultant matrices are ill-conditioned. However, this 
deficiency can be overcomed by the use of reduced integration techniques.

For a given penalty parameter $\epsilon>0$, the penalty formulation
requires the unconstrained minimization of the nonlinear convex
functional 
\[ J_{\epsilon}(\bu)=J(\bu)+\frac{1}{r\epsilon}\int_{\Omega}|\nabla\cdot \bu|^r d\bx \] 
over the Sobolev space
${\bW}_0^{1,r}(\Omega)$.
The corresponding pressure $p^\epsilon$ is defined in terms of the minimizer $\bu^\epsilon$.  We prove that the penalty
approximation $\bu^\epsilon$ of the unconstrained minimization problem
min$\{J_\epsilon(\bu): \bu \in \bW_0^{1,r}(\Omega)\}$ converges to the
true solution $\bu$ of min$\{J(\bu): \bu \in X \}$ as $\epsilon
\to 0$ for any $1 < r < \infty$ without assuming that the domain $\Omega$ is convex. This convergence result is only 
for the velocity field since the pressure may be undefined.  However,
because of the more general variational setting, this result
establishes the validity of a penalty approximation even when the \textit{LBB} condition fails to hold.  This is a
convergence result, not an error estimate, but it doesn't require the
\textit{LBB} condition (\ref{eq:int7}).
Here we are writing $(\bu, p)$ for the unique solution of the mixed
formulation (\ref{eq:mwf2}), and $\bu^\epsilon$ the penalty solution. 
When the \textit{LBB} condition holds, we obtain error
estimates for the velocity field $\sn{1}{r}{\bu - \bu^\epsilon} = O(\epsilon^{g_1(r)})$, where
$g_1(r) = \frac{1}{(r-1)(3-r)}$ for $1 < r \le 2$, and $g_1(r) =
\frac{1}{(r-1)^2}$ for $2 \le r < \infty $. Let $\phi(z) = |z|^{r-2}z, z \in \mathbb{R}$ and let $p^{\epsilon}
= c-\phi(\nabla\cdot\bu^{\epsilon})$, where $ c =
\int_{\Omega}\phi(\nabla\cdot\bu^{\epsilon})d\bx$.  We also show
error estimates for the pressure $ \|p - p^{\epsilon}\|_{0,r'} =
O(\epsilon^{g_2(r)})$ where $g_2(r) = \frac{1}{(3-r)}$ for $ 1 < r \le
2$ and $g_2(r) = \frac{1}{(r-1)^2}$ for $2 \le r < \infty $.  These
rates of convergence reduce to known results $\sn{1}{2}{\bu -
\bu^\epsilon}+\|p - p^{\epsilon}\|_{0,2} = O(\epsilon)$ for the Newtonian
case $ r = 2 $ as discussed in \cite{GR}, \cite{OKS}, \cite{R}, and
\cite{T}.  

 This is an important feature of the unconstrained
penalty minimization formulation which makes it convenient for error
analysis and numerical implementation. In contrast, the mixed weak
formulation requires the solution of a system of nonlinear equations and a
discrete \textit{LBB} condition.
Thus, we provide a 
mathematical analysis of the penalty
method applied to the power-law Stokes problem in the variational
formulation.  To our knowledge, this is a generalization of the  analysis available in the literature for Newtonian flows. 
Numerical experiments have been performed on power-law flow 
problems using the penalty method in the engineering literature \cite{LLH}, \cite{LWT}, \cite{R}, \cite{RP}, and \cite{Zi}. 
Since pressure must then be calculated from the computed
velocity field, the accuracy of the pressure is lower than
that of the velocity as shown in our error estimates.  It is interesting to note that the penalty term $\frac{1}{2\epsilon}\int_{\Omega}|\nabla\cdot \bu|^2 d\bx $ was used in \cite{RP} to approximate power-law flows instead of
$\frac{1}{r\epsilon}\int_{\Omega}|\nabla\cdot \bu|^r d\bx$, which is used in this work and reasonable numerical results were obtained without a mathematical analysis.
 
\section{Preliminaries}
We begin by establishing some notation.  Let $ L^r(\Omega)$ for $ 1 <
r < \infty$ be the space of real scalar functions defined on $\Omega$ whose
$r^{\hbox{th}}$ power is absolutely integrable with respect to
Lebesgue measure $d{\bx}=dx_1 \ldots dx_d$.  This is a Banach space with
the norm $\sn{0}{r}{u} = (\int_\Omega |u({\bx})|^r\, d{\bx})^{1/ r}$.  The
Sobolev space $W^{k,r}(\Omega)$ is the space of functions in $
L^r(\Omega)$ with distributional derivatives up to order $k$ also in $
L^r(\Omega)$.  The norm for this space is $ \sn{k}{r}{u} =
(\int_\Omega \sum_{|j|\le k}|D^j u({\bx})|^r \, d{\bx})^{1/ r}$, where we use
the standard multi-index notation.  That is, for $j = (j_1 \ldots
j_d)\in {\mathbb N}^d$, where ${\mathbb N}$ is the set of natural
numbers, define $|j|=\sum\limits_{i=1}^{d}j_i$ and write the partial
derivative $D^{j}
u({\bx})=\frac{\partial^{|j|}u}{\partial^{j_1}{x_{1}}\ldots
\partial^{j_d}{x_d}}$.
The closure of $ C^{\infty}_0(\Omega)$ in $W^{k,r}(\Omega)$ is denoted
by $W_0^{k,r}(\Omega)$.  For systems of equations, we need the product
spaces defined by ${\mathbf L}^r(\Omega)= [L^r(\Omega)]^d$, ${\mathbf
W}^{k,r}(\Omega)=[W^{k,r}(\Omega)]^d$, and ${\mathbf
W}_0^{k,r}(\Omega)=[W_0^{k,r}(\Omega)]^d$.  The norm for $\bv = (v_1,
\ldots, v_d)\in {\mathbf L} ^{r}(\Omega)$ is $\sn{0}{r}{\bv} = (
\int_{\Omega} \sum_{i=1}^d |v_i|^r\, d\bx )^{1/r}$.  For
$\bv\in \bW^{k,r}(\Omega)$ we have norm $\sn{k}{r}{\bv} = (
\int_{\Omega} \sum_{i=1}^d \sum_{|j|\le k}|D^j v_i|^r\,
d\bx)^{1/r}$. It is well known \cite{A}, that the seminorm $|\bv|_{k,r}
=( \int_{\Omega}(\sum_{i=1}^d \sum_{|j| = k}|D^j v_i|^r\,
d\bx)^{1/r}$ is equivalent to $\sn{k}{r}{\bv}$ for $\bv\in
\bW_0^{k,r}(\Omega)$.  In addition, by Korn's inequality, see \cite {A},
the norm $\| \bD(\bu)\|_{0,r}=( \int_\Omega \sum_{i,j=1}^d
|D_{ij}(\bu)|^r\, d\bx)^{1/r}$ is equivalent to $\|\bu\|_{1,r}$ in
$\bW_0^{1,r}(\Omega)$.   For $1<r<\infty$ let $r'$ satisfy
$\frac{1}{r}+ \frac{1}{r'} = 1$, which is equivalent to
$r'=\frac{r}{r-1}$.
 Let $\sn{-1}{r'}{\cdot}$ denote the norm on
$\bW^{-1,r'}(\Omega)$ which is the dual space of
$\bW_0^{1,r}(\Omega)$.  Let $\bphi(\bx) = |\bx|^{r-2}\bx$, where $\bx \in \mathbb{R}^d$.
Note that $\bphi^{-1}(\bx)= |\bx|^{r'-2}\bx$.   Finally, let $L^r_0(\Omega)=\{q\in L^r(\Omega):\int_\Omega q \, d\bx= 0\}$.

The following inequalities hold for all $\bx$, $\by\in \mathbb{R}^d$; 
the constant $C>0$ is independent of $\bx$ and $\by$.  
\begin{equation}
\gathered 
|\bx - \by|^2 \leq C (\bphi(\bx) - \bphi(\by))\cdot(\bx - \by)
(|\bx| + |\by|)^{2-r}, \\ 
|\bphi(\bx) - \bphi(\by)| \leq C
|\bx-\by|^{r -1},\, \text{for~} 1 < r < 2;
\endgathered \label{eq:pre1}
\end{equation}
\begin{equation}\gathered
|\bx -\by|^{r} \leq C \ (\bphi(\bx) - \bphi(\by))\cdot(\bx - \by), \\ 
|\bphi(\bx) - \bphi(\by)| \leq C |\bx-\by|(|\bx| + |\by|)^{r-2},\,
\text{for~} 2 \leq r < \infty.  
\endgathered\label{eq:pre2} 
\end{equation} 
They were proved for the case $d=2$ in Glowinski and Marroco \cite{GM} and were generalized by Barrett and Liu \cite{BL2}, see also \cite{Ch}.  A simple proof for
general $d$ is shown in DiBenedetto \cite{Di}. 
Finally, for completeness, we quote some important
results from convex analysis and functional analysis which will apply to our problem.  See \cite{C} or \cite{FK} for further details.
Let $X$ be a reflexive Banach space with dual space $X^*$.  Suppose the operator $T: X\to X^*$.  Let $\langle u^*, u\rangle$ denote the duality pairing between $u\in X$ and $u^*\in X^*$.  We say $T$ is \textit{bounded} if $\exists\, C>0$ such that $\Vert T u\Vert_{X^*} \leq C \Vert u\Vert_{X}$ for all $u\in X$.  The operator
$T$ is \textit{monotone} if $\langle T u - T v, u-v\rangle \ge 0$ for all $u, v\in X$.  $T$ is \textit{strictly monotone} if the inequality is strict for all $u, v\in X$ with $u\ne v$.  A \textit{coercive} operator $T$ satisfies $\lim_{\Vert u \Vert \to \infty} \frac{\langle T u, u\rangle}{\Vert u\Vert} = \infty$.
Finally, we say 
$T$ is \textit{potential} if $\exists$ a functional $J:X\to \mathbb{R}$ such that $J'(u) = T u$  for all $u\in X$ (i.e. $\langle J'(u), v\rangle = \langle T(u), v\rangle$
for all $u, v\in X$).

\begin{theorem}\label{thm:pre1}
Let $T: X\to X^*$ be a bounded, monotone, coercive, potential operator.  Then $TX= X^*$.  Thus, $Tu=f$ has a solution for every $f\in X^*$.  Moreover, if $T$ is strictly monotone, then $Tu=f$ has a unique solution.
\end{theorem}

\begin{theorem}\label{thm:pre2}
Let $J$ be a functional defined on $X$ such that $\lim\limits_{\Vert u\Vert \to \infty} J(u) = \infty$.  If $J$ is either (i) continuous and convex on $X$ or (ii) weakly lower semicontinuous on $X$ then $\inf_{u\in X} J(u) > -\infty$ and there exists at least one $u_0\in X$ such that $J(u_0) = \inf_{u\in X} J(u)$.  Moreover, if $J$ is continuous and strictly convex on $X$ then there is precisely one such $u_0$.  
\end{theorem}

\begin{theorem}\label{thm:pre3}
Let $J:X\to \mathbb{R}$ be a functional with a local extremum at $u_0\in X$.
If $\langle J'(u_0), v\rangle$ exists for some $v\in X$ then 
$\langle J'(u_0), v\rangle =0$.
\end{theorem}


\section{The Variational Formulation (VF) of the Stokes Problem}
In order to apply the penalty method, we first consider the
variational formulation of (\ref{eq:int5}) given in (\ref{eq:int6}).
The Euler-Lagrange equation associated to problem
(\ref{eq:int6}) is 
    \begin{equation}\label{eq:vf1} 
    \langle A(\bu),\bv
    \rangle = \langle{\mathbf f},\bv \rangle\, \quad \forall~\bv \in \bX
    \end{equation} 
where
$A:\bW_0^{1,r}(\Omega)\to \bW^{-1,r'}(\Omega)$ is defined by 
\[
\langle A(\bu),\bv \rangle = k \int_{\Omega}
\abs{\bD(\bu)}^{r-2}\bD(\bu):\bD(\bv )d\bx \quad \forall \bv\in
\bW_0^{1,r}(\Omega). 
\] We use  $\langle \cdot, \cdot\rangle$ to denote the
duality pairing between $\bW_0^{1,r}(\Omega)$ and
$\bW^{-1,r'}(\Omega)$ as well as between  ${\bf L}^{r}(\Omega)$ and  ${\bf L}^{r'
}(\Omega)$.  In particular, we only need to assume 
${\mathbf f}\in \bW^{-1,r'}(\Omega)$ for this formulation.

By using (\ref{eq:pre1}) and (\ref{eq:pre2}), we obtain the following where 
$C>0$
denotes a generic constant independent of $\bu$ and $\bv$.
\begin{equation} \gathered 
\|\bu - \bv\|_{1,r}^2 \leq C\langle A(\bu) - A(\bv ),{\bu-\bv}
\rangle (\|\bu\|_{1,r} + \|\bv\|_{1,r})^{2-r}, \\ 
 \| A(\bu) -
A(\bv )\|_{-1,r'} \le C \|\bu - \bv\|_{1,r}^{r-1},\text{ for~} 
1 < r \leq 2; \endgathered \label{eq:vf2} \end{equation}
\begin{equation}\gathered
\sn{1}{r}{\bu - \bv}^r \le C\langle A(\bu) - A(\bv ),
{\bu-\bv}\rangle,\\ 
\sn{-1}{r'}{A(\bu) - A(\bv )} \le C
\sn{1}{r}{\bu - \bv} (\sn{1}{r}{\bu} + \sn{1}{r}{\bv})^{r-2},
\text{ for~} 2 \leq r < \infty.
\endgathered\label{eq:vf3} 
\end{equation}
 From (\ref{eq:vf2}) and (\ref{eq:vf3}),  $A$ can be shown to be a bounded, monotone, coercive, potential operator on $\bX=\{\bu\in\bW^{1,r}_0(\Omega) : \nabla\cdot\bu = 0\}$. We conclude using Theorem \ref{thm:pre1} that (\ref{eq:vf1}) has a unique solution~$\bu$. It follows that $J:\bX \to \mathbb{R}$
is a continuous, strictly convex functional on $\bX$ and
$$\lim\limits_{\|\bu\|_{1,r} \to \infty} J(\bu) = \infty.$$  Thus, by
Theorem \ref{thm:pre2} problem (\ref{eq:int6}) has one and
only one solution $\bu$ and hence (\ref{eq:vf1}) and  (\ref{eq:int6}) are equivalent.
Note that in this formulation, the pressure
function $p$ does not appear. 

\section{The Mixed Weak Formulation (MWF) of the Stokes Problem}

For the mixed weak formulation of the Stokes problem (\ref{eq:int5}),
we suppose ${\mathbf f}\in  \bW^{-1,r'}(\Omega)$.  The problem is
then to simultaneously find $\bu \in \bW_0^{1,r}(\Omega)$ and $ p \in
L^{r'}_0(\Omega)$, such that 
\begin{equation}\label{eq:mwf1}
\gathered
 k \int_\Omega \abs{\bD(\bu)}^{r-2}\bD(\bu):\bD(\bv)\,d\bx -
\int_\Omega p\nabla\cdot \bv \,d\bx 
= \int_\Omega \mathbf f\cdot \bv \,d\bx\,\quad
\forall \bv \in \bW_0^{1,r}(\Omega) \\
 \int_\Omega q \nabla \cdot \bu \,d\bx
=0\,\quad\quad \forall q \in L^{r'}_0(\Omega).  \endgathered
\end{equation} 
If we let $b(p,\bv)$ be the bilinear form defined on
$L^{r'}_0(\Omega)\times \bW_0^{1,r}(\Omega)$ by $ b(p,\bv )
=\int_\Omega p\nabla \cdot\bv \,d\bx$, then the weak formulation
(\ref{eq:mwf1}) can be rewritten as the problem of finding $(\bu,p) \in
\bW_0^{1,r}(\Omega) \times L^{r'}_0(\Omega)$ such that
\begin{equation}\label{eq:mwf2} 
\begin{split} \langle A(\bu),\bv
\rangle - b(p,\bv ) &= \langle{\mathbf f},\bv \rangle\, \quad
\forall~\bv \in \bW_0^{1,r}(\Omega),\\ b(q,\bu) &= 0\, \quad \forall~q
\in L^{r'}_0(\Omega).\\ \end{split} \end{equation} 
The existence and uniqueness of solutions of (\ref{eq:mwf1}) and
(\ref{eq:mwf2}) was studied by J.~Ba\-ran\-ger and Najib \cite{BN} and 
J.~W. Barrett and W. B. Liu \cite{BL2}.  


This mixed weak formulation requires the pressure $p\in
L^{r'}_0(\Omega)$ and the
\textit{LBB} condition is a sufficient condition to guarantee $p\in L^{r'}_0(\Omega)$.
It is possible that the velocity field is well defined within $
\bW_{0}^{1,r}(\Omega)$, but the mixed weak formulation is not well-posed  when, e.g., when
the domain is nonconvex and the \textit{LBB} condition fails to hold.
In this case, good approximations of the pressure from the velocity field are not expected from the penalty method.


\section{The \textit{LBB} Condition and Equivalence of (VF) and (MWF)}

Baranger and Najib prove in \cite{BN} that (\ref{eq:mwf2}) is equivalent to
(\ref{eq:vf1}) (and hence (\ref{eq:int6})) for any $1<r<\infty$
provided $\Omega$ is 2-dimensional and $\partial\Omega$ is smooth.
They actually prove the following.

\begin{theorem}\label{thm:lbb1}   Suppose $\Omega\subset \mathbb{R}^2$ has a
smooth boundary.  Let $1<r<\infty$.  Problem (\ref{eq:mwf2}) has a unique
solution if and only if (\ref{eq:vf1}) has a solution and the
divergence operator $B=\nabla\cdot$ is surjective and satisfies the
following  condition 
\begin{equation}
 0 < \alpha \le \inf_{ q \in
L^{r'}_0(\Omega)}\sup_{ \bv \in \bW_0^{1,r}(\Omega)} \frac{\langle B
\bv , q\rangle}{\|q\|_{0,r'}\|\bv \|_{1,r}}. 
\end{equation}
\end{theorem}
The same result is also stated in \cite {BL2}.
The inequality in Theorem \ref{thm:lbb1} is often referred to
as the \textit{LBB condition} for the continuous model.
Amrouche and Girault stated \cite{AG2} the following generalization.

\begin{theorem}\label{thm:lbb2}
 Let $\Omega$ be a bounded, connected, Lipschitz-continuous domain in
$\mathbb{R}^d$ and let $r$ be any real number with $ 1 < r < \infty$, and $ r'$ its conjugate. There exists a constant $ \beta > 0$ such that
\begin{equation}\label{eq:lbb1}
 0< \beta \le \inf_{ q \in
L^{r'}_0(\Omega)}\sup_{ \bv \in \bW_0^{1,r}(\Omega)} \frac{\langle B
\bv , q\rangle}{\|q\|_{0,r'}\|\bv \|_{1,r}}. 
\end{equation}
\end{theorem}
This allows us to conclude the equivalence of (\ref{eq:vf1}) and
(\ref{eq:mwf2}) in our more general setting.    

\section{An \textit{A Priori} Bound}

Using Theorems \ref{thm:lbb1} and \ref{thm:lbb2}, we conclude, (\ref{eq:mwf2}) has an unique
solution  $(\bu,p)$ in which $\bu$ is the unique solution of (\ref{eq:int6}).
See, e.g, \cite{BN} and \cite {GR}.


\begin{lemma} \label{thm:apb1} Let $\bu$ be the solution of 
(\ref{eq:int6}), then $ \|\bu \|_{1,r} \le C$. Suppose further that 
(\ref{eq:lbb1})  holds, and let $(\bu,p) $ be the solution of
(\ref{eq:mwf2}).  Then $ \|\bu \|_{1,r} \le C$ and $\|p\|_{0,r'} \le C
$. These constants $C>0$ depend only on $r$, $\Omega$ and $\mathbf f$.
\end{lemma}

\begin{proof} In (\ref{eq:vf1}), let $\bv = \bu$. Then
 $\langle A(\bu),\bu\rangle = \langle {\mathbf f}, \bu\rangle$.  We have, 
 by (\ref{eq:vf2}), $\sn{1}{r}{\bu}^r \le C
 \langle A(\bu),\bu\rangle = C \langle {\mathbf f},\bu\rangle \le C
 \|{\mathbf f}\|_{-1,r'}\sn{1}{r}{\bu}$ which implies
\begin{equation}\label{eq:apb1}
\|\bu\|_{1,r} \le C\|{\mathbf  f}\|_{-1,r'}^\frac{1}{(r-1)}, 
\end{equation} 
for $2\le r\le\infty$.
Similarly, by (\ref{eq:vf3}), we have $\sn{1}{r}{\bu}^2 \le C \langle
A(\bu),\bu\rangle \sn{1}{r}{\bu}^{2-r}= C \langle {\mathbf
f},\bu\rangle \sn{1}{r}{\bu}^{2-r}$ which gives (\ref{eq:apb1}) for
$1< r\le2$.  By (\ref{eq:lbb1}) and (\ref{eq:mwf2})
\begin{align}\label{eq:apb2}
 \|p\|_{0,r'} &\le C \sup_{\bv \in
 \bW^{1,r}_{0}(\Omega)}\frac{\langle \nabla\cdot
 \bv,p\rangle}{\|\bv\|_{1,r}} \notag\\ 
&= C\sup_{\bv \in \bW^{1,r}_0(\Omega)}
 \frac{\langle A(\bu), \bv \rangle - \langle \mathbf{f}, 
\bv\rangle }{ \| \bv \|_{1,r}} \\
&\le C( \|A(\bu)\|_{-1,r'} + \|{\mathbf f}\|_{-1,r'}). \notag
\end{align} 
Upon applying (\ref{eq:vf2}) and
 (\ref{eq:vf3}) to the right hand side of (\ref{eq:apb2}) and using
 (\ref{eq:apb1}) we conclude that $\|p\|_{0,r'} \le C $ for $1<r<\infty$. \end{proof}

\section{The Penalty Formulation for the Stokes Problem}
 
Let $ \epsilon $ be a positive number and consider the following
functional \[ J_{\epsilon}(\bu) = J(\bu)+ \frac{1
}{r\epsilon}\int_{\Omega}|\nabla \cdot \bu |^r \,d\bx, \] where $J(\bu)$ is
defined in (\ref{eq:int6}).  The minimizer $\bu$ of  $J_{\epsilon}(\bu)$ over $\bW_0^{1,r}(\Omega)$ satisfies the Euler-Lagrange
equation 
\begin{equation}\label{eq:pf1} 
\langle A(\bu),\bv \rangle +
\frac{1}{\epsilon} \langle \bphi(\nabla \cdot \bu), \nabla \cdot \bv
\rangle = \langle {\mathbf f},\bv \rangle \quad \forall~ \bv \in
\bW_0^{1,r}(\Omega).
\end{equation}
Note that $J_{\epsilon}(\bu)$ is strictly convex and the operator $T_\epsilon:\bW_0^{1,r}(\Omega)\to \bW^{-1,r'}(\Omega)$ defined by $T_\epsilon(\bu) = J'_\epsilon(\bu)$
 is a bounded, coercive, and  strictly monotone operator on $\bW_0^{1,r}(\Omega)$, since $\bphi$ satisfies 
(\ref{eq:pre1}) and (\ref{eq:pre2}).
Therefore, by Theorem \ref{thm:pre1}, (\ref{eq:pf1}) has a solution $ {\bu}^{\epsilon}$ which is
the unique solution  of 
\begin{equation}\label{eq:pf2}
\min_{ \bu \in\bW_0^{1,r}(\Omega)} J_{\epsilon}(\bu).
\end{equation}
We now prove two main results related to the penalty approximations $\bu^\epsilon$ of solutions $\bu$ of (\ref{eq:int6}).  The first is a general convergence result, and the second is a more precise error estimate which holds provided the \textit{LBB} condition also holds.

\begin{theorem}\label{thm:pf1}
 Let $\epsilon>0$ be given and suppose that $\bu$ is the solution of (\ref{eq:int6}) and $ {\bu}^{\epsilon}$ is the solution of (\ref{eq:pf2}).   Then $ {\bu}^{\epsilon}$ converges strongly to $\bu$ in $\bW_0^{1,r}(\Omega)$
as $\epsilon \to 0$. Furthermore, $\exists\;C$ independent of $\epsilon$ such that $\| {\bu}^{\epsilon}\|_{1,r}\le C$ and 
$\|\nabla \cdot {\bu}^{\epsilon}\|_{0,r}\le C \epsilon^{\frac{1}{r}}$.  Therefore, it follows that  $\nabla \cdot {\bu}^{\epsilon} \to 0$ in $L^{r}(\Omega)$
as $\epsilon \to 0$.
\end{theorem}

\begin{proof} The proof that $ {\bu}^{\epsilon}$ converges strongly to $\bu$ in
 $\bW_0^{1,r}(\Omega)$
as $\epsilon \to 0$ follows along the  lines  of \cite[Theorem 46.D. and Corollary 46.7]{Z}. We only need to
check that the functional $G(\bv)=\|\nabla \cdot \bv\|_{0,r}$ is weakly sequentially continuous in
 $\bW_0^{1,r}(\Omega)$. To this end, let $ \bv_{n} \rightharpoonup \bv$ in $\bW^{1,r}_0(\Omega)$.
Then $\left<\nabla \cdot \bv_{n}, \eta\right>  \to \left<\nabla \cdot \bv, \eta\right>$ as $ n \to 
\infty$ for any $ \eta \in L^{r'}(\Omega)$.  Indeed, $C^\infty(\Omega)$ is dense in $L^{r'}(\Omega)$ and 
\[
\langle \nabla \cdot \bv_{n}, \eta\rangle = -
 \langle \bv_{n},\nabla \eta\rangle \to -\langle\bv, \nabla \eta\rangle = \langle\nabla \cdot \bv, \eta\rangle, \forall \eta\in C^\infty(\Omega).
\]
 Therefore, $\nabla \cdot \bv_{n} \rightharpoonup
 \nabla 
 \cdot \bv$ in $L^{r}(\Omega)$. It is well known that the norm $\|\cdot \|_{0,r}$ is
 weakly 
 sequentially continuous in $L^{r}(\Omega)$. We conclude that 
$\lim_{n\to
 \infty} 
 \|\nabla \cdot \bv_{n}\|_{0,r} = \|\nabla \cdot \bv\|_{0,r}$ and hence 
$\bu^\epsilon\to \bu$ in $\bW_0^{1,r}(\Omega)$ because of \cite[Theorem 46.D. and Corollary 46.7]{Z}. To complete the proof, 
let $\bu=\bv = \bu^\epsilon$ in (\ref{eq:pf1}).  We have 
\[
\sn{1}{r}{\bu^\epsilon}^r + \frac{1}{\epsilon} \sn{0}{r}{\nabla\cdot\bu^\epsilon}^r \le 
C\sn{-1}{r'}{\mathbf f} \sn{1}{r}{\bu^\epsilon}
\]
which implies $\sn{1}{r}{\bu^\epsilon}^{r-1} \le  
C\sn{-1}{r'}{\mathbf f} $ and $\sn{0}{r}{\nabla\cdot\bu^\epsilon}^r \le 
{\epsilon}C\sn{-1}{r'}{\mathbf f}^{r'}$.  
Therefore $\| {\bu}^{\epsilon}\|_{1,r}\le C$ and $\nabla \cdot
 {\bu}^{\epsilon} \to 0$ in $L^r(\Omega)$
as $\epsilon \to 0$.
\end{proof} 

\noindent Note that when $r=2$, Theorem \ref{thm:pf1} is a well-known result \cite{T}. A generalized version of it for convergence in higher order derivative norms can be found in \cite{AG2}.
\begin{lemma}\label{lemma:pf2}
For each $ \epsilon >0$,  let $\bu^{\epsilon}$ denote the unique minimizer of (\ref{eq:pf2}) and let
$p^{\epsilon} = c -\frac{1}{\epsilon}\bphi(\nabla \cdot \bu^\epsilon)$, where $ c=\frac{1}
{\epsilon|\Omega|} \int_{\Omega}\bphi(\nabla \cdot \bu^\epsilon)d\bx$.
If (\ref{eq:lbb1}) holds (which is  the
\textit{LBB} condition), then there exists $C>0$ which 
depends only on $r$, $\Omega$ and
$\mathbf f$ such that $\|\nabla \cdot {\bu}^{\epsilon}\|_{0,r}\le C \epsilon^{\frac{1}{r-1}}$ and $\|p^{\epsilon}\|_{0,r'}\le C.$ 
\end{lemma}

\begin{proof} 
The pair $(\bu^{\epsilon},p^{\epsilon})$ satisfies
\begin{equation}\label{eq:pf3}
\begin{split}
\langle A({\bu^{\epsilon}}),\bv \rangle - b( p^{\epsilon},\bv ) &=
\langle {\mathbf f},\bv \rangle,\quad \forall \bv \in
\bW_0^{1,r}(\Omega),
\end{split}
\end{equation}
 since $\bu^{\epsilon}$ satisfies (\ref{eq:pf1}) and 
 $\int_\Omega c\nabla \cdot \bv d\bx =c \int_{\partial\Omega}
\bv\cdot {\bf n} ds =0$ by Gauss' Theorem.
Moreover, $p^{\epsilon}\in L^{r'}_0(\Omega)$, and 
by (\ref{eq:lbb1})  and (\ref{eq:pf3})
\begin{align*} \sn{0}{r'}{p^\epsilon}
&\le C\sup_{\bv \in \bW^{1,r}_0(\Omega)} \frac{\langle \nabla\cdot\bv,
p^\epsilon\rangle }{ \| \bv \|_{1,r}}=C \sup_{\bv \in \bW^{1,r}_0(\Omega)} \frac{
b(p^\epsilon,\bv) }{ \| \bv \|_{1,r}} \\ 
&= C\sup_{\bv \in \bW^{1,r}_0(\Omega)} 
\frac{\langle A(\bu^\epsilon), \bv\rangle
-\langle{\mathbf f}, \bv\rangle}{ \| \bv \|_{1,r}} \\
&\le C(\sn{-1}{r'}{A(\bu^\epsilon)} + \sn{-1}{r'}{\mathbf f}).
\end{align*}
Using (\ref{eq:vf2}) and (\ref{eq:vf3}) we conclude $\|p^{\epsilon}\|_{0,r'}\le C
(\sn{1}{r}{\bu^\epsilon}^{r-1} + \sn{-1}{r'}{\mathbf f})$, therefore 
by Theorem \ref{thm:pf1}
\[ \|p^{\epsilon}\|_{0,r'}\le C \sn{-1}{r'}{\mathbf f}. \]
Let $\bv = \bu - \bu^{\epsilon}$ in (\ref{eq:mwf2}) and
(\ref{eq:pf3}) and then subtract (\ref{eq:pf3}) from
(\ref{eq:mwf2}) to get 
\begin{equation}\label{eq:pf4}
 \langle A(\bu)-A(\bu^{\epsilon}),\bu-\bu^{\epsilon}\rangle = b(p -
p^{\epsilon},\bu-\bu^{\epsilon}). 
\end{equation} 
Since $\nabla\cdot\bu =0$ and  $\int_\Omega \nabla\cdot\bu^\epsilon\, d\bx =0$, the above equation gives
\begin{equation*}
 \langle A(\bu)-A(\bu^{\epsilon}),\bu-\bu^{\epsilon}\rangle  + \frac{1}{\epsilon} \langle \bphi(\nabla \cdot \bu^{\epsilon}), \nabla \cdot  \bu^{\epsilon}
\rangle =- b(p,\bu^{\epsilon}). 
\end{equation*} 
Since $  \langle A(\bu)-A(\bu^{\epsilon}),\bu-\bu^{\epsilon}\rangle\ge 0$ and  $ |b(p,\bu^{\epsilon})|\le \|p\|_{0,r'} \|\nabla \cdot \bu^{\epsilon}\|_{0,r}$
we have $$\frac{1}{\epsilon}\|\nabla \cdot  \bu^{\epsilon}\|_{0, r}^r =\frac{1}{\epsilon} \langle \bphi(\nabla \cdot \bu^{\epsilon}), \nabla \cdot  \bu^{\epsilon}\rangle \le \|p\|_{0,r'} \|\nabla \cdot \bu^{\epsilon}\|_{0,r},$$
which gives $\|\nabla \cdot  \bu^{\epsilon}\|_{0,r}\le C \epsilon^{\frac{1}{r-1}}$ by using Lemma  \ref{thm:apb1} to bound $\|p\|_{0,r'}$.
 
\end{proof}


\begin{theorem}\label{thm:pf3} Suppose  (\ref{eq:lbb1})
(the \textit{LBB} condition) holds. 
Let $(\bu,p)$ be the unique solution of
(\ref{eq:mwf2}) and $\bu^{\epsilon}$ be a solution of
(\ref{eq:pf2}).  Let
$p^{\epsilon} = c -\frac{1}{\epsilon}\bphi(\nabla \cdot \bu^\epsilon)$, where $ c=\frac{1}
{\epsilon|\Omega|} \int_{\Omega}\bphi(\nabla \cdot \bu^\epsilon)d\bx$.
 Then there exists $C>0$ which depends only on $r$,
$\Omega$, and $\mathbf f$ such that 
\begin{align*} \|\bu -
\bu^{\epsilon}\|_{1,r}&\le C \epsilon^\frac{1}{ (r-1)(3-r)}\quad
\text{for~} 1 < r \le 2\\ \text{and}\quad \|\bu -
\bu^{\epsilon}\|_{1,r}&\le C \epsilon^\frac{1}{(r-1)^2} \quad
\text{for~} 2 \le r <\infty. 
\end{align*} 
Furthermore,
\begin{align*} \|p - p^{\epsilon}\|_{0,r'}&\le C
\epsilon^\frac{1}{(3-r)}\quad \text{for~} 1 < r \le 2\\
\text{and}\quad \|p - p^{\epsilon}\|_{0,r'}&\le C
\epsilon^\frac{1}{(r-1)^2}\quad \text{for~} 2 \le r < \infty.
\end{align*} 
\end{theorem}

\begin{proof}
By (\ref{eq:lbb1}), we have      
\begin{equation}\label{eq:pf6}
\sn{0}{r'}{p - p^\epsilon} \le C \sup_{\bv \in \bW^{1,r}_0(\Omega)}
\frac{b( p- p^{\epsilon}, \bv) }{\sn{1}{r}{\bv}}.  \end{equation}
Subtracting (\ref{eq:pf3}) from (\ref{eq:mwf2}) gives
\begin{equation}\label{eq:pf7} b(p-
p^{\epsilon}, \bv) = \langle A(\bu),\bv \rangle -\langle
A(\bu^{\epsilon}),\bv \rangle.  
\end{equation} 
Using (\ref{eq:pf6})
and (\ref{eq:pf7}) we get 
\begin{equation}\label{eq:pf8} 
\sn{0}{r'}{p
- p^\epsilon} \le C \|A(\bu) - A(\bu^{\epsilon})\|_{-1,r'}.
\end{equation}
By (\ref{eq:pf8}), (\ref{eq:vf2}), Lemma \ref{thm:apb1} and  Theorem \ref{thm:pf1}, we have 
\begin{equation}\label{eq:pf9} 
  \sn{0}{r'}{p -
  p^\epsilon} \le C \sn{1}{r}{\bu - \bu^\epsilon} \left(
  \sn{1}{r}{\bu} + \sn{1}{r}{\bu^\epsilon} \right)^{r-2}\le
  C\sn{1}{r}{\bu - \bu^\epsilon} 
\end{equation}
for $ 2 \le r <\infty$.
Similarly, using  (\ref{eq:pf8}) and (\ref{eq:vf3}) we get for $ 1 < r \le 2$ 
\begin{equation}\label{eq:pf10}
  \sn{0}{r'}{p - p^\epsilon} \le
   C\sn{1}{r}{\bu - \bu^\epsilon}^{r-1}. 
\end{equation} 
Since $(\bu, p)$ solves (\ref{eq:mwf2})
we have $\nabla\cdot \bu = 0$ and hence by (\ref{eq:pf4})
\begin {equation*}
\langle A(\bu) -A(\bu^\epsilon), \bu - \bu^\epsilon\rangle = -b(p-p^\epsilon, 
\bu^\epsilon).
\end{equation*}
Therefore, for $1<r\le 2$, by  (\ref{eq:vf3}), (\ref{eq:pf4}), and the uniform bounds on $\|\bu\|_{1,r}$ and $\|\bu^\epsilon\|_{1,r}$ in Lemma \ref{thm:apb1} and Theorem \ref{thm:pf1} we have
\begin{equation*}
\|\bu-\bu^\epsilon\|^2_{1,r}\le C \langle A(\bu) -A(\bu^\epsilon), 
\bu - \bu^\epsilon\rangle (\|\bu\|_{1,r} + \|\bu^\epsilon\|_{1,r})^{2-r} \le C |b(p-p^\epsilon, \bu^\epsilon)|.
\end{equation*}
Similarly, for $2\le r < \infty$
\begin{equation*}
\|\bu-\bu^\epsilon\|^r_{1,r}\le C \langle A(\bu) -A(\bu^\epsilon), 
\bu - \bu^\epsilon\rangle  \le C |b(p-p^\epsilon, \bu^\epsilon)|.
\end{equation*}
Applying Lemma \ref{lemma:pf2} and the bounds in (\ref{eq:pf9}), (\ref{eq:pf10}) we have for $2\le r < \infty$
\begin{equation}\label{eq:pf14} 
\|\bu -
\bu^{\epsilon}\|_{1,r}^{r-1} \le C\epsilon ^\frac{1}{r-1},
\end{equation} 
and
for $ 1< r \le 2$
\begin{equation}\label{eq:pf15}
\|\bu - \bu^{\epsilon}\|_{1,r}^{3-r}
\le C\epsilon ^\frac{1}{r-1}.  
\end{equation} 
This gives the desired estimates for $\|\bu-\bu^\epsilon\|_{1,r}$.
Using (\ref{eq:pf9}), (\ref{eq:pf10}) and (\ref{eq:pf14}),
(\ref{eq:pf15}) we have the error estimates for \sn{0}{r'}{p -
p^\epsilon}.  \end{proof}
\medskip


\noindent\textbf{Acknowledgements.}\ 
The authors would like to thank an anonymous referee, whose helpful comments 
led to an improved version of our results.


\bibliographystyle{amsplain} \begin{thebibliography}{10}
\bibitem{A} R.~A.  Adams, \textit{Sobolev Spaces}, Academic Press, San
Diego, 1978.

\bibitem{AG1} C.  Amrouche and V.  Girault, \textit{Propri{\'e}t{\'e}s
fonvtionnelles d'operateurs application au prob\-l{\`e}me de Stokes en
dimension quelconque}, Publications du labratoire d'Analye
Num{\'e}rique, no.~90025, Universit{\'e} Pierre et Marie Curie (1990).

\bibitem{AG2} C.  Amrouche and V.  Girault, \textit{Decomposition of vector spaces and application to the Stokes problem in arbitrary dimension}, Czech. Math. J. {\bf 44}, (1994), 109-140.

\bibitem{Ba} I. Bab\v uska, \textit{The finite element method with penalty
}, Tech. Note BN-710, The Institute for Fluid Dynamics and Applied 
Mathematics, University of Maryland, August 1971.

\bibitem{BN} J.  Baranger and K.~Najib, \textit{Analyse numerique des
ecoulements quasi-Newtoniens dont la viscosite obeit a la loi
puissance ou loi de carreau}, Numer.  Math., \textbf{58} (1990),
34--49.

\bibitem{BGN} J. Baranger, P.~Georget, and K. Najib, \textit{Error
estimates for a mixed finite element method for a non-Newtonian
flow},  J.~of non-Newtonian Fluid Mechanics, \textbf{23} (1987),
415--421. 

\bibitem{BL1} J. W. Barrett and W. B. Liu, \textit{Finite element
approximation of the $p$-Laplacian}, Math. Comp., \textbf{61} (1993),
523--537. 

\bibitem{BL2} J. W. Barrett and W. B. Liu, \textit{Finite element
error analysis of a quasi-Newtonian flow obeying the Carreau or power law}, 
Numer. Math., \textbf{64} (1993), 
433--453.

\bibitem{BL3} J.  W.  Barrett and W.  B.  Liu, \textit{Quasi-norm error
bounds for the finite element approximation of a non-Newtonian flow},
Numer.  Math., {\bf 68} (1994), 437--456.

\bibitem{Bh} T. Bhattacharya, \textit{Some Results concerning the
eigenvalue problem for the $p$--La\-placian},
Ann.~Acad.~Sci.~Fenn. Ser.~A, \#I Math, {\bf 14} (1989), 325--343. 

\bibitem{BBM} E.  Bernhardt, G.  Bertacchi, and A.  Moroni,
\textit{Modeling of flow in extruder Dies - fundamentals and
applications of the TMconcept-faBest finite element flow analysis}, in
Applications of computer modeling of extrusion and other continuous
polymer processes, Keith T.  O'Brien ed., Oxford University Press,
Munich, 1992.

\bibitem{BCH} D.  V.  Boger, A.  Cabelli and A.  L.  Halmos,
\textit{The behavior of a power-law fluid flowing through a sudden
expansion}, A.~I.~Ch.~E.  Journal, \textbf{21} (1975), 540--549.

\bibitem{Br} F.~Brezzi, \textit{On the Existence, Uniqueness, and
approximation of saddle-point problems arising from Lagrangian
multipliers} R.A.I.R.O.  Anal.~Numer., \textbf{8} (1974), 129--151.

\bibitem{C} J.  C\'ea, \textit{Optimization - Th\'eory and
Algorithms}, Dunod, Paris, 1971.

\bibitem{Ch} S.  S.  Chow, \textit{Finite element error estimates for
non-linear elliptic equations of monotone type}, Numerische
Mathematik, \textbf{54} (1989), 373--393.

\bibitem {Di} E.  DiBenedetto, \textit{ Degenerate Parabolic
Equations}, Springer Verlag, Berlin, 1994.

\bibitem{D} P. Dr\'abek, A. Kufner, and F.~Nicolosi, \textit{Nonlinear
Elliptic Equations, Singular and Degenerate Case}, University of West
Bohemia in Pilsen, 1996.

\bibitem{EJ} P. England and J. Jackson, \textit{Active deformation of the continents}, Ann.~Rev.~Earth Planet. Sci., \textbf{17} (1989), 197--226.

\bibitem{EM1} P. England and D. McKenzie, \textit{A thin viscous sheet model for continental deformation}, Geophys. J. R. Astr. Soc., \textbf{70} (1983), 295--321.

\bibitem{EM2} P. England and D. McKenzie, \textit{Correction to: A thin viscous
sheet model for continental deformation}, Geophys. J. R. Astr. Soc.,
\textbf {73} (1983), 523--532.

\bibitem{F1} R.~Falk, \textit{A finite element method for the stationary
Stokes equations using trial functions which do not have to satisfy
$\text{div}\,\textbf{v}=0$}, Math. Comp., \textbf{30}  (1976), 698--702.

\bibitem{F2} R. Falk, \textit{An analysis of the penalty method and 
extrapolation for the stationery Stokes problem},  in \textit{Advanced
 Computer Methods for Partial Differential Equations}, edited by
 R. Vichnevetsky, AICA Publication, (1975), 66--69.

\bibitem{FKg} R.~S.~Falk and J.~T.~King , \textit{A penalty and extrapolation
method for the stationery Stokes equation}, SIAM Journal of Numerical Analysis, \textbf{ 13} (1975), 814-829

\bibitem{FK} S.~Fucik and A.~Kufner, \textit{Nonlinear Differential
Equations}, Elsevier, Amsterdam, 1980.

\bibitem{GR} V.  Girault and P.  A.  Raviart, \textit{Finite Element
Metghods for Navier-Stokes Equations: Theory and Algorithms}, Springer
Series in Computational Mathematics {\bf 5}, Springer Verlag, New
York, 1986.
\bibitem{GM} R.~Glowinski and A.~Marroco, \textit{Sur l'approximation
par {\'e}l{\'e}ments finis d'ordre un, et la r{\'e}solution par
penalisation-dualit{\'e}, d'une clase des probl{\'e}mes de Dirichlet
non lin{\'e}aires}, R.A.I.R.O.  Anal.~Numer., \textbf{9} (1975),
41--76.

\bibitem{G} P. Grisvard, \textit{Elliptic Problems in Nonsmooth
Domains}, Pitman, Boston, 1985.

\bibitem{H} F.~K.~Hebeker, \textit{The penalty method applied to the
instationary Stokes equation}, Appl.~Anal., \textbf{14} (1982),  137--154.

\bibitem{HLB} T.  J.  R.  Hughes and W.  Liu, and A.  Brooks,
\textit{Finite element analysis of incompressible viscous flows by the
penalty function formulation}, J.  Comput.  Phys., \textbf{30} (1979),
1--60.

\bibitem{J} W.  Janna, \textit{Design of Fluid Thermal Systems}, PWS
Publishing, Boston, MA, 1993.

\bibitem{La} O.~A. Ladyzhenskaya, {\it The Mathematical Theory of
Viscous Incompressible Flow}. Revised 2nd ed., Gordon and Breach, New
York, 1969. 

\bibitem{LLH} T.~J.  Liu, H.~M.  Lin, C.~N.  Hong, \textit{Comparison
of two numerical methods for the solution of non-Newtonian flow in
ducts}, Int.~J.~Numer.~Methods Fluids, \textbf{8} (1988), 845--861
\bibitem{LWT} T.~J.  Liu, S.  Wen, J.  Tsou, \textit{Three-dimensional
finite element analysis of polymeric fluid flow in an extrusion die.
Part I:  Entrance effect}, Polymer Engineering Science, \textbf{34},
No.~10 (1994), 827--834.

\bibitem{LW2} L.~Lefton and D.~Wei,
\textit{Numerical approximation of the first eigenpair of the $p$-Laplacian
using finite elements and penalty method}, Numer. Funct. Anal. Optimiz.,
\textbf{18(3\&4)} (1997), 389--399.

\bibitem{MNZ} R. Manasevich, F.I. Njoku and F. Zanolin,\textit{Positive solutions for the one-dimensional $p$-Laplacian},  Differential and Integral Equations, \textbf
{8},(1989) 1--13.

\bibitem{OKS} J.  T.  Oden, N.  Kikuchi and Y.  J.  Song,
\textit{Penalty-finite element methods for the analysis of Stokesian
flows}, Comp.  Methods Appl.  Mech.  Engr., \textbf{31} (1982),
207--329.

\bibitem{Od1} J. Oden, \textit{Penalty method and reduced integration for the
analysis of fluids}, Penalty-Finite Element Methods in
Mechanics, ed.~J. Reddy, Amer. Soc. Mech. Eng., New York, 1982,
21--32. 

\bibitem{Od2} J. Oden, \textit{A theory of penalty methods for finite
element approximations of highly nonlinear problems in continuum
mechanics}, Computers and Struct. \textbf{8} (1978),  445-449. 

\bibitem{R} J.  N.  Reddy, \textit{On the accuracy and existence of
solutions to primitive variable models of viscous incompressible
fluids}, Internat.  J.  Engrg.  Sci., \textbf{16} (1978), 921--929.

\bibitem{R2} J.  N.  Reddy, \textit{On the finite element method with penalty for incompressible fluid flow problems}, The Mathematics of Finite Elements and Applications III, J. R. Whiteman ed., Academic Press, London, 1979, 277--235.

\bibitem{RP} J. Reddy and V. Padhye, \textit{A penalty finite element
model for axisymmetric flows of non-Newtonian fluids}, 
Numer. Meth. for Part. Diff. Eqns., \textbf{4} (1988), 33--56. 

\bibitem{SE} L.  J.  Sonder and P.  C.  England, \textit{Vertical
averages of rheology of the continental lithosphere}, Earth Planet.
Sci.  Lett., \textbf{77} (1986), 81--90.

\bibitem{T} R.  Temam, \textit{Navier-Stokes Equations},
North-Holland, Amsterdam, 1984.

\bibitem{W} D.~Wei, \textit{Finite element approximations of solutions for
$p$-harmonic equation with Dirichlet data},
Numer. Funct. Anal. Optimiz., \textbf{10(11 \& 12)} (1989), 1235--1251.

\bibitem{Wi} W.  Wilkinson, \textit{Non-Newtonian Fluids}, Pergamon
Press, New York, 1960.

\bibitem{Z} E.  Zeidler, \textit{Nonlinear Functional Analysis and its
Applications.  Volume III:  Variational Methods and Optimization},
Springer-Verlag, New York, 1985.

\bibitem{Zi} O. Zienkiewicz, \textit{Constrained variational
principles and penalty function methods in finite element analysis}, in
\textit{Lecture Notes in Mathematics}, ed. G. A. Watson, Springer Verlag,
 Berlin, 1974, 207--214.
\end{thebibliography}
\end{document}