\documentclass[reqno]{amsart}
\usepackage{hyperref}

\AtBeginDocument{{\noindent\small
\emph{Electronic Journal of Differential Equations},
Vol. 2009(2009), No. 83, pp. 1--22.\newline
ISSN: 1072-6691. URL: http://ejde.math.txstate.edu or http://ejde.math.unt.edu
\newline ftp ejde.math.txstate.edu}
\thanks{\copyright 2009 Texas State University - San Marcos.}
\vspace{9mm}}

\begin{document}
\title[\hfilneg EJDE-2009/83\hfil Optimal control for a thermistor problem]
{Optimal boundary control for a time dependent thermistor
problem}

\author[V. Hrynkiv \hfil EJDE-2009/83\hfilneg]
{Volodymyr Hrynkiv}

\address{Volodymyr Hrynkiv \newline
Department of Computer and Mathematical Sciences,
University of Houston-Down\-town, One Main St,
Houston, TX 77002, USA}
\email{HrynkivV@uhd.edu}

\thanks{Submitted December 10, 2008. Published July 10, 2009.}
\subjclass[2000]{49J20, 49K20}
\keywords{Optimal control; thermistor problem; elliptic systems}

\begin{abstract}
 An optimal control of a two dimensional time dependent thermistor
 problem is considered. The problem consists of two nonlinear
 partial differential equations coupled with appropriate boundary
 conditions which model the coupling of the thermistor to its
 surroundings. Based on physical considerations, an objective
 functional to be minimized is introduced and the convective
 boundary coefficient is taken to be the control. Existence of
 solutions to the state system and existence of the optimal control
 are proven. To characterize this optimal control, the optimality
 system consisting of the state and adjoint equations is derived.
\end{abstract}

\maketitle
\numberwithin{equation}{section}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\allowdisplaybreaks

\section{Introduction}

Thermistor is a thermally sensitive resistor whose electrical
conductivity changes drastically by orders of magnitude as the
temperature reaches a certain threshold. Thermistors are used as
temperature control elements in a wide variety of military and
industrial equipment ranging from space vehicles to air
conditioning controllers. They are also used in the medical field
for localized and general body temperature measurement, in
meteorology for weather forecasting as well as in chemical
industries as process temperature sensors \cite{Kwo,Mac}.

We consider the two-dimensional time-dependent
thermistor problem
\begin{equation} \label{statet} % \label{state}
\begin{gathered}
 u_t-\Delta u-{\sigma}(u)|\nabla\varphi|^{2} ={0  \quad
\text{in } \Omega\times(0,T),} \\
{\nabla\cdot(\sigma(u)\nabla\varphi) }= 0  \quad \text{in }
\Omega\times(0,T), \\
{\frac{\partial u}{\partial n}+\beta u}= 0  \quad \text{on }
 \Gamma_{N}\times(0,T), \\
{u}=0 \quad \text{on } \Gamma_{D}\times(0,T),\\
{\varphi}={\varphi_0  \quad\text{on }
 \partial\Omega\times(0,T),}\\
 {u} = u_0 \quad\text{on } \Omega\times\{0\},
\end{gathered}
\end{equation}
where $\varphi( x,t)$ is the electric potential, $u(x,t)$ is the
temperature, and $\sigma(u)$ is the electrical conductivity. Here
$n$ denotes the outward unit normal and $\partial/\partial
n=n\cdot\nabla$ is the normal derivative on $\partial\Omega$. The
elliptic-parabolic system \eqref{statet} describes the heating of
a conductor produced by an electric current. It is the consequence
of Ohm's law and Fourier's law
\begin{equation} \label{Ohm}
I=-\sigma(u)\nabla\varphi,\quad Q=-k(u)\nabla u,
\end{equation}
coupled with the conservation laws of energy and current
\begin{gather}
\label{conservation1}
\rho c\frac{\partial u}{\partial t}+\nabla\cdot Q=I\cdot E,\\
\label{conservation2}
 \nabla\cdot I=0,
\end{gather}
where $E$ denotes the electric field, $\rho$ is the density of the
conductor, and $c$ is its heat capacity. Since the nonlinearity
$k(u)$ varies only slightly with $u$, it is of secondary
importance, and therefore we can assume $k(u)=1$. As the density
$\rho$ of the material and its specific heat $c$ typically are
constants, we assume that $\rho c=1$ (see \cite{Ant}). A more
detailed discussion of equations \eqref{statet} can be found in
\cite{Ant,Cim, Fow,How,Shi}. Boundary conditions show how the
thermistor is connected thermally and electrically to its
surroundings. To describe boundary conditions for the temperature
we decompose the boundary $\partial\Omega$ into two disjoint sets
$\bar{\Gamma}_{D}\cup\bar{\Gamma}_N=\partial\Omega$. The Robin
boundary condition for temperature represents the physically
significant case of convection and/or radiation in situations
where $u$ is not too different from the ambient temperature. It is
precisely this boundary condition that will be used as a control.

It has been observed that large temperature gradients may cause
the thermistor to crack. Numerical experiments indicate (see
\cite{Fow, Zho}) that low values of the heat transfer coefficient
$\beta$ lead to small temperature variations. On the other hand,
low values of the heat transfer coefficient result in high
operating temperatures of a thermistor which are also undesirable.
This motivated us to take the heat transfer coefficient as a
control and to consider the optimal control problem of minimizing
the heat transfer coefficient while keeping the operating
temperature of the thermistor reasonably low. These physical
considerations lead us to the following objective functional
$$
J(\beta)=\int_0^T\int_\Omega
u\,dx\,dt+\int_{\Gamma_N\times(0,T)}\beta^2\,ds\,dt.
$$
Denoting the set of admissible controls by
$$
U_M=\{\beta\in
L^\infty(\partial\Omega\times(0,T)):\,0<\lambda\leq\beta\leq M\},
$$
the optimal control problem is
\begin{equation}
\text{Find $\beta^*\in U_M$ such  that
$J(\beta^*)=\min_{\beta\in U_M}J(\beta)$.} \label{OC}
\end{equation}
Henceforth we use the standard notation for Sobolev spaces, we
denote $\|\cdot\|_p=\|\cdot\|_{L^p(\Omega)}$ for each $p\in
[1,\infty]$; other norms will be clearly marked. We also denote
$Q:=\Omega\times (0,T)$ and $\Gamma_{N}^T:=\Gamma_N\times(0,T)$.

Analysis of both the steady-state and time-dependent thermistor
equations with different types of boundary and initial conditions
has received a great deal of attention. See
\cite{Ant,Cim,Cim1,Cim2,Fow,How,Shi,Xu1,Xu2,Yua} for existence of
weak solutions, existence/nonexistence of classical solutions,
uniqueness and related regularity results in different settings
with various assumptions on the coefficients.

We mention in passing that few authors consider a thermistor
problem which consists of two parabolic equations coupled with
some boundary conditions (see \cite{Lee, Rod1}). This fully
parabolic system comes from (\ref{Ohm}), (\ref{conservation1}),
and the time dependent version of charge conservation
(\ref{conservation2}), i.e.,
\begin{equation} \label{conservation3}
\frac{\partial q}{\partial t}+\nabla\cdot I=0.
\end{equation}
 However, the vast majority of the existing literature on
time dependent thermistor deals with the elliptic-parabolic system
\eqref{statet}. This reflects a feasible thermistor scenario where
the time derivative in $(\ref{conservation3})$ is dropped
considering that the relaxation time for the potential is very
small, hence giving the equation (\ref{conservation2}) (see
\cite{Cim4,Shi}). The first optimal control paper on a thermistor
problem is the paper by Lee and Shilkin \cite{Lee} where a fully
parabolic system is considered and the source is taken to be the
control. Optimal control of a nonlocal elliptic-parabolic case is
studied in \cite{Cim3}, where the applied potential is taken to be
the control. Optimal control of a steady state thermistor problem
is considered in \cite{Hry}, where the heat transfer coefficient
is the control.

The motivation for our work is threefold. First, the results from
\cite{Hry} are extended to the time dependent elliptic-parabolic
thermistor system. As it was mentioned before, such an
elliptic-parabolic thermistor model represents a reasonably
realistic situation where we neglect the time derivative in
(\ref{conservation3}) by taking into consideration the difference
in time scale with the thermal processes. One of the technical
difficulties in dealing with such an elliptic-parabolic system
lies in the fact that there is no information on time derivative
of the potential $\varphi$, and as a result one cannot use
directly the standard compactness result from \cite{Sim} to obtain
strong convergence of sequences of potentials in appropriate
spaces. To circumvent this difficulty one needs to incorporate the
structure of the system. This is different from \cite{Lee} in that
a fully parabolic system and a different type of control were
considered there. Secondly, we present a complete and
self-contained derivation of the optimality system, whereas in
\cite{Cim3} the optimality system was given only for the special
case of constant $\sigma(u)$. Thirdly, the choice of the
convective boundary condition as a control for this time dependent
problem seems to be quite appropriate from point of view of
practical applications.

In section \ref{apriori} we derive a priori estimates under the
assumption of small boundary data and prove existence of solutions
to the state system. In section \ref{exist} we prove existence of
an optimal control. The optimality system is derived and an
optimal control is characterized in section \ref{optimality}.

\section{A priori estimates and existence of solutions
to the state system}\label{apriori}

We make the following assumptions:
\begin{itemize}
\item $\Omega\subset{R}^{2}$ is a bounded domain with smooth
boundary;

\item $\sigma(s)\in C^1({R})$, $0<C_1\leq\sigma(s)\leq C_2$ for
all $s\in{R}$;

\item $\sigma(s)$ is Lipschitz: $|\sigma(s_1)-\sigma(s_2)|\leq
K\,|s_1-s_2|$ for all $s_1,s_2\in {R}$;

\item $\varphi{_{_0}}\in
W^{^{1,\infty}}(\partial\Omega\times(0,T))$;

\item Extending $\varphi_0$ to the whole domain
$\Omega\times(0,T)$ and using the same notation, i.e.,
$\varphi_0\in W^{^{1,\infty}}(Q)$, we assume that
$\|\varphi_0\|_{W^{1,\infty}(Q)}$ is sufficiently small.
\end{itemize}

The following lemma guarantees a better regularity for solutions
of equations in divergence form. It is taken from \cite{Xu3}. More
information about this lemma can be found in \cite{Rod}.

\begin{lemma}\label{meyers}
Let $\Omega\subset{R}^n$ be a bounded domain with a smooth boundary.
Assume that $g\in (L^2(\Omega))^n$ and $a\in C(\bar\Omega)$ with
$\min_{\bar\Omega}a>0$. Let $u$ be the weak solution of the
following problem
\begin{gather*}
-\nabla\cdot(a\nabla u)=\nabla\cdot g \quad\text{in }\Omega\\
u=0 \quad\text{on }\partial\Omega.
\end{gather*}
Then for each $p>2$ there exists a positive constant $c^*$ depending
only on $n, \Omega, a$, and $p$ such that if $g\in (L^2(\Omega))^n$
then we have
\[
\| \nabla u\|_p\leq c^* (\|g \|_p+\| \nabla u\|_2).
\]
\end{lemma}

Let $V_D:=\{v\in H^1(\Omega):v=0 \text{ on }\Gamma_D\}$.
By a weak solution to \eqref{statet} we mean a pair
$(u,\varphi)$ such that $u\in L^2(0,T;V_D(\Omega)),
\varphi-\varphi_0\in L^2(0,T;H_0^1(\Omega))$ and
\begin{equation}
\begin{gathered} \label{eq01}
\int_0^T\langle u_t,v\rangle +\int_Q\nabla u\cdot\nabla
v\,dx\,dt+\int_{\Gamma_{N}^T}\beta u
v\,ds\,dt=\int_Q\sigma(u)|\nabla\varphi|^2v\,dx\,dt,
\\
\int_Q\sigma(u)\nabla \varphi\cdot\nabla w\,dx\,dt=0,
\end{gathered}
\end{equation}
for all $v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and $w\in
L^2(0,T;H^{1}_{0}(\Omega))$, where $\langle \cdot,\cdot\rangle$
denotes duality bracket between $V_D'$ and $V_D$, with
$V_D'$ being the dual of $V_D$.

The quadratic term on the right hand side of the first equation in
(\ref{eq01}) can be dealt with as in \cite{Xu4, Shi}, and we
obtain %for a.e. $t\in (0,T)$
\begin{equation} \label{eq1}
\begin{gathered}
\begin{aligned}
&\int_0^T\langle u_t,v\rangle\,dt+\int_Q\nabla u\cdot\nabla
v\,dx\,dt+\int_{\Gamma_N^T}\beta u v\,ds\,dt\\
&= \int_Q (\varphi_{0}-\varphi)\sigma(u)\nabla\varphi\cdot\nabla
v\,dx\,dt +\int_Q(\sigma(u)\nabla
\varphi\cdot\nabla\varphi_{0})\,v\,dx\,dt,
\end{aligned}\\
\int_Q\sigma(u)\nabla \varphi\cdot\nabla w\,dx\,dt= 0,
\end{gathered}
\end{equation}
for all $v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and
$w\in L^2(0,T;H^{1}_{0}(\Omega))$. By taking $p=r$ in Lemma
\ref{meyers}, we can say that for each $r>2$, $\varphi(\cdot,t)\in
W^{1,r}(\Omega)$ for a.e. $t\in(0,T)$. Therefore, since
$\nabla\varphi(\cdot,t)\in L^{r}(\Omega)$ and
$\nabla\varphi_0(\cdot,t)\in L^{2}(\Omega)$ a.e. $t\in(0,T)$, it
follows that there exists $s'$ such that
$\nabla\varphi(\cdot,t)\cdot\nabla\varphi_0(\cdot,t)\in
L^{s'}(\Omega)$ and
\begin{equation} \label{conjugate1}
\frac{1}{s'}=\frac{1}{2}+\frac{1}{r}.
\end{equation}
Next, since $\Omega\subset{R}^2$ it follows $v(\cdot,t)\in
H^1(\Omega)\subset\subset L^s(\Omega)$ for $s\in[1,\infty)$,
conjugate of $s'$ in (\ref{conjugate1}):
\begin{eqnarray}\label{conjugate}
\frac{1}{s'}+\frac{1}{s}=1.
\end{eqnarray}
Also, by the weak maximum principle (see \cite[formula (2.14)]{Shi})
\begin{equation} \label{supremum}
\|\varphi\|_{L^\infty(Q)}\leq \sup_{\Gamma_D}|\varphi_0|.
\end{equation}
Note that $r>2$ is chosen from Lemma \ref{meyers}, then $s'$
and $s$ are determined and satisfy $s'<2<s$. First, we show
existence of solutions to the state system. Define
$$
W(0,T):=\{v\in L^2(0,T;V_D): v_t\in L^2(0,T;V_D')\} .
$$

\begin{theorem}\label{existence1}
There exists a weak solution to the state system \eqref{statet}.
\end{theorem}

We show existence of solutions to the state system by using the
Schauder's fixed point theorem (see, for example \cite{Gil}). Let
$\mathcal{R}: W(0,T)\to  W(0,T)$ be the operator defined as
follows $\mathcal{R}:=\mathcal{S}\circ\mathcal{T}:\vartheta\mapsto
u$, with $\mathcal{T}: L^2(0,T;H^{1}(\Omega))\cap
L^\infty(Q)\ni\vartheta\mapsto\varphi\in
L^2(0,T;H^{1}_{0}(\Omega))$ where for a given $\vartheta$, the
function $\varphi$ solves uniquely
\begin{equation} \label{schauder1}
\int_Q\sigma(\vartheta)\nabla \varphi\cdot\nabla w\,dx\,dt= 0,
\end{equation}
for all $w\in L^2(0,T;H^{1}_{0}(\Omega))$, %and a.e. $t\in(0,T)$,
and where $\mathcal{S}$ is defined as follows $\mathcal{S}:
L^2(0,T;H^{1}_{0}(\Omega))\times\break L^2(0,T;H^{1}(\Omega))\cap
L^\infty(Q)\ni(\varphi,\vartheta)\mapsto u\in
L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$, with $u$ being the unique
solution to
\begin{equation} \label{schauder2}
\begin{aligned}
&\int_0^T\langle u_t,v\rangle\,dt+\int_Q\nabla u\cdot\nabla
v\,dx\,dt+\int_{\Gamma_N^T}\beta u v\,ds\,dt\\
&=\int_Q (\varphi_{0}-\varphi)\sigma(\vartheta)\nabla\varphi\cdot\nabla
v\,dx\,dt
+\int_Q(\sigma(\vartheta)\nabla
\varphi\cdot\nabla\varphi_{0})\,v\,dx\,dt,
\end{aligned}
\end{equation}
for all $v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$. Standard
theory implies that there exists a unique solution to
(\ref{schauder1}), (\ref{schauder2}) which means that the map
$\mathcal{R}$ is well-defined. Before we proceed with the proof of
the theorem, we need to derive a priori estimates.

\begin{lemma}\label{est}
Given $\vartheta\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$, let
$\varphi=\mathcal{T}(\vartheta)$ and $u=S(\varphi,\vartheta)$ be
solutions satisfying (\ref{schauder1}) and (\ref{schauder2}). Then
there exist constants $c_1, c_2, c_3$, and $ c_4$ independent of
$\vartheta$ such that
\begin{gather}
\label{apriori1}\|\varphi \|_{L^2(0,T;H^1(\Omega))}\leq  c_1,\\
\label{apriori2}\|u \|_{L^2(0,T;V_D(\Omega))}\leq  c_2,\\
\label{apriori3}\|u\|_{L^2(0,T;L^2(\Gamma_N))}\leq  c_3,\\
\label{apriori4}\|u_t \|_{L^2(0,T;V_D'(\Omega))}\leq  c_4.
\end{gather}
\end{lemma}

\begin{proof}[Proof of Lemma \ref{est}]
 Taking $w=\varphi-\varphi_0$ in
(\ref{schauder1}) and using properties of $\sigma$, we obtain
(\ref{apriori1}). Taking $v=u$ in (\ref{schauder2}), we have
\begin{align*}
&\int_0^T\langle u_t,u\rangle\,dt+\int_Q|\nabla
u|^2\,dx\,dt+\int_{\Gamma_N^T}\beta u^2 \,ds\,dt\\
&=\int_Q(\varphi_{0}-\varphi)\sigma(\vartheta)\nabla\varphi\cdot\nabla
u\,dx\,dt +\int_Q(\sigma(\vartheta)\nabla
\varphi\cdot\nabla\varphi_{0})\,u\,dx\,dt.
\end{align*}
Using properties of $\beta$ we obtain
\begin{align*}
&\int_{\Omega\times\{T\}}\frac{u^2}{2}-\int_{\Omega\times\{0\}}\frac{u^2}{2}
+\int_0^T\int_\Omega|\nabla
u|^2\,dx\,dt+\lambda\int_0^T\int_{\Gamma_N}u^2 \,ds\,dt\\
&\leq\int_0^T\int_\Omega(\varphi_{0}-\varphi)\sigma(\vartheta)\nabla\varphi\cdot\nabla
u\,dx\,dt +\int_0^T\int_\Omega(\sigma(\vartheta)\nabla
\varphi\cdot\nabla\varphi_{0})\,u\,dx\,dt.
\end{align*}
We can write
\begin{align*}
&\int_{\Omega\times\{T\}}\frac{u^2}{2}+\int_0^T\int_\Omega|\nabla
u|^2\,dx\,dt+\lambda\int_0^T\int_{\Gamma_N}u^2 \,ds\,dt \\
&\leq\int_{\Omega}\frac{u_0^2}{2}+C_3\int_0^T\int_\Omega|\nabla\varphi|\cdot|\nabla
u|\,dx\,dt
+C_4\int_0^T\int_\Omega|\nabla\varphi|\cdot |u|\,dx\,dt \\
&\leq C_5+C_6\Big(\int_0^T \{\,\|\nabla\varphi \|_2\cdot\|\nabla u
\|_2+\|\nabla\varphi \|_2\cdot\|u \|_2 \}\,dt\Big)\\
& \leq C_5+C_8\int_0^T\|\nabla\varphi\|_2^2\,dt
 +\frac{1}{2}\int_0^T\int_{\Omega}|\nabla u|^2\,dx\,dt,
\end{align*}
where we used Poincare inequality. We obtained
\begin{equation} \label{e1}
\begin{aligned}
&\int_{\Omega\times\{T\}}\frac{u^2}{2}+\frac{1}{2}\int_0^T\int_\Omega|\nabla
u|^2\,dx\,dt+\lambda\int_0^T\int_{\Gamma_N}u^2 \,ds\,dt \\
& \leq C_5+C_8\int_0^T\|\nabla\varphi\|_2^2\,dt\leq C_9,
\end{aligned}
\end{equation}
where the last inequality in (\ref{e1}) follows from
(\ref{apriori1}) (also, see the estimate (2.15) on p. 245 of
\cite{Shi}). This implies estimates (\ref{apriori2}) and
(\ref{apriori3}). Using the PDE and the previous estimates one can
show (\ref{apriori4}). {\it This ends the proof of Lemma
\ref{est}.}

We continue the proof of Theorem \ref{existence1}. Let
$W_0:=\{v\in W(0,T): \|v \|_{L^2(0,T;V_D)}\leq c_2, \|v
\|_{L^2(0,T;V_D')}\leq c_4, v(x,0)=u_0(x,0) \}$. We show
that the operator $\mathcal{R}: W_0\to  W_0$ is continuous
with respect to weak convergence in $W(0,T)$. Then, since $W_0$ is
weakly compact, it will follow that $\mathcal{R}$ has a fixed
point in $W_0$. Let $\{\vartheta_j\}\subset W_0$ be a sequence
such that $\vartheta_j\stackrel{w}\rightharpoonup \vartheta $ in
$W(0,T)$, and let $u_j=u(\vartheta_j)$ and
$\varphi_j=\varphi(\vartheta_j)$ be the corresponding sequences of
solutions. A priori estimates imply that on a subsequence
\begin{gather}
\vartheta_j \stackrel{w}\rightharpoonup  \vartheta \quad\text{in }
L^2(0,T;V_D(\Omega)),\label{c1}\\
 \frac{\partial\vartheta_j}{\partial
t} \stackrel{w}\rightharpoonup \frac{\partial\vartheta}{\partial
t} \quad\text{in }
L^2(0,T;V_D'(\Omega)),\label{c2}\\
u_j \stackrel{w}\rightharpoonup u \quad\text{in }
L^2(0,T;V_D(\Omega)),\label{c3}\\
\frac{\partial u_j}{\partial
t} \stackrel{w}\rightharpoonup \frac{\partial u}{\partial t}
\quad\text{in }L^2(0,T;V_D'(\Omega)),\label{c4}\\
\varphi_j \stackrel{w}\rightharpoonup \varphi \quad\text{in }
L^2(0,T;H^1(\Omega)),\label{c5}\\
u_j \stackrel{w}\rightharpoonup u \quad\text{in }
L^2(0,T;L^2(\Gamma_N))\label{c6}.
\end{gather}
Note that by the standard compactness result (see \cite{Sim}),
convergence in (\ref{c1}), (\ref{c2}), (\ref{c3}), and
(\ref{c4}), imply that on a subsequence, not relabeled, we have
\begin{gather}
\vartheta_j \stackrel{s}\to   \vartheta \quad\text{in }
L^2(Q),\label{c11}\\
u_j \stackrel{s}\to   u \quad\text{in } L^2(Q),\label{c21} \\
\sigma(\vartheta_j) \stackrel{s}\to  \sigma(\vartheta)
\quad\text{in } L^2(Q).\label{c31}
\end{gather}
Moreover, it can be shown that
\begin{gather}
\varphi_j \stackrel{s}\to   \varphi \quad\text{in }
L^2(Q),\label{c41}\\
\nabla\varphi_j \stackrel{s}\to   \nabla\varphi \quad\text{in }
L^2(Q).\label{c42}
\end{gather}
Note that (\ref{c41}) and (\ref{c42}) can be obtained by
incorporating the structure of the PDE system as we do not have
information on $\varphi_t$.  For the proof of (\ref{c41}) and
(\ref{c42}) the reader is referred to \cite{Ant}, p. 1132-1133.

We have
\begin{equation}  \label{eqq1} %\label{s1}
\begin{gathered}
\begin{aligned}
&\int_0^T\langle (u_j)_t,v\rangle\,dt+\int_Q\nabla u_j\cdot\nabla
v\,dx\,dt+\int_{\Gamma_N^T}\beta u_j v\,ds\,dt\\
&=\int_Q (\varphi_{0}-\varphi_j)\sigma(\vartheta_j)
\nabla\varphi_j\cdot\nabla v\,dx\,dt
+\int_Q(\sigma(\vartheta_j)\nabla
\varphi_j\cdot\nabla\varphi_{0})\,v\,dx\,dt,
\end{aligned}\\
\int_Q\sigma(\vartheta_j)\nabla \varphi_j\cdot\nabla w\,dx\,dt=0,
\end{gathered}
\end{equation}
for all $v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and
$w\in L^2(0,T;H^{1}_{0}(\Omega))$. It is immediate that
\begin{gather*}
\int_0^T\langle u_t^j,v\rangle\,dt \to  \int_0^T\langle u_t,v\rangle\,dt,\\
\int_Q\nabla u_j\cdot\nabla v\,dx\,dt\to  \int_Q\nabla
u\cdot\nabla v\,dx\,dt,\\
\int_{\Gamma_N^T}\beta u_j v\,ds\,dt \to
\int_{\Gamma_N^T}\beta u v\,ds\,dt\quad\text{as } j\to \infty.
\end{gather*}
We show
\begin{equation} \label{t1}
\int_Q
(\varphi_{0}-\varphi_j)\sigma(\vartheta_j)\nabla\varphi_j\cdot\nabla
v\,dx\,dt\to  \int_Q
(\varphi_{0}-\varphi)\sigma(\vartheta)\nabla\varphi\cdot\nabla
v\,dx\,dt.
\end{equation}
We estimate the difference between the sequence terms and the
limit
\begin{align*}
&\big|\int_Q
(\varphi_{0}-\varphi_j)\sigma(\vartheta_j)\nabla\varphi_j\cdot\nabla
v\,dx\,dt-\int_Q
(\varphi_{0}-\varphi)\sigma(\vartheta)\nabla\varphi\cdot\nabla
v\,dx\,dt \big|\\
&\leq \big|\int_Q
\sigma(\vartheta_j)(\varphi_{0}-\varphi_j)\nabla\varphi_j\cdot\nabla
v-\sigma(\vartheta)(\varphi_{0}-\varphi_j)\nabla\varphi\cdot\nabla
v\,dx\,dt\big|\\
&\quad+\big|\int_Q
\sigma(\vartheta)(\varphi_{0}-\varphi_j)\nabla\varphi\cdot\nabla
v-\sigma(\vartheta)(\varphi_{0}-\varphi)\nabla\varphi\cdot\nabla
v\,dx\,dt\big|
:=A+B,
\end{align*}
where $ B:=\big|\int_Q
\sigma(\vartheta)(\varphi-\varphi_j)\nabla\varphi\cdot\nabla v
\,dx\,dt\big|\to  0 $ by the weak convergence
$\varphi_j\stackrel{w}\rightharpoonup \varphi$ in
$L^2(0,T;L^s(\Omega))$ (by Lemma \ref{meyers} and the fact that
$\sigma(\vartheta)\nabla\varphi\cdot\nabla v\in
L^2(0,T;L^{s'}(\Omega))$). For term $A$, we have
\begin{align*}
A:&=\big|\int_Q
(\varphi_{0}-\varphi_j)(\sigma(\vartheta_j)-\sigma(\vartheta))\nabla\varphi_j\cdot\nabla
v\,dx\,dt\big|\\
&\quad+ \big|\int_Q
(\varphi_{0}-\varphi_j)\sigma(\vartheta)(\nabla\varphi_j-\nabla\varphi)\cdot\nabla
v\,dx\,dt\big|=A_1+A_2.
\end{align*}
Note that
\begin{align*}
A_1:&= \big|\int_Q
(\varphi_{0}-\varphi_j)(\sigma(\vartheta_j)-\sigma(\vartheta))\nabla\varphi_j\cdot\nabla
v\,dx\,dt\big|\\
&\leq 2\tilde{M}K\int_0^T\int_{\Omega}|\vartheta_j
  -\vartheta|\cdot|\nabla\varphi_j|\cdot|\nabla v|\,dx\,dt  \\
&\leq  C\int_0^T\|\vartheta_j-\vartheta \|_s
\|\nabla\varphi_j \|_r \|\nabla v \|_2\, dt\\
&\leq C\sup_{t}\|\nabla\varphi_j
\|_r\int_0^T\|\vartheta_j-\vartheta \|_s \|\nabla v \|_2 \,
dt\\
&\leq C \|\vartheta_j-\vartheta \|_{L^2(0,T;L^s(\Omega))}\|\nabla v
\|_{L^2(Q)}\to  0
\end{align*}
by the Simon compactness result \cite{Sim}, i.e.,
$W(0,T)\subset\subset L^2(0,T;L^s(\Omega))$. Also, note
\begin{equation}\label{hlp_paper}
\sup_{t}\|\nabla\varphi_j \|_r\leq C_{\varphi_0},
\end{equation}
by \cite[formula (2.9)]{Hry}, and $C_{\varphi_0}$ here denotes a
constant which depends on the boundary data $\varphi_0$. Hence on
a subsequence $\vartheta_j \stackrel{s}\to \vartheta$ in
$L^2(0,T;L^s(\Omega))$. In the above, $C$ is a generic constant
and $\tilde{M}:=\sup_{Q}|\varphi_0|$. For $A_2$, we write
\begin{align*}
A_2&\leq \big|\int_Q
(\varphi_{0}-\varphi)\sigma(\vartheta)(\nabla\varphi_j-\nabla\varphi)\cdot\nabla
v\,dx\,dt\big|\\
&\quad +\big|\int_Q
(\varphi-\varphi_j)\sigma(\vartheta)(\nabla\varphi_j-\nabla\varphi)\cdot\nabla
v\,dx\,dt\big|\to  0
\end{align*}
by (\ref{c42}). Similarly, it can be shown that
\begin{gather*}
\int_Q \sigma(\vartheta_j)(\nabla\varphi_j\cdot\nabla\varphi_0)
v\,dx\,dt \to  \int_Q
\sigma(\vartheta)(\nabla\varphi\cdot\nabla\varphi_0) v\,dx\,dt,\\
\int_Q \sigma(\vartheta_j)\nabla\varphi_j\cdot\nabla
w\,dx\,dt \to  \int_Q
\sigma(\vartheta)\nabla\varphi\cdot\nabla w\,dx\,dt.
\end{gather*}
Letting  $j\to  \infty$ in \eqref{eqq1}, we conclude by the
Schauder theorem that $\mathcal{R}$ has a fixed point in $W_0$ and
hence $(u,\varphi)$ with $\varphi=\mathcal{T}(u)$ solves
\eqref{statet}.
\end{proof}

\section{Existence of an optimal control}\label{exist}

Now we proceed to the proof of existence of an optimal control.

\begin{theorem}\label{existencet}
There exists a solution to the optimal control problem (\ref{OC}).
\end{theorem}

\begin{proof}
Let $\{\beta_n\}_{n=1}^{\infty}\subset U_M$ be a minimizing
sequence,
$$
\lim_{n\to \infty}J(\beta_n)=\inf_{\beta\in U_M}J(\beta).
$$
 Let $u_n=u(\beta_n)$ and
$\varphi_n=\varphi(\beta_n)$ be the corresponding solutions to
\begin{equation} \label{ss2}
\begin{gathered}
\begin{aligned}
&\int_0^T\langle u_t^n,v\rangle\,dt+\int_Q\nabla u_n\nabla
v\,dx\,dt+\int_{\Gamma_N^T}\beta_n u_n v\,ds\,dt \\
&= \int_Q(\varphi_{0}-\varphi_n)\sigma(u_n)\nabla\varphi_n\nabla
v\,dx\,dt
+\int_Q(\sigma(u_n)\nabla
\varphi_n\cdot\nabla\varphi_{0})\,v\,dx\,dt,
\end{aligned}\\
\int_Q\sigma(u_n)\nabla \varphi_n\cdot\nabla w\,dx\,dt=
0,
\end{gathered}
\end{equation}
for all $v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and $w\in
L^2(0,T;H^{1}_{0}(\Omega))$. Note that the existence of an optimal
control can be derived from the proof of Theorem \ref{existence1}
and the estimates (\ref{apriori1}), (\ref{apriori2}),
(\ref{apriori3}), and (\ref{apriori4}). In passing to the limit as
$n\to \infty$ in (\ref{ss2}), we only need to check the
term with $\beta_n$. We have (in addition to convergences from the
previous section):
\begin{gather*}
\beta_n \stackrel{w*}\rightharpoonup \beta^* \quad\text{in }
L^\infty(\partial\Omega\times(0,T)),\\
u_n \stackrel{s}\to  u^* \quad\text{in }L^2(0,T;L^2(\partial\Omega)).
\end{gather*}
This implies
$$
 \int_{\Gamma_N^T}\beta_n u_n v\,ds\to
\int_{\Gamma_N^T}\beta^* u^* v\,ds\quad\text{as}\ n\to \infty.
$$
Letting $n\to \infty$ in (\ref{ss2}), we obtain that
$(u^*,\varphi^*)$ is a weak solution associated with $\beta^*$,
i.e., $u^*=u(\beta^*)$ and $\varphi^*=\varphi(\beta^*)$. Using weak
lower semicontinuity of $J(\beta)$ with respect to $L^2$ norm one
can show that the infimum is achieved at $\beta^*$.
\end{proof}

\section{Derivation of the optimality system}\label{optimality}

To characterize an optimal control, we need to derive an
optimality system which consists of the original state system
coupled with an adjoint system. To obtain the necessary conditions
for the optimality system, we differentiate the objective
functional with respect to the control.  Since the objective
functional also depends on $u$ which depends on the control
$\beta$ and is coupled to $\varphi$ through the PDE \eqref{statet},
we will need to differentiate $u$ and $\varphi$ with respect to
control $\beta$.

\begin{theorem}[Sensitivities] \label{sensitivity}
 If the boundary data $\varphi_{_0}$ are
sufficiently small; i.e., if $\|\varphi_0\|_{W^{1,\infty}(Q)}$ is
small enough, then the mapping $\beta\mapsto(u,\varphi)$ is
differentiable in the following sense:
\begin{equation} \label{psionetwo}
\begin{gathered}
\frac{u(\beta+\varepsilon\ell)-u(\beta)}{\varepsilon} \stackrel{w}
\rightharpoonup \psi_1\quad\text{in } L^2(0,T;H^1_0(\Omega)),
\\
\frac{\varphi(\beta+\varepsilon\ell)-\varphi(\beta)}{\varepsilon}
 \stackrel{w}\rightharpoonup \psi_2\quad\text{in }
L^2(0,T;H^1_0(\Omega))\quad\text{as } \varepsilon\to  0
\end{gathered}
\end{equation}
for any $\beta\in U_M$ and $\ell\in L^\infty(\partial\Omega\times(0,T))$
such that
$(\beta+\varepsilon\ell)\in U_M$ for small $\varepsilon$.
Moreover, the sensitivities, $\psi_1\in L^2(0,T;H^1_0(\Omega))$
and $\psi_2\in L^2(0,T;H^1_0(\Omega))$, satisfy
\begin{equation} \label{sensitivityeqn}
\begin{gathered}
-(\psi_1)_t+\Delta\psi_1+\sigma'(u)|\nabla\varphi|^2\psi_1+2\sigma(u)
\nabla\varphi\cdot\nabla\psi_2
= 0\quad\text{in }\Omega\times(0,T),
\\
\nabla\cdot[\sigma'(u)\psi_1\nabla\varphi+\sigma(u)\nabla\psi_2]=0
\quad\text{in }\Omega\times(0,T),
\\
\frac{\partial\psi_1}{\partial n}+\beta\psi_1+\ell u = 0\quad
\text{on } \Gamma_N\times(0,T),\\
\psi_1 = 0\quad\text{on }\Gamma_D\times(0,T),\\
\psi_2 = 0\quad\text{on }\partial\Omega\times(0,T), \\
\psi_1  = 0\quad\text{on }\Omega\times\{0\}.
\end{gathered}
\end{equation}
\end{theorem}

\begin{proof}
Recall that we denoted: $u=u(\beta)$ and $\varphi=\varphi(\beta)$.
Now we also denote $u^\varepsilon=u(\beta^\varepsilon)$,
$\varphi^\varepsilon=\varphi(\beta^\varepsilon)$, where
$\beta^\varepsilon:=\beta+\varepsilon\ell$. The
weak formulation for $(u^\varepsilon,\varphi^\varepsilon)$ is
\begin{equation}  \label{eps2} %\label{eps1}
\begin{gathered}
\begin{aligned}
&\int_0^T\langle u_t^\varepsilon,v\rangle\,dt+\int_Q\nabla
u^\varepsilon\nabla v\,dx\,dt+\int_{\Gamma_N^T}\beta^\varepsilon
u^\varepsilon v\,ds\,dt\\
&= \int_Q (\varphi_{0}-\varphi^\varepsilon)\sigma(u^\varepsilon)
\nabla\varphi^\varepsilon\nabla v\,dx\,dt
+\int_Q(\sigma(u^\varepsilon)\nabla
\varphi^\varepsilon\cdot\nabla\varphi_{0})\,v\,dx\,dt,
\end{aligned}\\
\int_Q\sigma(u^\varepsilon)\nabla \varphi^\varepsilon\cdot\nabla
w\,dx\,dt= 0,
\end{gathered}
\end{equation}
for all $v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and
$w\in L^2(0,T;H^{1}_{0}(\Omega))$. Similarly, for $(u,\varphi)$
\begin{equation}\label{eps4} %\label{eps3}
\begin{gathered}
\begin{aligned}
&\int_0^T\langle u_t,\tilde
 v\rangle\,dt+\int_Q\nabla u\nabla \tilde
v\,dx\,dt+\int_{\Gamma_N^T}\beta u \tilde v\,ds\,dt\\
&=\int_Q (\varphi_{0}-\varphi)\sigma(u)\nabla\varphi\nabla
\tilde v\,dx\,dt
+\int_Q(\sigma(u)\nabla
\varphi\cdot\nabla\varphi_{0})\,\tilde v\,dx\,dt,
\end{aligned}\\
\int_Q\sigma(u)\nabla \varphi\cdot\nabla \tilde w\,dx\,dt=
0,
\end{gathered}
\end{equation}
for all $\tilde v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and
$\tilde w\in L^2(0,T;H^{1}_{0}(\Omega))$. Take test functions
$(u^\varepsilon-u)/{\varepsilon}$ and
$(\varphi^\varepsilon-\varphi)/{\varepsilon}$, subtract the
corresponding equations, and divide by $\varepsilon$:
\begin{equation} \label{sensitivdifference}
\begin{gathered}
\begin{aligned}
&\int_0^T\big\langle\frac{u_t^\varepsilon-u_t}{\varepsilon},
 \frac{u^\varepsilon-u}{\varepsilon}\big\rangle\,dt
+\int_Q\nabla \big(\frac{u^\varepsilon-u}{\varepsilon}\big)\nabla
\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\,dx\,dt\\
&+\int_{\Gamma_N^T}
\beta\big(\frac{u^\varepsilon-u}{\varepsilon}\big)
\big(\frac{u^\varepsilon-u}
{\varepsilon}\big)\,ds\,dt\\
&=-\int_{\Gamma_N^T}\ell u^\varepsilon
\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\,ds\,dt\\
&\quad +\frac{1}{\varepsilon}\int_Q
\Big[(\varphi_{0}-\varphi^\varepsilon) \sigma(u^\varepsilon)
\nabla\varphi^\varepsilon-(\varphi_{0}-\varphi) \sigma(u)
\nabla\varphi\Big]\cdot\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}
\big)\,dx\,dt\\
&\quad +\frac{1}{\varepsilon}\int_Q\Big[\sigma(u^\varepsilon)\nabla
\varphi^\varepsilon-\sigma(u)\nabla
\varphi\Big]\cdot\nabla\varphi_{0}
\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\,dx\,dt,
\end{aligned}\\
 \frac{1}{\varepsilon}\int_\Omega\Big[\sigma(u^\varepsilon)\nabla
\varphi^\varepsilon\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)
-\sigma(u)\nabla\varphi\cdot\nabla\big(\frac{\varphi^\varepsilon
-\varphi}{\varepsilon}
\big)\Big]\,dx\,dt=0.
\end{gathered}
\end{equation}
We present the detailed derivation of the $L^2(0,T;H^1(\Omega))$
estimate for $(\varphi^\varepsilon-\varphi)/\varepsilon$. Since
$(\varphi^\varepsilon-\varphi)/\varepsilon\in
L^2(0,T;H_0^1(\Omega))$ it follows from Poincar\'{e}'s inequality
that it is enough to derive a bound on
$\|\nabla(\varphi^\varepsilon-\varphi)/\varepsilon\|_{L^2(Q)}$.

 From the second equation in (\ref{sensitivdifference}) we obtain
\begin{equation}\label{phiepsilon}
\int_Q\sigma(u^\varepsilon)\nabla
\big(\frac{\varphi^\varepsilon}{\varepsilon}\big)\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\,dx\,dt
=\int_Q\sigma(u)\nabla
\big(\frac{\varphi}{\varepsilon}\big)\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\,dx\,dt.
\end{equation}
Taking into account (\ref{phiepsilon}) we can write
\begin{equation} \label{phiestim}
\begin{aligned}
&\int_Q\sigma(u)\big|\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\big|^2\,dx\,dt\\
&=\int_Q\sigma(u)\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\,dx\,dt\\
&=\int_Q\sigma(u)\nabla
\big(\frac{\varphi^\varepsilon}{\varepsilon}\big)\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\,dx\,dt
-\int_Q\sigma(u)\nabla
\big(\frac{\varphi}{\varepsilon}\big)\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\,dx\,dt\\
&=\int_Q\sigma(u)\nabla
\big(\frac{\varphi^\varepsilon}{\varepsilon}\big)\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\,dx\,dt-
\int_Q\sigma(u^\varepsilon)\nabla\big(\frac{\varphi^\varepsilon}
{\varepsilon}\big)\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\,dx\,dt\\
&=\int_Q\big(\frac{\sigma(u)-\sigma(u^\varepsilon)}{\varepsilon}\big)
\nabla \varphi^\varepsilon\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\,dx\,dt.
\end{aligned}
\end{equation}
Using (\ref{phiepsilon}) and (\ref{phiestim}) we obtain
\begin{align*}
&C_1\int_0^T\int_\Omega\big|\nabla\big(\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big)\big|^2dx\,dt\\
&\leq \int_0^T\int_\Omega\sigma(u)\big|\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\big|^2\,dx\,dt\\
&=\int_Q\big(\frac{\sigma(u)-\sigma(u^\varepsilon)}{\varepsilon}\big)\nabla
\varphi^\varepsilon\cdot\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\,dx\,dt\\
&\leq K\int_0^T\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_s\cdot\|\nabla
\varphi^\varepsilon\|_r\cdot\big\|\nabla
\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\big\|_2\,dt\\
&\leq K\sup_{t}\|\nabla \varphi^\varepsilon\|_r\int_0^T
\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_s\cdot\big\|\nabla
\big(\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big)\big\|_2\,dt\\
&\leq C_{\varphi_0}\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_{L^2(0,T;L^s(\Omega))}
\big\|\nabla\big(\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big)\big\|_{L^2(0,T;L^2(\Omega))},
\end{align*}
where we used (\ref{hlp_paper}). We can write
\begin{equation} \label{prelimsensitivity}
\big\|\nabla\big(\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big)\big\|_{L^2(Q)}
\leq C\big\|\frac{u^\varepsilon-u}{\varepsilon}
\big\|_{L^2(0,T;L^s(\Omega))}\leq
\tilde C\big\|\frac{u^\varepsilon-u}{\varepsilon}
\big\|_{L^2(0,T;H_0^1(\Omega))}.
\end{equation}
Since $L^2(0,T;H^1(\Omega))\subset L^2(0,T;L^s(\Omega))$,
it follows by Poincar\'{e}'s inequality
\begin{equation}\label{keysensitestim}
\big\|\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big\|_{L^2(0,T;H^1(\Omega))}\leq
C_{\varphi_0}\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_{L^2(0,T;H^1(\Omega))}.
\end{equation}
Now we proceed to the derivation of $H^1$-norm estimate of
$(u^\varepsilon-u)/\varepsilon$ using
(\ref{sensitivdifference}):
\begin{align*}
& \frac{1}{2}\int_{\Omega\times\{T\}}
 \big(\frac{u^\varepsilon-u}{\varepsilon}\big)^2\,dx
+\int_Q\big|\nabla \big(\frac{u^\varepsilon-u}{\varepsilon}\big)
 \big|^2\,dx\,dt
+\lambda\int_{\Gamma_N^T}\big(\frac{u^\varepsilon-u}{\varepsilon}
\big)^2\,ds\,dt \\
&\leq \big|-\int_{\Gamma_N^T}\ell
u^\varepsilon\big(\frac{u^\varepsilon-u}{\varepsilon} \big)\,ds\,dt\\
&\quad +\frac{1}{\varepsilon}\int_Q
\Big[(\varphi_{_0}-\varphi^\varepsilon) \sigma(u^\varepsilon)
\nabla\varphi^\varepsilon-(\varphi_{_0}-\varphi) \sigma(u)
\nabla\varphi\Big]\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}
 \big)\,dx\,dt\\
&\quad +\frac{1}{\varepsilon}\int_Q\Big[\sigma(u^\varepsilon)\nabla
\varphi^\varepsilon-\sigma(u)\nabla
\varphi\Big]\nabla\varphi_{_0}\big(\frac{u^\varepsilon-u}{\varepsilon}
\big)\,dx\,dt\big|\\
&=\big|-\int_{\Gamma_N^T}\ell
u^\varepsilon\big(\frac{u^\varepsilon-u}{\varepsilon}
\big)\,ds\,dt+{\mathcal{C}}+\mathcal{D}\big|.
\end{align*}
We have
\begin{align*}
&\frac{1}{2}\int_{\Omega\times\{T\}}
\big(\frac{u^\varepsilon-u}{\varepsilon}\big)^2\,dx+\int_Q\big|\nabla
\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|^2\,dx\,dt+\lambda\int
_{\Gamma_N^T}\big(\frac{u^\varepsilon-u}{\varepsilon}
\big)^2\,ds\,dt \\
&\leq \int_{\Gamma_N^T}|\ell|\cdot
|u^\varepsilon|\cdot\big|\frac{u^\varepsilon-u}{\varepsilon}
\big|\,ds\,dt +|{\mathcal{C}}|+|\mathcal{D}|\leq M \int_{\Gamma_N^T}
|u^\varepsilon|\cdot\big|\frac{u^\varepsilon-u}{\varepsilon}
\big|\,ds\,dt +|{\mathcal{C}}|+|\mathcal{D}|\\
&\leq  c_7\|u^{\varepsilon}\|_{L^2(0,T;L^2(\Gamma_N))}
\cdot\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_{L^2(0,T;L^2(\Gamma_N))}
+|{\mathcal{C}}|+|\mathcal{D}|\\
&\leq  c_8\|u^{\varepsilon}\|_{L^2(0,T;H^1(\Omega))}
\cdot\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_{L^2(0,T;L^2(\Gamma_N))}+|{\mathcal{C}}|+|\mathcal{D}|.
\end{align*}
We estimate $\mathcal{C}$ term first
\begin{align*}
|\mathcal{C}|&\leq \int_Q
|\varphi_{_0}-\varphi|\cdot\big|\frac{\sigma(u^\varepsilon)
\nabla\varphi^\varepsilon- \sigma(u)\nabla\varphi}
{\varepsilon}\big|\cdot\big|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}
 \big)\big|\,dx\,dt\\
&\quad+ \int_Q\big|\frac{\varphi-\varphi^\varepsilon}{\varepsilon}\big|\,
 \sigma(u^\varepsilon
)|\nabla\varphi^\varepsilon|\cdot\big|\nabla
 \big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|\,dx\,dt\\
&\leq  2\tilde
M\int_Q\big|\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}\big|
\cdot
|\nabla\varphi^\varepsilon|\cdot\big|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|\,dx\,dt\\
&\quad + 2\tilde
M\int_Q\sigma(u)\cdot\big|\nabla\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\big|
\cdot\big|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|\,dx\,dt\\
&\quad + C_2\int_Q\big|\frac{\varphi-\varphi^\varepsilon}{\varepsilon}\big|\cdot
|\nabla\varphi^\varepsilon|\cdot\big|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|\,dx\,dt\\
&\leq
c_2\int_Q\big|\frac{u^\varepsilon-u}{\varepsilon}\big| \cdot
|\nabla\varphi^\varepsilon|\cdot\big|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|\,dx\,dt\\
&\quad +
c_3\int_Q\big|\nabla\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)\big|
\cdot\big|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|\,dx\,dt\\
&\quad + c_4\int_Q\big|\frac{\varphi-\varphi^\varepsilon}{\varepsilon}\big|\cdot
|\nabla\varphi^\varepsilon|\cdot\big|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|\,dx\,dt.
\end{align*}
Note that $c_2, c_3, c_5,$ and $c_6$ depend on the data $\varphi_0$.
We continue estimating $|{\mathcal{C}}|$:
\begin{align*}
|{\mathcal{C}}|
&\leq  c_9
 \int_Q\big|\frac{u^\varepsilon-u}{\varepsilon}\big|\cdot\big|
 \nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big| \,dx\,dt
+c_3\int_Q\big|\nabla\big(\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big)\big|\cdot\big|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|
\,dx\,dt\\
&\quad+ c_{10}\int_Q\big|\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big|\cdot\big|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big|
\,dx\,dt\\
&\leq c_9 \int_0^T\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_2\cdot\big\|
 \nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_2\,dt
+ c_3 \int_0^T\big\|\nabla\big(\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big)\big\|_2\\
&\quad\times \big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_2
\,dt
 +c_{10}\int_0^T\big\|\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big\|_2\cdot\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_2
\,dt\\
&\leq c_{11}\int_0^T\big\|\nabla
 \big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_2\cdot
\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_2\,dt+
c_3\int_0^T\big\|\nabla\big(\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big)\big\|_2 \\
&\quad\times \big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_2
\,dt
+ c_{10}\int_0^T\big\|\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big\|_2\cdot\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_2
\,dt\\
&\leq c_{11}\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)
\big\|_{L^2(Q)}\cdot \big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)
\big\|_{L^2(Q)}
 + c_3\big\|\nabla\big(\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big)\big\|_{L^2(Q)}\\
&\quad \times \big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}
+c_{10}\big\|\frac{\varphi^\varepsilon-\varphi}
{\varepsilon}\big\|_{L^2(Q)}\cdot\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}\\
&\leq
c_{11}\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}^2+
c_{12}\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}^2+
c_{13}\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}^2\\
&\leq c_{14}\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}^2,
\end{align*}
where we used  Poincar\'{e}'s inequality on
$(u^\varepsilon-u)/\varepsilon$ and the Cauchy inequality.
Similarly, for $\mathcal{D}$ we obtain
\begin{align*}
|\mathcal{D}|&\leq
c_5\int_Q\big|\frac{u^\varepsilon-u}{\varepsilon}\big|\cdot|\nabla\varphi^\varepsilon|\cdot\big|
\frac{u^\varepsilon-u}{\varepsilon}\big|\,dx\,dt
+c_6\int_Q\big|\nabla\big(\frac{\varphi^\varepsilon-
\varphi}{\varepsilon}\big)\big|\cdot\big|\frac{u^\varepsilon-u}{\varepsilon}\big|\,dx\,dt\\
&\leq c_{15}\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}^2.
\end{align*}
Notice that $c_{14}$ and $c_{15}$ depend on $\varphi_0$. Hence, we
obtain
\begin{align*}
& \frac{1}{2}\int_{\Omega\times\{T\}}
\big(\frac{u^\varepsilon-u}{\varepsilon}\big)^2\,dx+
\big\|\nabla
\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}^2
+\lambda\int_{\Gamma_N^T}\big(\frac{u^\varepsilon-u}{\varepsilon}
\big)^2\,ds\,dt \\
&\leq c_8\|u^{\varepsilon}\|_{L^2(0,T;H^1(\Omega))}
\cdot\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_{L^2(0,T;L^2(\Gamma_N))}+
c_{16}\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}^2\\
&\leq
c_{17}\|u^{\varepsilon}\|_{L^2(0,T;H^1(\Omega))}^2+\frac{\lambda}{2}
\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_{L^2(0,T;L^2(\Gamma_N))}^2
+c_{16}\big\|\nabla\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}^2.
\end{align*}
Observe that $\|u^{\varepsilon}\|_{L^2(0,T;H^1(\Omega))}^2$ is
bounded by (\ref{apriori2}). Take $\varphi_0$ small enough to get
\begin{align*}
\frac{1}{2}\int_{\Omega\times\{T\}}\big(\frac{u^\varepsilon-u}{\varepsilon}\big)^2\,dx+
k\big\|\nabla
\big(\frac{u^\varepsilon-u}{\varepsilon}\big)\big\|_{L^2(Q)}^2+\frac{\lambda}{2}
\big\|\frac{u^\varepsilon-u}{\varepsilon}
\big\|_{L^2(0,T;L^2(\Gamma_N))}^2\leq c_{18},
\end{align*}
where $k:=1-c_{16}>0$. Collecting the estimates and using the pde
\eqref{eps2} to get the estimate of the time derivative quotient,
gives
\begin{gather*}
 \big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_{L^2(0,T;H_0^1(\Omega))}
\leq c,\\
 \big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_{L^2(0,T;L^2(\Gamma_N))}
\leq c,\\
 \big\|\frac{u_t^\varepsilon-u_t}{\varepsilon}\big\|_{L^2(0,T;H^{-1}(\Omega))}\leq
c,\\
 \big\|\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big\|_{L^2(0,T;H^1(\Omega))}\leq
c,
\end{gather*}
where the generic constant $c>0$ does not depend on $\varepsilon$.
Claim: we can derive a better estimate on
$(\varphi^\varepsilon-\varphi)/\varepsilon$ than we have. This better estimate is needed
to pass to the limit as $\varepsilon\to  0$ in the derivation of the sensitivity equations.
Indeed, we can write for a.e. $t\in (0,T)$
\begin{gather*}
 \nabla\cdot(\sigma(u^\varepsilon)\nabla\varphi^\varepsilon)=0 \quad
 \text{in } \Omega,\\
 \nabla\cdot(\sigma(u)\nabla\varphi)=0 \quad \text{in } \Omega.
\end{gather*}
Subtract these equations as well as appropriate boundary conditions
to obtain
\begin{equation} \label{star}
\begin{gathered}
\nabla\cdot\Big(\sigma(u)\nabla\big(\frac{\varphi^\varepsilon
-\varphi}{\varepsilon}\big)\Big)
= \nabla\cdot\Big[\frac{\sigma(u^\varepsilon)-\sigma(u))}
{\varepsilon}\nabla\varphi^\varepsilon\Big] \quad \text{in } \Omega,
\\
\frac{\varphi^\varepsilon-\varphi}{\varepsilon}=0 \quad \text{on }
\partial\Omega.
\end{gathered}
\end{equation}
By Lemma \ref{meyers}, for a.e. $t\in (0,T)$, we have
$\|\nabla\varphi^\varepsilon \|_{2r}\leq c$, where $r>2$ and
$\varphi^\varepsilon$ solves
\begin{gather*}
\nabla\cdot(\sigma(u^\varepsilon)\nabla\varphi^\varepsilon)
= 0 \quad \text{in } \Omega,\\
\varphi^\varepsilon = \varphi_0 \quad \text{on } \partial\Omega.
\end{gather*}
Also, since $\Omega\subset R^2$ it follows that
$H^1(\Omega)\subset L^q(\Omega)$ for any $q\in [1,\infty)$.
Thus, we can write for a.e. $t\in (0,T)$,
\begin{eqnarray}
\big\|\frac{u^\varepsilon-u}{\varepsilon} \big\|_{2r}\leq
\big\|\frac{u^\varepsilon-u}{\varepsilon} \big\|_{H^1(\Omega)}\leq
c.
\end{eqnarray}
Therefore, returning to (\ref{star}), we obtain
\begin{equation}\label{m1}
\big\|\nabla\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)
\big\|_{r}\leq
c^*\Big(\big\|\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}\nabla\varphi^\epsilon
\big\|_r+\big\| \nabla\big(\frac{\varphi^\varepsilon-\varphi
}{\varepsilon}\big)\big\|_2 \Big).
\end{equation}
We can estimate the first term on the right hand side of (\ref{m1}),
\begin{align*}
\big\|\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}\nabla\varphi^\epsilon
\big\|_r^r
&= \int_{\Omega}\big|
\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}\big|^r\cdot
|\nabla\varphi^\varepsilon|^r\,dx\\
&\leq
C\int_{\Omega}\big|\frac{u^\varepsilon-u}{\varepsilon}\big|^r\cdot|\varphi^\varepsilon|^r\,dx\\
&\leq  C\Big(
\big\|\frac{u^\varepsilon-u}{\varepsilon}\big\|_{2r}^{2r}
+\|\nabla\varphi^\varepsilon
\|_{2r}^{2r}\Big)\leq c.
\end{align*}
Therefore,
\begin{equation} \label{r}
\big\|\nabla\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)
\big\|_{r}\leq  C,\quad
\big\|\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big\|_{r}
\leq C,
\end{equation}
where a generic constant $C>0$ is independent of $\varepsilon$. This
implies that
\begin{gather*}
\varphi^\varepsilon \stackrel{s}\to \varphi \quad \text{in }
L^r(\Omega),\\
\nabla\varphi^\varepsilon \stackrel{s}\to \nabla\varphi \quad \text{in }
 L^r(\Omega)
\end{gather*}
as $\varepsilon\to  0$. Moreover, by reiterating this
argument we can show that for a.e. $t\in (0,T)$
\begin{equation} \label{2r}
\big\|\nabla\big(\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big)
\big\|_{2r}\leq  C,\quad
\big\|\frac{\varphi^\varepsilon-\varphi}{\varepsilon}\big\|_{2r}\leq
C.
\end{equation}
This proves the claim. Now we can proceed to the derivation of
sensitivity equations, starting with
\begin{equation} \label{sen}
\begin{gathered}
\begin{aligned}
&\int_0^T\big\langle\frac{u_t^\varepsilon-u_t}{\varepsilon},
 v\big\rangle\,dt
+\int_Q\nabla \big(\frac{u^\varepsilon-u}{\varepsilon}\big)\nabla
v\,dx\,dt+\int_{\Gamma_N^T}
\beta\big(\frac{u^\varepsilon-u}{\varepsilon}\big)v\,ds\,dt\\
&=-\int_{\Gamma_N^T}\ell u^\varepsilon
v\,ds\,dt
+\frac{1}{\varepsilon}\int_Q
\Big[(\varphi_{0}-\varphi^\varepsilon) \sigma(u^\varepsilon)
\nabla\varphi^\varepsilon-(\varphi_{0}-\varphi) \sigma(u)
\nabla\varphi\Big]\cdot\nabla v\,dx\,dt\\
&\quad +\frac{1}{\varepsilon}\int_Q\Big[\sigma(u^\varepsilon)\nabla
\varphi^\varepsilon-\sigma(u)\nabla
\varphi\Big]\cdot\nabla\varphi_{0}v\,dx\,dt,
\end{aligned}\\
 \frac{1}{\varepsilon}\int_\Omega\Big[\sigma(u^\varepsilon)\nabla
\varphi^\varepsilon -\sigma(u)\nabla\varphi\Big]\cdot\nabla
w\,dx\,dt=0.
\end{gathered}
\end{equation}
The following convergence are immediate
\begin{gather*}
\int_0^T\big\langle\frac{u_t^\varepsilon-u_t}{\varepsilon},v\big\rangle\,dt
\to \int_0^T(\psi_1)_t,v\Big\rangle\,dt,\\
\int_Q\nabla \big(\frac{u^\varepsilon-u}{\varepsilon}\big)\nabla
v\,dx\,dt \to \int_Q \nabla\psi_1\cdot\nabla
v\,dx\,dt,\\
\int_{\Gamma_N^T}
\beta\big(\frac{u^\varepsilon-u}{\varepsilon}\big)v\,ds\,dt \to
\int_{\Gamma_N^T} \beta\psi_1 v\,ds\,dt,\\
\int_{\Gamma_N^T}\ell u^\varepsilon v\,ds\,dt \to
\int_{\Gamma_N^T}\ell u v\,ds\,dt.
\end{gather*}
We write the second term on the right hand side of (\ref{sen}) in
the form
\begin{align*}
& \frac{1}{\varepsilon}\int_Q
\Big[(\varphi_{_0}-\varphi^\varepsilon) \sigma(u^\varepsilon)
\nabla\varphi^\varepsilon\cdot\nabla
v-(\varphi_{_0}-\varphi) \sigma(u) \nabla\varphi\cdot\nabla
v\Big]\,dx\,dt\\
&= \frac{1}{\varepsilon}\int_Q
(\varphi_{_0}-\varphi^\varepsilon)\Big[\sigma(u^\varepsilon)\nabla\varphi^\varepsilon-
\sigma(u)\nabla\varphi\Big]\cdot\nabla
v\,dx\,dt\\
&\quad +\frac{1}{\varepsilon}\int_Q
(\varphi-\varphi^\varepsilon)\sigma(u)\nabla\varphi\cdot\nabla
v\,dx\,dt\\
&= \mathcal{G}+\mathcal{F}.
\end{align*}
As far as the term $\mathcal{F}$ is concerned, we have that
$\sigma(u)\nabla\varphi\cdot\nabla v\in L^2(Q)$ and
$(\varphi-\varphi^\varepsilon)/\varepsilon\stackrel{w}\rightharpoonup
-\psi_2$ in $L^2(Q)$, and therefore
\begin{align*}
\mathcal{F}\to -\int_Q \psi_2\sigma(u)\nabla\varphi\cdot\nabla
v\,dx\,dt
\end{align*}
as $\varepsilon\to  0$. Now we consider terms coming from
$\mathcal{G}$. We write
\begin{align*}
\mathcal{G}
&= \frac{1}{\varepsilon}\int_Q
(\varphi_{_0}-\varphi^\varepsilon)\Big[\sigma(u^\varepsilon)
 \nabla\varphi^\varepsilon-
\sigma(u)\nabla\varphi\Big]\cdot\nabla
v\,dx\,dt\\
&= \frac{1}{\varepsilon}\int_Q(\varphi_{_0}-\varphi^\varepsilon)
\Big[\sigma(u^\varepsilon)-\sigma(u)\Big]
\nabla\varphi^\varepsilon\cdot\nabla v\,dx\,dt\\
&\quad+\frac{1}{\varepsilon}\int_Q(\varphi_{_0}-\varphi^\varepsilon)
\sigma(u)\Big[\nabla\varphi^\varepsilon-\nabla\varphi\Big]\cdot\nabla
v\,dx\,dt
=\mathcal{G}_1+\mathcal{G}_2
\end{align*}
 Using (\ref{2r}) and the fact that
$\nabla(\varphi^\varepsilon-\varphi)/\varepsilon\stackrel{w}\rightharpoonup\nabla\psi_2$
in $L^2(Q)$, one can show
\begin{equation} \label{G2}
\mathcal{G}_2\to \int_Q(\varphi_{0}-\varphi)
 \sigma(u)\nabla\psi_2\cdot\nabla v\,dx\,dt\quad\text{as }
\varepsilon\to  0.
\end{equation}
To illustrate terms from $\mathcal{G}_1$, observe that
\begin{align*}
\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}\stackrel{s}
\to \sigma'(u)\psi_1\quad\text{in }L^2(0,T;L^s(\Omega))\quad\text{as }
 \varepsilon\to  0.
\end{align*}
Indeed, since $N=2$ we have $H_0^1(\Omega)\subset\subset
L^q(\Omega)$ for any $q\in [1,\infty)$,
$L^2(0,T;H_0^1(\Omega))\subset L^2(0,T;L^s(\Omega))$, and
\begin{equation} \label{alpha0}
\big\|\frac{u^\varepsilon-u}{\varepsilon}
\big\|_{L^2(0,T;L^s(\Omega))}\leq
c\big\|\frac{u^\varepsilon-u}{\varepsilon}
\big\|_{L^2(0,T;H_0^1(\Omega))}\leq C.
\end{equation}
Therefore,
\begin{equation} \label{alpha}
 \big\|\frac{u_t^\varepsilon-u_t}{\varepsilon}
\big\|_{L^2(0,T;H^{-1}(\Omega))}\leq C,
\end{equation}
where (\ref{alpha}) can be derived from the PDE.
Since $\frac{1}{s}+\frac{1}{r}+\frac{1}{2}=1$ and $r>2$, we have
that $s=\frac{2r}{r-2}>2$.  If we define $\tilde W:=\{v: v\in
L^2(0,T;L^s(\Omega)), v_t\in L^2(0,T;H^{-1}(\Omega)) \}$, then,
because $H_0^1(\Omega)\subset\subset L^s(\Omega)\subset
H^{-1}(\Omega)$, it follows from \cite{Sim} that
$\tilde W\subset\subset L^2(0,T;L^s(\Omega))$. Hence, the estimates
(\ref{alpha}) imply that on a subsequence
\[
 \big\|\frac{u^\varepsilon-u}{\varepsilon}
-\psi_1\big\|_{L^2(0,T;L^s(\Omega))}\to  0\quad\text{as }
\varepsilon\to  0,
\]
and therefore,
\begin{equation} \label{sigma}
\big\|\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}
-\sigma'(u)\psi_1\big\|_{L^2(0,T;L^s(\Omega))}\to  0\quad\text{as }
 \varepsilon\to  0.
\end{equation}
Now, we are ready show
\begin{align*}%\label{G1}
\mathcal{G}_1&\stackrel{\rm def}=\int_Q(\varphi_{0}-\varphi^\varepsilon)
\big(\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}\big)
\nabla\varphi^\varepsilon\cdot\nabla
v\,dx\,dt\\
& \to \int_Q(\varphi_{0}-\varphi)
 \sigma'(u)\psi_1\nabla\varphi\cdot\nabla v\,dx\,dt.
\end{align*}
To show this convergence, we estimate
\begin{align*}
& \big|\int_Q(\varphi_{0}-\varphi^\varepsilon)
\big(\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}\big)
\nabla\varphi^\varepsilon\cdot\nabla
v\,dx\,dt-\int_Q(\varphi_{0}-\varphi)
\sigma'(u)\psi_1\nabla\varphi\cdot\nabla v\,dx\,dt\big|\\
&\leq \big|\int_Q(\varphi_{0}-\varphi)\Big\{
\big(\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}\big)
\nabla\varphi^\varepsilon-\sigma'(u)\psi_1\nabla\varphi\Big\}
\cdot\nabla
v\,dx\,dt\big|\\
&\quad+\big|\int_Q(\varphi-\varphi^\varepsilon)
\big(\frac{\sigma(u^\varepsilon)
-\sigma(u)}{\varepsilon}-\sigma'(u)\psi_1+
\sigma'(u)\psi_1\big)\nabla\varphi^\varepsilon\cdot\nabla
v\,dx\,dt\big|\\
&\leq \big|\int_Q\Big\{(\varphi_{0}-\varphi)
\Big[\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}-\sigma'(u)\psi_1\Big]
\nabla\varphi^\varepsilon\cdot\nabla v\\
&\quad+
(\varphi_{0}-\varphi) \sigma'(u)\psi_1\nabla(\varphi^\varepsilon-\varphi)\cdot\nabla v\Big\}\,dx\,dt\big|\\
&\quad+\big|\int_Q(\varphi-\varphi^\varepsilon)
\big(\frac{\sigma(u^\varepsilon)
-\sigma(u)}{\varepsilon}-\sigma'(u)\psi_1\big)
\nabla\varphi^\varepsilon\cdot\nabla v\\
&\quad +
(\varphi-\varphi^\varepsilon) \sigma'(u)\psi_1\nabla\varphi^\varepsilon\cdot\nabla
v\,dx\,dt\big|\\
&\leq \big|\int_Q(\varphi_{0}-\varphi)\Big\{
\big(\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}\big)
-\sigma'(u)\psi_1\Big\}\nabla\varphi^\varepsilon\cdot\nabla
v\,dx\,dt\big|\\
&\quad+\big|\int_Q(\varphi_{0}-\varphi) \sigma'(u)\psi_1\nabla(\varphi^\varepsilon-\varphi)\cdot\nabla v\,dx\,dt\big|\\
&\quad+\big|\int_Q(\varphi-\varphi^\varepsilon)
\big(\frac{\sigma(u^\varepsilon)
-\sigma(u)}{\varepsilon}-\sigma'(u)\psi_1\big)\nabla\varphi^\varepsilon\cdot\nabla
v \,dx\,dt\big|\\
&\quad+\big|\int_Q(\varphi-\varphi^\varepsilon) \sigma'(u)\psi_1\nabla\varphi^\varepsilon\cdot\nabla
v\,dx\,dt\big|:=I+II+III+IV.
\end{align*}
We deal with the term $I$ first. We have
\begin{align*}
I&\leq  \|\varphi-\varphi_0 \|_{L^\infty(Q)}\int_0^T\int_\Omega\big|
\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}
-\sigma'(u)\psi_1\big|\cdot
|\nabla\varphi^\varepsilon|\cdot|\nabla v|\,dx\,dt\\
&\leq \|\varphi-\varphi_0 \|_{L^\infty(Q)}\int_0^T\big\|
\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}
-\sigma'(u)\psi_1\big\|_s\cdot
\|\nabla\varphi^\varepsilon\|_r\cdot\|\nabla v\|_2\,dt\\
&\leq \|\varphi-\varphi_0 \|_{L^\infty(Q)}\sup_{t}{\|
\nabla\varphi^\varepsilon\|_r}\int_0^T\big\|
\frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}
-\sigma'(u)\psi_1\big\|_s\cdot\|\nabla v\|_2\,dt\\
&\leq C\big\| \frac{\sigma(u^\varepsilon)-\sigma(u)}{\varepsilon}
-\sigma'(u)\psi_1\big\|_{L^2(0,T;L^s(\Omega))}\cdot\|\nabla
v\|_{L^2(Q)}\to  0\quad\text{as}\ \varepsilon\to  0,
\end{align*}
by (\ref{sigma}). Similarly, we obtain
\begin{align*}
II&:=\big|\int_0^T\int_\Omega(\varphi_{0}-\varphi) \sigma'(u)\psi_1
\nabla(\varphi^\varepsilon-\varphi)\cdot\nabla v\,dx\,dt\big|\\
&\leq C\int_0^T\|\psi_1\|_s\cdot\|\nabla(\varphi^\varepsilon-\varphi)\|_r\cdot\|\nabla
v\|_2\,dt\\
&\leq C\|\psi_1\|_{L^2(0,T;L^s(\Omega))}\cdot\|\nabla
v\|_{L^2(Q)}\cdot\sup_{t}{\|\nabla(\varphi^\varepsilon-\varphi)\|_r}\to
0\quad\text{as}\ \varepsilon\to  0,
\end{align*}
where we used (\ref{r}). Analogously, it can be shown that $III,
IV\to  0$ as $\varepsilon\to  0$.

One can show that the third term on the right hand side of
(\ref{sen}) satisfies
\begin{align*}%\label{lastterm}
& \frac{1}{\varepsilon}\int_Q\Big[\sigma(u^\varepsilon)\nabla
\varphi^\varepsilon-\sigma(u)\nabla
\varphi\Big]\cdot\nabla\varphi_{0}v\,dx\,dt\\
&\to \int_Q\sigma'(u)\psi_1\nabla\varphi\cdot\nabla\varphi_{0}v\,dx\,dt
+\int_Q\sigma(u)\nabla\psi_2\cdot\nabla\varphi_{0}v\,dx\,dt\quad
\text{as } \varepsilon\to  0.
\end{align*}

Finally, the equation for $\varphi$ in (\ref{sen}) can be shown to
satisfy
\begin{align*}%\label{sensitivityphi}
& \frac{1}{\varepsilon}\int_Q\Big[\sigma(u^\varepsilon)\nabla
\varphi^\varepsilon\cdot\nabla w -\sigma(u)\nabla\varphi\cdot\nabla
w\Big]\,dx\,dt\\
&\to \int_Q\sigma'(u)\psi_1\nabla \varphi\cdot\nabla
w\,dx\,dt +\int_Q\sigma(u)\nabla\psi_2\cdot\nabla w\,dx\,dt
\quad\text{as } \varepsilon\to  0.
\end{align*}
Letting $\varepsilon\to  0$ in (\ref{sen}), we obtain
\begin{equation} \label{sensitweakformul}
\begin{gathered}
\begin{aligned}
& \int_0^T\big\langle(\psi_1)_t,v\big\rangle\,dt+\int_Q\nabla
\psi_1\nabla v\,dx\,dt+\int_{\Gamma_N^T}
(\beta\psi_1+\ell u)v\,ds\,dt\\
&= \int_Q(\varphi_{0}-\varphi) \sigma'(u)\psi_1
\nabla\varphi\cdot\nabla
v\,dx\,dt+\int_Q(\varphi_{0}-\varphi) \sigma(u)\nabla\psi_2
\cdot\nabla v\,dx\,dt\\
&\quad - \int_Q\psi_2\sigma(u)\nabla\varphi\cdot\nabla v\,dx\,dt+
\int_Q\sigma'(u)\psi_1\nabla\varphi\cdot\nabla\varphi_0 v\,dx\,dt\\
&\quad+\int_Q\sigma(u)\nabla\psi_2\cdot\nabla\varphi_0 v\,dx\,dt,
\end{aligned}\\
 \int_Q(\sigma'(u)\psi_1\nabla\varphi+\sigma(u)\nabla\psi_2)\cdot
\nabla w\,dx\,dt=0
\end{gathered}
\end{equation}
for all $v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and $w\in
L^2(0,T;H^{1}_{0}(\Omega))$. This leads to the sensitivity system
(\ref{sensitivityeqn}).

The weak formulation for (\ref{sensitivityeqn}) is given by
\begin{gather} \label{weakform1}
\begin{aligned}
& \int_0^T\big\langle-(\psi_1)_t,v\big\rangle\,dt
 +\int_Q\nabla\psi_1\cdot\nabla v\,dx\,dt
 +\int_{\Gamma_N^T}(\beta\psi_1+\ell u) v\,ds\,dt\\
&= \int_Q\sigma'(u)\psi_1|\nabla\varphi|^2v\,dx\,dt+2\int_Q
\sigma(u)\nabla\varphi\cdot\nabla\psi_2 v\,dx\,dt,
\end{aligned}\\
 \int_Q(\sigma'(u)\psi_1\nabla\varphi+\sigma(u)\nabla\psi_2)\cdot\nabla
w\,dx\,dt=0
\label{weakform2}
\end{gather}
for all $v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and $w\in
L^2(0,T;H^{1}_{0}(\Omega))$ which can be shown to be equivalent to
(\ref{sensitweakformul}).
\end{proof}

To characterize the optimal control, we need to introduce
adjoint functions and the adjoint operator associated with
$\psi_1$ and $\psi_2$. Using $\frac{\partial p}{\partial
n}+\beta^*p=0$ on $\Gamma_N\times(0,T)$, $q=\psi_2=0$ on
$\partial\Omega\times(0,T)$, $p=0$ on $\Gamma_D\times(0,T)$,
$p(x,T)=0$, and the notation $\mathcal{L}$ for the operator in the
sensitivity system gives:
\begin{align*}
& \int_Q \begin{pmatrix} p & q\end{pmatrix}
 \mathcal{L}
\begin{pmatrix}
\psi_1\\
\psi_2 \end{pmatrix}
\,dx\,dt\\
&=\int_Q p\,(-\psi_1)_t\,dx\,dt+\int_Q p\,\Delta\psi_1\,dx\,dt
 + \int_Q p \sigma'(u) |\nabla\varphi|^2\psi_1\,dx\,dt\\
&\quad +2\int_Q p \sigma(u)\nabla\varphi\cdot\nabla\psi_2\,dx\,dt
 + \int_Q q\nabla\cdot[\sigma(u)\nabla\psi_2]\,dx\,dt
 + \int_Q q\nabla\cdot[\sigma'(u)\psi_1\nabla\varphi]\,dx\,dt\\
&=\int_Q \psi_1 p_t\,dx\,dt+\int_Q \psi_1\Delta p\,dx\,dt
 +\int_Q\psi_1\sigma'(u) |\nabla\varphi|^2 p\,dx\,dt\\
&\quad -2\int_Q\psi_2\nabla\cdot[p \sigma(u)\nabla\varphi]\,dx\,dt
 -\int_Q \psi_1\sigma'(u)\nabla\varphi\cdot\nabla q\,dx\,dt\\
&\quad +\int_Q\psi_2\nabla\cdot[\sigma(u)\nabla q]\,dx\,dt\\
&= \int_Q
\begin{pmatrix} \psi_1&\psi_2 \end{pmatrix}
 \mathcal{L}^*
\begin{pmatrix} p\\ q
\end{pmatrix} \,dx\,dt,
\end{align*}
where
\[ %\label{operatorLstar}
\mathcal{L}^* \begin{pmatrix}
p\\
q \end{pmatrix}
= \begin{pmatrix}
 p_t+\Delta p+\sigma'(u)|\nabla\varphi|^2p-\sigma'(u)
\nabla\varphi\cdot\nabla q\\
\nabla\cdot[-2p \sigma(u)\nabla\varphi+\sigma(u)\nabla q]
\end{pmatrix}.
\]
Thus, the adjoint system is given by
\begin{equation} \label{adjoint1}
\begin{gathered}
p_t+\Delta p+\sigma'(u)|\nabla\varphi|^2p-\sigma'(u)
\nabla\varphi\cdot\nabla q = 1\quad\text{in }\Omega\times(0,T),\\
\nabla\cdot[-2p \sigma(u)\nabla\varphi+\sigma(u)\nabla
q] = 0\quad\text{in }\Omega\times(0,T),\\
\frac{\partial p}{\partial n}+\beta^* p = 0\quad\text{on }
\Gamma_N\times(0,T),\\
p= 0\quad\text{on }\Gamma_D\times(0,T),\\
q= 0\quad\text{on }\partial\Omega\times(0,T),\\
p= 0\quad\text{on }\Omega\times\{T\},
\end{gathered}
\end{equation}
where the nonhomogeneous term ``1'' comes from differentiating the
integrand of $J(\beta)$ with respect to the state $u$.

\begin{theorem}\label{adjoint_theorem}
Let $\|\varphi_0 \|_{W^{1,\infty}(Q)}$ be sufficiently small.
Then, given an optimal control $\beta^*\in U_M$ and corresponding
states $u,\varphi$, there exists a solution $(p,q)\in
L^2(0,T;H_0^1(\Omega))\times L^2(0,T;H_0^1(\Omega))$ to the
adjoint system \eqref{adjoint1}. Furthermore, $\beta^*$ can be
explicitly characterized as:
\begin{equation}\label{characterization2}
\beta^*(x,t)=\min\Big(\max\big(-\frac{up}{2},\lambda\big),M\Big).
\end{equation}
\end{theorem}

\begin{proof}
The weak formulation of \eqref{adjoint1} is
\begin{equation} \label{adjointweak}
\begin{gathered}
\int_0^T\langle p_t,v\rangle\,dt
-\int_Q\nabla p\cdot\nabla v\,dx\,dt
-\int_{\Gamma_N\times(0,T)}\beta^* p\,v\,ds\,dt\\
+\int_Q\sigma'(u)|\nabla\varphi|^2p\,v\,dx\,dt
 -\int_Q\sigma'(u)(\nabla\varphi\cdot\nabla q)v\,dx\,dt
=\int_Q v\,dx\,dt,
\\
2\int_Q p \sigma(u)\nabla\varphi\cdot\nabla
w\,dx\,dt-\int_Q\sigma(u)\nabla q\cdot\nabla w\,dx\,dt=0
\end{gathered}
\end{equation}
for all $v\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and $w\in
L^2(0,T;H^{1}_{0}(\Omega))$.

We show that the solution to the adjoint system \eqref{adjoint1}
exists. Define the map $F:L^2(0,T;H^1(\Omega))\times
L^2(0,T;H_0^1(\Omega))\mapsto L^2(0,T;H^1(\Omega))\times
L^2(0,T;H_0^1(\Omega))$ with $F(V,W)=(p,q)$ as follows:
\begin{equation} \label{adjoint2}
\begin{gathered}
p_t+\Delta p+\sigma'(u)|\nabla\varphi|^2V-\sigma'(u)
\nabla\varphi\cdot\nabla W = 1\quad\text{in }\Omega\times(0,T),\\
\nabla\cdot[-2V \sigma(u)\nabla\varphi+\sigma(u)\nabla
q] = 0\quad\text{in }\Omega\times(0,T),\\
\frac{\partial p}{\partial n}+\beta^* p = 0\quad\text{on }
\Gamma_N\times(0,T),\\
p = 0\quad\text{on }\Gamma_D\times(0,T),\\
q = 0\quad\text{on }\partial\Omega\times(0,T),\\
p = 0\quad\text{on }\Omega\times\{T\},
\end{gathered}
\end{equation}
The weak formulation of \eqref{adjoint2} is
\begin{gather*}
\int_0^T\langle p_t,\Theta\rangle\,dt
-\int_Q\nabla p\cdot\nabla\Theta\,dx\,dt
-\int_{\Gamma_N^T}\beta^* p\Theta\,ds\,dt\\
+\int_Q\sigma'(u)|\nabla\varphi|^2V\Theta\,dx\,dt
-\int_Q\sigma'(u) (\nabla\varphi\cdot\nabla W) \Theta\,dx\,dt
=\int_Q\Theta\,dx\,dt,\\
2\int_Q V \sigma(u)\nabla\varphi\cdot\nabla
\Psi\,dx\,dt=\int_Q\sigma(u)\nabla q\cdot\nabla \Psi\,dx\,dt
\end{gather*}
for all $\Theta\in L^2(0,T;H^{1}(\Omega))\cap L^\infty(Q)$ and
$\Psi\in L^2(0,T;H^{1}_{0}(\Omega))$. We use the Banach fixed point
theorem. Let $F(V_1,W_1)=(p_1,q_1)$ and $F(V_2,W_2)=(p_2,q_2)$. We
show the contraction property
\begin{equation} \label{Banach}
\|p_1-p_2\|_{H^1}+\|q_1-q_2\|_{H_0^1}\leq
\delta\Big(\|V_1-V_2\|_{H^1}+\|W_1-W_2\|_{H_0^1}\Big)
\end{equation}
for some $0<\delta<1$. Indeed, for $F(V_1,W_1)=(p_1,q_1)$, we have
\begin{gather*} %\label{adjointweak2}
 \int_0^T\langle (p_1)_t,\Theta\rangle\,dt-\int_Q\nabla
p_1\cdot\nabla\Theta\,dx\,dt-\int_{\Gamma_N^T}\beta^*
p_1 \Theta\,ds\,dt\\
+\int_Q\sigma'(u)|\nabla\varphi|^2V_1 \Theta\,dx\,dt
-\int_Q\sigma'(u)
(\nabla\varphi\cdot\nabla W_1) \Theta\,dx\,dt=\int_Q\Theta\,dx\,dt,\\
2\int_Q V_1 \sigma(u)\nabla\varphi\cdot\nabla
\Psi\,dx\,dt=\int_Q\sigma(u)\nabla q_1\cdot\nabla \Psi\,dx\,dt,
\end{gather*}
and similarly, for $F(V_2,W_2)=(p_2,q_2)$. Let us denote
$\bar p:=p_1-p_2$, $\bar q:=q_1-q_2$,
$\bar W:=W_1-W_2$, and $\bar V:=V_1-V_2$. Take
$\Theta=\bar p$, then
equations for $p_1$ and $p_2$ give
\begin{align*}%\label{pexistence2}
&\int_0^T\langle (\bar p)_t,\bar
p\rangle\,dt+\int_Q\sigma'(u)|\nabla\varphi|^2\bar V\bar
p\,dx\,dt-\int_Q\sigma'(u) \nabla\varphi\cdot\nabla\bar W\bar
p\,dx\,dt\\
&=\int_Q|\nabla\bar p|^2dx\,dt+\int_{\Gamma_N^T}\beta^*
\bar p^2\,ds\,dt.\\
\end{align*}
Thus, we obtain
\begin{align*}%\label{pexistence3}
& \frac{1}{2}\int_{\Omega\times\{0\}}\bar{p}^2\,dx+\int_Q|\nabla\bar
p|^2\,dx\,dt+\lambda\int_{\Gamma_N^T}\bar
p^2\,ds\,dt\\
& \leq\Big|\int_Q\sigma'(u)|\nabla\varphi|^2\,\bar V\bar
p\,dx\,dt\Big|+\Big|\int_Q\sigma'(u) \nabla\varphi\nabla\bar
W\bar p\,dx\,dt\Big|.
\end{align*}
We have
\begin{align*}
\Big|\int_Q\sigma'(u)|\nabla\varphi|^2\,\bar V\bar
p\,dx\,dt\Big|
&\leq C\sup_{t}{\||\nabla\varphi|^2\|_r}\int_0^T\|\bar
V\|_s\cdot\|\bar p\|_2\,dt\\
&  \leq C_{\varphi}\|\bar V\|_{L^2(0,T;L^s(\Omega))}\|\bar
p\|_{L^2(Q)}\\
&\leq C_{\varphi}\|\bar V\|_{L^2(0,T;L^s(\Omega))}\|\bar
p\|_{L^2(0,T;H^1(\Omega))},
\end{align*}
and
\begin{align*}
\Big|\int_Q\sigma'(u) \nabla\varphi\cdot\nabla\bar W\bar p\,dx\,dt\Big|
\leq\tilde{C}_{\varphi}\|\nabla\bar
W\|_{L^2(Q)}\|\bar p\|_{L^2(0,T;H^1(\Omega))}.
\end{align*}
The above three inequalities give
\begin{equation} \label{p}
\|\bar p\|_{L^2(0,T;H^1(\Omega))}\leq C_{\varphi}\|\bar
V\|_{L^2(0,T;H^1(\Omega))}+\tilde C_{\varphi}\|\bar
W\|_{L^2(0,T;H^1(\Omega))},
\end{equation}
where the constants $ C_{\varphi}$ and $\tilde C_{\varphi}$ depend
on the boundary data $\varphi_0$. A similar estimate can be derived
for $\bar q$
\begin{eqnarray}\label{q}
\|\bar q\|_{L^2(0,T;H^1(\Omega))}\leq C_{\varphi}'\|\bar
V\|_{L^2(0,T;H^1(\Omega))}.
\end{eqnarray}
Adding (\ref{p}) and (\ref{q}) we get
\begin{align*}
\|\bar p\|_{L^2(0,T;H^1(\Omega))}+\|\bar
q\|_{L^2(0,T;H^1(\Omega))}\leq c_{\varphi}(\|\bar
V\|_{L^2(0,T;H^1(\Omega))}+\|\bar W\|_{L^2(0,T;H^1(\Omega))}).
\end{align*}
Now, choosing $\varphi_0$ so that
$\|\varphi_0\|_{W^{1,\infty}(Q)}$ is small, we obtain $\delta<1$.
This proves (\ref{Banach}), the contraction property, which gives
the desired fixed point of the map and therefore the existence of
solutions to the adjoint system.

Recall that for variation $\ell\in L^\infty(\Gamma_N^T)$, with
$\beta^*+\varepsilon\ell\in U_M$ for $\varepsilon$ small, the weak
formulation of the sensitivity system (\ref{sensitivityeqn}) is
given by equations (\ref{weakform1}) and (\ref{weakform2}).


Now we are ready to characterize the optimal control. Since the
minimum of $J$ is achieved at $\beta^*$ and for small
$\varepsilon>0$, $\beta^*+\varepsilon\ell\in U_M$, and denoting
$u^\varepsilon=u(\beta^*+\varepsilon\ell),\varphi^\varepsilon=
\varphi(\beta^*+\varepsilon\ell)$, we obtain
\begin{align*}
0&\leq \lim_{\varepsilon\to
0^+}\frac{J(\beta^*+\varepsilon\ell)-J(\beta^*)}{\varepsilon}\\
&= \int_Q\psi_1\,dx\,dt+\int_{\Gamma_N^T}2\beta^*\ell\,ds\,dt\\
&=\int_Q \begin{pmatrix} \psi_1&\psi_2
\end{pmatrix}
\begin{pmatrix}
1\\
0\end{pmatrix}\,dx\,dt
 +\int_{\Gamma_N^T}2\beta^*\ell\,ds\,dt\\
&= -\int_Q\langle(\psi_1)_t,p\rangle\,dt-\int_Q\nabla p\nabla
\psi_1\,dx\,dt-\int_{\Gamma_N^T}\beta^*
p\,\psi_1\,ds\,dt\\
&\quad +\int_Q\psi_1 \sigma'(u)|\nabla\varphi|^2p\,dx\,dt
-\int_Q\psi_1 \sigma'(u)\nabla\varphi\cdot\nabla q\,dx\,dt\\
&\quad +2\int_Q p \sigma(u)\nabla\varphi\cdot\nabla
\psi_2\,dx\,dt-\int_Q\sigma(u)\nabla q\cdot\nabla\psi_2\,dx\,dt
 +\int_{\Gamma_N^T}2\beta^*\ell\,ds\,dt \\
&=\Big\{-\int_Q\nabla
p\cdot\nabla \psi_1\,dx\,dt-\int_{\Gamma_N^T}\beta^*
p\,\psi_1\,ds\,dt\\
&\quad+\int_Q\psi_1 \sigma'(u)|\nabla\varphi|^2p\,dx\,dt
+2\int_Q p \sigma(u)\nabla\varphi\cdot\nabla\psi_2\,dx\,dt \Big\}\\
&\quad+\Big[-\int_Q\psi_1 \sigma'(u)\nabla\varphi\cdot\nabla
q\,dx\,dt
-\int_Q\sigma(u)\nabla q\cdot\nabla\psi_2\,dx\,dt\Big]
+\int_{\Gamma_N^T}2\beta^*\ell\,ds\,dt\\
&= \int_{\Gamma_N^T}\ell\,u
p\,ds\,dt+\int_{\Gamma_N^T}2\beta^*\ell\,ds\,dt,
\end{align*}
where we integrated by parts and used the sensitivity system
(\ref{weakform1}), (\ref{weakform2}) with test functions $p, q$.
Thus, we obtain
\begin{equation}\label{variation}
\int_{\Gamma_N^T}\ell(2\beta^*+up)\,ds\geq 0.
\end{equation}
We use this inequality with three cases to characterize
$\beta^*$:

(i) Take the variation $\ell$ to have support on the set
$\{x\in\Gamma_N^T:\lambda<\beta^*(x,t)<M\}$. The variation
$\ell(x,t)$ can be of any sign, therefore we obtain
$2\beta^*+up=0$ whence $\beta^*=-\frac{up}{2}\,$.

(ii) On the
set $\{(x,t)\in\Gamma_N\times(0,T):\beta^*(x,t)=M\}$, the
variation must satisfy $\ell(x,t)\leq 0$ and therefore we get
$2\beta^*+up\leq 0$ implying
$M=\beta^*(x,t)\leq-\frac{up}{2}$.

(iii) On the set
$\{(x,t)\in\Gamma\times(0,T):\beta^*(x,t)=\lambda\}$, the
variation must satisfy $\ell(x,t)\geq 0$. This implies
$2\beta^*+up\leq 0$ and hence
$\lambda=\beta^*(x)\geq-\frac{up}{2}\,$. Combining cases (i),
(ii), and (iii) gives us the explicit characterization
(\ref{characterization2}) of the optimal control $\beta$.
\end{proof}

Substituting (\ref{characterization2}) into the state system
\eqref{statet} and the adjoint equations \eqref{adjoint1} we
obtain the optimality system:
\begin{gather*} %\label{OS}
{u_t-\Delta u-{\sigma}(u)|\nabla\varphi|^{2} }
= {0  \quad \text{in }\Omega\times(0,T),} \\
{\nabla\cdot(\sigma(u)\nabla\varphi) } =  {0  \quad \text{in }
\Omega\times(0,T), }\\
p_t+\Delta p+\sigma'(u)|\nabla\varphi|^2p-\sigma'(u)
\nabla\varphi\cdot\nabla q = 1\quad\text{in }\Omega\times(0,T),\\
\nabla\cdot[-2p \sigma(u)\nabla\varphi+\sigma(u)\nabla
q] = 0\quad\text{in }\Omega\times(0,T),\\
\frac{\partial p}{\partial
n}+\min\Big(\max\big(-\frac{up}{2},\lambda\big),M\Big) p
= 0\quad\text{on } \Gamma_N\times(0,T),\\[-1.5ex]
{\frac{\partial u}{\partial
n}+\min\Big(\max\big(-\frac{up}{2},\lambda\big),M\Big) u}= {0
\quad \text{on }\Gamma_{N}\times(0,T),} \\
u=p= 0\quad\text{on }\Gamma_D\times(0,T),\\
{u} = {u_0 \quad \text{on } \Omega\times\{0\},}\\
{\varphi}= {\varphi_0  \quad\text{on }
 \partial\Omega\times(0,T),}\\
q = 0\quad\text{on }\partial\Omega\times(0,T),\\
p = 0\quad\text{on }\Omega\times\{T\},
\end{gather*}
Note that existence of solution to the above optimality system
follows from the existence of solution to the state
system \eqref{statet} and Theorem \ref{adjoint_theorem}.


\begin{thebibliography}{10}

\bibitem{Ant} { S.~N. Antontsev and M. Chipot},
\emph{The thermistor problem: existence, smoothness, uniqueness,
and blow up}, SIAM J. Math. Anal., 25 (1994), pp.~1128--1156.

\bibitem{Cim} { G. Cimatti},
\emph{Existence of weak solutions for the nonstationary problem of
the Joule heating of a conductor}, Ann. Mat. Pura Appl. (4), {162}
(1992), pp.~33--42.

\bibitem{Cim1}  { G. Cimatti and G. Prodi},
\emph{Existence results for a nonlinear elliptic system modeling a
temperature dependent electrical resistor}, Ann. Mat. Pura Appl.
(4), {63} (1989), pp.~227--236.

\bibitem{Cim2}  { G. Cimatti},
\emph{A bound for the temperature in the thermistor's problem}, IMA
J. Appl. Math., {40} (1988), pp.~15--22.

\bibitem{Cim3}  { G. Cimatti},
\emph{Optimal control for the thermistor problem with a current
limiting device}, IMA J. Math. Control Inform., {24} (2007),
pp.~339--345.

\bibitem{Cim4}  { G. Cimatti},
\emph{Existence of periodic solutionsfor a thermally dependent
resistor in a R-C circuit}, Int. J. Engng Sci., {vol. 32}, no. 1
(1994), pp.~69--78.

\bibitem{Fow} { A.~C. Fowler, I. Frigaard, and S.~D. Howison},
\emph{Temperature surges in current-limiting circuit devices}, SIAM
J. Appl. Math., {52} (1992), pp.~998--1011.

\bibitem{Gil} { D. Gilbarg and N.~S. Trudinger},
\emph{Elliptic partial differential equations of second order}, 2nd
edition, Springer-Verlag, Berlin, 1983.

\bibitem{How} { S.~D. Howison, J.~F. Rodrigues, and M. Shillor},
\emph{Stationary solutions to the thermistor problem}, J. Math.
Anal. Appl., {174} (1993), pp.~573--588.

\bibitem{Hry} { V. Hrynkiv, S. Lenhart, and V. Protopopescu},
\emph{Optimal control of a convective boundary condition in a
thermistor problem}, SIAM J. Control Optim., {47} (2008),
pp.~20-39

\bibitem{Kwo} { K. Kwok},
\emph{Complete guide to semiconductor devices}, McGraw-Hill, New
York, 1995.

\bibitem{Lee} { H-C. Lee and T. Shilkin},
\emph{Analysis of optimal control problem for the two-dimensional
thermistor system}, SIAM J. Control Optim., {44} (2005),
pp.~268--282.

\bibitem{Mac} { E.~D. Maclen},
\emph{Thermistors}, Electrochemical Publications, Glasgow, 1979.

\bibitem{Rod} { J.~F. Rodriguez},
\emph{Obstacle problems in mathematical physics}, North-Holland Math
Studies \#134, Elsevier Science Publishers B.V., Amsterdam, 1987.

\bibitem{Rod1} { J.~F. Rodriguez},
\emph{A nonlinear parabolic system arising in thermomechanics and
in thermomagnetism}, Math. Models Methods Appl. Sci., {vol. 2},
no. 3, (1992), pp.~271-281.

\bibitem{Sim} { J. Simon},
\emph{Compact sets in the space $L\sp p(0,T;B)$}, Ann. Mat. Pura
Appl. (4) , {146} (1987), pp.~65--96.

\bibitem{Shi} { P. Shi, M. Shillor, and X. Xu},
\emph{Existence of a solution to the Stefan problem with Joule's
heating}, J. Differential Equations, {105} (1993), pp.~239--263.

\bibitem{Xu1} { X. Xu},
\emph{Local regularity theorems for the stationary thermistor
problem}, Proc. Roy. Soc. Edinburgh, {134A} (2004), pp.~773--782.

\bibitem{Xu2} { X. Xu},
\emph{Exponential integrability of temperature in the thermistor
problem}, Differential Integral Equations, {17} (2004),
pp.~571--582.

\bibitem{Xu3} { X. Xu},
\emph{Local and global existence of continuous temperature in the
electrical heating of concuctors}, Houston Journal of Mathematics,
{22} (1996), pp.~435--455.

\bibitem{Xu4} { X. Xu},
\emph{A degenerate Stefan-like problem with Joule's heating}, SIAM J.
Math. Anal., {23} (1992), pp.~1417--1438.

\bibitem{Yua} { G. Yuan},
\emph{Regularity of solutions of the nonstationary thermistor
problem}, Appl. Anal., {53} (1994), pp.~149--164.

\bibitem{Zho} { S. Zhou and D.~R. Westbrook},
\emph{Numerical solution of the thermistor equations}, J. Comput.
Appl. Math., {79} (1997), pp.~101--118.

\end{thebibliography}

\end{document}