%\pdfoutput=1\relax\pdfpagewidth=8.26in\pdfpageheight=11.69in\pdfcompresslevel=9
\documentclass[twoside]{article}
\usepackage{amssymb,amsthm,amsmath} 
\pagestyle{myheadings}
\markboth{ Linearization via the Lie Derivative }
{ Carmen Chicone \& Richard Swanson }
\begin{document}
\title{\vspace{-1in}\parbox{\linewidth}{\footnotesize\noindent
Electron. J. Diff. Eqns., Monograph 02, 2000\newline
http://ejde.math.swt.edu or http://ejde.math.unt.edu
\newline ftp  ejde.math.swt.edu or ejde.math.unt.edu (login: ftp)}
 \vspace{\bigskipamount} \\
%
 Linearization via the Lie Derivative 
% 
\thanks{ {\em Mathematics Subject Classifications:} 34-02, 34C20, 37D05, 37G10.
 \hfil\break\indent 
{\em Key words:} Smooth linearization, Lie derivative, Hartman, Grobman, 
         hyperbolic rest point,  \hfil\break\indent
         fiber contraction, Dorroh smoothing.
 \hfil\break\indent
\copyright 2000 Southwest Texas State University. 
\hfil\break\indent Submitted November 14, 2000. Published December 4, 2000. } } 

\date{}
\author{ Carmen Chicone \& Richard Swanson }
\maketitle
\begin{abstract} 
The standard proof of the Grobman--Hartman linearization theorem for a
flow at a hyperbolic rest point proceeds by first establishing the
analogous result for hyperbolic fixed points of local diffeomorphisms. 
In this exposition we present a simple direct proof that avoids the 
discrete case altogether. We give new proofs for
Hartman's smoothness results: A $\mathcal{C}^2$ flow is $\mathcal{C}^1$ 
linearizable at a hyperbolic sink, and a $\mathcal{C}^2$ flow in the 
plane is$\mathcal{C}^1$ linearizable at a hyperbolic rest point.
Also, we formulate and prove some new results on smooth linearization
for special classes of quasi-linear vector fields where either
the nonlinear part is restricted or additional conditions 
on the spectrum of the linear part (not related to resonance conditions) 
are imposed.
\end{abstract}

\tableofcontents

 \newcommand{\abs}[1]{\lvert#1\rvert}
 \newcommand{\slip}{\operatorname{sLip}}
 \newcommand{\norm}[1]{\lVert#1\rVert}
 \newcommand{\omnorm}[1]{\lVert#1\rVert_{r,\mu}}
 \newcommand{\munorm}[1]{\lVert#1\rVert_\mu}
 \newcommand{\nunorm}[1]{\lVert#1\rVert_\nu}
 \newcommand{\bnorm}[1]{\lVert#1\rVert_{\mathcal{B}}}
 \newcommand{\hnorm}[1]{\lVert#1\rVert_{\mathcal{H}}}
 \newcommand{\range}{\operatorname{Range}}
 \newcommand{\id}{\operatorname{id}}

\newtheorem{thm}{Theorem}[section]
\newtheorem{prop}[thm]{Proposition}
\newtheorem{lemma}[thm]{Lemma}
\newtheorem{hyp}[thm]{Hypothesis}
\newtheorem{defn}[thm]{Definition}
\numberwithin{equation}{section}


\section{Introduction}\label{sec:intro}

This paper is divided into three parts. 
In the first part, a new proof is presented for 
the Grobman--Hartman linearization theorem: A $\mathcal{C}^1$ flow is 
$\mathcal{C}^0$ linearizable at a hyperbolic rest point. 
The second part is a discussion of Hartman's results on
smooth linearization where
smoothness of the linearizing transformation is proved in those cases where
resonance conditions are not required. For example, 
we will use the theory of ordinary differential equations 
to prove two main theorems: 
A $\mathcal{C}^2$ vector field is $\mathcal{C}^1$ linearizable at a hyperbolic sink; and,  
a $\mathcal{C}^2$ vector field in the plane is $\mathcal{C}^1$ linearizable at a 
hyperbolic rest point. 
In the third part, we will study a special
class of vector fields where the smoothness of the linearizing
transformation can be improved. 

The proof of the existence of a smooth linearizing transformation at
a hyperbolic sink is delicate. It uses a version of the
stable manifold theorem, consideration of the gaps in
the spectrum of the linearized vector field at the rest point,
carefully constructed Gronwall type estimates,
and an induction argument. The main lemma is a result about
partial linearization by near-identity transformations that
are continuously differentiable with H\"older derivatives.
The method of the proof requires 
the H\"older exponent of these derivatives
to be less than a
certain number, called the H\"older spectral exponent, that
is defined for linear maps as follows.
Suppose that
$\{-b_1,-b_2,\dots -b_N\}$
is the set of real parts of the
eigenvalues of the linear transformation $A:\mathbb{R}^n\to\mathbb{R}^n$
and
\begin{equation}\label{hse}
-b_N<-b_{N-1}<\dots<-b_1<0.
\end{equation} 
The \emph{H\"older spectral exponent} of $A$ is the number
\[
\frac{b_1(b_{j+1}-b_j)}{b_1(b_{j+1}-b_j)+b_{j+1}b_j}
\] 
where
\[
\frac{b_{j+1}-b_j}{b_{j+1}b_j}
=\min_{i\in\{1,2,\cdots,N-1\}} \frac{b_{i+1}-b_i}{b_{i+1}b_i}
\]
in case $N>1$; it is the number one in case $N=1$.
The H\"older spectral exponent of a linear transformation $B$
whose eigenvalues all have positive real parts is 
the H\"older spectral exponent of $-B$. 

Although a $\mathcal{C}^2$ flow in the plane is always
$\mathcal{C}^1$ linearizable at a hyperbolic rest point, a 
$\mathcal{C}^2$ flow in $\mathbb{R}^3$ may not be $\mathcal{C}^1$ linearizable at
a hyperbolic saddle point. For example, the flow of the system
\begin{equation}\label{notlin0}
\dot x=2 x,\quad \dot y=y+xz, \quad \dot z=-z
\end{equation} 
is not $\mathcal{C}^1$ linearizable at the origin
(see Hartman's example~\eqref{notlin}).
We will prove that a flow in $\mathbb{R}^n$ can be smoothly
linearized at a hyperbolic saddle 
if the spectrum of the corresponding
linearized system at the saddle point
satisfies the following condition 
introduced by Hartman in~\cite{Hartman60M}.
Note first that the real parts of the 
eigenvalues of the system matrix of the linearized 
system at a hyperbolic saddle lie in the union of two intervals,
say $[-a_L,-a_R]$ and $[b_L,b_R]$ where $a_L$, $a_R$, $b_L$, and $b_R$
are all positive real numbers. Thus, the system matrix can be written
as a direct sum $A\oplus B$ where the real parts of the eigenvalues
of $A$ are in  $[-a_L,-a_R]$ and the real parts of the eigenvalues
of $B$ are in $[b_L,b_R]$.  
Let $\mu$ denote the H\"older spectral exponent of $A$ and
$\nu$ the H\"older spectral exponent of $B$. 
If Hartman's spectral condition
\[
a_L-a_R<\mu  b_L,\qquad b_R-b_L<\nu a_R
\]
is satisfied,
then the $C^2$ nonlinear system is $C^1$ linearizable at
the hyperbolic saddle point. 
It follows that, unlike system~\eqref{notlin0}, the flow of 
\[
\dot x=2 x,\quad \dot y=y+xz, \quad \dot z=-4z
\]
is $\mathcal{C}^1$ linearizable at the origin.

In the case of hyperbolic saddles where
one of the H\"older spectral exponents is small, 
Hartman's spectral condition is satisfied only if
the  corresponding real parts of the eigenvalues of
the linear part of the field are contained in an accordingly small interval.
Although the situation cannot be improved for general vector fields,
stronger results (in the spirit of Hartman) are possible for
a restricted class of vector fields. There are at least two ways
to proceed: additional conditions can be imposed on the spectrum of
the linearization, or restrictions can be imposed on the nonlinear
part of the vector field. We will show that a $C^3$ vector field
in ``triangular form'' with a hyperbolic saddle point at 
the origin can be $\mathcal{C}^1$
linearized if Hartman's spectral condition is replaced by
the inequalities $a_L-a_R< b_L$ and $b_R-b_L< a_R$ 
(see Theorem~\ref{th:sltvf}).
Also, we will prove the following result:
Suppose that $X=\mathcal{A}+\mathcal{F}$ is a quasi-linear $\mathcal{C}^3$ vector field with    
a hyperbolic saddle at the origin, the set of negative real parts
of eigenvalues of $\mathcal{A}$ is given by 
$\{-\lambda_1,\ldots,-\lambda_p\}$, the set of positive real
parts is given by $\{\sigma_1,\ldots,\sigma_q\}$, and
\[
-\lambda_1<-\lambda_2<\cdots<-\lambda_p<0
                           <\sigma_q<\sigma_{q-1}<\cdots<\sigma_1.
\]
If $\lambda_{i-1}/\lambda_i>3$, for $i\in \{2,3,\ldots,p\}$, and
$\sigma_{i-1}/\sigma_i>3$, for $i\in \{2,3,\ldots,q\}$, and if
$\lambda_1-\lambda_p< \sigma_q$ and  $\sigma_1-\sigma_q<\lambda_p$,
then $X$ is $\mathcal{C}^1$ linearizable (see Theorem~\ref{th:slugc}).

The important dynamical behavior of a nonlinear system
associated with a hyperbolic sink is local: 
there is an open basin of attraction
and every trajectory that enters this set is asymptotically
attracted to the sink. This behavior is adequately explained by 
using a linearizing homeomorphism, that is, by using
the Grobman--Hartman theorem. On the other hand, the interesting
dynamical behavior associated with saddles is global; for example, 
limit cycles are produced by homoclinic loop bifurcations and 
chaotic invariant sets are found near transversal intersections of
homoclinic manifolds. 
Smooth linearizations at hyperbolic saddle points are used
to analyze these global phenomena. It turns out that
results on the smooth linearization at hyperbolic sinks
are key lemmas required to prove the existence of 
smooth linearization for hyperbolic saddles. In fact, this is the main
reason to study smooth linearization at hyperbolic sinks.

We treat only the case of rest points here, but we expect that
our method can be applied to the problem of linearization
near general invariant manifolds of differential equations. 

Hartman's article~\cite{Hartman60M} is the main reference for 
our results on smoothness of linearizations.
Other primary sources are the 
papers~\cite{Grobman}, \cite{Hartman60}, \cite{Hartman63}, and~\cite{stern}.
For historical remarks, additional references, and later work 
see~\cite{cl}, \cite{cll}, \cite{kp}, \cite{sell}, \cite{stowe}, and \cite{tan}.

\section{Continuous Conjugacy}\label{sec1}
A $\mathcal{C}^1$ vector field $X$ on $\mathbb{R}^n$ such that $X(0)=0$ is
called \emph{locally topologically conjugate} to its
linearization $A:=DX(0)$ at the origin if there is
a homeomorphism $h:U\to V$ of neighborhoods of the origin 
such that the flows of $X$ and $A$ are
locally conjugated by $h$; that is,  
\begin{equation} \label{conjugacy}
h(e^{tA} x)=X_t(h(x)) 
\end{equation}
whenever $x\in U$, $t\in \mathbb{R}^n$, and both
sides of the conjugacy equation are defined. 
A matrix is \emph{infinitesimally hyperbolic} if every one of
its eigenvalues has a nonzero real part. 
\begin{thm}[Grobman--Hartman]\label{th:gr-hart}
Let $X$ be a $\mathcal{C}^1$ vector field on $\mathbb{R}^n$ such that $X(0)=0$. 
If the linearization $A$ of $X$ at the origin is infinitesimally
hyperbolic, then $X$ is locally topologically conjugate to $A$
at the origin. 
\end{thm}
\begin{proof}
For each $r>0$ there is a smooth bump function $\rho:\mathbb{R}^n\to [0,1]$ 
with the following properties: 
$\rho(x)\equiv 1$ for $\abs{x}<r/2$,  
$\rho(x)\equiv 0$ for $\abs{x}>r$,  
and $\abs{d\rho(x)}<4/r$ for $x\in \mathbb{R}^n$.  
The vector field $Y=A+\xi$ where $\xi(x):=\rho(x)(X(x)-Ax)$ 
is equal to $X$ on the open ball of radius $r/2$ at the origin.
Thus, it suffices to prove that $Y$ is locally conjugate to $A$ at the
origin.

Suppose that in equation~\eqref{conjugacy} $h=id+\eta$ and
$\eta:\mathbb{R}^n\to \mathbb{R}^n$ is differentiable in the direction $A$. 
Rewrite equation~\eqref{conjugacy} in the form
\begin{equation}\label{eq:rearr}
e^{-tA} h(e^{tA} x)=e^{-tA} X_t(h(x))
\end{equation}
and differentiate both sides 
with respect to $t$ at $t=0$ to obtain 
the \emph{infinitesimal conjugacy equation}
\begin{equation}\label{inf_conj}
L_A\eta=\xi\circ(\id+\eta)
\end{equation}
where
\begin{equation}\label{dydeflie}
L_A\eta:=\frac{d}{dt}(e^{-tA} \eta(e^{tA} ))\big|_{t=0}
\end{equation}
is the Lie derivative of $\eta$ along $A$. 
(We note that if $h$ is a conjugacy, then the
right-hand-side of equation~\eqref{eq:rearr} is differentiable;
and therefore, the Lie derivative of $h$ in the direction $A$ is defined.) 

We will show that if $r>0$ is sufficiently small, then
the infinitesimal conjugacy equation has a
bounded continuous solution $\eta:\mathbb{R}^n\to \mathbb{R}^n$  
(differentiable along $A$) 
such that $h:=\id+\eta$  is a homeomorphism of $\mathbb{R}^n$ whose restriction
to the ball of radius $r/2$ at the origin is a local conjugacy as in
equation~\eqref{conjugacy}.

Since $A$ is infinitesimally hyperbolic, 
$A=A^{+}\oplus A^{-}$ having spectra,
respectively, to the left and to the right of the imaginary axis. 
Put ${\bf E}^{-}=\range(A^-)$ and ${\bf E}^{+}=\range(A^+)$. There are
positive constants $C$ and $\lambda$ such that 
\begin{equation}\label{hyest}
\abs{e^{tA}  v^+}\le C e^{-\lambda t}\abs{v^+},\qquad 
\abs{e^{-tA}  v^-} \le C e^{-\lambda t}\abs{v^-}
\end{equation}
for $t\ge 0$. 
The Banach space $\mathcal{B}$ of bounded (in the supremum norm)
continuous vector fields on $\mathbb{R}^n$ 
splits into the complementary subspaces $\mathcal{B}^+$ and $\mathcal{B}^-$ of 
vector fields with ranges, respectively, in ${\bf E}^+$ or ${\bf E}^-$. 
In particular, a vector field $\eta\in \mathcal{B}$ has a unique representation 
$\eta=\eta^+ +\eta^-$ where $\eta^+\in \mathcal{B}^+$ and $\eta^-\in \mathcal{B}^-$.

The function $G$ on $\mathcal{B}$ defined by
\begin{equation}\label{inverse}
G\eta(x)=\int_0^\infty e^{tA} \eta^{+} (e^{-tA} x) \, dt 
 -\int_0^\infty e^{-tA} \eta^{-}(e^{tA} x) \, dt
\end{equation}
is a bounded linear operator $G:\mathcal{B}\to\mathcal{B}$.
The boundedness of $G$ follows from the
hyperbolic estimates~\eqref{hyest}. 
The continuity of the function $x\mapsto G\eta(x)$ is
an immediate consequence of the following lemma from advanced
calculus---essentially the
Weierstrass $M$-test---and the hyperbolic estimates.
\begin{lemma}\label{pfcont}
Suppose that $f:[0,\infty)\times \mathbb{R}^n\to \mathbb{R}^m$, given
by $(t,x)\mapsto f(t,x)$, is continuous $($respectively,
the partial derivative $f_x$ is continuous$)$.  If for
each $y\in \mathbb{R}^n$ there is an open set $S\subset \mathbb{R}^n$
with compact closure $\bar S$ and
a function  $M:[0,\infty)\to \mathbb{R}$ such that
$y\in S$, the integral $\int_0^\infty M(t)\,dt$ converges,
and $\abs{f(t,x)}\le M(t)$ $($respectively, $\abs{f_x(t,x)}\le M(t)\,)$
whenever 
$t\in [0,\infty)$ and $x$ is in $\bar S$, 
then  $F:\mathbb{R}^n\to \mathbb{R}^m$ given by $F(x)=\int_0^\infty f(t,x) \,dt$ 
is continuous $($respectively, $F$ is continuously differentiable and
$DF(x)=\int_0^\infty f_x(t,x) \,dt\,)$.
\end{lemma}

Using the definition of $L_A$ in display~\eqref{dydeflie}
and the fundamental theorem of calculus, 
we have the identity  $L_A G=\id_{\mathcal{B}}$. 
As a consequence, if
\begin{equation}\label{fixed_point}
\eta=G(\xi\circ(\id+\eta)):=F(\eta),
\end{equation}
then $\eta$ is a solution of the 
infinitesimal conjugacy equation~\eqref{inf_conj}. 

Clearly,\label{cutoff} $F:\mathcal{B}\to \mathcal{B}$ 
and for $\eta_1$ and $\eta_2$ in $\mathcal{B}$
we have that
\begin{eqnarray*}
\norm{F(\eta_1)-F(\eta_2)}&\le& 
\norm{G}\,\norm{\xi\circ(\id+\eta_1)-\xi\circ(\id+\eta_2)} \\
&\le& \norm{G}\,\norm{D\xi}\,\norm{\eta_1-\eta_2}.
\end{eqnarray*}
Using the definitions of $\xi$ and the properties of
the bump function $\rho$, 
we have that
\[
\norm{D\xi}\le \sup_{\abs{x}\le r}\norm{DX(x)-A}
+\frac{4}{r}\sup_{\abs{x}\le r}\abs{X(x)-Ax}. 
\]
By the continuity of DX, there is some positive number $r$ such
that $\norm{DX(x)-A}<1/(10\norm{G})$ whenever $\abs{x}\le r$. 
By Taylor's theorem (applied to the $\mathcal{C}^1$ function $X$) and the
obvious estimate of the integral form of the remainder, if
$\abs{x}\le r$, then 
$\abs{X(x)-Ax}<r/(10\norm{G})$. For the number $r>0$ just chosen,
we have the estimate $\norm{G}\norm{D\xi}<1/2$; and therefore, 
$F$ is a contraction on $\mathcal{B}$. By the contraction mapping theorem
applied to the restriction of $F$ on the closed subspace $\mathcal{B}_0$ of $\mathcal{B}$
consisting of those elements that vanish at the origin, the
equation~\eqref{fixed_point}
has a unique solution $\eta\in \mathcal{B}_0$, which also satisfies the
infinitesimal conjugacy equation~\eqref{inf_conj}.

We will show that $h:=\id+\eta$ is a local conjugacy.
To do this recall the following elementary fact about Lie 
differentiation: If $U$, $V$, and $W$
are vector fields, $\phi_t$
is the flow of $U$, and $L_U V=W$, then
\[
\frac{d}{dt}D\phi_{-t}(\phi_t(x))V(\phi_t(x))
               =D\phi_{-t}(\phi_t(x))W(\phi_t(x)).
\]
Apply this result to the infinitesimal conjugacy equation~\eqref{inf_conj}
to obtain the identity
\[
\frac{d}{dt}(e^{-tA} \eta(e^{tA} x))=e^{-tA} \xi(h(e^{tA}  x)).
\]
Using the definitions of $h$ and $Y$, it follows immediately that
\[
\frac{d}{dt}(e^{-tA} h(e^{tA} x))=-e^{-tA} A h(e^{tA}  x)+ e^{-tA} Y(h(e^{tA}  x))
\]
and (by the product rule)
\[
e^{-tA} \frac{d}{dt}h(e^{tA} x)=e^{-tA} Y(h(e^{tA}  x)).
\] 
Therefore,
the function given by $t\mapsto h(e^{tA}  x)$ is the integral curve
of $Y$ starting at the point $h(x)$. 
But, by the definition of the flow $Y_t$ of $Y$, 
this integral curve is the function
$t\mapsto Y_t(h(x))$. By uniqueness, $h(e^{tA} x)=Y_t(h(x))$.
Because $Y$ is linear on the complement of a compact set, 
Gronwall's inequality can be used to show that the flow of $Y$ is complete.
Hence, the conjugacy equation holds for all $t\in \mathbb{R}$.

It remains to show that 
the continuous function $h:\mathbb{R}^n\to\mathbb{R}^n$  given by $h(x)=x+\eta(x)$
is a homeomorphism. 
Since $\eta$ is bounded on $\mathbb{R}^n$,
the map $h=\id+\eta$ is surjective.
To see this, choose $y\in\mathbb{R}^n$, note that the equation $h(x)=y$ 
has a solution of the form $x=y+z$ if $z=-\eta(y+z)$, and
apply Brouwer's fixed point theorem to the map
$z\mapsto -\eta(y+z)$ on the ball of radius $\norm{\eta}$ centered
at the origin. 
(Using this idea, it is also easy to prove that $h$ is proper;
that is, the inverse image under $h$  of every compact subset
of $\mathbb{R}^n$ is compact.) 
We will show that $h$ is injective.
If $x$ and $y$ are in $\mathbb{R}^n$ and $h(x)=h(y)$, then 
$Y_t(h(x))=Y_t(h(y))$ and, by the conjugacy relation,
$A_t x+\eta(A_tx)=A_ty+\eta(A_ty)$. By the linearity of
$A_t$, we have that
\begin{equation}\label{eq:18old}
\abs{A_t(x-y)}=\abs{\eta(A_ty)-\eta(A_tx)}.
\end{equation}
For each nonzero $u$ in $\mathbb{R}^n$, the function
$t\mapsto A_t u=e^{tA}u$ is unbounded on $\mathbb{R}$.
Hence, either $x=y$ or the left side of equation~\eqref{eq:18old}
is unbounded for $t\in \mathbb{R}$. 
Since $\eta$ is bounded, $x=y$; and therefore, the
map $h$ is injective. By Brouwer's theorem on invariance of
domain, the bijective continuous map $h$ is a homeomorphism. 
(Brouwer's theorem can be avoided by using instead the following
elementary fact: A continuous, proper, bijective map from
$\mathbb{R}^n$ to $\mathbb{R}^n$ is a homeomorphism.) 
\end{proof}
\section{Smooth Conjugacy}\label{sc}
In the classic paper~\cite{Hartman60M}, Hartman shows that
if $a>b>0$ and $c\ne 0$, then there is no $\mathcal{C}^1$ 
linearizing conjugacy at the origin for the analytic differential equation
\begin{equation}\label{notlin}
\dot x=a x,\quad \dot y=(a-b) y+ cxz, \quad \dot z=-b z.
\end{equation}
On the other hand,
he proved the following two important results.
(1) If a $\mathcal{C}^2$ vector field has a rest point such that
either all eigenvalues of its linearization have negative real parts or
all eigenvalues have positive real parts, then the vector field
is locally $\mathcal{C}^1$ conjugate to its linearization. (2)~If 
a $\mathcal{C}^2$ \emph{planar} vector field has a hyperbolic rest point, then
the vector field is locally $\mathcal{C}^1$ conjugate to its linearization.
Hartman proves the analogs of these theorems for maps and then
derives the corresponding theorems for vector fields as corollaries. 
We will work directly with vector fields and thereby use standard methods
from the theory of ordinary differential equations to obtain these results.
We also note that S. Sternberg proved that the analytic planar system
\begin{equation}\label{ex:stern}\dot x=-x,\qquad \dot y=-2y+x^2\end{equation}
is not $\mathcal{C}^2$ linearizable.
Hence, it should be clear that the 
proofs of Hartman's results on the existence
of (maximally) smooth linearizations will 
require some delicate estimates.
Nevertheless, as we will soon see, 
the strategy used in these proofs is easy to understand.

Although the starting point for the proof of Theorem~\ref{th:gr-hart}, 
namely, the differentiation with respect to $t$ of the desired conjugacy 
relation~\eqref{conjugacy} and the inversion of the operator
$L_A$ as in display~\eqref{inverse}, 
leads to the simple proof of the existence
of a conjugating homeomorphism given in Section~\ref{sec1}, 
it turns out this strategy does not produce smooth conjugaces. 
This fact is illustrated by linearizing
the scalar vector field given by $X(x)=-a x+f(x)$ where
$a>0$. Suppose that $f$ vanishes outside a sufficiently small
open subset of the origin with radius $r>0$ so that
$h(x)=x+\eta(x)$ is the continuous linearizing transformation where
\[
\eta(x)=\int_0^\infty e^{-at}f(e^{at}x+\eta(e^{at}x))\,dt
\] 
as in the proof of Theorem~\ref{th:gr-hart}.
With $F:=f\circ(\id+\eta)$, $u:=e^{at}$, and $x\ne 0$, the
function $\eta$ is given by
\[
\eta(x)=\frac{1}{a}\int_1^{r/\abs{x}} \frac{F(ux)}{u^2}\, du.
\]
Moreover, if $x>0$, then (with $w=ux$)
\[
\eta(x)=\frac{x}{a}\int_x^r \frac{F(w)}{w^2}\, dw,
\]
and if $x<0$, then
\[
\eta(x)=-\frac{x}{a}\int_{-r}^x \frac{F(w)}{w^2}\, dw.
\]
If $\eta$ were continuously differentiable in a neighborhood of the
origin, then we would have the identity 
\[
\eta'(x)=\frac{1}{a}\int_{x}^r \frac{F(w)}{w^2}\, dw-\frac{F(x)}{ax}
\]  
for $x>0$ and the identity
\[
\eta'(x)=-\frac{1}{a}\int_{-r}^x \frac{F(w)}{w^2}\, dw-\frac{F(x)}{ax} 
\]
for $x<0$. 
Because the left-hand and right-hand derivatives agree
at $x=0$, it would follow that
\[
\int_{-r}^r \frac{F(w)}{w^2}\, dw=0.
\]
But this equality is not true in general. For example, it is 
not true if  $f(x)=\rho (x) x^2$ where $\rho$ is a 
bump function as in the proof of Theorem~\ref{th:gr-hart}.
In this case, the integrand is nonnegative and not identically zero.

There are at least two ways to avoid the difficulty just described.
First, note that the operator $L_A$, for the case $Ax=-ax$, 
is formally inverted by
running time forward instead of backward. This leads to the
formal inverse given by  
\[(G\eta)(x):=-\int_0^\infty e^{at}\eta(e^{-at}x)\,dt\]
and the fixed point equation  
\[
\eta(x)=-\int_0^\infty e^{at}f(e^{-at}x+\eta(e^{-at}x))\,dt.
\]
In this case, no inconsistency arises from the
assumption that $\eta'(0)$ exists.
In fact, in the last chapter of this paper, we will show that
this method does produce a smooth conjucacy for certain ``special
vector fields'', for example, 
the scalar vector fields under consideration 
here (see Theorem~\ref{th:1d}).

Another idea that can be used to avoid the difficulty with smoothness
is to differentiate both sides of the conjugacy relation
\begin{equation}\label{opcon}
e^{tA}h(x)=h(X_t(x))
\end{equation}
with respect to $t$,
or equivalently for the scalar differential equation,
to use the change of coordinates $u=x+\eta(x)$. 
With this starting point, it is easy to see that
$\eta$ determines a linearizing transformation
if it is a solution of the first order partial differential
equation
\[
D\eta(x)X(x)+a\eta(x)=-f(x).
\] 
To solve it, replace $x$ by the integral curve $t\mapsto \phi_t(x)$ where
$\phi_t$ denotes the flow of $X$,  and note that (along this
characteristic curve)
\[
\frac{d}{dt}\eta(\phi_t(x))+a\eta(\phi_t(x))=-f(\phi_t(x)).
\]
By variation of constants, we have the identity
\[
\frac{d}{dt}e^{at}\eta(\phi_t(x))=-e^{at}f(\phi_t(x)),
\]
and (after integration on the interval $[0,t]$)
it follows that the function $\eta$ given by 
\begin{equation}\label{int:nw}
\eta(x)=\int_0^\infty e^{at} f(\phi_t(x))\,dt  
\end{equation}
determines a linearizing transformation $h=\id+\eta$
if the improper integral converges on some open interval containing
the origin.
The required convergence is not obvious in general
because the integrand of this integral 
contains the exponential growth factor $e^{at}$. 
In fact, to prove that $\eta$ is continuous, 
a uniform estimate is required for the growth rate
of the family of functions $t\mapsto \abs{f(\phi_t(x))}$, 
and to show that $\eta$ is continuously differentiable,
a uniform growth rate estimate is required for their derivatives.
The required estimates will be obtained in the next section where we
will show that $\eta$ is smooth for a hyperbolic sinks.
For the scalar case as in equation~\eqref{int:nw}, $f(x)$ is less than
a constant times $x^2$ near the origin, and  
the solution $\phi_t(x)$ is approaching the
origin like $e^{-at}x$. 
Because this quantity is squared by the function $f$, the integral converges. 

To test the validity of this method,
consider the example $\dot x=-a x+x^2$ 
where the flow can be computed explicitly and
the integral~\eqref{int:nw} can be evaluated to obtain the smooth
near-identity linearizing transformation $h:(-a,a)\to \mathbb{R}$ given by
\[
h(x)=x+\frac{x^2}{a-x}.
\]

\subsection{Hyperbolic Sinks}
The main result of this section is the following theorem.
\begin{thm}[Hartman]\label{hsth}
Let $X$ be a $\mathcal{C}^2$ vector field on $\mathbb{R}^n$ such that $X(0)=0$. 
If every eigenvalue of $DX(0)$
has negative real part,
then $X$ is locally $\mathcal{C}^1$ conjugate to its linearization
at the origin. 
\end{thm}

The full strength of the
natural hypothesis that $X$ is $\mathcal{C}^2$ is not used in the
proof; rather, we will use only the weaker hypothesis that $X$ is $\mathcal{C}^1$ 
and certain of its partial derivatives 
are H\"older on some fixed neighborhood of the origin.
A function $h$ is H\"older on a subset $U$ of its domain
if there is some (H\"older exponent) $\mu$ with $0<\mu \le 1$ and
some constant $M>0$ such that 
\[
\abs{h(x)-h(y)}\le M \abs{x-y}^\mu
\]
whenever  $x$ and $y$ are in $U$. In the special case
where $\mu=1$, the function $h$ is also called Lipschitz.  
As a convenient notation, let $\mathcal{C}^{1,\mu}$
denote the class of $\mathcal{C}^1$ functions whose first
partial derivatives are all H\"older with H\"older exponent $\mu$.


Recall the definition of H\"older spectral exponents given
in Section~\ref{sec:intro}.
We will prove the following 
generalization of Theorem~\ref{hsth}. 
%To state this result, we need a technical definition.
%Suppose that
%$\{-b_1,-b_2,\dots -b_N\}$ 
%is the set of real parts of the
%eigenvalues of the linear transformation $A:\mathbb{R}^n\to\mathbb{R}^n$ 
%and
%\begin{equation}\label{hse}
%-b_N<-b_{N-1}<\dots<-b_1<0.
%\end{equation}
%The \emph{H\"older spectral exponent} of $A$ is the number
%\[
%\frac{b_1(b_{j+1}-b_j)}{b_1(b_{j+1}-b_j)+b_{j+1}b_j}
%\]
%where  
%\[
%\frac{b_{j+1}-b_j}{b_{j+1}b_j}
%=\min_{i\in\{1,2,\cdots,N-1\}} \frac{b_{i+1}-b_i}{b_{i+1}b_i}.
%\]
%The H\"older spectral exponent of a linear transformation $B$ 
%all of whose eigenvalues have positive real parts is
%the H\"older spectral exponent of $-B$.
\begin{thm}[Hartman]\label{rhsth}
Let $X$ be a $\mathcal{C}^{1,1}$ vector field on $\mathbb{R}^n$ such that $X(0)=0$. 
If every eigenvalue of $DX(0)$
has negative real part and $\mu>0$ is smaller than the 
H\"older spectral exponent, 
then there is a near-identity 
$\mathcal{C}^{1,\mu}$-diffeomorphism defined on some neighborhood of
the origin that conjugates $X$ to its linearization at the origin.
\end{thm}

The strategy for the proof of Theorem~\ref{rhsth} is simple; in fact,
the proof is by a finite induction.  By a linear change of
coordinates, the linear part of the vector field at the origin
is transformed to a real Jordan canonical form where the
diagonal blocks are ordered according to the real parts of the corresponding
eigenvalues, and the vector field is decomposed into (vector) components
corresponding to these blocks.  A theorem from invariant manifold theory
is used to ``flatten'' the invariant manifold corresponding
to the block whose eigenvalues have the 
largest real part onto the corresponding linear subspace. 
This transforms the original vector field into a special
form which is then ``partially linearized'' by a near-identity diffeomorphism; 
that is, the
flattened---but still nonlinear---component of the vector field
is linearized by the transformation. This reduces the dimension of the
linearization problem by the dimension of the flattened manifold.
The process is continued until the system is completely linearized.
Finally, the inverse of the linear transformation to
Jordan form is applied to return to the original coordinates so that
the composition of all the coordinate transformations is a near-identity map.

We will show that the nonlinear part of each near-identity
partially linearizing transformation is given explicitly by 
an integral transform
\[\int_0^\infty e^{-tB}g(\varphi_t(x))\,dt\]
where $g$ is given by the nonlinear terms of
the component function of the vector field  corresponding
to the linear block $B$ and $\varphi_t$ is the nonlinear flow. 
The technical part of the proof is to demonstrate  that these
transformations maintain the required smoothness. This is
done by repeated applications of Lemma~\ref{pfcont} to
prove that ``differentiation under the integral sign'' is permitted. 
Because maximal smoothness is obtained, it is perhaps not
surprising that some of the estimates required to 
majorize the integrand of the integral transform are rather delicate.
In fact, the main difficulty is to prove that the exponential
rate of decay toward zero of the  functions
$t\mapsto g(\varphi_t(x))$  and $t\mapsto g_x(\varphi_t(x))$, defined
on some open neighborhood of $x=0$, 
is faster than the 
exponential rate at which the linear
flow $e^{tB}$ moves points away from
the origin in reverse time.

As in Section~\ref{sec1}, 
the original vector field $X$ can be
expressed in the ``almost linear'' form $X(x)=A x+(X(x)-A x)$. 
There is a linear change of coordinates in 
$\mathbb{R}^n$ such that $X$, in the new coordinates, is the almost linear  vector
field
$Y(x)=B x +(Y(x)-B x)$ where the matrix $B$ is in real Jordan canonical form
with diagonal blocks $B_1$ and $B_2$, every eigenvalue of $B_2$ has the
same negative real part $-b_2$, and every eigenvalue of $B_1$ has its real
part strictly smaller than $-b_2$. The corresponding ODE has the form
\begin{eqnarray}\label{sys:1}
\nonumber \dot x_1&=&B_1\, x_1+P_1(x_1,x_2),\\
\dot x_2&=&B_2\, x_1+P_2(x_1,x_2)
\end{eqnarray}
where $x=(x_1,x_2)$ and $(P_1(x_1,x_2),P_2(x_1,x_2))=(Y(x)-B x)$. 
Let $c$ be a real number such that $-b<-c<0$,  and note that
if the augmented system
\begin{eqnarray*}
\dot x_1&=&B_1\, x_1+P_1(x_1,x_2),\\
\dot x_2&=&B_2\, x_1+P_2(x_1,x_2),\\
\dot x_3&=&-c  x_3
\end{eqnarray*}
is linearized by a near-identity transformation of the form
\begin{eqnarray*}
u_1&=&x_1+\alpha_1(x_1,x_2,x_3),\\
u_2&=&x_2+\alpha_2(x_1,x_2,x_3),\\ 
u_3&=&x_3,
\end{eqnarray*}
then the ODE~\eqref{sys:1} is linearized by the transformation
\[
u_1=x_1+\alpha_1(x_1,x_2,0),\quad 
u_2=x_2+\alpha_2(x_1,x_2,0).
\]

More generally, let  $\mathcal{C}^{1,L,\mu}$ denote the class of
all systems of the form
\begin{eqnarray}\label{sys:3} 
\nonumber \dot x&=&A x+f(x,y,z),\\
\nonumber \dot y&=&B y+g(x,y,z),\\
\dot z &=& C z
\end{eqnarray}
where $x\in\mathbb{R}^k$, $y\in\mathbb{R}^\ell$, and $z\in \mathbb{R}^m$; where
$A$, $B$, and $C$ are square matrices of the corresponding
dimensions; $B$ is in real Jordan canonical form; 
\begin{itemize}
\item every eigenvalue of $B$ has real part $-b<0$;
\item every eigenvalue of $A$ has real part less than $-b$;
\item every eigenvalue of $C$ has real part in
an interval $[-c,-d]$ where\\ $-b<-c$ and $-d<0$;
\end{itemize}
and $F:(x,y,z)\mapsto (f(x,y,z),g(x,y,z))$
is a $\mathcal{C}^1$ function defined in a bounded product neighborhood 
\begin{equation}\label{omega}
\Omega=\Omega_{xy}\times \Omega_{z}
\end{equation}
of the origin in 
$(\mathbb{R}^k\times\mathbb{R}^\ell)\times \mathbb{R}^m$ 
such that
\begin{itemize}
\item $F(0,0,0)=0$ and $DF(0,0,0)=0$,\label{1to6} 
\item the partial derivatives $F_x$ and $F_y$ are Lipschitz in  $\Omega$, and
\item the partial derivative $F_z$ is 
Lipschitz in $\Omega_{xy}$  uniformly with respect to 
$z\in \Omega_z$ and H\"older in $\Omega_z$ uniformly with respect
to $(x,y)\in \Omega_{xy}$ with  H\"older exponent $\mu$. 
\end{itemize}  
System~\eqref{sys:3} satisfies
the \emph{$(1,\mu)$ spectral gap condition} if
$(1+\mu)c<b$.\label{1musgc}

%Note that $\mathcal{C}^{1,L,\mu}\subset \mathcal{C}^{1,\mu}$ because $\Omega$ is bounded.

We will show that system~\eqref{sys:3} can be linearized by
a $\mathcal{C}^1$ near-identity transformation of the form
\begin{eqnarray}\label{map:1}
\nonumber u&=&x+\alpha(x,y,z),\\
\nonumber v&=&y+\beta(x,y,z),\\
w&=&z.                      
\end{eqnarray}
The proof of this result is given in three main steps:
an invariant manifold theorem for a system with a spectral gap
is used to find a preliminary near-identity $\mathcal{C}^1$ map, as in
display~\eqref{map:1}, 
that transforms system~\eqref{sys:3} into a system of the same form
but with the new function $F=(f,g)$ ``flattened'' along the coordinate
subspace corresponding to the invariant manifold.  
Next, for the main part of the proof, a second 
near-identity transformation of the same form is constructed that
transforms the flattened system to the partially linearized form
\begin{eqnarray}\label{sys:4}
\nonumber \dot x&=&A x+p(x,y,z),\\
\nonumber \dot y&=&B y,\\
\dot z &=& C z
\end{eqnarray}
where $A$, $B$, and $C$ are the matrices in system~\eqref{sys:3}
and the function $p$ has the following properties:
\begin{itemize}
\item $p$ is $\mathcal{C}^1$ on an open neighborhood 
$\Omega=\Omega_x\times\Omega_{yz}$
of the origin in $\mathbb{R}^k\times(\mathbb{R}^\ell\times\mathbb{R}^m)$;\label{n16}
\item $p(0,0,0)=0$ and $Dp(0,0,0)=0$;
\item The partial derivative $p_x$ is Lipschitz in  $\Omega$;
\item The partial derivatives $p_y$ and $p_z$ are 
Lipschitz in $\Omega_{x}$  uniformly with respect to
$(y,z)\in \Omega_{yz}$ and H\"older in $\Omega_{yz}$ uniformly with respect
to $x\in \Omega_{x}$.
\end{itemize}

The final step of the proof consists of three observations:
The composition of $\mathcal{C}^1$ near-identity transformations of the form
considered here is again a $\mathcal{C}^1$ near-identity transformation; 
the dimension of the ``unlinearized'' part of the system
is made strictly smaller after applying
the partially linearizing transformation, and 
the argument can be repeated as long as the system is not linearized.
In other words, the proof is completed by a finite
induction.

The required version of the invariant manifold theorem is
a special case of a more general theorem 
(see, for example, Yu. Latushkin and B. Layton~\cite{ll}). 
For completeness, we will formulate and prove this special case.
Our proof can be modified to obtain the general result.

For notational convenience, let us view system~\eqref{sys:3} in the
compact form
\begin{eqnarray}\label{sys:5}
\nonumber \dot{\mathcal{X}}&=&\mathcal{A} \mathcal{X}+F(\mathcal{X},z),\\
\dot z &=& C z
\end{eqnarray}
where $\mathcal{X}=(x,y)$, 
$\mathcal{A}=\left(\begin{array}{cc} A & 0\\ 0 & B\end{array}\right)$,
and $F:=(f,g)$.
Hyperbolic estimates for the corresponding linearized
equations are used repeatedly. In particular, in view of the hypotheses
about the eigenvalues of $A$, $B$, and $C$, 
it follows that if $\epsilon>0$ and 
\[0<\lambda<d,\] then there is a constant $K>1$  such that
\begin{equation}\label{est:0}
\norm{e^{t\mathcal{A}}}\le K e^{-(b-\epsilon) t}, 
\qquad \norm{e^{tC}}\le K e^{-\lambda t}
\qquad \norm{e^{-tC}}\le K e^{(c+\epsilon)t}
\end{equation}
for all $t\ge 0$.

\begin{thm}\label{th:invman}
If the $(1,\mu)$ spectral gap condition 
holds for system~\eqref{sys:5}, then there is
an open set $\Omega_z\subset\mathbb{R}^m$ containing $z=0$ and a $\mathcal{C}^{1,\mu}$ function
$\gamma:\Omega_z\to \mathbb{R}^{k+\ell}$ 
such that $\gamma(0)=D\gamma(0)=0$ whose graph 
$($the set $\{(\mathcal{X},z)\in \mathbb{R}^{k+\ell}\times\mathbb{R}^m : \mathcal{X}=\gamma(z)\})$
is forward invariant.  
\end{thm}

As a remark, we mention that the smoothness of $\gamma$ cannot
in general be improved by simply requiring additional smoothness of
the vector field. Rather, the smoothness of the invariant manifold
can be improved only if additional requirements are made on the
smoothness of the vector field \emph{and} 
on the length of the spectral gap 
(see~\cite{ll} and the references therein). 
For these reasons, it seems that
the technical burden imposed by working with H\"older functions
cannot be avoided by simply requiring additional smoothness 
of the vector field unless additional hypotheses are made on the
eigenvalues of the linearization at the origin as well.
Also, we mention that our proof 
illustrates the full power of the fiber contraction principle 
introduced by M. Hirsch and C. Pugh in~\cite{hp} as a method
for proving the smoothness of functions obtained as fixed points
of contractions.

To describe the fiber contraction method in our setting,
let us consider a metric subspace $\mathcal{D}$ of a Banach space
of continuous functions defined 
on $\Omega\subset \mathbb{R}^m$ with values in $\mathbb{R}^p$, 
and let us suppose that
$\Gamma:\mathcal{D}\to \mathcal{D}$ is a contraction (on the complete metric space $\mathcal{D}$)
with fixed point $\gamma$.
(In the analysis to follow,
$\Gamma$ is given by an integral transform operator.)
We wish to show that $\gamma$ is differentiable. Naturally,
we start by formally differentiating both sides of the identity
$\eta(z)=\Gamma(\eta)(z)$ with respect to $z$ to obtain the identity
$D\gamma(z)=\Delta(\gamma,D\gamma)(z)$
where the map $\Phi\mapsto \Delta(\gamma,\Phi)$ is a linear
operator on a metric---not necessarily complete---subspace  
$\mathcal{J}$ of continuous functions from $\Omega$ to the
bounded linear maps from $\mathbb{R}^m$ to $\mathbb{R}^p$. 
We expect the derivative
$D\eta$, if it exists, to satisfy the equation 
\[\Phi=\Delta(\eta,\Phi).\]
Hence, $\mathcal{J}$ is a space of ``candidates for the derivative of $\gamma$''.

The next step is to show that the bundle map
$\Lambda:\mathcal{D}\times \mathcal{J}\to \mathcal{D}\times \mathcal{J}$ defined by
\[\Lambda(\gamma,\Phi)=(\Gamma(\gamma),\Delta(\gamma,\Phi))\]
is a fiber contraction; that is,
for each $\gamma\in \mathcal{D}$, the map
$\Phi\to \Delta(\gamma,\Phi)$ is a contraction on
$\mathcal{J}$ with respect to a contraction constant that does not depend
on the choice of $\gamma\in \mathcal{D}$.
The fiber contraction theorem
(see~\cite{hp} or, for more details, \cite{Chicone}) states that
\emph{if $\gamma$ is the globally attracting fixed point of $\Gamma$ and
if $\Phi$ is a fixed point of the map $\Phi\to \Delta(\gamma,\Phi)$,
then $(\gamma,\Phi)$ is the globally attracting fixed point
of $\Lambda$.}
The fiber contraction theorem does not require $\mathcal{J}$ to be
a complete metric space. This leaves open the possibility to
prove the existence of a fixed point in the fiber over $\gamma$ by
using, for example, Schauder's theorem. But, for our applications,
the space $\mathcal{J}$ will be chosen to be complete so that 
the existence of the fixed point $\Phi$ follows from 
an application of the contraction mapping theorem.   

After we show that $\Lambda$ is a fiber contraction, the following
argument can often be used to prove the desired equality $\Phi=D\gamma$.
Find a point $(\gamma_0, \Phi_0)\in \mathcal{D}\times\mathcal{J}$ such that
$D\gamma_0=\Phi_0$, define a sequence
$\{(\gamma_{j},\Phi_{j})\}_{j=0}^\infty$
in $\mathcal{D}\times \mathcal{J}$ by
\[
\gamma_{j}=\Gamma(\gamma_{j-1}),\qquad
  \Phi_{j}=\Delta(\gamma_{j-1},\Phi_{j-1}),
\]
and prove by induction that $D\gamma_{j}=\Phi_{j}$ for every
positive integer $j$. By the fiber contraction theorem,
the sequence $\{\gamma_j\}_{j=0}^\infty$ converges to $\gamma$
and the sequence $\{D\gamma_j\}_{j=0}^\infty$ converges to the
fixed point $\Phi$ of the map $\Phi\to \Delta(\gamma,\Phi)$.
If the convergence is \emph{uniform}, then by a standard theorem
from advanced calculus---\emph{the uniform
limit of a sequence of differentiable functions is
differentiable and equal to the limit of the derivatives of the
functions in the sequence provided that the sequence of derivatives
is uniformly convergent}---the function $\gamma$ is differentiable
and its derivative is $\Phi$.

Let us prove Theorem~\ref{th:invman}.
\begin{proof}
The graph of $\gamma$ is forward invariant if and only
if $\dot{\mathcal{X}}=D\gamma(z)\dot z$ whenever $\mathcal{X}=\gamma(z)$. 
Equivalently, the identity
\begin{equation}\label{id:invar}
D\gamma(z)Cz-\mathcal{A}\gamma(z)=F(\gamma(z),z)
\end{equation}
holds for all $z$ in the domain of $\gamma$. 

The function $\gamma$ will satisfy identity~\eqref{id:invar}
if $\gamma$ is $\mathcal{C}^1$ and 
\[
\frac{d}{d\tau}e^{-\tau\mathcal{A}}\gamma(e^{\tau C}z)=
e^{-\tau\mathcal{A}} F(\gamma(e^{\tau C}z),e^{\tau C}z).
\]
In this case, by integration on the interval $[-\tau,0]$ followed by a
change of variables in the integral on the right-hand side
of the resulting equation, it follows that
\[
\gamma(z)-e^{\tau\mathcal{A}}\gamma(e^{-\tau C}z)=
\int_0^\tau e^{t\mathcal{A}} F(\gamma(e^{-t C}z),e^{-t C}z)\,dt.
\]
If $\gamma$ is a $\mathcal{C}^1$ function
such that $\gamma(0)=D\gamma(0)=0$ and 
$\lim_{\tau\to\infty} \abs{e^{\tau\mathcal{A}}\gamma(e^{-\tau C}z)}=0$,
then the graph of $\gamma$ will be (forward) invariant
provided that
\begin{equation}\label{eq:g81}
\gamma(z)=\Gamma(\gamma)(z)
:=\int_0^\infty e^{t\mathcal{A}} F(\gamma(e^{-t C}z),e^{-t C}z)\,dt.
\end{equation}

For technical reasons, it is convenient to assume that
$\gamma$ is defined on all of $\mathbb{R}^m$ and that $F$ is ``cut off''
as in the proof of Theorem~\ref{th:gr-hart} to have support
in an open ball at the origin in $\mathbb{R}^{k+\ell+m}$ of radius $r>0$
so that the new function, still denoted
by the symbol $F$, is defined globally with $\norm{DF}$ bounded
by a small number $\rho>0$ to be determined. Recall that both
$r$ and $\rho$ can be chosen to be as small as we wish.
This procedure maintains the smoothness of the original function and
the modified function agrees with the original function on some 
open ball centered at the origin and with radius $r_0<r$.
Also, the graph of the restriction of
a function $\gamma$ that satisfies the equation~\eqref{eq:g81}
to an open subset of $\Omega_z$, containing the origin
and  inside the ball of radius $r_0$,
is forward invariant for system~\eqref{sys:5} because the
the modified differential equation agrees with the original differential
equation in the ball of radius $r_0$. 

We will show that there is a solution $\gamma$  of equation~\eqref{eq:g81}
such that $\gamma\in \mathcal{C}^{1,\mu}$ and $\gamma(0)=D\gamma(0)=0$.

Let $\mathcal{B}$ denote the Banach space of continuous functions
from  $\mathbb{R}^m$ to $\mathbb{R}^{k+\ell}$ that are bounded with respect to the
norm given by
\[
\bnorm{\gamma}:=
\sup\big\{\frac{\abs{\gamma(z)}}{\abs{z}}:z\in\mathbb{R}^m\setminus\{0\}\big\}.
\]
Also, note that convergence of a sequence in the $\mathcal{B}$-norm implies uniform
convergence of the sequence on compact subsets of $\mathbb{R}^m$.

Let $\mathcal{D}:=\{\gamma\in\mathcal{B}: \abs{\gamma(z_1)-\gamma(z_2)}\le \abs{z_1-z_2}\}$.
Note that if $\gamma\in \mathcal{D}$, then $\bnorm{\gamma}\le 1$. 
We will also show that $\mathcal{D}$ is a closed subset, and hence a complete
metric subspace, of $\mathcal{B}$. In fact, if $\{\gamma^k\}_{k=1}^\infty$ is
a sequence in $\mathcal{D}$ that converges to $\gamma$ in $\mathcal{B}$, then
\begin{eqnarray}\label{pttlim}
\nonumber \abs{\gamma(z_1)-\gamma(z_2)}&\le& \abs{\gamma(z_1)-\gamma^k(z_1)}
+\abs{\gamma^k(z_1)-\gamma^k(z_2)}+\abs{\gamma^k(z_2)-\gamma(z_2)} \\
&\le& \bnorm{\gamma-\gamma^k}\abs{z_1}^\mu
+\abs{z_1-z_2}+\bnorm{\gamma-\gamma^k}\abs{z_2}^\mu.
\end{eqnarray}
To see that $\gamma\in\mathcal{D}$, pass to the limit as $k\to \infty$.

We will show that if $\rho$ is sufficiently small, 
then $\Gamma$, the operator defined in display~\eqref{eq:g81},
is a contraction on $\mathcal{D}$.

Recall the hyperbolic estimates~\ref{est:0}, 
choose $\epsilon$ so small that $c-b+2\epsilon<0$, 
let $\gamma\in \mathcal{D}$, and note that 
\begin{eqnarray}\label{major}
\nonumber\abs{\Gamma(\gamma)(z_1)-\Gamma(\gamma)(z_2)}
&\le& \int_0^\infty Ke^{-(b-\epsilon)t}\norm{DF}
(\abs{\gamma(e^{-tC}z_1)-\gamma(e^{-tC}z_2)}\\
&&\mbox{}+\abs{e^{-tC}(z_1-z_2)})\,dt \\
\nonumber&\le& \int_0^\infty Ke^{-(b-\epsilon)t}\rho
(2\abs{e^{-tC}(z_1-z_2)})\,dt \\
&\le& \big(2K\rho \int_0^\infty e^{(c-b+2\epsilon)t}\,dt\big)
\abs{z_1-z_2}.
\end{eqnarray}
By taking $\rho$ sufficiently small, it follows that
\[\abs{\Gamma(\gamma)(z_1)-\Gamma(\gamma)(z_2)}\le \abs{z_1-z_2}.\]
Also, using similar estimates, 
it is easy to show that
\[
\abs{\Gamma(\gamma_1)(z)-\Gamma(\gamma_2)(z)}
\le \frac{K^2\rho}{b-c-2\epsilon} \bnorm{\gamma_1-\gamma_2}\abs{z}.
\]
Hence, if $\rho=\norm{DF}$ is sufficiently small, then
$\Gamma$ is a contraction on the complete metric space $\mathcal{D}$; 
and therefore,
it has a unique fixed point $\gamma_\infty\in \mathcal{D}$. 

We will use the fiber contraction principle to show that 
$\gamma_\infty\in \mathcal{C}^{1,\mu}$.

Before modification by the bump function, there is some
open neighborhood of the origin on which 
$F_\mathcal{X}$ is Lipschitz, 
$F_z$ is Lipschitz in $\mathcal{X}$,  and $F_z$ is H\"older in $z$.
Using this fact and the construction of the bump function,
it is not difficult to show that
there is a constant $\bar M>0$ 
such that (for the modified function $F$)
\begin{equation}\label{hefFz}
\abs{F_z(\mathcal{X}_1,z_1)-F_z(\mathcal{X}_2,z_2)}
\le\bar M(\abs{\mathcal{X}_1-\mathcal{X}_2}^\mu+\abs{z_1-z_2}^\mu)
\end{equation} 
and
\begin{equation}\label{hefFX}
\abs{F_\mathcal{X}(\mathcal{X}_1,z_1)-F_\mathcal{X}(\mathcal{X}_2,z_2)}
\le\bar M(\abs{\mathcal{X}_1-\mathcal{X}_2}^\mu+\abs{z_1-z_2}^\mu).
\end{equation}
Moreover, $\bar M$ is independent of $r$ as long as $r>0$ is
smaller than some preassigned positive number. 

In view of the $(1,\mu)$ spectral gap condition,
$-b+\epsilon +(1+\mu)(c+\epsilon)<0$ 
whenever $\epsilon>0$ is sufficiently small. For $\epsilon$
in this class, let 
\[\bar K:=K^3\int_0^\infty e^{(-b+\epsilon+(1+\mu)(c+\epsilon)) t}\,dt.\]

Define the Banach space $\mathcal{H}$ of continuous functions 
from $\mathbb{R}^m$ to the bounded linear maps from $\mathbb{R}^m$ to $\mathbb{R}^{k+\ell}$
that are bounded with respect to the norm
\[
\hnorm{\Phi}:=\sup_{z\in \mathbb{R}^m}\frac{\abs{\Phi(z)}}{\abs{z}^\mu},
\]
note that each element $\Phi$ in this space is such 
that $\Phi(0)=0$, 
and  
note that convergence of a sequence in the $\mathcal{H}$-norm implies uniform
convergence of the sequence on compact subsets of $\mathbb{R}^m$.

For $r>0$ the radius of the
ball containing the support of $F$  and for  $\rho<1/\bar K$, 
let $\mathcal{J}$ denote the metric subspace consisting
of those functions in $\mathcal{H}$ that satisfy the 
following additional properties: 
\begin{equation}\label{ballbound}
\hnorm{\Phi}\le \frac{2\bar K \bar M}{1-\bar K\rho},\qquad
\sup\{\abs{\Phi(z)}:z\in \mathbb{R}^m\}\le \frac{2\bar K \bar M K r}{1-\bar K\rho},
\end{equation}
and
\begin{equation}\label{ballholder}
\abs{\Phi(z_1)-\Phi(z_2)}\le H \abs{z_1-z_2}^\mu
\end{equation}
where 
$H:=(1-\bar K \rho+2 \bar K\bar M K r)(2\bar K \bar M)/(1-\bar K\rho)^2$.
Using estimates similar to estimate~\eqref{pttlim}, it is easy
to prove that $\mathcal{J}$ is a complete metric space.

Moreover, for $\gamma\in\mathcal{D}$ and $\Phi\in \mathcal{J}$, define the bundle map
\[\Lambda(\gamma,\Phi)=(\Gamma(\gamma),\Delta(\gamma,\Phi))\]
where
\begin{eqnarray*}
\Delta(\gamma,\Phi)(z)&:=&
\int_0^\infty e^{t\mathcal{A}}\big(F_\mathcal{X}(\gamma(e^{-t C}z),e^{-t C}z)\Phi(e^{-t C}z)\\
&&\mbox{}+F_z(\gamma(e^{-t C}z),e^{-t C}z)\big) e^{-tC} \,dt
\end{eqnarray*}
and note that the derivative of $\gamma_\infty$ is a formal solution of the
equation $\Phi=\Delta(\gamma_\infty,\Phi)$.

We will prove that $\Lambda$ is a fiber contraction on $\mathcal{D}\times\mathcal{J}$ 
in two main steps: (1) $\Delta(\gamma,\Phi)\in \mathcal{J}$ whenever
$\gamma\in\mathcal{D}$ and $\Phi\in\mathcal{J}$; (2) $\Phi\mapsto \Delta(\gamma,\Phi)$
is a contraction whose contraction constant is uniform for
$\gamma\in\mathcal{D}$.

Because $\abs{e^{tC} z}\le Ke^{-\lambda t} \abs{z}$ for $t\ge 0$,
By substituting $e^{-tC}z$ for $z$ in the hyperbolic estimate
$\abs{e^{tC} z}\le Ke^{-\lambda t} \abs{z}$, we have the inequality
$
\abs{e^{tC}z}\ge \frac{1}{K}\abs{z}
$
for all $t\ge 0$. Hence, if $\abs{z}\ge Kr$, then $\abs{e^{tC}z}\ge r$.

Note that if $(\gamma, \Phi)\in \mathcal{D}\times\mathcal{J}$ and 
$\abs{z}\ge Kr$, then $\Delta(\gamma,\Phi)(z)=0$.
On the other hand, for $\abs{z}< Kr$,
the H\"older estimate~\eqref{hefFz} can be used to obtain
the inequalities
\begin{eqnarray*}
\abs{\Delta(\gamma,\Phi)(z)}&\le& 
\int_0^\infty\abs{e^{t\mathcal{A}}}\abs{e^{-tC}}
\big(F_\mathcal{X}(\gamma(e^{-t C}z),e^{-t C}z)\abs{\Phi(e^{-tC}z)}\\
&&\mbox{} +\abs{F_z(\gamma(e^{-t C}z),e^{-t C}z)}\big)\,dt\\
&\le& 
\int_0^\infty K^2 e^{(c-b+2\epsilon)t}
\big(\norm{DF}\hnorm{\Phi}K^\mu e^{\mu(c+\epsilon) t}\abs{z}^\mu\\
&&\mbox{} +\bar M(\abs{\gamma(e^{-t C}z}^\mu +\abs{e^{-t C}z}^\mu\big)\,dt\\
&\le& 
\big(K^{2+\mu}\int_0^\infty e^{(-b+\epsilon+(1+\mu)(c+\epsilon))t}\,dt\big)
(\norm{DF} \hnorm{\Phi} +2 \bar M)\abs{z}^\mu\,dt\\
&\le& \bar K(\rho\hnorm{\Phi} +2 \bar M)\abs{z}^\mu\\
&\le& \frac{2\bar K \bar M}{1-\bar K\rho}\abs{z}^\mu.
\end{eqnarray*}
It follows that 
$\Delta(\gamma,\Phi)$ satisfies both inequalities
in display~\eqref{ballbound}.

To show that $\Delta(\gamma,\Phi)$ satisfies the 
H\"older condition~\eqref{ballholder}, estimate
\[Q:=\abs{\Delta(\gamma,\Phi)(z_1)-\Delta(\gamma,\Phi)(z_2)}\]
in the obvious manner, add and subtract 
$F_\mathcal{X}(\gamma(e^{-t C}z_1),e^{-t C}z_1)\Phi(e^{-t C}z_2)$,
and then use the triangle inequality 
to obtain the inequality
\begin{eqnarray*}
Q&\le& 
\int_0^\infty e^{(c-b+2\epsilon)t}\big(
\abs{F_\mathcal{X}(\gamma(e^{-tC}z_1),e^{-tC}z_1)}
   \abs{\Phi(e^{-tC}z_1)-\Phi(e^{-tC}z_2)} \\
&&\mbox+\abs{F_\mathcal{X}(\gamma(e^{-tC}z_1),e^{-tC}z_1)
                 -F_\mathcal{X}(\gamma(e^{-tC}z_2),e^{-tC}z_2)}\abs{\Phi(e^{-tC}z_2)}\\
&&\mbox+\abs{F_z(\gamma(e^{-tC}z_1),e^{-tC}z_1)
                 -F_z(\gamma(e^{-tC}z_2),e^{-tC}z_2}
\big)\,dt.
\end{eqnarray*}
The first factor involving $F_\mathcal{X}$ is bounded above by $\rho=\norm{DF}$;
the second factor involving $F_\mathcal{X}$ is bounded above using the H\"older
estimate~\eqref{hefFX} followed the Lipschitz estimate for $\gamma$ 
in the definition of $\mathcal{D}$. 
Likewise, the second factor
involving $\Phi$ is bounded using the supremum in display~\eqref{ballbound}; 
the first factor involving $\Phi$ is bounded using the
H\"older inequality~\eqref{ballholder}. 
The term involving $F_z$ is bounded above using the H\"older
estimate~\eqref{hefFz}.
After some manipulation using the hyperbolic estimate for $e^{-tC}$ and
in view of the definition of $\bar K$,
it follows that $Q$ is bounded above by 
\[
\bar K(\rho H +2\bar M\frac{2\bar K\bar M K r}{1-\bar K \rho}+2\bar M) 
\abs{z_1-z_2}^\mu\le H\abs{z_1-z_2}^\mu.
\]
This completes the proof that $\Delta(\gamma,\Phi)\in \mathcal{J}$.

Finally, by similar estimation procedures, it is easy to show that 
\begin{eqnarray*}
\lefteqn{\norm{\Delta(\gamma_1,\Phi_1)(z)-\Delta(\gamma_2,\Phi_2)(z)}
\le}\hspace{1in}\\
&& \bar K\big(
\rho\hnorm{\Phi_1-\Phi_2}+\bar M(1+\frac{2\bar K\bar M Kr}{1-\bar K \rho}
\bnorm{\gamma_1-\gamma_2}
\big)\abs{z}^\mu.
\end{eqnarray*}
Therefore, $\Delta$ and hence $\Lambda$ is continuous.
Also, because $\bar K\rho<1$,  
the map $\Phi\mapsto \Delta(\gamma,\Phi)$ is a contraction, and
the contraction constant is uniform over $\mathcal{D}$. 
This proves that $\Lambda$ is a fiber contraction on
$\mathcal{D}\times\mathcal{J}$.

Choose $(\gamma_0,\Phi_0)=(0,0)$ and define a sequence in
$\mathcal{D}\times\mathcal{J}$ inductively by
\[
(\gamma_{j+1},\Phi_{j+1}):=\Lambda(\gamma_{j},\Phi_j).
\]
In particular, by the contraction mapping theorem  
the sequence $\{\gamma_{j}\}_{j=0}^\infty$
converges to $\gamma_\infty$. Clearly,  
we have $D\gamma_0=\Phi_0$. Proceeding by induction,
let us assume that $\mathcal{D}\gamma_j=\Phi_j$.  
Since 
\[
\gamma_{j+1}(z)=\Gamma(\gamma_j)(z)=
\int_0^\infty e^{t\mathcal{A}}F(\gamma_j(e^{-t C}z),e^{-t C}z)\,dt
\]
and since differentiation under the integral sign is permitted by
Lemma~\ref{pfcont} because we have the majorization~\eqref{major},
it follows that
\[
D\gamma_{j+1}(z)=\int_0^\infty e^{t\mathcal{A}}
F_\mathcal{X}(\gamma_j(e^{-t C}z),e^{-t C}z) D\gamma_j(e^{-t C}z) e^{-t C}\,dt.
\]
By the induction hypothesis $D\gamma_j(e^{-t C}z)=\Phi_j(e^{-t C}z)$r.
Hence, by the definition of $\Delta$, we have that
$D\gamma_{j+1}=\Phi_{j+1}$. 
Finally, because convergence in the spaces $\mathcal{D}$ and $\mathcal{J}$ 
implies uniform converge on compact sets, 
by using the fiber contraction theorem and the theorem 
from advanced calculus on uniform limits of differentiable functions, 
it follows that $\gamma_\infty$ is $\mathcal{C}^1$ with its derivative
in $\mathcal{J}$. Thus, in fact, $\gamma_\infty\in \mathcal{C}^{1,\mu}$.
\end{proof}


We will now apply Theorem~\ref{th:invman}.
After this is done, the remainder of this section will be devoted
to the proof of the existence and smoothness of the
partially linearizing transformation for the flattened system. 

The mapping given by
\[U=\mathcal{X}-\gamma(z),\]
where the graph of $\gamma$ is the invariant manifold in
Theorem~\ref{th:invman}, transforms
system~\eqref{sys:5} into the form
\begin{eqnarray}\label{sys:6}
\nonumber \dot U&=&\mathcal{A} U +A\gamma(z) +F(U+\gamma(z),z)-D\gamma(z)Cz,\\
\dot z &=& C z.
\end{eqnarray}
Because the graph of $\gamma$ is invariant, $\dot U=0$ whenever $U=0$.   
In particular, 
\[
A\gamma(z)+F(\gamma(z),z)-D\gamma(z)Cz\equiv 0;
\]
and therefore, the system~\eqref{sys:6} has the form
\begin{eqnarray}\label{sys:7}
\nonumber \dot U&=&\mathcal{A} U +\mathcal{F}(U,z),\\         
\dot z &=& C z
\end{eqnarray}
where
\begin{equation}\label{eq:3}
\mathcal{F}(U,z):=F(U+\gamma(z),z)-F(\gamma(z),z).
\end{equation}
\begin{prop}\label{prop:1}
The function $\mathcal{F}$ in system~\eqref{sys:7} is $\mathcal{C}^{1,L,\mu}$ 
on a bounded neighborhood of the origin.
In addition, if $(U,z_i)$ and $(U_i,z_i)$ for $i\in\{1,2\}$ are
in this neighborhood, then 
there are constants $M>0$ and $0\le \vartheta < 1$ such that
\begin{eqnarray}
\nonumber \abs{\mathcal{F}(U_1,z_1)-\mathcal{F}(U_2,z_2)}&\le& 
M(\abs{U_1}+\abs{z_1}+\abs{U_2}+\abs{z_2})(\abs{U_1-U_2}+\abs{z_1-z_2}),\\
\label{F1}&& 
\end{eqnarray}
\begin{eqnarray}
\nonumber \abs{\mathcal{F}_z(U_1,z_1)-\mathcal{F}_z(U_2,z_2)}&\le& 
M(\abs{z_1-z_2}^{\mu}+\abs{U_1-U_2})^{1-\vartheta}(\abs{U_1}
+\abs{U_2})^\vartheta.\\
&&\label{F2}
\end{eqnarray}
\end{prop}
\begin{proof}
The function $\mathcal{F}$ is $\mathcal{C}^1$ because it is the composition of $\mathcal{C}^1$ functions.
Moreover, by definition~\eqref{eq:3} and because
$F(0,0)=DF(0,0)=0$, it is 
clear that  $\mathcal{F}(0,0)=D\mathcal{F}(0,0)=0$.

To show that the partial derivative $\mathcal{F}_U$ is Lipschitz in a neighborhood
of the origin, 
start with the equality $\mathcal{F}_U(U,z)=F_X(U+\gamma(z),z)$, 
note that $F_X$ is Lipschitz, and conclude that there is a 
constant $K>0$ such that
\[
\abs{\mathcal{F}_U(U_1,z_1)-\mathcal{F}_U(U_2,z_2)}\le K (
\abs{U_1-U_2}+\abs{\gamma(z_1)-\gamma(z_2)}+\abs{z_1-z_2}).
\]
By an application of the mean value theorem 
to the $\mathcal{C}^1$  function $\gamma$, it follows that
\[
\abs{\mathcal{F}_U(U_1,z_1)-\mathcal{F}_U(U_2,z_2)}\le K (1+\norm{D\gamma}) 
(\abs{U_1-U_2}+\abs{z_1-z_2}).
\]  
Since $\norm{D\gamma}$ is bounded in some neighborhood of the origin,
we have the desired result.

Similarly, in view of the equality 
\begin{eqnarray*}
\mathcal{F}_z(U,z) &=& F_\mathcal{X}(U+\gamma(z),z) D\gamma(z)-F_\mathcal{X}(\gamma(z),z)D\gamma(z)\\
        &&\mbox{} +F_z(U+\gamma(z),z)-F_z(\gamma(z),z),
\end{eqnarray*}
the properties of $F$, and the triangle law,  there
is a constant $K>0$ such that
\[
\abs{\mathcal{F}_z(U_1,z)-\mathcal{F}_z(U_2,z)}\le K \abs{U_1-U_2}
\]
uniformly for $z$ in a sufficiently small open neighborhood of $z=0$.

Several easy estimates are required to show that $\mathcal{F}_z$ is H\"older 
with respect to its second argument. For this, let
$T:=\abs{\mathcal{F}_z(U,z_1)-\mathcal{F}_z(U,z_2)}$ and note that
\begin{eqnarray*}
T &\le & 
\abs{F_\mathcal{X}(U+\gamma(z_1),z_1) D\gamma(z_1)
-F_\mathcal{X}(U+\gamma(z_2),z_2) D\gamma(z_2)}\\
&&\mbox{}
+\abs{F_\mathcal{X}(\gamma(z_1),z_1) D\gamma(z_1)-F_\mathcal{X}(\gamma(z_2),z_2) D\gamma(z_2)}\\
&& \mbox{} + \abs{F_z(U+\gamma(z_1),z_1)-F_z(U+\gamma(z_2),z_2)}\\
&& \mbox{} +\abs{F_z(\gamma(z_1),z_1)-F_z(\gamma(z_2),z_2)}.
\end{eqnarray*}
Each term on the right-hand side of this inequality is estimated
in turn. The desired result is obtained by combining these results.
We will show how to obtain the H\"older estimate for the first
term; the other estimates are similar.

To estimate the first term $T_1$, 
add and subtract $F_\mathcal{X}(U+\gamma(z_1),z_1) D\gamma(z_2)$
and use the triangle inequality to obtain the upper bound
\begin{eqnarray*}
T_1&\le& \norm{DF}\abs{D\gamma(z_1)-D\gamma(z_2)}\\
 &&\mbox{}+\norm{D\gamma}\abs{F_\mathcal{X}(U+\gamma(z_1),z_1)-F_\mathcal{X}(U+\gamma(z_2),z_2)}.
\end{eqnarray*}
Because $D\gamma$ is 
H\"older and $F_\mathcal{X}$ is Lipschitz, there is a constant $K>0$ such that
\begin{eqnarray*}
T_1 &\le& \norm{DF}K\abs{z_1-z_2}^\mu
+\norm{D\gamma}K(\abs{z_1-z_2}^\mu+\abs{z_1-z_2})\\
&\le&\norm{DF}K\abs{z_1-z_2}^\mu
+\norm{D\gamma}K\abs{z_1-z_2}^\mu(1+\abs{z_1-z_2}^{1-\mu}).
\end{eqnarray*}
Finally, by restricting to a sufficiently small neighborhood of the origin,
it follows that there is a constant $M>0$ such that
\[
T_1 \le M \abs{z_1-z_2}^\mu,
\]
as required.

To prove the estimate~\eqref{F1}, note that
$\mathcal{F}(0,0)=D\mathcal{F}(0,0)=0$ and, by Taylor's formula, 
\[\mathcal{F}(U,z)=\int_0^1(\mathcal{F}_U(tU,tz)U+\mathcal{F}_z(tU,tz)z)\, dt.\]
The desired  estimate for $\abs{\mathcal{F}(U_1,z_1)-\mathcal{F}(U_2,z_2)}$ 
is obtained by subtracting the integral expressions for
$\mathcal{F}(U_1,z_1)$ and $\mathcal{F}(U_2,z_2)$, adding and subtracting
$\mathcal{F}_U(U_1,z_1)U_2$ and $\mathcal{F}_z(U_1,z_1)z_2$, and then by using the triangle 
inequality, the Lipschitz estimates for $F_U$ and $F_z$, and the
observation that $\abs{t}\le 1$ in the integrand.  

For the estimate~\eqref{F2}, note that
the function $U\mapsto \mathcal{F}_z(U,z)$ is (uniformly) Lipschitz and
use the obvious triangle estimate to conclude that there is a constant
$M>0$ such that
\[
\abs{\mathcal{F}_z(U_1,z_1)-F_z(U_2,z_2)}\le M(\abs{U_1}+\abs{U_2}).
\]
Also, a different upper bound for the same quantity is obtained as follows:
\begin{eqnarray*}
\abs{\mathcal{F}_z(U_1,z_1)-F_z(U_2,z_2)}&\le&
\abs{\mathcal{F}_z(U_1,z_1)-F_z(U_1,z_2)}\\
&&\mbox{}+\abs{\mathcal{F}_z(U_1,z_2)-F_z(U_2,z_2)}\\
&\le&
M(\abs{z_1-z_2}^\mu +\abs{U_1-U_2})\\
\end{eqnarray*} 
The desired inequality~\eqref{F2} is obtained from these two
upper bounds and the following proposition:
\emph{Suppose that $a \ge 0$, $b > 0$,  and $c>0$. 
If $a\le \max\{b,c\}$ and $0\le\vartheta<1$, then 
$a\le b^\eta c^{1-\vartheta}$.}
The proposition is clearly valid in case $a=0$. On the other
hand, the case where  $a\ne 0$ is an immediate consequence of the 
inequality
\[
\ln a=\vartheta \ln a+(1-\vartheta) \ln a
\le \vartheta \ln b+(1-\vartheta) \ln c
\le \ln(b^\vartheta c^{1-\vartheta}).
\]
\end{proof}

As mentioned above, the main result of this section concerns
the partial linearization of system~\eqref{sys:7}. 
More precisely, let us fix the previously defined
square matrices $A$, $B$, and $C$, and consider
the system 
\begin{eqnarray}\label{sys:8}
\nonumber \dot x&=&A x +f(x,y,z),\\
\nonumber \dot y&=&B y +g(x,y,z),\\
\dot z &=& C z
\end{eqnarray}
where $\mathcal{F}:=(f,g)$ satisfies all of the properties
mentioned in Proposition~\ref{prop:1}.
Also, we will use the following hypothesis.
\begin{hyp}\label{hy:1}
Let $\Omega$ be an open neighborhood of the origin given
by $\Omega_{xy}\times \Omega_z$ 
as in display~\eqref{omega}, 
\[U, U_1, U_2\in \Omega_{xy},\qquad  z, z_1, z_2\in \Omega_{z},\qquad  
r:=\sup_{(U,z)\in \Omega} \abs{U},\] 
the numbers $K>1$ and $\lambda>0$ are the constants in display~\eqref{est:0}, 
the numbers $M>0$ and $\vartheta$ are the constants
in Proposition~\ref{prop:1}, and $-b$ is the real part of the
eigenvalues of the matrix $B$ in system~\eqref{sys:8}. 
In addition, $\Omega$ is a sufficiently small open set, 
$\epsilon$ is a sufficiently small positive real number, 
and $\vartheta$ is a sufficiently large number in the open
unit interval such that for $\delta:=2K^2Mr+\epsilon$ we have 
$\epsilon<1$, $-b+\delta<-\lambda$, $-\lambda+2 \delta<0$,
$(1-\vartheta)b-\lambda<0$, and $\delta-\lambda+(1-\vartheta)b<0$.
\end{hyp} 
\begin{thm}\label{th:pl}
There is a $\mathcal{C}^{1,L,\mu(1-\theta)}$ near-identity diffeomorphism defined on an open
subset of the origin that transforms 
system~\eqref{sys:8} into system~\eqref{sys:4}.
\end{thm}
\begin{proof}
We will show that there is
a near-identity transformation of the form 
\begin{eqnarray}\label{cov:1}
\nonumber u&=&x,\\
\nonumber v&=&y +\alpha(x,y,z),\\
w&=& z
\end{eqnarray}
that transforms system~\eqref{sys:8} into 
\begin{eqnarray}\label{sys:9}
\nonumber \dot u&=&A u +p(u,v,w),\\
\nonumber \dot v&=&B v, \\
\dot w &=& C w.
\end{eqnarray}

The map~\eqref{cov:1} transforms system~\eqref{sys:8} into a 
system in the form of system~\eqref{sys:9}  if and only if
\begin{eqnarray*}
&&A u + p(u,v,w)=A x+ f(x,y,z),\\  
&&B v =B y+ g(x,y,z)+D\alpha(x,y,z)V(x,y,z)
\end{eqnarray*}
where $V$ denotes the vector field given by
\[
(x,y,z)\mapsto (A x+f(x,y,z), B y+ g(x,y,z), C z).
\]
Hence, to obtain the desired transformation, it suffices to 
show that the (first order partial differential) equation 
\begin{equation}\label{PDE:1}
D\alpha V+g=B\alpha
\end{equation}
has a $\mathcal{C}^{1,L,\mu(1-\theta)}$ solution $\alpha$ with the additional property that
$\alpha(0)=D\alpha(0)=0$, and that $p$ has the properties listed
for system~\eqref{sys:4}.
%on page~\pageref{n16}.

To solve equation~\eqref{PDE:1}, let us seek a
solution along its characteristics; that is, let us seek
a function $\alpha$ such that
\begin{equation}\label{ODE:1}
\frac{d}{dt}\alpha(\varphi_t(x,y,z)) -B\alpha(\varphi_t(x,y,z)) 
=-g(\varphi_t(x,y,z))
\end{equation} 
where $\varphi_t$ is the flow of $V$.
Of course, by simply evaluating at $t=0$, it follows immediately that
such a function $\alpha$ is also a solution of the equation~\eqref{PDE:1}.

By variation of parameters, equation~\eqref{ODE:1} is equivalent to
the differential equation
\[
\frac{d}{dt} e^{-tB}\alpha(\varphi_t(x,y,z))=-e^{-tB} g(\varphi_t(x,y,z)).
\]
Hence, (after evaluation at $t=0$) it suffices to solve the equation
\[ J\alpha = -g\]
where $J$ is the (Lie derivative) operator defined by
\[
(J\alpha)(x,y,z)=\frac{d}{dt} e^{-tB}\alpha(\varphi_t(x,y,z))\big |_{t=0}.
\]
In other words, it suffices to prove that
the operator $J$ is invertible
in a space of functions containing $g$, 
that $\alpha:=J^{-1} g$ is in
$\mathcal{C}^{1,L,\mu(1-\theta)}$, and $\alpha(0)=D\alpha(0)=0$.

Formally, $J$ satisfies the ``Lie derivative property''; that is,
\[
\frac{d}{dt} e^{-tB}\alpha(\varphi_t(x,y,z))=e^{-tB} J \alpha(\varphi_t(x,y,z)).  
\]
Hence,
\begin{equation}\label{eq:4}
e^{-tB}\alpha(\varphi_t(x,y,z))-\alpha(x,y,z)
=\int_0^te^{-sB} J \alpha(\varphi_s(x,y,z))\,ds;
\end{equation}
and therefore, if $\alpha$ is in a function space where 
$\lim_{t\to \infty}\abs{e^{-tB}\alpha(\varphi_t(x,y,z))}=0$, then
the operator $E$ defined by
\begin{equation}\label{E}
(E\alpha)(x,y,z)=-\int_0^\infty e^{-tB} \alpha(\varphi_t(x,y,z))\,dt
\end{equation}
is the inverse of $J$. In fact, by passing to the limit as
$t\to \infty$  on both sides of equation~\eqref{eq:4} it follows
immediately that $E J=I$. The identity $JE=I$ is proved using a direct
computation and the fundamental theorem of calculus as follows:
\begin{eqnarray*}
JE \alpha(x,y,z) &=& -\frac{d}{ds}\int_0^\infty 
               e^{(t+s)B}\alpha(\varphi_{t+s}(x,y,z))\, dt\big |_{s=0}\\
&=& -\frac{d}{ds}\int_s^\infty 
               e^{uB}\alpha(\varphi_{u}(x,y,z))\, du\big |_{s=0}\\
&=& \alpha(x,y,z).
\end{eqnarray*}

Let $\mathcal{B}$ denote the Banach space consisting of all continuous
functions $\alpha:\Omega\to \mathbb{R}^\ell$  with the norm
\[
\bnorm{\alpha}:=\sup_{(x,y)\ne 0} \frac{\abs{\alpha(x,y,z)}}
                 {(\abs{x}+\abs{y})(\abs{x}+\abs{y}+\abs{z})}
\]
where $\Omega$ is an open neighborhood of the origin with compact
closure. We will show that $E$ is a bounded operator on $\mathcal{B}$. 
If $\Omega$ is sufficiently small so that the function $\mathcal{F}=(f,g)$ 
satisfies property~\eqref{F1} in Proposition~\ref{prop:1}, then
$g\in \mathcal{B}$. Thus, the near-identity transformation~\eqref{cov:1}, 
with $\alpha:=E g$,  is a candidate
for the desired transformation that partially linearizes system~\eqref{sys:8}.
The proof is completed by showing that $E g \in  \mathcal{C}^{1,L,\mu(1-\theta)}$.

Because of the special decoupled form of system~\eqref{sys:8} 
and for the purpose of distinguishing solutions from initial conditions, 
it is convenient to recast system~\eqref{sys:8} in the form
\begin{eqnarray}\label{sys:10}
\nonumber \dot{\mathcal{U}} &=& \mathcal{A}\mathcal{U}+\mathcal{F}(\mathcal{U},\zeta),\\
          \dot \zeta &=& C\zeta
\end{eqnarray}
so that we can write $t\mapsto (\mathcal{U}(t),\zeta(t))$ for the solution 
with the initial condition $(\mathcal{U}(0),\zeta(0))=(U,z)$. 
The next proposition states the growth estimates for the components of
the flow $\varphi_t$, its partial derivatives, and certain
differences of its components and partial derivatives that will be used
to prove that $E$ is a bounded operator on $\mathcal{B}$ and 
$E g\in \mathcal{C}^{1,L,\mu(1-\theta)}$. 
\begin{prop}\label{prop:est}
Suppose that for $i\in\{1,2\}$ the function
$t\mapsto (\mathcal{U}_i(t),\zeta_i(t))$ is the
solution of system~\eqref{sys:10} such that
$(\mathcal{U}_i(0),\zeta_i(0))= (U_i,z_i)$ and $t\mapsto (\mathcal{U}(t),\zeta(t))$ is
the solution such that $(\mathcal{U}(0),\zeta(0))= (U,z)$. 
If Hypothesis~\ref{hy:1} holds, then 
there are constants $\mathcal{K}>0$ and $\kappa>0$
such that
\begin{eqnarray}
\label{est:1}\abs{\zeta(t)}&\le& K e^{-\lambda t}\,\abs{z}, \\
\label{est:2}\abs{\mathcal{U}(t)}&\le& K e^{(\delta-b) t}\,\abs{U},\\
\label{est:12c}\abs{\mathcal{U}(t)}+\abs{\zeta(t)}&\le& 
              K e^{-\lambda t}\,(\abs{U}+\abs{z}),\\
\label{est:3}\abs{\zeta_1(t)-\zeta_2(t)}&\le& K e^{-\lambda t}\,\abs{z_1-z_2},\\
\label{est:4}\abs{\mathcal{U}_1(t)-\mathcal{U}_2(t)}&\le& 
              \mathcal{K} e^{(\delta-\lambda) t}\,(\abs{U_1-U_2}+\abs{z_1-z_2}),\\ 
\label{est:10}\abs{\mathcal{U}_U(t)}&\le& K e^{(\delta-b)t},\\
\label{est:11}\abs{\mathcal{U}_z(t)}&\le& 
              \mathcal{K} e^{(\delta-b)t}\,\abs{U},\\
\label{est:12}\abs{\mathcal{U}_{1U}(t)-\mathcal{U}_{2U}(t)}&\le& 
              \kappa e^{(\delta-b)t}\,(\abs{U_1-U_2}+\abs{z_1-z_2}).
\end{eqnarray}
Moreover, if $z_1=z_2$, then
\begin{eqnarray}
\label{est:13a}\abs{\mathcal{U}_{1}(t)-\mathcal{U}_{2}(t)}&\le& 
              K e^{(\delta-b)t}\,\abs{U_1-U_2},\\
\label{est:13}\abs{\mathcal{U}_{1z}(t)-\mathcal{U}_{2z}(t)}&\le& 
              \kappa e^{(\delta-b)t}\,\abs{U_1-U_2};
\end{eqnarray}
and, if $U_1=U_2$, then
\begin{equation}
\label{est:14}\abs{\mathcal{U}_{1z}(t)-\mathcal{U}_{2z}(t)} \le  
              \kappa e^{(\delta-b)t}\,\abs{z_1-z_2}^{\mu(1-\vartheta)}.
\end{equation}
\end{prop}
\begin{proof}
By the definition of $\delta$ and the inequality $K>1$, 
we have the inequalities  $KMr+\epsilon<\delta$ and $2KMr+\epsilon<\delta$.
(These inequalities are used so that 
the single quantity $\delta-b$, rather than three different exponents,
appears in the statement of the proposition.)

The estimate~\eqref{est:1}
follows immediately by solving the differential equation 
$\dot \zeta = C\zeta$ and using the hyperbolic estimate~\eqref{est:0}.
To prove the inequality~\eqref{est:2}, start with the 
variation of parameters formula 
\[
\mathcal{U}(t)=e^{t\mathcal{A}}\, U+\int_0^t e^{(t-s)\mathcal{A}}\,\mathcal{F}(\mathcal{U}(s),\zeta(s))\, ds,
\]
use the hyperbolic estimates to obtain the inequality
\[
\abs{\mathcal{U}(t)}\le Ke^{-(b-\epsilon)t}\, \abs{U}
         +\int_0^t K e^{-(b-\epsilon)(t-s)}\,\abs{\mathcal{F}(\mathcal{U}(s),\zeta(s))}\, ds, 
\]
and then use the estimate~\eqref{F1} to obtain the inequality
\[
\abs{\mathcal{U}(t)}\le Ke^{-(b-\epsilon)t}\, \abs{U}
         +\int_0^t rMK e^{-(b-\epsilon)(t-s)}\,\abs{\mathcal{U}(s)}\, ds. 
\]
Rearrange this last inequality to the equivalent form
\[
 e^{(b-\epsilon)t}\, \abs{\mathcal{U}(t)}\le K \abs{U}
         +\int_0^t rMK e^{(b-\epsilon)s}\,\abs{\mathcal{U}(s)}\, ds         
\]
and apply Gronwall's inequality to show 
\[ 
e^{(b-\epsilon)t}\, \abs{\mathcal{U}(t)}\le K e^{rMK t}\,\abs{U},
\]
an estimate that is equivalent to the desired result.

The inequality~\eqref{est:3} is easy to prove and
inequality~\eqref{est:12c} is a simple 
corollary of estimates~\eqref{est:1} and~\eqref{est:2}. 

To begin the proof for estimates~\eqref{est:4} and~\eqref{est:13a}, use
variation of parameters to obtain the inequality
\begin{eqnarray*}
\abs{\mathcal{U}_1(t)-\mathcal{U}_2(t)}&\le& Ke^{-(b-\epsilon) t}\,\abs{U_1-U_2}\\
 &&\mbox{}+\int_0^t KMe^{-(b-\epsilon)(t-s)}\,
\abs{\mathcal{F}(\mathcal{U}_1(t),\zeta_1(t))-\mathcal{F}(\mathcal{U}_2(t),\zeta_2(t))}\,ds.
\end{eqnarray*}
For estimate~\eqref{est:4} use the
inequalities~\eqref{F1} and $\delta-b<-\lambda$ to obtain the
upper bound
\[\abs{\mathcal{F}(\mathcal{U}_1(t),\zeta_1(t))-\mathcal{F}(\mathcal{U}_2(t),\zeta_2(t))}
\le 2MKr(\abs{\mathcal{U}_1(t)-\mathcal{U}_2(t)}+\abs{\zeta_1(t)-\zeta_2(t)}).
\]
Then, using this inequality, the estimates~\eqref{est:3}, and the
inequality
$-(b-\epsilon)\le -(\lambda-\epsilon)$, it is easy to see that 
\begin{eqnarray*}
e^{(\lambda-\epsilon)t}\,\abs{\mathcal{U}_1(t)-\mathcal{U}_2(t)} &\le &
K\abs{U_1-U_2}+\frac{2K^3Mr}{\epsilon}\abs{z_1-z_2}\\
&&\mbox{}
+\int_0^t e^{(\lambda-\epsilon)s} 2K^2Mr \abs{\mathcal{U}_1(s)-\mathcal{U}_2(s)}\,ds.
\end{eqnarray*}
The desired result follows by an application of Gronwall's inequality.
The proof of estimate~\eqref{est:13a} is similar. The only difference
is that the inequality
\[
\abs{\mathcal{F}(\mathcal{U}_1(t),\zeta(t))-\mathcal{F}(\mathcal{U}_2(t),\zeta(t))} \le 2MKr\abs{\mathcal{U}_1(t)-\mathcal{U}_2(t)}
\]
is used instead of inequality~\eqref{est:3}.

To obtain the bounds for the partial derivatives of solutions
with respect to the space variables,
note that the function $t\mapsto \mathcal{U}_U(t)$ is the solution of
the variational initial value problem
\begin{equation}\label{vode:1}
\nonumber \dot\omega =\mathcal{A} \omega+\mathcal{F}_U(\mathcal{U}(t),\zeta(t)) \omega, \qquad
\omega(0)= I
\end{equation}
whereas $t\mapsto \mathcal{U}_z(t)$ is the solution of
the variational initial value problem
\begin{equation}\label{vode:2}
\nonumber \dot\omega =\mathcal{A} 
\omega+\mathcal{F}_U(\mathcal{U}(t),\zeta(t))\omega+\mathcal{F}_z(\mathcal{U}(t),\zeta(t))e^{tC}, \qquad                
\omega(0)= 0. 
\end{equation}

The proofs of the estimates~\eqref{est:10} and~\eqref{est:11} are 
similar to the proof of estimate~\ref{est:2}. 
For~\eqref{est:10}, note that $\mathcal{F}_U$ is Lipschitz
and use the growth estimates
for $\abs{\mathcal{U}(t)}$ and $\abs{\zeta(t)}$  to obtain the inequality
$\abs{\mathcal{F}_U(\mathcal{U}(t),\zeta(t))}\le M r$. 
For estimate~\eqref{est:11} use
variation of parameters,  bound
the term containing $\mathcal{F}_z$ using the Lipschitz estimate, 
evaluate the resulting integral, and then apply Gronwall's inequality.

To prove estimate~\eqref{est:12}, subtract the two corresponding variational
equations, add and subtract $\mathcal{F}_U(\mathcal{U}_1,\zeta_1)\mathcal{U}_{2U}$, and
use variation of parameters to obtain the inequality
\begin{eqnarray*}
\abs{\mathcal{U}_{1U}-\mathcal{U}_{2U}} &\le& 
\int_0^t Ke^{-(b-\epsilon)(t-s)}\,\abs{\mathcal{F}_U(\mathcal{U}_1,\zeta_1)-\mathcal{F}_U(\mathcal{U}_2,\zeta_2)}
\abs{\mathcal{U}_{2U}}\,ds\\
&&+ \int_0^t Ke^{-(b-\epsilon)(t-s)}\,\abs{\mathcal{F}_U(\mathcal{U}_1,\zeta_1)} 
\abs{\mathcal{U}_{1U}-\mathcal{U}_{2U}}\,ds.
\end{eqnarray*}
The second integral is bounded by using the Lipschitz estimate
for $\mathcal{F}_U$, inequality~\eqref{est:12c}, and the diameter of $\Omega$.
For the first integral, a suitable bound is obtained by
again using the Lipschitz estimate
for $\mathcal{F}_U$ followed by estimates~\eqref{est:3} and~\eqref{est:4}, and by
using estimate~\eqref{est:10}.  After the replacement
of the factor $e^{-\lambda s}$ (obtained from~\eqref{est:3}) 
by $e^{(\delta-\lambda) s}$, multiplication of both sides of 
the inequality by $e^{(b-\epsilon)t}$, and an integration,  
it follows that
\begin{eqnarray*}
e^{(b-\epsilon) t}\,\abs{\mathcal{U}_{1U}-\mathcal{U}_{2U}} &\le& 
\frac{K\mathcal{K}(K+\mathcal{K})M}{\lambda-2\delta-\epsilon}(\abs{U_1-U_2}+\abs{z_1-z_2})\\
&&+ \int_0^t K^2 M r e^{(b-\epsilon)s}\,\abs{\mathcal{U}_{1U}-\mathcal{U}_{2U}}\,ds. 
\end{eqnarray*}
The desired result is obtained by an application of Gronwall's inequality.

For the proof of estimate~\eqref{est:13} subtract the two solutions
of the appropriate variational equation, add and subtract 
$\mathcal{F}_U(\mathcal{U}_1,\zeta)\mathcal{U}_{2z}$ and use variation of parameters. After the
Lipschitz estimates are employed, as usual, use inequality~\eqref{est:12c}
to estimate $\abs{\mathcal{U}_1}+\abs{\zeta}$ and 
use inequality~\eqref{est:13a}
to estimate $\abs{\mathcal{U}_1-\mathcal{U}_2}$. 

The proof of estimate~\eqref{est:14} again uses the same basic
strategy, that is, variation of parameters and Gronwall's inequality; 
but several estimates are required before Gronwall's inequality can
be applied. First, by the usual method, it is easy to see that
\begin{eqnarray*}
e^{(b-\epsilon) t}\,\abs{\mathcal{U}_{1z}(t)-\mathcal{U}_{2z}(t)}
&\le & \int_0^t Ke^{(b-\epsilon) s}\,\abs{\mathcal{F}_U(\mathcal{U}_1,\zeta_1)} 
         \abs{\mathcal{U}_{1z}-\mathcal{U}_{2z}}\,ds\\
&&\mbox{} +\int_0^t Ke^{(b-\epsilon) s}\,\abs{\mathcal{F}_U(\mathcal{U}_1,\zeta_1)-\mathcal{F}_U(\mathcal{U}_2,\zeta_2)} 
         \abs{\mathcal{U}_{2z}}\,ds\\
&&\mbox{} +\int_0^t K^2e^{(b-\epsilon-\lambda) s}\,
\abs{\mathcal{F}_z(\mathcal{U}_1,\zeta_1)-\mathcal{F}_z(\mathcal{U}_2,\zeta_2)}\,ds\\
\end{eqnarray*}
To complete the proof, use Lipschitz estimates 
for the terms involving partial derivatives
of $\mathcal{F}$ in the first two integrals, and use the estimate~\eqref{F2} in
the third integral. Next, use the obvious estimates
for the terms involving $\mathcal{U}_i$ and $\zeta_i$; but, for the application
of inequality~\eqref{est:4} in the second and third integrals, use
the hypothesis that $U_1=U_2$. 
Because $1-\vartheta$ is such
that $(1-\vartheta)b-\lambda<0$,
the third integral converges as $t\to \infty$.
By this observation together with some easy estimates and manipulations,
the second integral is bounded above by
a constant multiple of $\abs{z_1-z_2}^{\mu(1-\vartheta)}$.
Because the second integral converges as $t\to \infty$,
it is easy to show that the second integral
is bounded above by a constant multiple of $\abs{z_1-z_2}$,
a quantity that is itself bounded above by 
$r^{1-\mu(1-\vartheta)}\, \abs{z_1-z_2}^{\mu(1-\vartheta)}$ where $r$ is the
radius of $\Omega$. After the indicated estimates are made, 
the desired result follows in
the usual manner by an application of Gronwall's inequality.
\end{proof}

Let us return to the analysis of the
operator $E$ defined in display~\eqref{E}. 
We will show that if Hypothesis~\ref{hy:1} is satisfied
and $\alpha\in \mathcal{B}$, then $E \alpha\in \mathcal{B}$. The fundamental idea
here is to apply the Weierstrass $M$-test in the form stated in
Lemma~\ref{pfcont}. In particular, we will estimate the growth of
the integrand in the integral representation of $E$. 

Using the notation defined above, note first that
$\varphi_t(x,y,z)=(\mathcal{U}(t), \zeta(t))$. Also, recall that
the matrix $-B$ is in real Jordan canonical form; that is,
\[
-B=b I+B^{\text{rot}}+B^{\text{nil}}
\]
where the second summand has some diagonal or super-diagonal
blocks of $2\times 2$ infinitesimal rotation matrices of the form
\[
\left(\begin{array}{cc} 0& -\beta\\ \beta&0 \end{array}\right)
\]
where $\beta\ne 0$,
and the third summand is a nilpotent matrix whose $\ell$th power vanishes.
The summands in this decomposition pairwise commute; and therefore,
\[
e^{-tB}=e^{b t} Q(t)
\]
where the components of the
matrix $Q(t)$ are (real) linear combinations of functions given by
\[ q_1(t),\qquad q_2(t) \sin \beta t, \qquad  q_3(t) \cos \beta t\]
where $q_i$, for $i\in\{1,2,3\}$, is a 
polynomial of degree at most $\ell-1$.
It follows that there is a positive universal constant
$\upsilon$ such that 
\[
|e^{-t B}|\le e^{b t}\abs{Q(t)}\le \upsilon e^{b t} (1+|t|^{\ell-1})
\]
for all $t\in\mathbb{R}$. 

The integrand of $E$ is bounded above as follows:
\begin{eqnarray}\label{in:2} 
\abs{e^{-tB}\,\alpha(\mathcal{U}(t),\zeta(t))} &\le & 
e^{bt}\,\abs{Q(t)} \bnorm{\alpha} (\abs{\mathcal{U}(t)}+\abs{\zeta(t)})\abs{\mathcal{U}(t)}.
\end{eqnarray}
In view of estimates~\eqref{est:1} and~\eqref{est:12c}, we have
the inequality
\[
\abs{e^{-tB}\,\alpha(\mathcal{U}(t),\zeta(t))}  \le  
 K^2e^{(\delta-\lambda)t}\,\abs{Q(t)} \bnorm{\alpha} (\abs{x}+\abs{y}+\abs{z})
(\abs{x}+\abs{y}).
\]
Because $\abs{Q(t)}$ has polynomial growth and $\delta-b<0$,
\begin{equation}\label{N}
N:=\sup_{t\ge 0} e^{(\delta-b) t/2}\abs{Q(t)}<\infty.
\end{equation}
Hence, we have that
\[
\abs{e^{-tB}\,\alpha(\mathcal{U}(t),\zeta(t))}  \le  
 K^2 N e^{(\delta-\lambda)t/2}\,\bnorm{\alpha} (\abs{x}+\abs{y}+\abs{z})
(\abs{x}+\abs{y})       
\]
and
\[
\int_0^\infty \abs{e^{-tB}\,\alpha(\varphi_t(x,y,z))}\,dt 
\le \frac{2K^2 N}{\lambda-\delta}\bnorm{\alpha}
(\abs{x}+\abs{y}+\abs{z})(\abs{x}+\abs{y});
\]
and therefore,
$E\alpha$ is continuous in $\Omega$ and
\[\bnorm{E}\le \frac{2 K^2 N}{\lambda-\delta}.\]
As a result, 
the equation $J\alpha=-g$ has a unique solution $\alpha\in \mathcal{B}$, namely,
\[\alpha(x,y,z)=\int_0^\infty e^{-tB}\,g(\varphi_t(x,y,z))\,dt.\]
We will show that $\alpha\in \mathcal{C}^{1,L,\mu(1-\theta)}$. 

In view of the form of system~\eqref{sys:10},
to prove that $\alpha\in \mathcal{C}^1$ 
it suffices to demonstrate that 
the partial derivatives of $\alpha$ 
with respect to $U:=(x,y)$ and $z$ are both $\mathcal{C}^1$, a fact
that we will show by using Lemma~\ref{pfcont}.

The solution $t\mapsto (\mathcal{U}(t),\zeta(t))$ 
with initial condition $(U,z)$ is more precisely written
in the form $t\mapsto (\mathcal{U}(t,U,z),\zeta(t,z))$ where the dependence
on the initial conditions is explicit. Although
this dependence is suppressed in most of the formulas that follow,
let us note here that $\zeta$ does not depend on $U$. At any rate,
the partial derivatives of $\alpha$ are given formally by
\begin{eqnarray}
\label{eq:5}\alpha_U(U,z)&=&\int_0^\infty e^{-tB}\,
              g_U(\mathcal{U}(t),\zeta(t))\mathcal{U}_U(t)\,dt,\\
\label{eq:6}\alpha_z(U,z)&=&\int_0^\infty e^{-tB}\,
              (g_U(\mathcal{U}(t),\zeta(t))\mathcal{U}_z(t)+g_z(\mathcal{U}(t),\zeta(t))e^{tC})\,dt.
\end{eqnarray}
 
To prove that $\alpha_U$ is $\mathcal{C}^1$,
use  estimate~\ref{est:10}, the definition~\eqref{N} of $N$,
and note that $g_U$ is Lipschitz to show that
the integrand of equation~\eqref{eq:5} is majorized  by
\begin{eqnarray*}
\abs{e^{-tB}\, g_U(\mathcal{U}(t),\zeta(t))\mathcal{U}_U(t)}&\le& M e^{\delta t}\,\abs{Q(t)}
                                       (\abs{\mathcal{U}(t)}+\abs{\zeta(t)})\\
&\le& K M N e^{(\delta-\lambda)t/2}\,(\abs{U}+\abs{z}).
\end{eqnarray*}
By an application of
Lemma~\ref{pfcont}, $\alpha_U$ is $\mathcal{C}^1$. 
Moreover, because $\abs{\alpha_U(U,z)}$ is bounded above
by a constant multiple of $\abs{U}+\abs{z}$, we also have that 
$\alpha_U(0,0)=0$. 

The proofs to show that $\alpha_z$ is $\mathcal{C}^1$ and 
$\alpha_z(0,0)=0$ are similar. For example, 
the integrand of equation~\eqref{eq:6} is majorized by
\[
K^2 M N e^{(\delta-\lambda) t/2}\, \big(\frac{K M}{\lambda-\delta+\epsilon}
(\abs{U}+\abs{z})\abs{U}+\abs{U}\big).
\]

To prove that $\alpha_U$ is Lipschitz, let us note first that
by adding and subtracting $g_U(\mathcal{U}_1,\zeta_1) \mathcal{U}_{2U}$ 
and an application of the triangle law, we have the inequality
\begin{eqnarray*}
\abs{\alpha_U(U_1,z_1)-\alpha_U(U_2,z_2)} 
&\le & \int_0^\infty\abs{e^{-tB}}\, 
  \abs{g_U(\mathcal{U}_1,\zeta_1) \mathcal{U}_{1U}-g_U(\mathcal{U}_2,\zeta_2) \mathcal{U}_{2U}}\,dt\\
&\le & \int_0^\infty e^{bt}\,\abs{Q(t)}
\big( 
\abs{g_U(\mathcal{U}_1,\zeta_1)}\,\abs{\mathcal{U}_{1U}-\mathcal{U}_{2U}}\\
&&\mbox{}+\abs{g_U(\mathcal{U}_1,\zeta_1)-g_U(\mathcal{U}_2,\zeta_2)}\,\abs{\mathcal{U}_{2U}}\big )\,dt.
\end{eqnarray*} 
By using  the Lipschitz estimate for $g_U$ inherited from
$\mathcal{F}_U$, 
the obvious choices of the
inequalities in Proposition~\ref{prop:est},
and an easy computation, it follows that
the integrand in the above inequality is majorized
up to a constant multiple by
\[e^{(2\delta-\lambda) t/2}\, (\abs{U_1-U_2}+\abs{z_1-z_2}).\]
There are two key points in the proof of this fact: 
the estimates~\eqref{est:10}
and~\eqref{est:12} both contain the exponential decay 
factor $e^{-bt}$ to compensate for the growth of $e^{bt}$; and, 
after the cancelation of these factors, 
the majorizing integrand still contains the exponential
factor $e^{(2\delta-\lambda) t}$, the presence of which ensures that
the majorizing integral converges even though the factor $\abs{Q(t)}$
has polynomial growth.
After the majorization is established, 
the desired result follows from Lemma~\ref{pfcont}.

The proof that the function $U\mapsto \alpha_z(U,z)$ is (uniformly)
Lipschitz is similar to the proof that $\alpha_U$ is Lipschitz.
As before,  
by adding and subtracting $g_U(\mathcal{U}_1,\zeta) \mathcal{U}_{2z}$, it is
easy to obtain the basic estimate
\begin{eqnarray*}
\abs{\alpha_z(U_1,z)-\alpha_z(U_2,z)}
&\le & \int_0^\infty e^{bt}\,\abs{Q(t)}
\big(
\abs{g_U(\mathcal{U}_1,\zeta)}\,\abs{\mathcal{U}_{1z}-\mathcal{U}_{2z}}\\
&&\mbox{}+\abs{g_U(\mathcal{U}_1,\zeta)-g_U(\mathcal{U}_2,\zeta)}\,\abs{\mathcal{U}_{2z}}\\
&&\mbox{}+\abs{g_z(\mathcal{U}_1,\zeta)-g_z(\mathcal{U}_2,\zeta)}Ke^{-\lambda t}\big )\,dt.
\end{eqnarray*}
By first applying the Lipschitz estimates and then the 
inequalities~\eqref{est:3}, \eqref{est:13a}, and~\eqref{est:13},
the growth factor $e^{bt}$ is again canceled; and, up to
a constant multiple, the integrand is majorized by the integrable function
\[t\mapsto e^{(\delta-\lambda) t}\abs{Q(t)}\abs{U_1-U_2}.\] 

Finally, we will show that the function $z\mapsto \alpha_z(U,z)$ is
(uniformly) H\"older. 
In this case $U_1=U_2$. But this equality does not imply that 
$\mathcal{U}_1=\mathcal{U}_2$. Thus, the basic estimate in this case is given by
\begin{eqnarray*}
\abs{\alpha_z(U,z_1)-\alpha_z(U,z_2)}    
&\le & \int_0^\infty e^{bt}\,\abs{Q(t)}
\big(
\abs{g_U(\mathcal{U}_1,\zeta_1)}\,\abs{\mathcal{U}_{1z}-\mathcal{U}_{2z}}\\  
&&\mbox{}+\abs{g_U(\mathcal{U}_1,\zeta_1)-g_U(\mathcal{U}_2,\zeta_2)}\,\abs{\mathcal{U}_{2z}}\\
&&\mbox{}+\abs{g_z(\mathcal{U}_1,\zeta_1)-g_z(\mathcal{U}_2,\zeta_2)}Ke^{-\lambda t}\big )\,dt.
\end{eqnarray*}
Use Lipschitz estimates for the partial derivatives in the first
two terms in the (expanded) integrand and the estimate~\eqref{F2} for
the third term. Then, use the estimates~\eqref{est:12c},
\eqref{est:3}, and~\eqref{est:4} to show that 
the first term is majorized, up to a constant multiple, by 
\[e^{(\delta-\lambda)t} \abs{Q(t)}\abs{z_1-z_2}^{\mu(1-\vartheta)}.\]
The second term is bounded above by a similar function that
has the decay rate $2\delta-\lambda$. 
After some obvious manipulation, the third term
is majorized by a similar term with
the decay rate $\delta \vartheta-\lambda+b(1-\vartheta)$. 
By Hypothesis~\ref{hy:1}, this number is negative; and therefore, 
the integrand is majorized
by an integrable function multiplied by
the required factor $\abs{z_1-z_2}^{\mu(1-\vartheta)}$.

The final step of the proof is to show that the function
$p$ in system~\eqref{sys:9} has the properties 
listed for system~\eqref{sys:4}.
There is an essential observation: $p$ is obtained
by composing $f$ in system~\eqref{sys:8} with the inverse of
the transformation~\eqref{cov:1}. In particular, no derivatives
of $\alpha$ are used to define $p$.  More precisely, 
the inverse transformation has the form
\begin{equation} \label{linvt}
x=u,\qquad y=v+\beta(u,v,w),\qquad w=z
\end{equation}
and 
\[ p(u,v,w)=f(u, v+\beta(u,v,w),w).\]
Hence it is clear that $p(0,0,0)=0$ and
$Dp(0,0,0)=0$. 

Note that $\beta$ is $\mathcal{C}^1$ and therefore Lipschitz in a
bounded neighborhood of the origin. 
Because $\beta(u,v,w)=-\alpha(x,y,z)$, it follows that
$\beta_u=-\alpha_x+\alpha_y\beta_u$. 
By using a Neumann series representation, 
note that $I+\alpha_y$ is invertible (with Lipschitz inverse) if
$\alpha_y$ is restricted                               
to a sufficiently small neighborhood of the origin. Hence,
we have that
$\beta_u=-(I+\alpha_y)^{-1}\alpha_x$ and 
\[p_u=f_x+f_y \beta_u=f_x-f_y(I+\alpha_y)^{-1}\alpha_x\]
where the right-hand side is to viewed as a function composed
with the inverse transformation~\eqref{linvt}.
Moreover, since
sums, products, and compositions of bounded Lipschitz (respectively, H\"older) 
maps are bounded Lipschitz (respectively, H\"older) maps,
$p_u$ is Lipschitz.
Similarly,
\[p_v=f_y(I-(I+\alpha_y)^{-1}\alpha_y),\]
and it follows that $p_v$ is (uniformly) Lipschitz. Hence,
$p_v$ is (uniformly) Lipschitz with respect to its first argument and
(uniformly) H\"older with respect to its second and third arguments.  
Finally, we have that
\[p_w=-f_y(I+\alpha_y)^{-1}\alpha_w+f_z.\]                  
It follows that $p_w$ is (uniformly) Lipschitz with respect to its 
first and second arguments and (uniformly) H\"older with respect
to its third argument. Hence,  $p_w$ is (uniformly) Lipschitz
with respect to its first argument and  (uniformly) H\"older with respect  
to its second and third arguments.
\end{proof}

To complete the proof of Theorem~\ref{rhsth}, we will show
that it suffices to choose the H\"older exponent in the statement
of the theorem less than the H\"older spectral exponent of $DX(0)$.
Note that two conditions have been imposed on the H\"older
exponent $\mu(1-\vartheta)$ in Theorem~\ref{th:pl}:
the \emph{$(1,\mu)$ spectral gap condition} 
$(1+\mu)c<b$ (or $\mu<(b-c)/c$) and the inequality 
$b(1-\vartheta)-\lambda<0$ (or $1-\theta<\lambda/b$) in
Hypothesis~\ref{hy:1}. Because the real parts of the eigenvalues
of $C$ lie in the interval $[-c,-d]$ and $\lambda$ can be chosen
anywhere in the interval $(0,d)$, 
the numbers $\mu$ and $\vartheta$ can be chosen so that 
the positive quantity
\[
\frac{(b-c)d}{bc}-\mu(1-\vartheta)
\] 
is as small as we wish. We will choose $\mu(1-\vartheta)$,
the H\"older exponent, as large as possible under the constraints
imposed by the spectral gap condition and the inequality
\[
\mu(1-\vartheta)<\frac{(b-c)d}{bc}.
\]

Suppose that the real parts of the eigenvalues of $A:=DX(0)$ 
are as in display~\eqref{hse}.
At the first step of the finite induction on the dimension of the
``unlinearized'' part of the system, we artificially introduce
a scalar equation $\dot z=-cz$ where $0<c<b_1$. In this case
$c=d$, and the exponent $\mu(1-\vartheta)$ can be chosen to be
as close as we like to the number $(b_1-c)/b_1$. At the second
step, the real parts of the eigenvalues of the new matrix $C$ are
in the interval $[-b_1,-c]$, the new exponent can be
chosen as close as we like to 
the minimum of the numbers $(b_1-c)/b_1$ and $(b_2-b_1) c/(b_1b_2)$,
and so on. Hence, the H\"older exponent in Theorem~\ref{rhsth}
can be chosen as close as we like to
\[
\mbox{HSE}:=\max_{0<c<b_1}\min\Big\{
\frac{b_1-c}{b_1}, \frac{(b_2-b_1)c}{b_2b_1},\frac{(b_3-b_2)c}{b_3b_2},\ldots,
\frac{(b_N-b_{N-1})c}{b_Nb_{N-1}}
\Big\}.
\]
By treating the rational expressions as linear functions of $c$ defined
on the interval $[0,b_1]$, it is easy to show that 
$\mbox{HSE}$ is the H\"older spectral exponent for $DX(0)$.
This completes the proof of Theorem~\ref{rhsth}.
\subsubsection{Smooth Linearization on the Line}
By Theorem~\ref{rhsth}, a $\mathcal{C}^{1,1}$ vector field on the line  
is $\mathcal{C}^{1,\mu}$ linearizable at a hyperbolic rest point. 
But in this
case a stronger result is true (see~\cite{stern}).
\begin{thm}\label{th:1d}
If $X$ is a $\mathcal{C}^{1,1}$ vector field on $\mathbb{R}^1$ with
a hyperbolic rest point at the origin, then $X$ is locally
$\mathcal{C}^{1,1}$ linearizable at the origin by a near-identity transformation. 
If, in addition,
$X$ is $\mathcal{C}^k$ with $k>1$, then there is a
$\mathcal{C}^k$ near-identity linearizing transformation.
\end{thm} 
\begin{proof}
Near the origin, the vector field has the form
$X(x)=-ax+f(x)$ where
$a\ne 0$ and $f$ is a $\mathcal{C}^{1,1}$-function with $f(0)=f'(0)=0$. 
Let us assume that $a>0$. The proof for the case $a<0$ is similar.

We seek a linearizing transformation given by
\[
u=x+\alpha(x)
\]
where $\alpha(0)=\alpha'(0)=0$. 
Clearly, it suffices to prove that
\begin{equation}\label{1d:1}
\alpha'(x)(-ax+f(x))+a\alpha(x)=-f(x)
\end{equation}
for all $x$ in some open neighborhood of the origin.

Let $\phi_t$ denote the flow of $X$ and (in the usual manner)
note that if $\alpha \in \mathcal{C}^1$ is such that 
\[
\frac{d}{dt} \alpha(\phi_t(x))+a\alpha(\phi_t(x))=-f(\phi_t(x)),
\]
then $\alpha$ satisfies the identity~\eqref{1d:1}.
Using variation of constants, it follows that
$\alpha$ is given formally by
\begin{equation}\label{1d:0}
\alpha(x)=\int_0^\infty e^{at}f(\phi_t(x))\,dt.
\end{equation}
We will show that this formal expression defines a
sufficiently smooth choice for $\alpha$.

Using the assumption that $f'$ is Lipschitz and Taylor's theorem,
there is a constant $M>0$ such that
\begin{equation}\label{1d:2}
\abs{f(x)}\le M\abs{x}^2.
\end{equation}
Also, the solution $t\mapsto \mathcal{X}(t):=\phi_t(x)$ is bounded if
$x$ is sufficiently small; in fact, for $0<r<a/(2M)$, we have that
\[
\abs{\mathcal{X}(t)}\le r\qquad \mbox{whenever}\qquad  \abs{x}<r.
\]
To see this, write $X(x)=x(-a+(f(x)/x^2) x)$, 
use the inequality~\eqref{1d:2}, and note the direction of $X(x)$
for $x$ in the given interval.

By variation of constants and the inequality~\eqref{1d:2},
we have that
\[
e^{at}\abs{\mathcal{X}(t)}\le \abs{x}+\int_0^t e^{as}Mr\abs{\mathcal{X}(s)}\,ds.
\] 
Hence, by Gronwall's inequality, we have the estimate
\begin{equation}\label{1d:7}
\abs{\mathcal{X}(t)}\le \abs{x} e^{(Mr-a)t}.
\end{equation}

Note that the function given by
$t\mapsto \mathcal{X}_x(t)$ is the solution of the variational
initial value problem
\[
\dot w=-a w+f'(\mathcal{X}(t))w,\qquad w(0)=1,
\]
and in case $f\in\mathcal{C}^2$,  the function
given by $t\mapsto \mathcal{X}_{xx}(t)$ is the solution of
\[
\dot z=-a z+f'(\mathcal{X}(t))z+f''(\mathcal{X}(t))w^2(t),\qquad z(0)=0.
\]
Their are similar variational equations for the higher order derivatives
of $\mathcal{X}$ with respect to $x$. 

If $\rho>0$ is given,
there is a bounded open interval $\Omega$ containing the origin such
that $\abs{f'(x)}\le \rho$ whenever $x\in \Omega$.
Using this estimate, variation of constants, and Gronwall's inequality,
it is easy to show that the solution $\mathcal{W}$ of the first variational
equation is bounded above as follows:
\begin{equation}\label{1d:4}
\abs{\mathcal{W}(t)}\le e^{(\rho-a)t}.
\end{equation}
Likewise, if  $\abs{f''(x)}\le \sigma $ whenever $x\in \Omega$,
then, by a similar argument, we have that
\begin{equation}\label{1d:5}
\abs{\mathcal{Z}(t)}\le \frac{\sigma}{a-2\rho}e^{(\rho-a)t},
\end{equation}  
and so on for higher order derivatives.

We will show the smoothness of $\alpha$ up to order two, the proof
in the general case is similar.

Choose $\Omega$ with a sufficiently small radius so that $2\rho-a<0$.
(For the $\mathcal{C}^k$ case, the inequality $k\rho-a<0$ is required.)
To prove that $\alpha\in\mathcal{C}^0$, 
bound the absolute value of
the integrand in display~\eqref{1d:0}
using the growth estimate~\eqref{1d:7} and the inequality~\eqref{1d:2},
and then apply Lemma~\eqref{pfcont}.  

To show that $\alpha\in\mathcal{C}^1$, 
formally differentiate the integral representation
and then bound the absolute value of 
the resulting integrand  using the Lipschitz estimate for
$f'$ and the growth bound~\eqref{1d:4}. This results in the upper
bound
\[
M\abs{x} e^{(Mr+\rho-a)t}.
\]
For $r>0$ sufficiently
small, the exponential growth rate $Mr+\rho-a$ is negative and
the continuity of $\alpha_x$ follows from Lemma~\ref{pfcont}.
Also, by using the same estimate, it is clear that $\alpha\in\mathcal{C}^{1,1}$.

In case $f\in \mathcal{C}^2$, 
the second derivative of the integrand in the integral representation
of $\alpha$ is bounded above
by 
\[
\abs{e^{at}}(\abs{f''(\mathcal{X}(t))}\abs{\mathcal{X}_x(t)}^2
+\abs{f'(\mathcal{X}(t))}\abs{\mathcal{X}_{xx}(t)}).
\]
This term is majorized using the inequality $\abs{f''(x)}\le \sigma$,
the Lipschitz estimate for $f'$, and the growth bounds~\eqref{1d:4}
and~\eqref{1d:5}. The exponential growth rates of the resulting
upper bound are $2\rho-a$ and $Mr+\rho-a$. 
If $\Omega$ is chosen with a sufficiently small radius, 
then both rates are negative.
\end{proof}


\subsection{Hyperbolic Saddles}
The main result of this section is the following theorem.
\begin{thm}\label{thm:pc}
If $X$ is a $\mathcal{C}^2$ vector field on $\mathbb{R}^2$ such that $X(0)=0$
and $DX(0)$ is infinitesimally hyperbolic, then
$X$ is locally $\mathcal{C}^1$ conjugate to its linearization at the origin. 
\end{thm}
\noindent 
We will formulate and prove a slightly
more general result about the linearization of systems on $\mathbb{R}^n$
with hyperbolic saddle points. 

Consider a linear map $\mathcal{A}:\mathbb{R}^n\to\mathbb{R}^n$ and suppose that 
there are positive numbers $a_L$, $a_R$, $b_L$, and $b_R$ such that the real  
parts of the eigenvalues of $\mathcal{A}$ are contained in the union of the
intervals $[-a_L,-a_R]$ and $[b_L,b_R]$. 
By a linear change of coordinates, 
$\mathcal{A}$ is transformed to a block diagonal matrix with two diagonal blocks: 
$\mathcal{A}^s$, a matrix whose eigenvalues have their real parts in the
interval $[-a_L,-a_R]$, 
and $\mathcal{A}^u$, a matrix whose eigenvalues have their real parts in the    
interval  $[b_L,b_R]$. Suppose that $0<\mu<1$ and  every quasi-linear
$C^{1,1}$ vector field of the form $\mathcal{A}^s +F$
(that is, $F(0)=DF(0)=0$) is $\mathcal{C}^{1,\mu}$ linearizable at the origin.
Likewise, suppose that $0<\nu<1$ and  every quasi-linear 
$\mathcal{C}^{1,1}$ vector field of the form $\mathcal{A}^u +G$
is $\mathcal{C}^{1,\nu}$ linearizable at the origin. 
In particular, this is true if
$\mu$ is the H\"older spectral exponent~\eqref{hse}  
associated with the real parts of eigenvalues
in the interval $[-a_L,-a_R]$ and $\nu$ is the H\"older spectral 
exponent associated with the real parts of eigenvalues 
in the interval $[b_L,b_R]$. 
We say that $\mathcal{A}$ satisfies 
\emph{Hartman's $(\mu,\nu)$-spectral condition} if
\[
a_L-a_R<\mu  b_L,\qquad b_R-b_L<\nu a_R.
\]

A linear transformation of $\mathbb{R}^2$ with one negative
and one positive eigenvalue satisfies Hartman's $(\mu,\nu)$-spectral condition
for every pair of H\"older exponents $(\mu,\nu)$. Hence, Theorem~\ref{thm:pc}
is a corollary of the following more general result.

\begin{thm}\label{th:hsct}
If $X$ is a $\mathcal{C}^{1,1}$ vector field on $\mathbb{R}^n$ such that $X(0)=0$ and
$DX(0)$ satisfies Hartman's $(\mu,\nu)$-spectral condition, 
then $X$ is locally $\mathcal{C}^1$ conjugate to its linearization at the origin.
\end{thm}

The proof of Theorem~\ref{th:hsct} has two main ingredients:
a change of coordinates into a normal form where the stable and
unstable manifolds of the saddle point at the origin are flattened
onto the corresponding linear subspaces of $\mathbb{R}^n$ in such a way that the system
is linear on each of these invariant subspaces, and  the
application of a linearization procedure for systems
in this normal form.

A vector field on $\mathbb{R}^n$ with a hyperbolic
saddle point at the origin is in $(\mu,\nu)$-\emph{flattened normal form}
if it is given by
\begin{equation}\label{vfnn}
(x,y)\mapsto (Ax+f(x,y), y=By+g(x,y))
\end{equation}
where $(x,y)\in \mathbb{R}^k\times\mathbb{R}^\ell$ for $k+\ell=n$, all eigenvalues
of $A$ have negative real parts, all eigenvalues of $B$ have positive
real parts, $F:=(f,g)$ is a $\mathcal{C}^1$ function defined on an
open subset $\Omega$  of the origin in 
$\mathbb{R}^k\times\mathbb{R}^\ell$ with $F(0,0)=DF(0,0)=0$,
and there are real numbers $M$, $\mu$, and $\nu$ with
$M>0$, $0<\mu\le 1$, and $0<\nu\le 1$ such that
for $(x,y)\in \Omega$,
\begin{eqnarray}
\label{fnnl} 
\abs{f_y(x,y)}&\le& M\abs{x},
       \hspace{.42in} \abs{g_x(x,y)}\le M\abs{y}, \\
\label{fnni}
\abs{f_x(x,y)}&\le& M\abs{y}^\mu,
       \hspace{.35in} \abs{g_y(x,y)}\le M\abs{x}^\nu, \\
\label{fnn}
\abs{f(x,y)}&\le& M\abs{x}\abs{y},
       \hspace{.3in}\abs{g(x,y)}\le M\abs{x}\abs{y}.
\end{eqnarray}


Theorem~\ref{th:hsct} is a corollary of the following two results.
\begin{thm}\label{thm:fnn}
If $X$ is a $\mathcal{C}^{1,1}$ vector field on $\mathbb{R}^n$ such that $X(0)=0$,
the linear transformation $DX(0)$ 
satisfies Hartman's $(\mu,\nu)$-spectral condition, and
$0< \upsilon <\min \{\mu,\nu\}$,
then there is an open neighborhood of the origin
on which $X$ is $\mathcal{C}^{1,\upsilon}$ 
conjugate to a vector field in $(\mu,\nu)$-flattened normal form. 
\end{thm}

\begin{thm}\label{thm:lfnn}
If $X$ is a vector field on $\mathbb{R}^n$ such that $X(0)=0$,
the linear transformation $DX(0)$
satisfies Hartman's $(\mu,\nu)$-spectral condition,
and $X$ is in $(\mu,\nu)$-flattened normal form,
then there is an open neighborhood of the origin 
on which $X$ is $\mathcal{C}^1$ conjugate to its linearization at the origin.
\end{thm}

 
The proof of Theorem~\ref{thm:fnn} uses three results: the stable
manifold theorem, Dorroh smoothing, and Theorem~\ref{rhsth}.
The required version of the
stable manifold theorem is a standard result, which is 
a straightforward generalization of the statement and the proof
of Theorem~\ref{th:invman}. 
On the other hand, since Dorroh smoothing is perhaps less familiar,
we will formulate and prove the required result. 

Suppose that $X$ is a $\mathcal{C}^k$ vector field on $\mathbb{R}^n$ and $h:\mathbb{R}^n\to \mathbb{R}^n$ 
is a $\mathcal{C}^k$ diffeomorphism. The flow of $X$ is automatically $\mathcal{C}^k$.  
If we view $h$ as a change of coordinates $y=h(x)$, then 
the vector field in the new coordinate,  given by
the push forward of $X$ by $h$, is
\[ y\mapsto Dh(h^{-1}(y))X(h^{-1}(y));\]
and, because the derivative of $h$ appears in the transformation formula,
the maximal smoothness of the
representation of $X$ in the new coordinate is (generally) at most
$\mathcal{C}^{k-1}$. 
But the transformed flow, 
given by $h(\phi_t(h^{-1} (y))$, is $\mathcal{C}^{k}$. 
Thus, in the transformation theory for differential equations,
it is natural encounter vector fields that are
less smooth than their flows. Fortunately, 
this problem is often proved to be inessential
by applying the following version of a 
result of J. R. Dorroh~\cite{dorroh}. 
\begin{thm}\label{th:dorroh}
Suppose that $X$ is a $\mathcal{C}^0$ vector field on $\mathbb{R}^n$ and
$k$ is a positive integer. 
If $X$ has a $\mathcal{C}^{k,\mu}$ (local) flow $\phi_t$ and
$\phi_t(0)\equiv 0$, then there is a number $T>0$ and 
an open set $\Omega\subset \mathbb{R}^n$ with $0\in \Omega$ such that 
\[
h(x):=\frac{1}{T}\int_0^T \phi_t(x)\,dt 
\]
defines a $\mathcal{C}^{k,\mu}$-diffeomorphism in $\Omega$
that conjugates $X$ to a $\mathcal{C}^{k,\mu}$ vector field $Y$ on
$h(\Omega)$. In particular, $Y(0)=0$ and $Y$ has a $\mathcal{C}^{k,\mu}$-flow
that is $\mathcal{C}^{k,\mu}$ conjugate to $\phi_t$.
\end{thm}

\begin{proof}
Because the flow is $\mathcal{C}^{k,\mu}$,
the function $h$ defined in the statement
of the theorem is $\mathcal{C}^{k,\mu}$ for each fixed $T>0$. 
Also, we have that
\[
Dh(0)=\frac{1}{T}\int_0^T D\phi_t(0)\,dt.
\]
Recall that the space derivative of a flow satisfies the cocycle
property, that is, the identity
\[
D\phi_t(\phi_s(x))D\phi_s(x)=D\phi_{t+s}(x).
\]
In particular, because $x=0$ is a rest point,
$D\phi_t(0)D\phi_s(0)=D\phi_{t+s}(0)$, and
because $\phi_0(x)\equiv x$, we also have that
$D\phi_0(0)=I$. Hence, $\{D\phi_t(0)\}_{t\in \mathbb{R}}$ is a one-parameter
group of linear transformations on $\mathbb{R}^n$. Moreover,
the function $t\mapsto D\phi_t(0)$ is continuous. 

If the function $t\mapsto D\phi_t(0)$ were differentiable and
$C:=\frac{d}{dt} D\phi_t(0)|_{t=0}$, 
then
\[
\frac{d}{dt} D\phi_t(0)=\frac{d}{ds} D\phi_{s+t}(0)\big |_{s=0}
=\frac{d}{ds} D\phi_s(0)D\phi_t(0)\big |_{s=0}=C  D\phi_t(0);
\]
and therefore,
$D\phi_t(0)=e^{tC}$. 
Using elementary semigroup theory, this result follows without
the a priori assumption that $t\mapsto D\phi_t(0)$ is differentiable
(see, for example, \cite[p. 614]{ds}).

There is a constant $M>0$ such that
\[
\norm{e^{tC}-I}\le M\abs{t}
\]
whenever $\abs{t}\le 1$. Hence, we have 
\begin{eqnarray*}
Dh(0)&=&\frac{1}{T}\int_0^T e^{tC}\,dt\\
     &=&\frac{1}{T}\big(\int_0^T I\,dt+ \int_0^T (e^{tC}-I)\,dt\big)\\
     &=&I+\frac{1}{T}\int_0^T (e^{tC}-I)\,dt.
\end{eqnarray*} 
For  $0<T<1$, the norm of the operator 
\[
B:=\frac{1}{T}\int_0^T (e^{tC}-I)\,dt
\]
is bounded by $MT$. If $T<1/M$, then
$Dh(0)=I+B$ with $\norm{B}<1$. It follows that 
$Dh(0)$ is invertible, and
by the inverse function theorem, $h$ is a $\mathcal{C}^{k,\mu}$-diffeomorphism
defined on some neighborhood of the origin. (The ``usual''
inverse function theorem does not mention H\"older derivatives.
But the stated result can be proved with an easy modification
of the standard proof that uses the contraction principle. For example,
use the fiber contraction method to prove smoothness and
note that the fiber contraction preserves
a space of candidate derivatives that also satisfy the
H\"older condition.) 

Let us note that
\[
\frac{d}{ds} \phi_t(\phi_s(x))=D\phi_t(\phi_s(x)) F(\phi_s(x))
\]
and
\[
\frac{d}{ds} \phi_t(\phi_s(x))= \frac{d}{ds} \phi_{t+s}(x)=
F(\phi_{s+t}(x)).
\]
Hence, we have the identity
\[
D\phi_t(\phi_s(x))F(\phi_s(x))=F(\phi_{s+t}(x))
\]
and, at $s=0$, 
\[
D\phi_t(x)F(x)=F(\phi_{t}(x)). 
\]
It follows that
\begin{eqnarray*}
Dh(x)F(x)&=&\frac{1}{T}\int_0^T D\phi_t(x)F(x)\,dt\\
&=&\frac{1}{T}\int_0^T F(\phi_t(x))\,dt\\
&=&\frac{1}{T}\int_0^T \frac{d}{dt}\phi_t(x)\,dt\\
&=&\frac{1}{T}(\phi_T(x)-x);
\end{eqnarray*}
and therefore,
$x\mapsto Dh(x)F(x)$ is a $\mathcal{C}^{k,\mu}$-function. 
The push forward of $X$ by $h$, namely, 
\[y\mapsto Dh(h^{-1}(y))F(h^{-1}(y))\]
is the composition of two $\mathcal{C}^{k,\mu}$ functions, hence
it is $\mathcal{C}^{k,\mu}$.
\end{proof}

As a remark, we mention another variant of Dorroh's theorem:
A $\mathcal{C}^1$ vector field with a $\mathcal{C}^k$ flow is locally conjugate
to a $\mathcal{C}^k$ vector field. In particular, this result is valid
at an arbitrary point $p$ in the phase space, which we may as
well assume is $p=0$. The essential part of
the proof is to show that $Dh(0)$ is invertible where
$h$ is the function defined in the statement of Theorem~\ref{th:dorroh}.
In fact, by choosing a 
bounded neighborhood $\Omega$ of the origin so that
$M:=\sup\{\norm{DF(x)}: x\in \Omega\}$ is sufficiently small,
by using Gronwall's lemma to obtain the estimate
$\abs{D\phi_t(0)}\le e^{Mt}$, and by also choosing
$T>0$ sufficiently small, 
it is easy to show that
$\norm{I-Dh(0)}<1$. Hence, because $Dh(0)=I-(I-Dh(0))$, the inverse
of $Dh(0)$ is given by the Neumann series $\sum_{i=0}^\infty (I-Dh(0))^i$.

One nice feature of Dorroh's theorem is the explicit formula
for the smoothing diffeomorphism $h$. In particular, since
$h$ is an average over the original flow, most dynamical
properties of this flow are automatically inherited by
the smoothed vector field. For example, invariant sets of
the flow  are also $h$-invariant. This fact will be used
in the following proof of Theorem~\ref{thm:fnn}.

\begin{proof} 
Suppose that the
vector field~\eqref{vfnn} is such that $F:=(f,g)$ is a $\mathcal{C}^1$ function.
If the inequalities~\eqref{fnnl}-\eqref{fnni} are satisfied, then
so are the inequalities~\eqref{fnn}. In fact, by using~\eqref{fnni} we have
the identity $f(x,0)\equiv 0$ and by Taylor's theorem the estimate
\[
\abs{f(x,y)}\le \abs{f_y(x,0)}\abs{y}
+\int_0^1 (\abs{f_y(x,ty)}\abs{y}-\abs{f_y(x,0)}\abs{y})\,dt.
\]
The first inequality in display~\eqref{fnn} is an immediate
consequence of this estimate and the first inequality in
display~\eqref{fnnl}. The second inequality in display~\eqref{fnn} is
proved similarly.

By an affine change of coordinates, the differential
equation associated with $X$ has the representation
\begin{equation}\label{s:1}
\dot p=\tilde A p+f_4(p,q),\qquad \dot q=\tilde B q+g_4(p,q)
\end{equation}
where $(p,q)\in \mathbb{R}^k\times\mathbb{R}^\ell$ with $k+\ell=n$,
all eigenvalues of $\tilde A$ have negative real parts, all eigenvalues
of $\tilde B$ have positive real parts, and $F_4:=(f_4,g_4)$ is 
$\mathcal{C}^{1,1}$ with $F_4(0,0)=DF_4(0,0)=0$.

By the (local) stable manifold theorem, there are 
open sets $U_4\subset \mathbb{R}^k$ and $V_4\subset \mathbb{R}^\ell$ such
that  $(0,0)\in  U_4\times V_4$ 
and $\mathcal{C}^{1,1}$ functions
$\eta:V_4\to\mathbb{R}^k$ and $\gamma:U_4\to\mathbb{R}^\ell$ 
such that
$\eta(0)=D\eta(0)=0$, $\gamma(0)=D\gamma(0)=0$, the
set $\{(p,q):p=\eta(q)\}$ is overflowing invariant, and 
$\{(p,q):q=\gamma(p)\}$ is inflowing invariant.

By the inverse function theorem, the restriction of the
near-identity transformation
given by
\[
u=p-\eta(q),\qquad v=q-\gamma(p)
\]
to a sufficiently small open set containing the origin
in $\mathbb{R}^k\times\mathbb{R}^\ell$ is a $\mathcal{C}^{1,1}$ diffeomorphism. 
Moreover, the differential equation~\eqref{s:1} is transformed to
\begin{equation}\label{s:4.5}
\dot u=\tilde A u+f_3(u,v), \qquad
\dot v=\tilde B v+g_3(u,v)
\end{equation}
where $F_3:=(f_3,g_3)$ is $\mathcal{C}^{0,1}$ with
$F_3(0,0)=DF_3(0,0)=0$. In view of the fact
that the stable and unstable manifolds are invariant, we also have
the identities
\begin{equation}\label{s:5}
f_3(0,v)\equiv 0,\qquad g_3(u,0)\equiv 0.
\end{equation}
Hence, the transformed invariant manifolds lie on the 
respective coordinate planes.

Because system~\eqref{s:4.5} has a $\mathcal{C}^{1,1}$ flow, 
Dorroh's smoothing transformation $h$ (defined in Theorem~\ref{th:dorroh})
conjugates system~\eqref{s:4.5} to a $\mathcal{C}^{1,1}$ system. Moreover,
by the definition of $h$, it is 
clear that it preserves the coordinate planes in the open
neighborhood of the origin where it is defined. In fact,
$h$ is given by
$h(u,v)=(\bar h_1(u,v),\bar h_2(u,v))$ where
\[
\bar h_1(0,v)\equiv 0,\qquad \bar h_2(u,0)\equiv 0.
\]

The invertible derivative of $h$ at the origin
has the block diagonal form 
\[
Dh(0,0)=\left(
\begin{array}{cc}
C_1 & 0 \\
0 & C_2 \\
\end{array}\right).
\]
Hence, the diffeomorphism $h$ is given by
\[
(\xi,\zeta)=h(u,v)=(C_1u+h_1(u,v), C_2 v+h_2(u,v))
\]
where 
$C_1$ and $C_2$ are invertible, $\tilde H:=(h_1,h_2)$ is
$\mathcal{C}^{1,1}$ with $\tilde H(0,0)=0$, $D\tilde H(0,0)=0$, and 
\begin{equation}\label{s:6}
 h_1(0,v)\equiv 0,\qquad h_2(u,0)\equiv 0.
\end{equation}

The system~\eqref{s:4.5} is transformed by $h$ to
\begin{equation}\label{s:8}
\dot \xi=\bar A\xi +f_2(\xi,\zeta), \qquad
\dot \zeta=\bar B\zeta+g_2(\xi,\zeta)
\end{equation}
where  $\bar A=C_1\tilde A C_1^{-1}$, 
$\bar B=C_2\tilde B C_2^{-1}$, 
$F_2:=(f_2,g_2)$ is $\mathcal{C}^{1,1}$ with
$F_2(0,0)=DF_2(0,0)=0$, and 
\begin{equation}\label{s:9} 
f_2(0,\zeta)\equiv 0,\qquad g_2(\xi,0)\equiv 0.
\end{equation}

In view of the identities~\eqref{s:9}, the dynamical system~\eqref{s:8}
restricted to a neighborhood of the 
origin in $\mathbb{R}^k\times\{0\}$ is given by the $\mathcal{C}^{1,1}$ system
\[
\dot \xi=\bar A\xi+f_2(\xi,0).
\]
Moreover, since this system satisfies the
hypotheses of Theorem~\ref{rhsth}, it is linearized by a 
near-identity $\mathcal{C}^{1,\mu}$ diffeomorphism 
$H_1:\xi\mapsto \xi+h_3(\xi)$. 
Likewise, there is a  
near-identity $\mathcal{C}^{1,\nu}$ diffeomorphism  $H_2$, given by
$\zeta \mapsto \zeta+h_4(\zeta)$, that linearizes
\[
\dot \zeta=\bar B\zeta+g_2(0,\zeta).
\]
These maps define a diffeomorphism $H:=(H_1,H_2)$
that transforms system~\eqref{s:8} to
a system of the form
\begin{equation}\label{s:12}
\dot \psi=\bar A \psi +f_1(\psi,\omega), \qquad
\dot \omega=\bar B \omega +g_1(\psi,\omega)
\end{equation}
where
$F_1:=(f_1,g_1)$ is $\mathcal{C}^0$ with
$F_1(0,0)=DF_1(0,0)=0$, 
\begin{equation}\label{s:13} 
f_1(0,\omega)\equiv 0,\qquad g_1(\psi,0)\equiv 0,
\end{equation}
and
\begin{equation}\label{s:13a} 
f_1(\psi,0)\equiv 0,\qquad g_1(0,\omega)\equiv 0.
\end{equation}
Let $\phi_t=(\phi_t^1,\phi_t^2)$ 
denote the flow of system~\eqref{s:8} (even though
the same notation has been used to denote other flows).  
The first component of the flow of system~\eqref{s:12} is
given by
\[
H_1(\phi_t^1(H_1^{-1}(\psi),H_2^{-1}(\omega))).
\]
Its partial derivative with respect to $\psi$ is clearly in $\mathcal{C}^\mu$,
where for notational convenience we use $\mu$ and $\nu$ to denote H\"older
exponents that are strictly smaller than the corresponding
H\"older spectral exponents. 
On the other hand, 
its partial derivative with respect to $\omega$ is bounded 
by a constant times
\[
\abs{
\big(\frac{\partial}{\partial \omega} 
\phi_t^1\big)(H_1^{-1}(\psi),H_2^{-1}(\omega))
}.
\]
Because $f_2(0,\zeta)\equiv 0$, it follows that
$\phi_t^1(0,\zeta)\equiv 0$, and therefore,
\[
\big(\frac{\partial}{\partial \zeta} \phi_t^1\big)(0,\zeta)\equiv 0.
\]
Because system~\eqref{s:8} is in $\mathcal{C}^{1,1}$, there is a
constant $M>0$ such that
\[
\abs{\big(\frac{\partial}{\partial \zeta} \phi_t^1\big)(\xi,\zeta)}
\le M\abs{\xi},
\]
and consequently,
\[
\abs{
\big(\frac{\partial}{\partial \omega}
\phi_t^1\big)(H_1^{-1}(\psi),H_2^{-1}(\omega))}
\le M\abs{H_1^{-1}(\psi)}\le M\norm{DH_1^{-1}}\abs{\psi}.
\]
Similarly, the partial derivative with respect to $\psi$ 
of the second component of
the flow is bounded above by a constant times $\abs{\omega}$.

By a second application of Dorroh's theorem~\eqref{th:dorroh}, 
there is a  $\mathcal{C}^1$ diffeomorphism, whose partial derivatives
satisfy H\"older and Lipschitz conditions corresponding to those
specified for the flow of system~\eqref{s:12},
that transforms system~\eqref{s:12}
to a system of the form
\begin{equation}\label{s:15}
\dot x=A x +f(x,y), \qquad
\dot y=B y +g(x,y)
\end{equation}
where $A$ is similar to $\bar A$ and $B$ is similar to $\bar B$,
where $F:=(f,g)$ is $\mathcal{C}^1$ with
$F_1(0,0)=DF_1(0,0)=0$ and with corresponding H\"older partial
derivatives, and where 
\begin{equation}\label{s:16}
f(0,y)\equiv 0,\qquad g(x,0)\equiv 0,
\end{equation}
and
\begin{equation}\label{s:17} 
f(x,0)\equiv 0,\qquad g(0,y)\equiv 0.   
\end{equation}
The identities~\eqref{s:16} are equivalent to the invariance of
the coordinate planes, whereas the identities~\eqref{s:17} are equivalent 
to the linearity of the system on each coordinate plane.
The preservation of linearity on the coordinate planes by
the Dorroh transformation is clear from its definition; to wit, a linear
flow produces a linear Dorroh transformation.

Because $f(x,0)\equiv 0$, it follows that $f_x(x,0)\equiv 0$.
Also, $f_x\in \mathcal{C}^\mu$. Hence, there is some $M>0$ such that
\[
\abs{f_x(x,y)}=\abs{f_x(x,y)-f_x(x,0)} \le M \abs{y}^\mu.
\]
Likewise, we have the estimate
\[
\abs{g_y(x,y)} \le M \abs{x}^\nu.
\]

Because $f(0,y)\equiv 0$, it follows that $f_y(0,y)\equiv 0$,
and because $f_y$ is Lipschitz, there is a constant
$M>0$ such that 
\[
\abs{f_{y}(x,y)}=\abs{f_y(x,y)-f_y(0,y)}
\le M \abs{x}.
\]
Similarly, we have that $\abs{g_x(x,y)}\le M\abs{y}$.
\end{proof}

For a $\mathcal{C}^2$ vector field on the plane with a hyperbolic
saddle point at the origin, there is a stronger version
of Theorem~\ref{thm:fnn}. In fact, in this case, the vector field
is conjugate to a $\mathcal{C}^2$ vector field in  flattened normal form. 
To prove this, use the $\mathcal{C}^2$ stable manifold theorem 
to flatten the stable and the unstable manifolds
onto the corresponding coordinate axes. 
Dorroh smoothing conjugates
the resulting vector field to a $\mathcal{C}^2$ vector field 
that still has invariant coordinate axes. 
Apply Theorem~\ref{th:1d} to $\mathcal{C}^2$ linearize on
each coordinate axis, and then use Dorroh smoothing to obtain
a $\mathcal{C}^2$ vector field that is also linearized on each coordinate
axis.

The following proof of Theorem~\ref{thm:lfnn} is similar to a portion
of the proof of Theorem~\ref{rhsth}. 
In particular, an explicit integral formula for
the nonlinear part of the linearizing transformation is obtained and
its smoothness is proved using Lemma~\ref{pfcont}. 

\begin{proof}
Let $X$ denote the vector field~\eqref{vfnn}
in flattened normal form. We will construct a smooth
near-identity linearizing transformation given by
\begin{equation}\label{slt}
u=x+\alpha(x,y),\qquad v=y+\beta(x,y).
\end{equation}

The smooth transformation~\eqref{slt} 
linearizes the vector field $X$ if and only if the 
pair of functions $\alpha, \beta$ satisfies the system of partial
differential equations
\[
D\alpha X-A\alpha=-f,\qquad D\beta X-B\beta=-g.
\]
The first equation is equivalent to the differential equation
\[
\frac{d}{dt} e^{tA} \alpha (\phi_{-t}(x,y))=e^{tA}f(\phi_{-t}(x,y));
\]
and therefore, it has the solution
\begin{equation}\label{eq:inteq}
\alpha(x,y)=-\int_0^\infty e^{tA}f(\phi_{-t}(x,y))\,dt
\end{equation} 
provided that the improper integral converges.
Similarly, the second equation is equivalent to
the differential equation
\[
\frac{d}{dt} e^{-tB} \beta (\phi_{t}(x,y))=-e^{-tB}g(\phi_{t}(x,y)) 
\]
and has the solution
\[
\beta(x,y)=\int_0^\infty e^{-tB}g(\phi_{t}(x,y))\,dt.
\]

We will prove that $\alpha$, as defined in display~\eqref{eq:inteq},
is a $\mathcal{C}^1$ function. The proof for $\beta$ is similar.

By using a smooth bump function as in Section~\ref{sec1},
there is no loss of generality if we assume that $X$ is bounded 
on $\mathbb{R}^n$. Under this assumption, $f$ is bounded; and because
$A$ is a stable matrix, it follows immediately
that $\alpha$ is a continuous function defined on an open ball $\Omega$
at the origin with radius $r$.

Let $t\mapsto (\mathcal{X}(t),\mathcal{Y}(t))$ denote the solution of the system
\begin{eqnarray}
\label{rtode1}\dot x&=&-Ax-f(x,y),\\
\label{rtode2}\dot y&=&-By-g(x,y)
\end{eqnarray}
with initial condition $(\mathcal{X}(0),\mathcal{Y}(0))=(x,y)$ and
note that (formally)
\begin{equation}\label{alsubx}
\alpha_x(x,y)=-\int_0^\infty e^{tA}(f_x(\mathcal{X}(t),\mathcal{Y}(t))\mathcal{X}_x(t)+
f_y(\mathcal{X}(t),\mathcal{Y}(t))\mathcal{Y}_x(t))\,dt
\end{equation}
where $t\mapsto(\mathcal{X}_x(t),\mathcal{Y}_x(t))$ is the solution of the
variational initial value problem
\begin{eqnarray}
\nonumber \dot w&=&-Aw-f_x(\mathcal{X}(t),\mathcal{Y}(t)) w-f_y(\mathcal{X}(t),\mathcal{Y}(t)) z,\\
\label{cve} \dot z&=&-Bz-g_x(\mathcal{X}(t),\mathcal{Y}(t)) w-g_y(\mathcal{X}(t),\mathcal{Y}(t)) z,\\
\nonumber w(0)&=& I,\\
\nonumber z(0)&=& 0.
\end{eqnarray}
We will show that $\alpha_x$ is a continuous function defined
on an open neighborhood of the origin. The proof for
$\alpha_y$ is similar.

Several (Gronwall) estimates are required.
To set notation, let $F:=(f,g)$ and  
$\rho:=\sup\{\norm{DF(x,y)}:(x,y)\in\Omega\}$; and
assume that every eigenvalue of $A$ has its real part 
in the interval $[-a_L,-a_R]$ where $0<a_R<a_L$ and
every eigenvalue of $B$ has its real part in
the interval  $[b_L,b_R]$ where $0<b_L<b_R$. 
As before, if $a$, $b$, $\lambda$, and $\sigma$
are numbers with
$0<\lambda<a_R<a_L<a$ and $0<\sigma<b_L<b_R<b$, then
there is a number $K>0$ 
such that the following hyperbolic estimates hold
whenever $t\ge 0$:
\begin{eqnarray*}
\abs{e^{tA}x}&\le& K e^{-\lambda t}\abs{x},\qquad
\abs{e^{-tA}x}\le K e^{at}\abs{x},\\
\abs{e^{tB}y}&\le& K e^{bt}\abs{y},\hspace{.45in}
\abs{e^{-tB}y}\le K e^{-\sigma t}\abs{y}.
\end{eqnarray*}
We will show that there is a constant $\mathcal{K}>0$ such that
the following Gronwall estimates hold:
\begin{eqnarray}
\label{gw:1}
\abs{\mathcal{X}(t)}&\le& \mathcal{K} e^{(K\rho+a)t},\\
\label{gw:2}
\abs{\mathcal{Y}(t)}&\le& \mathcal{K}\abs{y} e^{(KMr-\sigma)t},\\
\label{gw:3}
\abs{\mathcal{X}_x(t)}&\le& \mathcal{K} e^{(K\rho+a)t},\\
\label{gw:4}
\abs{\mathcal{Y}_x(t)}&\le& \mathcal{K}\abs{y} e^{(KMr+2K\rho+a-\sigma)t}
\end{eqnarray}
where $M$ is the constant that appears in the definition
of the flattened normal form.

The inequality~\eqref{gw:1} is proved in the usual manner:
apply the variation of constants formula
to equation~\eqref{rtode1} to derive the estimate
\begin{eqnarray*}
\abs{\mathcal{X}(t)} &\le& \abs{e^{-tA} x}
    +\int_0^t \abs{e^{(s-t)A}}\abs{f(\mathcal{X}(s),\mathcal{Y}(s))}\,ds\\
&\le& Ke^{at}\abs{x}+\int_0^t Ke^{a(t-s)}\rho(\abs{\mathcal{X}(s)}+r)\,ds,
\end{eqnarray*}
rearrange and integrate to obtain the estimate
\begin{equation}
e^{-at}\abs{\mathcal{X}(t)} \le  Kr+\frac{K\rho r}{a}+
\int_0^tK\rho e^{-as}\abs{\mathcal{X}(t)}\,dt,
\end{equation}
and then apply Gronwall's inequality.

The proof of inequality~\eqref{gw:2} is similar to the proof
of inequality~\eqref{gw:1} except that 
the estimate in display~\eqref{fnn} is used for $\abs{g(\mathcal{X}(t),\mathcal{Y}(t))}$
instead of the mean
value estimate used for $\abs{f(\mathcal{X}(t),\mathcal{Y}(t))}$. 

The estimates~\eqref{gw:3} and~\eqref{gw:4} are proved in two main steps.
First, define $\mathcal{A}$ to be the block diagonal matrix with
blocks $A$ and $B$,  $U:=(x,y)$,  and $F:=(f,g)$
so that the system~\eqref{rtode1}--\eqref{rtode2} is expressed in
the compact form 
\begin{equation}\label{compacteq}
\dot U=-\mathcal{A} U-F(U),
\end{equation}
and the corresponding variational equation (also corresponding
to equation~\eqref{cve}) is
\[
\dot V=-\mathcal{A} V-DF(\mathcal{U}(t))V
\]
where $t\mapsto \mathcal{U}(t)$ is the solution of system~\eqref{compacteq}
with initial condition $\mathcal{U}(0)=U$. An easy Gronwall estimate
shows that 
\[
\abs{\mathcal{V}(t)} \le Ke^{(K\rho+a)t}
\]
where  $t\mapsto \mathcal{V}(t)$ is the corresponding solution of the
variational equation. Because $\abs{V}$ can be defined to be
$\abs{w}+\abs{z}$, it follows that
\begin{equation}
\label{zwest}
\abs{\mathcal{W}(t)} \le Ke^{(K\rho+a)t},\qquad 
\abs{\mathcal{Z}(t)} \le Ke^{(K\rho+a)t}.
\end{equation}
Next, the estimate for $\mathcal{Z}$ is improved.
In fact, using equation~\eqref{cve}, the corresponding
initial condition for $\mathcal{Z}(t)$, and variation of constants,
we have that
\begin{eqnarray}
\nonumber \abs{\mathcal{Z}(t)} 
&\le& \int_0^t\abs{e^{(s-t)B}}\abs{g_x(\mathcal{X}(s),\mathcal{Y}(s))} \abs{\mathcal{W}(s)}\,ds\\
\nonumber &&\mbox{}+
 \int_0^t\abs{e^{(s-t)B}}\abs{g_y(\mathcal{X}(s),\mathcal{Y}(s))} \abs{\mathcal{Z}(s)}\,ds\\
\nonumber &\le& \int_0^t K e^{-\sigma (t-s)}M\abs{\mathcal{Y}(s)} \mathcal{K} e^{(K\rho+a)s}\,ds\\
\label{lastin} &&\mbox{}+
\int_0^t Ke^{-\sigma (t-s)} \rho \abs{\mathcal{Z}(s)}\,ds.
\end{eqnarray}
The inequality 
\begin{eqnarray*}
e^{\sigma t}\abs{\mathcal{Z}(t)} 
&\le&\int_0^t K\mathcal{K}^2Mr e^{\sigma s}e^{(KMr-\sigma)s} e^{(K\rho+a)s}\,ds\\
&&\mbox{}+ \int_0^t K\rho e^{\sigma s} \abs{\mathcal{Z}(s)}\,ds
\end{eqnarray*}
is obtained by rearrangement of inequality~\eqref{lastin}
and by using the hyperbolic estimate~\eqref{gw:2}. After the first
integral is bounded above by its value  on the interval 
$[0,\infty)$, the desired
result is obtained by an application of Gronwall's inequality.

To show that $\alpha_x$ is continuous,
it suffices to show that the absolute value of the
integrand $J$ of its formal
representation~\eqref{alsubx} is majorized by
an integrable function. In fact, 
\begin{eqnarray*}
J &\le& Ke^{-\lambda t} (\abs{f_x(\mathcal{X}(t),\mathcal{Y}(t))}\abs{\mathcal{W}(t)}
+\abs{f_y(\mathcal{X}(t),\mathcal{Y}(t))}\abs{\mathcal{Z}(t)})\\
&\le& Ke^{-\lambda t}(M\abs{\mathcal{Y}(t)}^\mu\abs{\mathcal{W}(t)}
+\rho\abs{\mathcal{Z}(t)})\\
&\le& Ke^{-\lambda t}(MK^\mu r^\mu e^{(K M r \mu-\sigma\mu)t}
\mathcal{K} e^{(K\rho+a)t}
+\mathcal{K}\rho e^{(KMr+2K\rho+a-\sigma)t}).
\end{eqnarray*}
Thus, we have proved that $J$ is bounded by a function with
two exponential growth rates:
\[
K M r \mu+K\rho+a-\lambda-\sigma\mu,\qquad
K M r \mu+ 2K\rho+a-\lambda-\sigma.
\]
Note that $a-\lambda-\sigma<a-\lambda-\sigma\mu$, and
recall Hartman's spectral condition             
$a_L-a_R-\mu  b_L<0$.
By choosing admissible values of $a$, $\lambda$, and 
$\sigma$ 
such that the three quantities
$\abs{a-a_L}$, $\abs{\lambda-a_R}$, and 
$\abs{\sigma-b_L}$ are sufficiently small,
it follows that
$a-\lambda-\sigma\mu<0$.
Moreover, once this inequality is satisfied,
if $r>0$ and $\rho>0$ are sufficiently small,
then the two rate factors are both negative.
This proves that $\alpha\in\mathcal{C}^1$.
\end{proof}

\section{Linearization of Special Vector Fields}
As we have seen, a $\mathcal{C}^{1,1}$ vector field is
$\mathcal{C}^{1,\mu}$ linearizable at a hyperbolic sink if the H\"older
exponent $\mu$ is less than the H\"older spectral exponent of its
linearization.
Also, a $\mathcal{C}^{1,1}$ vector field is $\mathcal{C}^{1}$ linearizable at
a hyperbolic saddle point
if Hartman's $(\mu,\nu)$-spectral condition is satisfied. 
Can these results be improved?

In view of Sternberg's example~\eqref{ex:stern}, 
there is no hope of improving the 
smoothness of the linearization at a hyperbolic
sink from class $\mathcal{C}^1$ to class $\mathcal{C}^2$ even for polynomial vector fields. 

For hyperbolic saddle points, on the other hand, 
the existence of a $\mathcal{C}^1$ linearization is in doubt unless
Hartman's $(\mu,\nu)$-spectral condition is satisfied.
In view of Hartman's example~\eqref{notlin}, 
it is not possible to remove this condition.
Note, however, that this spectral condition is imposed, in the
course of the proof of the linearization theorem, under the assumption
that the nonlinear part of the vector field at the rest point is
arbitrary. Clearly, this result can be improved by restricting the 
type of nonlinearities that appear. As a trivial example, 
note that no restriction is necessary if the vector field is linear. 
It can also be improved by placing further restrictions on
the spectrum of the linear part of the vector field.

We will define a class of nonlinear vector fields with hyperbolic
sinks at the origin where there is
a linearizing transformation of class
$\mathcal{C}^{1,\mu}$ for every $\mu\in (0,1)$. In particular, the
size of the H\"older exponent $\mu$ of the
derivative of the linearizing transformation 
is not restricted by the H\"older spectral exponent
of the linear part of the vector field at the origin. 
This result will be used to enlarge the class of nonlinear vector
fields with hyperbolic saddles at the origin that can be proved
to be  $\mathcal{C}^1$ linearizable. 

Vector fields corresponding to systems of differential equations
of the form
\begin{eqnarray*}  
\dot u_1&=&-a_1 u_1+f_{11}(u_1,\ldots,u_n)u_1,\\
\dot u_2&=&-a_2 u_2+f_{21}(u_1,\ldots,u_n)u_1
                       +f_{22}(u_1,\ldots,u_n)u_2,\\
&\vdots&\\
\dot u_n&=&-a_n u_n+f_{n1}(u_1,\ldots,u_n)u_1
                       +f_{n2}(u_1,\ldots,u_n)u_2\\
&&\mbox{}+\cdots+f_{nn}(u_1,\ldots,u_n)u_n
\end{eqnarray*}
where $a_1>a_2>\cdots a_n>0$ and the functions $f_{ij}$ are all
of class $\mathcal{C}^2$ with $f_{ij}(0)=0$
are in the special class. They are $\mathcal{C}^{1,\mu}$ linearizable
at the origin for every $\mu\in (0,1)$.


\subsection{Special Vector Fields}
The next definition lists the properties of the special vector
fields that will be used in the proofs of the results in this section.
The following propositions give simple and explicit criteria that
can be easily checked 
to determine if a $\mathcal{C}^3$ vector field and some vector
field in this special class are equal when restricted to
some open neighborhood of the origin.

We will use the notation $D_jH$ to denote the partial derivative
of the function $H$ with respect to its $j$th variable. 
Also, for $r>0$, let 
\[\Omega_r:=\{x\in \mathbb{R}^n: \abs{x}<r\}.\]
Sometimes we will view $\Omega_r$
as a subset of $\mathbb{R}^{n_1}\times\cdots\times \mathbb{R}^{n_p}$ where
$n_1+\cdots+n_p=n$. In this case, a point $x\in \Omega_r$ is expressed
in components as $x=(x_1,\ldots,x_p)$. 

Let $\mathcal{P}_r$ denote the set of all vector fields on $\Omega_r$ of the form
\begin{equation} \label{llnf}
(x_1,\ldots,x_p)\mapsto (A_1 x_1+F_1(x_1,\ldots,x_p),\ldots,
                         A_p x_p+F_p(x_1,\ldots,x_p)),
\end{equation} 
with the following additional properties:
\begin{itemize}
\item[(1)] There are real numbers  
$\lambda_1>\lambda_2>\cdots>\lambda_p>0$ such that, for
each $i\in \{1,2,\ldots,p\}$,  
every eigenvalue of the matrix
$A_i$ has real part $-\lambda_i$. 
\item[(2)] For each $i\in\{1,2,\ldots,p\}$, 
the function $F_i:\Omega_r\to\mathbb{R}^{n_i}$ 
is in class $\mathcal{C}^1(\Omega_r)$. 
(In particular, $F_i$ and $DF_i$ are bounded functions).
\item[(3)] 
There is a constant $M>0$ such that
\[
\abs{F_i(x)-F_i(y)}\le M\big((\abs{x}+\abs{y})\sum_{k=1}^i \abs{x_k-y_k}
       +\abs{x-y}\sum_{k=1}^i(\abs{y_k}+\abs{x_k})\big)
\]
whenever $x,y\in \Omega_r$.
\item[(4)] There is a constant $M>0$ such that
\[\abs{D_jF_i(x)-D_jF_i(y)}\le M\abs{x-y}\]
whenever $i,j\in\{1,2,\ldots,p\}$ and 
$x,y\in \Omega_r$. 
(In particular, 
\[\abs{D_jF_i(x_1,\ldots,x_p)}\le M\abs{x}\]
whenever $i\in \{1,2,\ldots,p\}$, $j\in\{1,2,\ldots,i\}$,
and $x\in \Omega_r$.)
\item[(5)]
There is a constant $M>0$ such that
\[\abs{D_jF_i(x)-D_jF_i(y)}\le M\big(\sum_{k=1}^i \abs{x_k-y_k}
       +\abs{x-y}\sum_{k=1}^i(\abs{x_k}+\abs{y_k})\big)\]
whenever $i\in\{1,2,\ldots,p\}$, $j\in\{i+1,i+2,\ldots,p\}$,
and $x\in \Omega_r$.
(In particular, 
$\abs{D_jF_i(x_1,\ldots,x_p)}\le M(\abs{x_1}+\cdots+\abs{x_i})$
whenever $i\in\{1,2,\ldots,p\}$, $j\in\{i+1,i+2,\ldots,p\}$, 
and $x\in \Omega_r$.)
\end{itemize}

\begin{defn}
A vector field $Y$, given by
\begin{equation}\label{gvf}
(x_1,\ldots,x_p)\mapsto (A_1 x_1+G_1(x_1,\ldots,x_p),\ldots,
                         A_p x_p+G_p(x_1,\ldots,x_p))
\end{equation}
where the matrices $A_1,\ldots,A_p$ satisfy the
property $(1)$ listed in the definition of $\mathcal{P}_r$,
the function $G:=(G_1,\ldots,G_p)$ is
defined on an open neighborhood $U$ of
the origin in $\mathbb{R}^n$, and $G(0)=DG(0)=0$, 
is called \emph{lower triangular} if for each  $i\in\{1,2,\ldots,p-1\}$
\[
G_i(0,0,\ldots,0,x_{i+1},x_{i+2},\ldots,x_p)\equiv 0. 
\]
\end{defn}
For a quasi-linear vector field in the form of $Y$, as given
in display~\eqref{gvf}, let $A$ denote the block diagonal matrix
with diagonal blocks $A_1, A_2,\ldots,A_p$ so that $Y$ is expressed 
in the compact form $Y=A+G$.
\begin{prop}\label{c22nor}
If the $\mathcal{C}^3$ vector field $Y=A+G$ is lower triangular on the open
set $U$ containing the origin and the closure of $\Omega_r$ is in $U$,
then there is a vector field of the form $X=A+F$ in $\mathcal{P}_r$ such that 
the restrictions of the vector fields $X$ and $Y$ to $\Omega_r$ are equal.
\end{prop}
\begin{proof}
Fix $r>0$ such that the closure of $\Omega_r$ is contained in $U$. 
Because $Y$ is $\mathcal{C}^3$, there is a constant $K>0$ such that 
$G$ and its first three derivatives are bounded by $K$ on $\Omega_r$.

Because $DG$ is $\mathcal{C}^1$, the mean value theorem implies that
\[
\abs{D_jG_i(x)-D_jG_i(y)}\le K\abs{x-y}
\]
whenever $x,y\in \Omega_r$. This proves property~(4).

Note that (as in the proof of Taylor's theorem) 
\[
G_i(x_{1}, x_2,\ldots,x_{p})=
\int_0^1 \sum_{k=1}^i D_kG_i(tx_1,tx_2,\ldots,tx_{i},x_{i+1},x_{i+2},
\ldots,x_{p})x_k\,dt.
\]
Hence, with 
\[
\begin{array}{cc}
u:=(x_1,x_2,\ldots,x_{i}),&\qquad  v:=(x_{i+1},x_{i+2},\ldots,x_{p}), \\
w:=(y_1,y_2,\ldots,y_{i}),&\qquad  z:=(y_{i+1},y_{i+2},\ldots,y_{p}), 
\end{array}
\]
we have 
\[
\abs{G_i(x)-G_i(y)}\le
\int_0^1 \sum_{k=1}^i 
\abs{D_kG_i(tu,v)x_k-D_kG_i(tw,z)y_k}\,dt.
\]
Using the mean value theorem applied to the $\mathcal{C}^1$ function 
$f:=D_kG_i$, we have the inequalities
\begin{eqnarray}\label{eq:propf}
\nonumber \abs{f(tu,v)x_k-f(tw,z)y_k}&\le & \abs{f(tu,v)x_k-f(tu,v)y_k}\\
\nonumber &&\mbox{} +\abs{f(tu,v)y_k-f(tw,z)y_k}\\
\nonumber &\le & \abs{f(tu,v)}\abs{x_k-y_k}+\abs{f(tu,v)-f(tw,z)}\abs{y_k}\\
\nonumber &\le & K( (\abs{t}\abs{u}+\abs{v})\abs{x_k-y_k}\\
\nonumber &&\mbox{} +(\abs{t}\abs{u-w}+\abs{v-z})\abs{y_k})\\
&\le & K(\abs{x}\abs{x_k-y_k}+\abs{x-y}\abs{y_k});
\end{eqnarray}
and as a consequence,
\begin{eqnarray}\label{property3}
\nonumber \abs{G_i(x)-G_i(y)}&\le&
K\big(\abs{x} \sum_{k=1}^i \abs{x_k-y_k}+\abs{x-y}\sum_{k=1}^i \abs{y_k}\big)\\
\abs{G_i(x)-G_i(y)}&\le&
K\big((\abs{x}+\abs{y}) \sum_{k=1}^i \abs{x_k-y_k}\\
&&\mbox{}+\abs{x-y}\sum_{k=1}^i (\abs{x_k}+\abs{y_k})\big)
\end{eqnarray}
whenever $x,y\in\Omega_r$. This proves property~(3).

Using the integral representation of $G$, note that
\[
D_jG_i(x_{1}, x_2,\ldots,x_{p})=
\int_0^1 \sum_{k=1}^i(t(D_jD_kG_i)(tu,v)x_k
+G_i(tu,v)D_j x_k)\,dt.
\]
If $j>i$, then $D_j x_k=0$; and therefore, the estimate for
$\abs{D_jG_i(x)-D_jG_i(y)}$ required to prove property~(5) is
similar to the proof of estimate~\eqref{property3}. The only
difference in the proof occurs because  the 
corresponding function $f$ is not required to vanish at the origin. 
For this reason, the estimate $\abs{f(tu,v)}<K$
is used in place of the Lipschitz
estimate for $\abs{f(tu,v)}$ in the chain of inequalities~\eqref{eq:propf}.
\end{proof}

\begin{defn}
Suppose that $A$ is an $n\times n$ real matrix, 
$\lambda_1>\lambda_2>\cdots>\lambda_p>0$, and the real part
of each eigenvalue of $A$ is one of the real numbers $-\lambda_i$
for $i\in\{1,2,\ldots,p\}$.
The matrix $A$ has the \emph{$k$-spectral gap condition}
if $\lambda_{i-1}/\lambda_{i}>k$ for $i\in\{2,3,\ldots,p\}$. 
\end{defn}

\begin{prop}\label{prop:gap}
Suppose that $Y=A+G$ is a quasi-linear $\mathcal{C}^3$ vector field defined on
the open set $U$ containing the origin. 
If the matrix $A$ has the \emph{$3$-spectral gap condition} and
the closure of $\Omega_r$ is contained in $U$, 
then there is a vector field of the form $X$ in $\mathcal{P}_r$ such that 
the restrictions of the vector fields $X$ and $Y$ to $\Omega_r$ are equal.
\end{prop}
\begin{proof}
We will outline the proof, the details are left to the reader.

There is a linear change of coordinates such that the linear
part of the transformed vector field is block diagonal with
diagonal blocks $A_i$, for $i\in\{1,2,\ldots,p\}$, such that $-\lambda_i$
is the real part of each eigenvalue of the matrix $A_i$ and
$\lambda_1>\lambda_2>\cdots>\lambda_p>0$. Thus, without loss of
generality, we may as well assume that $A$ has this block diagonal
form. 

Consider the vector field $Y$ in the form
\[ (B x+G_1(x,y), A_py+G_2(x,y))\]
where $B$ is block diagonal with blocks $A_1, A_2,\ldots,A_{p-1}$.
Because the $3$-gap condition is satisfied for the
$\mathcal{C}^3$ vector field $Y$ viewed in this form, an application of the
smooth spectral gap theorem (see~\cite{ll}) can be used to
obtain a $\mathcal{C}^3$ function $\phi$, defined for $\abs{y}$ sufficiently small,
such that $\phi(0)=0$ and  
$\{(x,y): x=\phi(y)\}$ is an invariant manifold. 

In the new coordinates $u=x-\phi(y)$ and $v=y$, the vector field $Y$
is given by
\[(B u+G_2(u,v), A_p v+G_3(u,v))\]
where $G_2$ and $G_3$ are $\mathcal{C}^2$ and $G_2(0,v)\equiv 0$.
By an application of Dorroh's theorem 
(as in the proof of Theorem~\ref{thm:fnn}), this system is $\mathcal{C}^3$ conjugate
to a $\mathcal{C}^3$ vector field $Y_1$ of the same form. 

Next, consider the vector field $Y_1$ in the form
\[(C u+G_4(u,v,w), A_{p-1}v+G_5(u,v,w), A_p w+G_6(u,v,w))\]
where the variables are renamed. We have already proved that
$G_5(0,0,w)\equiv 0$. By an application of the smooth spectral gap
theorem, there is a $\mathcal{C}^3$ function $\psi$ such that
$\{(u,v,w): u=\psi(v,w)\}$ is an invariant manifold. 
In the new coordinates $a= u-\psi(v,w)$, $b=v$, and $c=w$, the vector
field has the form
\[(Ca+G_7(a,b,c), A_{p-1}b+G_8(a,b,c), A_p w+G_9(a,b,c))\]
where $G_8(0,0,c)\equiv 0$ and $G_7(0,b,c)\equiv 0$. 
By Dorroh smoothing we can assume that the functions
$G_7$, $G_8$, and $G_9$ are class $\mathcal{C}^3$. 

To complete the proof, repeat the argument 
to obtain a lower triangular vector field
and then apply Proposition~\ref{prop:gap}.
\end{proof}


We will prove the following theorem.
\begin{thm}\label{th:lotri}
For each $\mu\in (0,1)$, a
$\mathcal{C}^3$ lower triangular vector field (or a $\mathcal{C}^3$ quasi-linear vector
field whose linear part satisfies the $3$-spectral gap condition)
is linearizable at the origin by a $\mathcal{C}^{1,\mu}$ 
near-identity diffeomorphism. 
\end{thm}
In particular, for the restricted class of vector fields
mentioned in Theorem~\ref{th:lotri}, the 
H\"older exponent of the linearizing transformation
is not required to be less than the H\"older
spectral exponent of $A$; rather, the H\"older exponent 
can be chosen as close to the number one as we wish. 

\subsection{Saddles}
Theorem~\ref{th:lotri} together with Theorem~\ref{th:hsct} can
be used, in the obvious manner, 
to obtain improved results on the smooth linearization of 
special systems with hyperbolic saddles. 
For example, suppose that $X=\mathcal{A}+\mathcal{F}$ is quasi-linear such that
$\mathcal{A}$ is in block diagonal form $\mathcal{A}=(\mathcal{A}^s,\mathcal{A}^u)$ where
all eigenvalues of $\mathcal{A}^s$ have negative real parts and
all eigenvalues of $\mathcal{A}^u$ have positive real parts. 
In this case, the vector field has the form 
$X(x,y)=(\mathcal{A}^sx+G(x,y), \mathcal{A}^u y+H(x,y))$ where $\mathcal{F}=(G,H)$. 
The vector field $X$ is called \emph{triangular} if
$G(0,y)\equiv 0$ and $H(x,0)\equiv 0$, and the vector fields
$x\mapsto \mathcal{A}^sx+G(x,0)$ and $y\mapsto -(\mathcal{A}^u y+H(0,y))$ are both
lower triangular. 
\begin{thm}\label{th:sltvf}
Suppose that $X=\mathcal{A}+\mathcal{F}$ is a quasi-linear $\mathcal{C}^3$ 
triangular vector field and 
there are positive numbers $a_L$, $a_R$, $b_L$, and $b_R$ such that the real
parts of the eigenvalues of $\mathcal{A}$ are contained in the union of the
intervals $[-a_L,-a_R]$ and $[b_L,b_R]$.
If $a_L-a_R< b_L$ and  $b_R-b_L< a_R$,
then $X$ is $\mathcal{C}^1$ linearizable.
\end{thm}
The next theorem replaces the requirement that the vector field
be triangular with a spectral gap condition.
\begin{thm}\label{th:slugc}
Suppose that $X=\mathcal{A}+\mathcal{F}$ is a quasi-linear $\mathcal{C}^3$ vector field with
a hyperbolic saddle at the origin, the set of negative real parts
of eigenvalues of $\mathcal{A}$ is given by 
$\{-\lambda_1,\ldots,-\lambda_p\}$, the set of positive real
parts is given by $\{\sigma_1,\ldots,\sigma_q\}$, and
\[
-\lambda_1<-\lambda_2<\cdots<-\lambda_p<0
                           <\sigma_q<\sigma_{q-1}<\cdots<\sigma_1.
\]
If $\lambda_{i-1}/\lambda_i>3$, for $i\in \{2,3,\ldots,p\}$, and
$\sigma_{i-1}/\sigma_i>3$, for $i\in \{2,3,\ldots,q\}$, and if
$\lambda_1-\lambda_p> \sigma_q$ and $\sigma_1-\sigma_q< \lambda_p$,
then $X$ is $\mathcal{C}^1$ linearizable.
\end{thm}

\subsection{Infinitesimal Conjugacy and Fiber Contractions}
Recall from Section~\ref{sec1} that the near-identity map 
$h=\id+\eta$ on $\mathbb{R}^n$ conjugates the quasi-linear 
vector field $X=A+F$ to the linear vector field given by $A$ if
$\eta$ satisfies the infinitesimal conjugacy equation
\[L_A\eta=F\circ(\id+\eta).\]
In case the nonlinear vector field $X$ is in $\mathcal{P}_r$,
we will invert the Lie derivative operator
$L_A$ on a Banach space $\mathcal{B}$ of continuous functions, defined
on an open neighborhood $\Omega$ of the origin, that also satisfy
a H\"older condition at the origin. The inverse $G$ of $L_A$ 
is used to obtain a fixed point equation, 
\[
\alpha=G(F\circ(\id+\alpha)),
\]
that can be solved by the contraction principle. Its unique fixed
point $\eta$ is a solution of the infinitesimal conjugacy equation
and $h=\id+\eta$ is the desired near-identity 
continuous linearizing transformation.
To show that $\eta$ is smooth, we will use fiber contraction
(see the discussion following Theorem~\ref{th:invman}).

The candidates for the (continuous) derivative of $\eta$ belong to
the space $\mathcal{H}$ of continuous functions from $\Omega$ to
the bounded linear operators on $\mathcal{B}$. Moreover, the derivative
of $\eta$, if it exists, satisfies the fixed point equation  
\[
\Psi=\mathcal{G}(DF\circ(\id+\eta)(I+\Psi))
\]
on $\mathcal{H}$,
where $\mathcal{G}$ is an integral operator that inverts the
differential operator $\mathcal{L}_A$ given by
\begin{equation}\label{defsL}
\mathcal{L}_A\Psi(x)=\frac{d}{dt}e^{-tA} \Psi(e^{tA}  x)e^{tA} \Big|_{t=0}.
\end{equation}  
For appropriately defined subsets $\mathcal{D}\subset \mathcal{B}$ and $\mathcal{J}\subset\mathcal{H}$, 
we will show that the bundle map
$\Lambda:\mathcal{D}\times\mathcal{J}\to \mathcal{D}\times\mathcal{J}$
given by
\[
\Lambda(\alpha,\Psi)=(G(F\circ(\id+\alpha)),\mathcal{G}(DF\circ (\id+\alpha)(I+\Psi)))
\]
is the desired fiber contraction.
   

\subsection{Sources and Sinks}
Theorem~\ref{th:lotri} is an immediate consequence of 
Proposition~\ref{c22nor} and the following result.
\begin{thm}\label{thm:sc1}
If $\mu\in (0,1)$ and $X$ is in $\mathcal{P}_r$,
then $X$ is linearizable at the
origin by a $\mathcal{C}^{1,\mu}$ near-identity diffeomorphism.
\end{thm}

The remainder of this section is devoted to the proof of
Theorem~\ref{thm:sc1}

By performing a linear change of coordinates (if necessary),
there is no loss of generality if we assume that the block matrices 
$A_1,\ldots A_p$ on the diagonal of $A$
are each in real Jordan canonical form. 
In this case, it is easy to see that there is a real valued function
$t\mapsto Q(t)$, 
given by $Q(t)=C_Q(1+\abs{t}^{n-1})$ where $C_Q$ is a constant,
such that
\begin{equation}\label{qbounda}
\abs{e^{tA_i}}\le e^{-\lambda_i t} Q(t)
\end{equation}
for each $i\in\{1,\ldots,p\}$. 
Also, for each $\lambda$ with
\begin{equation}\label{llll}
-\lambda_1<-\lambda_2<\cdots<-\lambda_p<-\lambda<0,
\end{equation}
there is an adapted norm on $\mathbb{R}^n$ such that
\begin{equation}\label{adext}
\abs{e^{tA} x}\le e^{-\lambda t}\abs{x}
\end{equation}
whenever $x\in \mathbb{R}^n$ and $t\ge 0$ (see, for example, \cite{Chicone}
for the standard construction of the adapted norm).

Unfortunately, the adapted norm is not necessarily natural with
respect to the decomposition $\mathbb{R}^{n_1}\times\cdots\times \mathbb{R}^{n_p}$
of $\mathbb{R}^n$. The natural norm is the $\ell_1$-norm. 
For $i\in \{1,2,\ldots,p\}$, we will use the notation
\begin{equation}\label{inot}
\abs{x}_i:=\sum_{k=1}^i\abs{x_k}.
\end{equation}
In particular, $\abs{\ }_p$ is a norm on $\mathbb{R}^n$ that does respect
the decomposition. It is also 
equivalent to the adapted norm; that is,
there is a constant $K>1$ such that
\begin{equation}\label{eon}
\frac{1}{K}\abs{x}_p\le \abs{x}\le K\abs{x}_p.
\end{equation}

Because $A$ is block diagonal and in view of the ordering
of the real parts of the eigenvalues in display~\eqref{llll}, 
we have the useful estimate
\begin{eqnarray}\label{useest}
\nonumber
\abs{e^{tA}x}_i&=&\sum_{k=1}^i\abs{e^{tA_k}x_k}\\
\nonumber &=&\sum_{k=1}^ie^{-\lambda_k t}Q(t)\abs{x_k}\\
&\le &e^{-\lambda_i t}Q(t)\abs{x}_i.
\end{eqnarray}

Recall that for $r>0$, $\Omega_r:=\{x\in \mathbb{R}^n: \abs{x}<r\}$. 
Also, note that a function
$\alpha:\Omega_r\to \mathbb{R}^n$ is given by
$\alpha=(\alpha_1,\ldots,\alpha_p)$ corresponding to the decomposition
$x=(x_1,\ldots, x_p)$.

For $r>0$ and $0<\mu<1$, 
let $\mathcal{B}_{r,\mu}$ denote the space of all continuous functions 
from $\Omega_r$ to $\mathbb{R}^n$ such that the norm
\[
\omnorm{\alpha}:=\max_{i\in \{1,\ldots,p\}}\sup_{0<\abs{x}<r}
\frac{\abs{\alpha_i(x)}}{\abs{x}_i \abs{x}^\mu}
\]  
is finite.  

\begin{prop} 
The set $\mathcal{B}_{r,\mu}$ endowed with the norm $\omnorm{\ }$ 
is a Banach space. 
\end{prop}

For $\alpha\in \mathcal{B}_{r,\mu}$, define
\begin{equation}\label{G} 
(G\alpha)(x):=-\int_0^\infty e^{-tA}\alpha(e^{tA} x)\,dt. 
\end{equation}
Note that the ``natural'' definition of $G$---in view of
the definitions in Section~\ref{sec1}---would be
$\int_0^\infty e^{tA}\alpha(e^{-tA} x)\,dt$. Although this
definition does lead to the existence of a
linearizing homeomorphism, the
homeomorphism thus obtained may not be smooth.


\begin{prop}\label{prop:Gbound}
The function $G$ is a bounded linear operator on 
$\mathcal{B}_{r,\mu}$; and, for fixed $\mu$, the operator norms are uniformly bounded
for $r>0$. 
Moreover,  $L_AG=I$ on $\mathcal{B}_{r,\mu}$,  
and $GL_A$, restricted to the domain of $L_A$ on $\mathcal{B}_{r,\mu}$, is the identity.
\end{prop}
\begin{proof}
The $k$th component of $G\alpha$ is given by
\[
(G\alpha)_i(x)=-\int_0^\infty e^{-tA_i}\alpha_i(e^{tA} x)\,dt.
\]
Since we are using an adapted norm on $\mathbb{R}^n$, if $x\in \Omega_r$, then so
is $e^{tA} x$.
Using this fact, the definition of the space $\mathcal{B}_{r,\mu}$, and 
the inequalities~\eqref{qbounda} and~\eqref{useest}, we have the estimate
\begin{eqnarray*}
\abs{e^{-tA_i}\alpha_i(e^{tA} x)}&\le & e^{\lambda_i t} Q(t)
\frac{\abs{\alpha_i(e^{tA} x)}}{\abs{e^{tA} x}_i\abs{e^{tA} x}^\mu} 
     \abs{e^{tA} x}_i\abs{e^{tA} x}^\mu\\
&\le & e^{-\lambda\mu t}Q^2(t)\omnorm{\alpha}\abs{x}_i \abs{x}^\mu.
\end{eqnarray*}
Because $Q(t)$ has polynomial growth, there is a universal constant
$c>0$ such that
\[
e^{-\lambda \mu t/2} Q^2(t)\le c
\]
whenever $t\ge 0$. Hence, it follows that
\begin{eqnarray*}
\abs{e^{-tA_i}\alpha_i(e^{tA} x)}
&\le & c\, e^{-\lambda\mu t/2}\omnorm{\alpha}\abs{x}_i \abs{x}^\mu.
\end{eqnarray*}
By Lemma~\ref{pfcont}, the function $x\mapsto (G\alpha)_i(x)$
is continuous in $\Omega_r$ and clearly
\[
\sup_{0<\abs{x}<r} \frac{\abs{(G\alpha)_i(x)}}{\abs{x}_i \abs{x}^\mu} 
< \frac{2c}{\lambda\mu}\omnorm{\alpha}. 
\]

The fundamental theorem of calculus and the properties of the Lie
derivative are used to show that $G$ is a right inverse of $L_A$ and
that $GL_A$ is the identity operator on the domain of $L_A$.
\end{proof}

\begin{prop}\label{prop:range}
If $\alpha\in \mathcal{B}_{r,\mu}$, 
then $F\circ(\id+\alpha)\in \mathcal{B}_{r,\mu}$.
Moreover, if $\epsilon>0$ is given and $r>0$ is sufficiently small, then
the map $\alpha\mapsto F\circ (\id+\alpha)$ restricted to the closed
unit ball in $\mathcal{B}_{r,\mu}$ has range in the ball with radius $\epsilon$ 
centered at the origin.
\end{prop}
\begin{proof}
Clearly the function $F\circ (\id+\alpha)$ is continuous in 
$\Omega_r$. We will show first that this function is in $\mathcal{B}_{r,\mu}$.

The $i$th component of $F\circ (\id+\alpha)$ is
\[
[F\circ (\id+\alpha)]_i=F_i\circ(\id+\alpha)
\]
Using property~(3) in the definition of $\mathcal{P}_r$, the equivalence
of norms, and the triangle law, we have the estimate
\begin{eqnarray*}
\abs{F_i(x+\alpha(x))} 
&\le& KM (\abs{x}_p+\abs{\alpha(x)}_p) (\abs{x}_i+\abs{\alpha(x)}_i),
\end{eqnarray*}
and for $k\in \{1,2, \ldots, p\}$ we have the inequality 
\begin{eqnarray}\label{impin}
\nonumber \abs{\alpha(x)}_k&\le& 
   \omnorm{\alpha}\abs{x}^\mu \sum_{\ell=1}^k\abs{x}_\ell\\
                 &\le& p\omnorm{\alpha}\abs{x}^\mu \abs{x}_k.
\end{eqnarray}
By combining these estimates and restricting $x$ to lie in $\Omega_r$
where $\abs{x}<r$, it follows that
\[
\abs{F_i(x+\alpha(x))}
\le MK^2 (1+p\omnorm{\alpha}r^\mu)^2  \abs{x} \abs{x}_i;
\]
and therefore,
\[
\omnorm{F\circ (\id+\alpha)}\le MK^2(1+p\omnorm{\alpha}r^\mu)^2 r^{1-\mu}.
\]  
This proves the first statement of the proposition.
The second statement of the proposition
follows from this norm estimate because $0<\mu<1$. 
\end{proof}

The \emph{special Lipschitz number} for $\alpha\in \mathcal{B}_{r,\mu}$ is defined
as follows:
\[ 
\slip(\alpha):= \max_{i\in\{1,2,\ldots,p\}}
\sup\big\{\frac{\abs{\alpha(x)-\alpha(y)}_i}{\abs{x-y}_i}:
x,y\in \Omega_r; x\ne y \big\}.
\]
Also, let $\mathcal{D}$ denote the set of all $\alpha$ in $\mathcal{B}_{r,\mu}$ such that
$\omnorm{\alpha}\le 1$ and $\slip(\alpha)\le 1$.
\begin{prop}\label{prop:lip}
The set $\mathcal{D}$ is a complete metric subspace of $\mathcal{B}_{r,\mu}$.
\end{prop}
\begin{proof}
Suppose that $\{\alpha^m\}_{m=1}^\infty$ is a sequence in $\mathcal{D}$
that converges to $\alpha$ in $\mathcal{B}_{r,\mu}$. To show
$\slip(\alpha)\le 1$, use the inequality~\eqref{impin} to obtain
the estimate
\begin{eqnarray*}
\abs{\alpha(x)-\alpha(y)}_i
&\le& \abs{\alpha(x)-\alpha^m(x)}_i+\abs{\alpha^m(x)-\alpha^m(y)}_i
+\abs{\alpha^m(y)-\alpha(y)}_i \\
&\le& 2p\omnorm{\alpha-\alpha^m}r^{1+\mu}+\slip(\alpha^m)\abs{x-y}_i
\end{eqnarray*}
and pass to the limit as $m\to \infty$.
\end{proof}

\begin{prop}\label{prop:Fctrtion}
If $r>0$ is sufficiently small, then the function
\begin{equation}\label{cont:svf}
\alpha\mapsto G(F\circ(\id+\alpha))
\end{equation}
is a contraction on 
$\mathcal{D}$.
\end{prop}
\begin{proof}
Let $R:=1/\omnorm{G}$ and suppose that $\omnorm{\alpha}\le 1$.
By Proposition~\ref{prop:range}, 
if $r>0$ sufficiently small, then
\[
\omnorm{G(F\circ(\id+\alpha))}\le \omnorm{G}\omnorm{F\circ(\id+\alpha)}\le 1.
\]
Hence, the closed unit ball in $\mathcal{B}_{r,\mu}$ is an invariant set for the
map $\alpha\mapsto G(F\circ(\id+\alpha))$. 

To prove that $\mathcal{D}$ is an invariant set,
we will show the following proposition: If $\alpha\in \mathcal{D}$ and 
$r>0$ is sufficiently
small, then $\slip(G(F\circ(\id+\alpha)))<1$.

Start with the basic inequality
\begin{eqnarray*}
\lefteqn{\abs{(G(F\circ(\id+\alpha)))_i(x)-(G(F\circ(\id+\alpha)))_i(y)}
\le}\hspace*{1in}\\
&& \int_0^\infty e^{\lambda_i t}Q(t)
\abs{F_i(e^{tA}x+\alpha(e^{tA}x))- F_i(e^{tA}y+\alpha(e^{tA}y))}\,dt,
\end{eqnarray*}
and then use property~(3) in the definition of $\mathcal{P}_r$
to estimate the third factor of the integrand. 
Note that the resulting estimate has two terms, each of the
form $\abs{\ }\,\abs{\ }_i$. 
After making the obvious triangle law estimates using
the linearity of $e^{tA}$, 
the inequality $\abs{\alpha(x)-\alpha(y)}_i\le \abs{x-y}_i$, 
and the inequality~\eqref{useest}; 
it is easy to see that the first factor is majorized
by a bounded multiple of $e^{-\lambda t}$ and
the second factor is majorized
by a bounded multiple of $e^{-\lambda_i t}Q(t)$. 
One of the multipliers is bounded above by a constant multiple of $r$; 
the other is bounded above by a constant multiple of $\abs{x-y}$. 
The integral converges because its integrand is thus
majorized by a constant (in $t$) multiple
of $e^{-\lambda t}Q(t)$. In fact, there
is a constant $c>0$ such that
\[
\abs{(G(F\circ(\id+\alpha)))(x)-(G(F\circ(\id+\alpha)))(y)}
\le c r \abs{x-y};
\]
and therefore, if $r>0$ is sufficiently small, then
$\slip(G(F\circ(\id+\alpha)))<1$, as required. 

We have just established that the complete metric space 
$\mathcal{D}$ is an invariant set for the map
$\alpha\mapsto G(F\circ(\id+\alpha))$. 
To complete the proof, we will show that this map is a contraction 
on $\mathcal{D}$.

Fix $\alpha$ and $\beta$  such that
$\omnorm{\alpha}\le 1$ and $\omnorm{\beta}\le 1$,
and note that 
\begin{eqnarray}\label{ineq1}
\nonumber\lefteqn{
\omnorm{G(F\circ(\id+\alpha))-G(F\circ(\id+\beta))}\le}\hspace*{1.5in}\\
&&\omnorm{G}\omnorm{F\circ(\id+\alpha)-F\circ(\id+\beta)}.
\end{eqnarray}

The $i$th component function of the function
\[x\mapsto F\circ(\id+\alpha)-F\circ(\id+\beta)\]
is given by
\[
C_i:=F_i(x+\alpha(x))-F_i(x+\beta(x)).
\]
Using the inequality~\eqref{impin} and property~(3) of the
definition of $\mathcal{P}_r$, 
we have the estimate
\begin{eqnarray*}
\abs{C_i}&\le& MK(\abs{x+\alpha(x)}_p \abs{\alpha(x)-\beta(x)}_i
+ \abs{\alpha(x)-\beta(x)}_p \abs{x+\beta(x)}_i)\\
&\le& 
KM\big((\abs{x}_p+p\omnorm{\alpha}r^\mu\abs{x}_p)p\omnorm{\alpha-\beta}\abs{x}^\mu\abs{x}_i\\
&&\mbox{}+p\omnorm{\alpha-\beta}\abs{x}^\mu \abs{x}_p 
  (\abs{x}_i+p\omnorm{\alpha}r^\mu\abs{x}_i)\big)\\
&\le& 
2K^2Mp(1+pr^\mu) r \omnorm{\alpha-\beta}\abs{x}^\mu\abs{x}_i.
\end{eqnarray*}
Hence, there is a constant $\bar M>0$ such that 
\[
\omnorm{F(x+\alpha(x))-F(x+\beta(x))}\le \bar M r\omnorm{\alpha-\beta}.
\]

Using the inequality~\eqref{ineq1} and Proposition~\ref{prop:Gbound},
it follows that if $r>0$ is sufficiently small, then
the map $\alpha\mapsto G(F\circ(\id+\alpha))$ is a contraction
on the closed unit ball in $\mathcal{B}_{r,\mu}$.
\end{proof}

We will prove that the 
unique fixed point $\eta$ of the contraction~\eqref{cont:svf} 
is $\mathcal{C}^{1,\mu}$  for all $\mu\in (0,1)$.

Let $\mathcal{H}_{r,\mu}$  denote the space of
all continuous maps $\Psi:\Omega_r\to{\bf L}(\mathbb{R}^n,\mathbb{R}^n)$ such that, for $j\le i$,
\[
\sup_{0<\abs{x}<r}\frac{\abs{\Psi_{ij}(x)}}{\abs{x}^\mu}<\infty
\]
and, for $j>i$,
\[
\sup_{0<\abs{x}<r}\frac{\abs{\Psi_{ij}(x)}}{\abs{x}^\mu_i}<\infty                \]
where the subscripts refer to the components of the matrix valued
function $\Psi$ with respect to the decomposition
$\mathbb{R}^n=\mathbb{R}^{n_1}\times\cdots\times\mathbb{R}^{n_p}$. 
Also, the norm $\munorm{\Psi}$ of $\Psi\in \mathcal{H}_{r,\mu}$ is defined
to be the maximum of these suprema.

\begin{prop}
The space $\mathcal{H}_{r,\mu}$ endowed with the norm $\munorm{\ }$ is a Banach space.
\end{prop}

For $\Psi\in \mathcal{H}_{r,\mu}$, define
\begin{equation}\label{calG}
(\mathcal{G} \Psi)(x):= -\int_0^\infty e^{-tA}\Psi(e^{tA} x)e^{tA}\,dt.
\end{equation}

\begin{prop}\label{prop:calG}
If $\mu<1$ is sufficiently large, then
the operator $\mathcal{G}$ is a bounded linear operator on $\mathcal{H}_{r,\mu}$.
Also, $\munorm{\mathcal{G}}$ is uniformly bounded with respect to $r$.
\end{prop}
\begin{proof}
In view of the inequality~\eqref{qbounda} and for $x\in \Omega_r$, 
the $ij$-component of the integrand in the definition
of $\mathcal{G}$ is bounded above as follows:
\[
\abs{e^{-tA_i} \Psi_{ij}(e^{tA} x)e^{tA_j}}
 \le e^{\lambda_i t} Q(t) \abs{\Psi_{ij}(e^{tA} x)} 
                                 e^{-\lambda_j t} Q(t).
\]

For $j\le i$, we have the inequality
\begin{eqnarray*}
\abs{e^{-tA_i} \Psi_{ij}(e^{tA} x)e^{tA_j}}&\le& e^{(\lambda_i-\lambda_j)t}Q^2(t)
                                 \munorm{\Psi}\abs{e^{tA} x}^\mu\\
&\le& e^{(\lambda_i-\lambda_j-\mu\lambda) t}Q^3(t)\munorm{\Psi}\abs{x}^\mu\\
&\le& e^{-\mu\lambda t}Q^3(t)\munorm{\Psi}\abs{x}^\mu;
\end{eqnarray*}
and for $j>i$, by using the estimate~\eqref{useest}, we have
\begin{eqnarray*}
\abs{e^{-tA_i} \Psi_{ij}(e^{tA} x)e^{tA_j}}&\le& e^{(\lambda_i-\lambda_j)t}Q^2(t)
                                 \munorm{\Psi}\abs{e^{tA} x}_i^\mu\\
&\le& e^{((1-\mu)\lambda_i-\lambda_j) t}Q^3(t)\munorm{\Psi}\abs{x}_i^\mu.
\end{eqnarray*}

Because $Q$ has polynomial growth, if $\mu<1$ is sufficiently large,
then the integrals
\[
\int_0^\infty e^{((1-\mu)\lambda_i-\lambda_j) t}Q^3(t)\,dt, \qquad
\int_0^\infty e^{-\lambda_j t}Q^3(t)\,dt
\]
both converge. By the definition of the norm, 
$\munorm{\mathcal{G}}$ is bounded by a constant that does not depend on $r$.
\end{proof}

The hypothesis of Proposition~\ref{prop:calG} is the first instance
where  $\mu<1$ is required to be sufficiently large. This restriction
is compatible with the conclusion of Theorem~\ref{thm:sc1}. 
Indeed, if a function is H\"older on a bounded set with H\"older
exponent $\mu$, then it is H\"older with exponent $\nu$ whenever
$0<\nu\le\mu$.

A map $\Psi\in \mathcal{H}_{r,\mu}$ is called \emph{special $\mu$-H\"older}
if, for all $i,j\in\{1,2,\ldots,p\}$,
\begin{equation}\label{smh1}
\abs{\Psi_{ij}(x)-\Psi_{ij}(y)}\le \abs{x-y}^\mu
\end{equation} 
and, for all $j>i$,
\begin{equation}\label{smh2}
\abs{\Psi_{ij}(x)-\Psi_{ij}(y)}\le \abs{x-y}_i^\mu
  +\abs{x-y}^\mu(\abs{x}_i^\mu+\abs{y}_i^\mu)
\end{equation}
whenever $x,y\in\Omega_r$. 


Let $\mathcal{J}$ denote the subset of $\mathcal{H}_{r,\mu}$ consisting of those
functions in $\mathcal{H}_{r,\mu}$ such that $\Psi$ is special $\mu$-H\"older and
$\munorm{\Psi}\le 1$. 
Also, for $\alpha \in \mathcal{D}$ and $\Psi\in \mathcal{J}$, let 
\begin{eqnarray}\label{Gamma}
\nonumber \Gamma(\alpha)&:=&G(F\circ(\id+\alpha)), \\
\nonumber \Upsilon(\alpha,\Psi)&:=&DF\circ(\id+\alpha)(I+\Psi),\\
\nonumber \Delta(\alpha,\Psi)&:=&\mathcal{G}\Upsilon(\alpha,\Psi),\\
\Lambda(\alpha,\Psi)&:=&(\Gamma(\alpha),\Delta(\alpha,\Psi)).
\end{eqnarray}

\begin{prop}\label{prop:smuhcom}
The set $\mathcal{J}$ is a complete metric subspace of $\mathcal{H}_{r,\mu}$. 
\end{prop}
\begin{proof}
It suffices to show that $\mathcal{J}$ is closed in  $\mathcal{H}_{r,\mu}$.
The proof of this fact is similar to the 
proof of Proposition~\ref{prop:lip}. 
\end{proof}

\begin{prop}\label{prop:fc}
If $r>0$ is sufficiently small and $\mu<1$ is sufficiently large,
then the bundle map $\Lambda$ defined in display~\eqref{Gamma} 
is a fiber contraction on $\mathcal{D}\times \mathcal{J}$. 
\end{prop}
\begin{proof}
We will show first that there is a constant $C$ such that 
\[\munorm{DF\circ(\id+\alpha)(I+\Psi)}\le C r^{1-\mu}\]
for all $\alpha\in \mathcal{D}$ and $\Psi\in \mathcal{J}$.

Using the properties listed in the definition of $\mathcal{P}_r$,
note that if $j\le i$, then
\begin{eqnarray}\label{fc:ext1}
\nonumber 
\abs{D_jF_i(x+\alpha(x))} &\le & M\abs{x+\alpha(x)}\\
\nonumber &\le & MK(\abs{x}_p+\abs{\alpha(x)}_p)\\
\nonumber &\le & MK^2(1+p\omnorm{\alpha}r^\mu)\abs{x}\\
&\le & MK^2(1+pr^\mu)r^{1-\mu}\abs{x}^\mu,
\end{eqnarray}
and if $j>i$, then
\begin{eqnarray}\label{fc:ext2}
\nonumber 
\abs{D_jF_i(x+\alpha(x))} &\le & MK(\abs{x}_i+\abs{\alpha(x)}_i)\\
&\le & MK^{1+\mu}(1+pr^\mu)r^{1-\mu}\abs{x}_i^\mu. 
\end{eqnarray}
It follows that there is a constant $c_1>0$ such that
\begin{equation}\label{eq:normsize}
\munorm{DF\circ(\id+\alpha)}\le c_1 r^{1-\mu}.
\end{equation}

Suppose that $\Phi$ and $\Psi$ are in $\mathcal{H}_{r,\mu}$. We will show that
there is a constant $c_2>0$ such that
\begin{equation}\label{prule}
\munorm{\Phi\Psi}\le c_2 r^{\mu} \munorm{\Phi}\munorm{\Psi}.
\end{equation}
First, note that
\[
\abs{(\Phi\Psi)_{ij}}\le \sum_{k=1}^p\abs{\Phi_{ik}}\abs{\Psi_{kj}}.
\]
There is a constant $\bar c$ such that
\begin{equation}\label{est:jm}
\abs{\Phi_{ij}}\le \bar c \munorm{\Phi}\abs{x}^\mu
\end{equation}
for all $i,j\in \{1,2,\ldots,p\}$.
In fact, for $j\le i$, this estimate is immediate from the definition
of the norm; for $j>i$, it is a consequence of the inequality
\[
\abs{\Phi_{ij}}\le\munorm{\Phi}\abs{x}_i^\mu
                \le K^\mu \munorm{\Phi}\abs{x}_i^\mu.
\]
Using estimate~\eqref{est:jm}, it follows that
\begin{eqnarray*}
\abs{(\Phi\Psi)_{ij}}&\le& 
\sum_{k=1}^p\bar c^2 \munorm{\Phi}\munorm{\Psi} \abs{x}^{2\mu} \\
&\le& 
(p\bar c^2 \abs{r}^{\mu}) \munorm{\Phi}\munorm{\Psi} \abs{x}^{\mu}
\end{eqnarray*}
whenever $j\le i$,
and 
\begin{eqnarray*}
\abs{(\Phi\Psi)_{ij}}&\le&
\sum_{k=1}^i\bar c^2 \munorm{\Phi} \abs{x}^{\mu}\munorm{\Psi} \abs{x}_i^{\mu} 
+\sum_{k=i+1}^p \munorm{\Phi} \abs{x}_i^{\mu}\bar c\munorm{\Psi}\abs{x}^{\mu}\\
&\le& 
(p\bar c \abs{r}^{\mu}) \munorm{\Phi}\munorm{\Psi} \abs{x}_i^{\mu}   
\end{eqnarray*}
whenever $j>i$. This completes the proof of estimate~\eqref{prule}.

It is now clear that there is a constant $C>0$ such that 
\begin{equation}\label{inJ}
\munorm{DF\circ(\id+\alpha)(I+\Psi)}\le \munorm{DF\circ(\id+\alpha)}
     +\munorm{DF\circ(\id+\alpha)\Psi}\le C r^{1-\mu}
\end{equation}
for all $\alpha\in \mathcal{D}$ and $\Psi\in \mathcal{J}$.
Hence, if $r$ is sufficiently small, then
\[
\munorm{\Delta(\alpha,\Psi)}\le 
\munorm{\mathcal{G}}\munorm{DF\circ(\id+\alpha)(I+\Psi)}\le 1
\]
for all $\alpha\in \mathcal{D}$ and $\Psi\in \mathcal{J}$.

To complete the proof that $\mathcal{J}\times\mathcal{D}$ is an invariant set
for $\Lambda$, we will prove the following proposition:   
If $r>0$ is sufficiently small, then $\Delta(\alpha,\Psi)$ 
is special $\mu$-H\"older whenever $\alpha\in \mathcal{D}$ and $\Psi\in \mathcal{J}$.

Recall from display~\eqref{Gamma} that
\[\Upsilon(\alpha,\Psi):=DF\circ(\id+\alpha)(I+\Psi).\]
We will use the following uniform estimates:
There is a constant $c>0$ such that
\begin{equation}\label{sml1}
\abs{\Upsilon(\alpha,\Psi)_{ij}(x)-\Upsilon(\alpha,\Psi)_{ij}(y)}
\le cr^\mu\abs{x-y}^\mu
\end{equation} 
and, for all $j>i$,
\begin{equation}\label{sml2}
\abs{\Upsilon(\alpha,\Psi)_{ij}(x)-\Upsilon(\alpha,\Psi)_{ij}(y)}\le
 c(r^\mu+r^{1-\mu})\abs{x-y}_i^\mu
  +\abs{x-y}^\mu(\abs{x}_i^\mu+\abs{y}_i^\mu)
\end{equation}
whenever $0<r<1$, $x,y\in\Omega_r$, $\alpha\in \mathcal{D}$, and $\Psi\in \mathcal{J}$. 

To prove the inequalities~\eqref{sml1}
and~\eqref{sml2} we will show (the key observation) that
for $\alpha\in\mathcal{D}$ there are Lipschitz (hence H\"older)
estimates for $DF\circ(\id+\alpha)$.
In fact, using property~(4) in the definition of $\mathcal{P}_r$,
there is a constant $\bar M$ such that
\begin{eqnarray}\label{slesa}
\nonumber \abs{D_j F_i(x+\alpha(x))-D_j F_i(y+\alpha(y))} &\le &
\nonumber M(\abs{x-y}+\abs{\alpha(x)-\alpha(y)})\\
\nonumber &\le & M(\abs{x-y}+K\abs{\alpha(x)-\alpha(y)}_p)\\
\nonumber &\le & M(\abs{x-y}+K^2\slip(\alpha)\abs{x-y})\\
\nonumber &\le & M(1+K^2)\abs{x-y}\\
\nonumber &\le & M(1+K^2)\abs{x-y}^{1-\mu}\abs{x-y}^\mu\\
\nonumber &\le & M(1+K^2)2^{1-\mu}r^{1-\mu}\abs{x-y}^\mu\\
&\le & \bar Mr^{1-\mu}\abs{x-y}^\mu
\end{eqnarray}
for all $i,j\in \{1,2,\ldots,p\}$ and $x,y\in\Omega_r$.
Using property~(5) in the definition of $\mathcal{P}_r$ and the special
Lipschitz estimates for $\alpha$, 
we have the following similar result for $j>i$:
\begin{eqnarray}\label{slesb}
\nonumber
\lefteqn{\abs{D_j F_i(x+\alpha(x))-D_j F_i(y+\alpha(y))}\le} \hspace*{1.5in}\\
&&\bar M r^{1-\mu}(\abs{x-y}_i^\mu
  +\abs{x-y}^\mu(\abs{x}_i^\mu+\abs{y}_i^\mu)).
\end{eqnarray}

We have just obtained ``special H\"older''
estimates the first summand in the representation
\[\Upsilon(\alpha,\Psi) = DF\circ(\id+\alpha)+DF\circ(\id+\alpha)\Psi;\]
to obtain estimates for the second
summand, and hence for $\Upsilon(\alpha,\Psi)$, 
let $\Phi:=DF\circ(\id+\alpha)$ and note that
\begin{eqnarray*}
\abs{(\Phi\Psi)_{ij}(x)-(\Phi\Psi)_{ij}(y)}&\le& 
\sum_{k=1}^p \abs{\Phi_{ik}(x) \Psi_{kj}(x)-\Phi_{ik}(y) \Psi_{kj}(y)}\\
&\le& \sum_{k=1}^p \Xi_k
\end{eqnarray*}
where
\[
\Xi_k:=
\abs{\Phi_{ik}(x)}\abs{\Psi_{kj}(x)-\Psi_{kj}(y)}
+\abs{\Phi_{ik}(x)-\Phi_{ik}(y)} \abs{\Psi_{kj}(y)}.
\]
Because $\Psi$ and $\Phi$ are in $\mathcal{J}$ and $0<r<1$, 
there is a constant $\bar c>0$ such that 
\begin{eqnarray}\label{est:svf1}
\nonumber \Xi_k&\le& \munorm{\Phi} \abs{x}^\mu \abs{x-y}^\mu
+ \bar Mr^{1-\mu}\abs{x-y}^\mu\munorm{\Psi} \abs{y}^\mu\\
\nonumber &\le& (1+\bar M)\abs{r}^\mu \abs{x-y}^\mu\\
&\le& \bar c \abs{r}^\mu \abs{x-y}^\mu.
\end{eqnarray}
The desired inequality~\eqref{sml1} is obtained by summing over
$k$ and adding the result to the estimate~\eqref{slesa}.

Suppose that $j>i$. 
If $k\le i$, then $j>k$ and 
\begin{eqnarray*}
\Xi_k
&\le&
\munorm{\Phi} \abs{x}^\mu
(\abs {x-y}_k^\mu+\abs{x-y}^\mu(\abs {x}_k^\mu+\abs {y}_k^\mu))
+\bar M r^{1-\mu}\abs{x-y}^\mu\munorm{\Psi} \abs{y}_k^\mu\\
&\le&
\abs{r}^\mu \abs {x-y}_k^\mu
+\abs{r}^\mu\abs{x-y}^\mu(\abs {x}_k^\mu+\abs {y}_k^\mu)
+\bar M r^{1-\mu}\abs{x-y}^\mu(\abs {x}_k^\mu+\abs {y}_k^\mu)\\
&\le& (1+\bar M)(r^\mu+r^{1-\mu})
(\abs {x-y}_k^\mu+\abs{x-y}^\mu(\abs {x}_k^\mu+\abs {y}_k^\mu))\\
&\le& (1+\bar M)(r^\mu+r^{1-\mu})
(\abs {x-y}_i^\mu+\abs{x-y}^\mu(\abs {x}_i^\mu+\abs {y}_i^\mu));
\end{eqnarray*}
and if $k>i$, then 
\begin{eqnarray*}
\Xi_k
&\le&
\bar M r^{1-\mu}(\abs{x}_i^\mu+\abs{x}^\mu\abs{x}_i^\mu) \abs {x-y}^\mu\\
&&\mbox{}
+\bar M r^{1-\mu}
(\abs{x-y}_i^\mu+ \abs{x-y}^\mu(\abs {x}_i^\mu+\abs {y}_i^\mu))\abs{y}^\mu\\ 
&\le&
\bar M \abs{r}^{1-\mu}(
\abs {x-y}_i^\mu+
(1+r^\mu)\abs{x-y}^\mu(\abs {x}_i^\mu+\abs {y}_i^\mu)\\
&&\mbox{}+
r^\mu r^{1-\mu}\abs{x-y}^\mu(\abs {x}_i^\mu+\abs {y}_i^\mu))\\
&\le& 2\bar Mr^{1-\mu}
(\abs {x-y}_i^\mu+\abs{x-y}^\mu(\abs {x}_i^\mu+\abs {y}_i^\mu)).
\end{eqnarray*}
The desired inequality~\eqref{sml2} is obtained by summing over
$k$ and adding the result to the estimate~\eqref{slesb}.


Note that
\begin{eqnarray*}
\lefteqn{\abs{(\mathcal{G}\Upsilon(\alpha,\Psi))_{ij}(x)-(\mathcal{G}\Upsilon(\alpha,\Psi))_{ij}(y)}}
\hspace*{1in}\\
&\le&
\int_0^\infty\abs{e^{-tA_i}}
\abs{\Upsilon(\alpha,\Psi)_{ij}(e^{tA}x)-\Upsilon(\alpha,\Psi)_{ij}(e^{tA}y)}
             \abs{e^{tA_j}}\,dt\\
&\le&
\int_0^\infty e^{(\lambda_i-\lambda_j) t} Q^2(t)
\abs{\Upsilon(\alpha,\Psi)_{ij}(e^{tA}x)-\Upsilon(\alpha,\Psi)_{ij}(e^{tA}y)}\,dt.
\end{eqnarray*}
Using the estimates~\eqref{sml1} and~\eqref{adext}, 
for $j\le i$ we have
\begin{eqnarray*}
\lefteqn{
\abs{(\mathcal{G}\Upsilon(\alpha,\Psi)_{ij})(x)-(\mathcal{G}\Upsilon(\alpha,\Psi)_{ij})(y)}
\le}\hspace{2.0in}\\
&&\big(
cr^{1-\mu} \int_0^\infty  e^{(\lambda_i-\lambda_j-\mu\lambda) t} Q^2(t)\,dt
\big)
\abs{x-y}^\mu
\end{eqnarray*}
with $\lambda_i-\lambda_j-\mu\lambda<0$.
On the other hand, using 
inequality~\eqref{sml2} and~\eqref{useest} for the case $j>i$, 
it follows that 
\[
\abs{(\mathcal{G}\Upsilon(\alpha,\Psi)_{ij})(x)-(\mathcal{G}\Upsilon(\alpha,\Psi)_{ij})(y)} 
\le
\bar M (\abs{x-y}_i^\mu +\abs{x-y}^\mu(\abs{x}_i^\mu+\abs{y}_i^\mu))
\]
where 
\[
\bar M:=c(r^{1-\mu}+r^\mu)
\int_0^\infty  e^{((1-\mu)\lambda_i-\lambda_j) t} Q^{2+\mu}(t)\,dt
\]
Hence, if $\mu<1$ is sufficiently large, then
$\mathcal{G}\Psi_{ij}$ satisfies the inequality~\eqref{smh2}; and
because 
\[
\abs{x-y}_i^\mu +\abs{x-y}^\mu(\abs{x}_i^\mu+\abs{y}_i^\mu)\le
(1+2 r^\mu)\abs{x-y}^\mu,
\]
the previous estimate also shows that $\mathcal{G}\Psi_{ij}$ is
$\mu$-H\"older. This completes the proof that $\mathcal{D}\times\mathcal{J}$ is
an invariant set for the fiber contraction.


To show that the function $\Psi\mapsto \Delta(\alpha,\Psi)$ is a uniform
contraction, use the linearity of $\mathcal{G}$ together with
inequalities~\eqref{eq:normsize} and~\eqref{prule} to obtain the
estimate
\begin{eqnarray*}
\munorm{\Delta(\alpha,\Psi_1)-\Delta(\alpha,\Psi_2)}
&\le&\munorm{\mathcal{G}}\nunorm{DF\circ(\id+\alpha)(\Psi_1-\Psi_2)}\\
&\le&\munorm{\mathcal{G}}cr^\mu\nunorm{DF\circ(\id+\alpha)}\munorm{(\Psi_1-\Psi_2)}\\
&\le&\munorm{\mathcal{G}}c^2r\munorm{(\Psi_1-\Psi_2)}.
\end{eqnarray*}
Hence, if $r>0$ is sufficiently small, 
then $\Delta$ is a uniform contraction; 
and therefore, $\Lambda$ is a fiber contraction on $\mathcal{D}\times\mathcal{H}_{r,\mu}$.
\end{proof}

\begin{prop}\label{prop:dandc}
If $\alpha\in \mathcal{D}$, $D\alpha\in \mathcal{J}$, then
$D(G(F\circ(\id+\alpha)))\in \mathcal{J}$ and
\[
D(G(F\circ(\id+\alpha)))=\mathcal{G} (DF\circ(\id+\alpha)(I+D\alpha)).
\]
\end{prop}
\begin{proof}
Let
\[
(G(F\circ(\id+\alpha)))(x):=-\int_0^\infty e^{-tA} F(e^{tA} x+\alpha(e^{tA} x))\,dt.
\]
Since $D\alpha$ exists, the integrand is differentiable.
Moreover, the derivative of the integrand has the form
$e^{-tA} \Psi(e^{tA} x)e^{tA}$ where, by the estimate~\eqref{inJ},
\[
\Psi:=DF\circ(\id+\alpha)(I+D\alpha)
\]
is in $\mathcal{J}$. 
Using the same estimates as in Proposition~\ref{prop:calG}, 
it follows that the derivative of the original integrand is
majorized by an integrable function. The result now follows from 
an application of Lemma~\ref{pfcont} and the definition of $\mathcal{G}$.
\end{proof}

We are now ready to prove Theorem~\ref{thm:sc1}
\begin{proof}
By Proposition~\ref{prop:fc},
$\Gamma$ is a fiber contraction.

Choose a function $\alpha_0\in \mathcal{D}$ such that
$\Psi_0:=D\alpha_0\in \mathcal{J}$---for example, take $\alpha_0=0$, 
and consider the sequence in $\mathcal{D}\times\mathcal{J}$ given by the
forward $\Lambda$-orbit of $(\alpha_0, \Psi_0)$, namely,  the sequence
$\{(\alpha_k,\Psi_k)\}_{k=0}^\infty$ where
\[
\alpha_k:=\Gamma(\alpha_{k-1}),\qquad \Psi_k:=\Delta(\alpha_{k-1},\Psi_{k-1}).
\]
We will prove, by induction, that $\Psi_k=D\alpha_k$. 

By definition,
\[
\alpha_k=G(F\circ(\id+\alpha_{k-1}))
\]
Also, by the induction hypothesis, $\Psi_{k-1}=D\alpha_{k-1}$.
Because $\alpha_{k-1}\in \mathcal{D}$ and $D\alpha_{k-1}\in \mathcal{J}$, by an
application of Proposition~\ref{prop:dandc} we have that
\begin{eqnarray*}
D\alpha_k &=& \mathcal{G} DF\circ(\id+\alpha_{k-1})(I+\Psi_{k-1})\\
&=&\Psi_k,
\end{eqnarray*}
as required.

By an application of the fiber contraction theorem, if $\eta$ is the
fixed point of $\Gamma$ and $\Phi$ is the fixed point of
the map $\Psi\to \Delta(\eta,\Psi)$, then  
\[
\lim_{k\to\infty} \alpha_k=\eta,\qquad 
\lim_{k\to\infty} D\alpha_k=\Phi
\]
where the limits exist in the respective spaces $\mathcal{D}$ and $\mathcal{J}$. 

The following lemma will be used to finish the proof.
\begin{lemma}
If a sequence converges in either of the spaces $\mathcal{B}_{r,\mu}$ or $\mathcal{H}_{r,\mu}$,
then the sequence converges uniformly. 
\end{lemma}
To prove the lemma, 
recall that the functions in the spaces $\mathcal{B}_{r,\mu}$ and $\mathcal{H}_{r,\mu}$ are
continuous functions defined on $\Omega_r$, 
the ball of radius $r$ at the origin
in $\mathbb{R}^n$ with respect to the adapted norm. 
Also, by the equivalence of the norms (see display~\eqref{eon})
there is a positive constant $K$ such that  $\abs{x}_i<K\abs{x}$.

If $\lim_{k\to\infty}\alpha_k=\alpha$ in $\mathcal{B}_{r,\mu}$, then
for each $\epsilon>0$ there is an integer 
$\kappa>0$ such that 
\[
\frac{\abs{(\alpha_k)_i(x)-\alpha_i(x)}}
{\abs{x}_i\abs{x}^\mu}<\frac{\epsilon}{Kr^{1+\mu}}
\]
whenever $0<\abs{x}<r$, $k\ge \kappa$, and $i\in \{1,\ldots,p\}$.
Using the inequality $\abs{x}_i<K\abs{x}$ and the norm equivalence~\eqref{eon},
it follows that 
\[
\norm{\alpha_k-\alpha}<\epsilon 
\]
whenever $0<\abs{x}<1$ and $k\ge \kappa$; that is, the sequence of
continuous functions
$\{\alpha_k\}_{k=0}^\infty$ converges uniformly to $\alpha$.
The proof of the uniform convergence of a convergent sequence
in $\mathcal{H}_{r,\mu}$ is similar.

As mentioned previously, the equality
$\Psi=D\eta$ follows from the uniform convergence and
a standard result in advanced calculus on the differentiability
of the limit of a uniformly convergent sequence of functions.
Thus, the conjugating homeomorphism $h=\id+\eta$ is continuously
differentiable. Moreover, using the equality $Dh(0)=I$ and 
the inverse function theorem, the conjugacy
$h$ is a diffeomorphism when restricted to a sufficiently small
open ball at the origin. 
\end{proof}


\begin{thebibliography}{99999}
\bibitem[C99]{Chicone}
C. Chicone, 
\emph{Ordinary Differential Equations with Applications},
New York: Springer-Verlag, 1999 
\bibitem[CL88]{cl}
S-N. Chow and K. Lu, 
$\mathcal{C}^k$ centre unstable manifolds,
\emph{Proc. Roy. Soc. Edinburgh}, {\bf 108A} (1988) 303--320.

\bibitem[CLL91]{cll}
S-N. Chow, X-B. Lin, and K. Lu, 
Smooth invariant foliations in infinite-dimensional spaces,
\emph{J. Diff. Eqs.}, {\bf 94} (2) (1988) 266--291.

\bibitem[D71]{dorroh}
J. R. Dorroh, 
Local groups of differentiable transformations,
\emph{Math. Ann.}, {\bf 192} (1971) 243--249.

\bibitem[DS58]{ds} N. Dunford and J. Schwartz,
\emph{Linear Operators,} Vol. 1,
New York: Interscience Pub. 1958.

\bibitem[G59]{Grobman} 
D. Grobman, 
Homeomorphisms of systems of differential equations, 
\emph{Dokl. Akad. Nauk., SSSR} {\bf 128} (1959) 880--881.

\bibitem[H60]{Hartman60} 
P. Hartman, 
A lemma in the theory of structural stability of differential equations, 
\emph{Proc. Amer. Math. Soc.} {\bf 11} (1960) 610--620.
\bibitem[H60M]{Hartman60M} 
P. Hartman, 
On local homeomorphisms of Euclidean space,
\emph{Bol. Soc. Mat. Mexicana} {\bf 5} (2) (1960) 220--241.
\bibitem[H63]{Hartman63} 
P. Hartman, 
On the local linearization of differential equations, 
\emph{Proc. Amer. Math. Soc.} {\bf 14} (1963) 568--573.

\bibitem[HP70]{hp} M. Hirsch and C. Pugh,
Stable manifolds and hyperbolic sets,
In Global Analysis XIV, Amer. Math. Soc. (1970) 133--164.

\bibitem[KP90]{kp} U. Kirchgraber and K. Palmer, \emph{Geometry
in the Neighborhood of Invariant Manifolds of Maps and Flows and
Linearization,} New York: Wiley 1990.

\bibitem[LL99]{ll} Yu. Latushkin and B. Layton,
The optimal gap condition for invariant manifolds,
\emph{Disc. Cont. Dy. Sys.} {\bf 5} (2) (1999) 233--268.

\bibitem[Se85]{sell} G. Sell, Smooth linearization near a fixed point,
\emph{Amer. J. Math.}, {\bf 107}(1985) 1035--1091.

\bibitem[St57]{stern} S. Sternberg, Local contractions and
a theorem of Poincar\'e, \emph{Amer. J. Math.},
{\bf 79}(1957), 809--824.

\bibitem[St89]{stowe} D. Stowe,
Linearization in two dimensions,
\emph{J. Diff. Eqs.}, {\bf 63} (1989) 183--226.

\bibitem[T99]{tan} B. Tan,
$\sigma$-H\"older continuous linearizations near hyperbolic fixed points
in $\mathbb{R}^n$, Preprint, Georgia Tech., 1999. 
\end{thebibliography}

\bigbreak

\noindent{\sc Carmen Chicone}\\
Department of Mathematics,
University of Missouri, \\
Columbia, MO 65211, USA \\
e-mail: carmen@chicone.math.missouri.edu\smallskip

\noindent{\sc Richard Swanson} \\
Department of Mathematical Sciences, 
Montana State University, \\ 
Bozeman, MT 59717-0240, USA \\
e-mail: rswanson@math.montana.edu 
\end{document}