\documentclass[reqno]{amsart} \usepackage{hyperref} \AtBeginDocument{{\noindent\small \emph{Electronic Journal of Differential Equations}, Vol. 2012 (2012), No. 03, pp. 1--8.\newline ISSN: 1072-6691. URL: http://ejde.math.txstate.edu or http://ejde.math.unt.edu \newline ftp ejde.math.txstate.edu} \thanks{\copyright 2012 Texas State University - San Marcos.} \vspace{9mm}} \begin{document} \title[\hfilneg EJDE-2012/03\hfil Newton's method for stochastic differential equations] {Newton's method for stochastic differential equations and its probabilistic second-order error estimate} \author[K. Amano \hfil EJDE-2012/03\hfilneg] {Kazuo Amano} \address{Kazuo Amano \newline Department of Mathematics, Faculty of Engineering, Gunma University, Kiryu, 376-8515, Japan} \email{kamano@gunma-u.ac.jp} \thanks{Submitted June 27, 2011. Published January 4, 2012.} \subjclass[2000]{60H10, 65C30} \keywords{Newton's method; stochastic differential equation; \hfill\break\indent second order error estimate} \begin{abstract} Kawabata and Yamada \cite{Kawabata} proposed an implicit Newton's method for nonlinear stochastic differential equations and proved its convergence. Later Amano \cite{Amano2} gave an explicit formulation of method and showed its direct error estimate. In this article, we prove a probabilistic second-order error estimate which has been an open problem since 1991. \end{abstract} \maketitle \numberwithin{equation}{section} \newtheorem{theorem}{Theorem}[section] \newtheorem{lemma}[theorem]{Lemma} \newtheorem{remark}[theorem]{Remark} %\allowdisplaybreaks \section{Introduction} Let $$a(t,x)$$ and $$b(t,x)$$ be real-valued bounded $$C^2$$ smooth functions defined in the two dimensional Euclidean space $${\mathbb R}^2$$. We assume that there exist nonnegative constants $$A_1$$, $$A_2$$, $$B_1$$ and $$B_2$$ satisfying $\big|\frac{\partial a}{\partial x}(t,x)\big|\le A_1\,, \quad \big|\frac{\partial^2 a}{\partial x^2}(t,x)\big|\le A_2$ and $\big|\frac{\partial b}{\partial x}(t,x)\big|\le B_1\,, \quad \big|\frac{\partial^2 b}{\partial x^2}(t,x)\big|\le B_2$ in $${\mathbb R}^2$$. Let $$w(t)$$, $$t\ge 0$$ be a standard Brownian motion on a probability space $$(\Omega,\mathcal{F}, P)$$ and let $$\mathcal{F}_t$$, $$t\ge 0$$ be the natural filtration of $$\mathcal{F}$$. We assume that $$\xi(t)$$, $$t\ge 0$$ is a solution of the initial value problem for stochastic differential equation $$\label{problem0} d\xi(t)=a\bigl(t,\xi(t)\bigr)\,dt +b\bigl(t,\xi(t)\bigr)\,dw(t)\,,\quad \xi(0)=\xi_0\,,$$ where $$\xi_0$$ is a bounded random variable independent of $$\mathcal{F}_t$$, $$t\ge 0$$. Without loss of generality, we may assume that $$\xi(t)$$ is continuous with respect to $$t\ge 0$$. For $$T > 0$$ and $$1 \le p < \infty$$, $$L_w^p[0,T]$$ stands for the class of all separable non-anticipative functions $$f(t)$$, $$t\ge 0$$ with respect to $$\{\mathcal{F}_t\}$$ satisfying $P\Bigl[\int_0^T |f(t)|^p\,dt<\infty\Bigr]=1$ and $$M_w^p[0,T]$$ denotes the subset of $$L_w^p[0,T]$$ consisting of all functions $$f(t)$$ with $E\Bigl[\int_0^T |f(t)|^p\,dt\Bigr]<\infty\,.$ It is well-known that $$\xi(t)\in M_w^2[0,T]$$ for any $$T>0$$ (see, for example, \cite{Friedman}). The explicit Newton's scheme for \eqref{problem0} is formulated as follows (see \cite{Amano2}): We define a sequence $$\{\xi_n(t)\}$$ by $$\xi_0(t)=\xi_0$$ and \begin{align*} &\xi_{n + 1}(t)\\ &= e^{\eta_n(t)}\Bigl(\xi_0 +\int_0^t\bigl(a_{0,n}(s)-b_{0,n}(s) b_{1,n}(s)\bigr) e^{-\eta_n(s)}ds +\int_0^t b_{0,n}(s)e^{-\eta_n(s)}dw(s)\Bigr) \end{align*} for $$n = 0,1,2,\dots$$, where \begin{gather*} \eta_n(t) = \int_0^t\Bigl(a_{1,n}(s)-\frac{1}{2}\,(b_{1,n}(s))^2\Bigr)\,ds +\int_0^t b_{1,n}(s)\,dw(s)\,,\\ a_{0,n}(t) = a(t,\xi_n(t))-\frac{\partial a}{\partial x} \bigl(t,\xi_n(t)\bigr)\,\xi_n(t)\,,\\ a_{1,n}(t) = \frac{\partial a}{\partial x}\bigl(t,\xi_n(t)\bigr)\,,\\ b_{0,n}(t) = b(t,\xi_n(t))-\frac{\partial b}{\partial x} \bigl(t,\xi_n(t)\bigr)\,\xi_n(t)\,,\\ b_{1,n}(t) = \frac{\partial b}{\partial x}\bigl(t,\xi_n(t)\bigr)\,. \end{gather*} In this article, we shall estimate the approximation errors $$\label{errors} \varepsilon_n(t)=\xi_n(t)-\xi(t),\quad n=0,1,2,\dots\,.$$ \begin{theorem}\label{theorem1} For any $$T>0$$, there exists a nonnegative constant $$C$$ depending only on $$T$$, $$A_1$$, $$A_2$$, $$B_1$$ and $$B_2$$ such that $P\Bigl[\sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho \text{ \rm implies } \sup_{0\le t\le T}|\varepsilon_{n + 1}(t)|\le R\rho^2\Bigr] \ge 1-C\,R^{- 1 / 2}$ for all $$R \ge 1$$, $$0 < \rho \le 1$$ and every $$n = 0,1,2,\dots$$. \end{theorem} Our symbolic Newton's method may give a new possibility to the study of computer algebraic method in stochastic analysis (see, for example, \cite{Cyganowsky}). If the positive constants $$T_0$$, $$A_1$$, $$A_2$$, $$B_1$$ and $$B_2$$ are given, then a repeated use of Theorem \ref{theorem1} gives an approximate solution of \eqref{problem0} in terms of multiple stochastic integrals. For example, we first note that, seeing the proof of Theorem \ref{theorem1}, we can choose $$R\ge 1$$ sufficiently large so that $CR^{- 1 / 2}<\frac{1}{100},\quad \text{where }C=C(A_1,A_2,B_1,B_2,T)$ for all positive $$T\le T_0$$. Second, we take a small $$\rho>0$$ so as to satisfy $R\rho<\frac{1}{10}\,.$ Third, by using a martingale inequality (Lemma \ref{lemma6}), we take a sufficiently small $$T>0$$ such that $P\Bigl[ \sup_{0\le t\le T}|\varepsilon_0(t)|\le\rho \Bigr]\ge 1-\frac{1}{100}\,.$ Now, a repeated use of Theorem \ref{theorem1} and $R^{- 1}\bigl(R\rho\bigr)^{2^{10}}\le\frac{1}{10^{2^{10}}}$ show that $P\Bigl[ \sup_{0\le t\le T}|\varepsilon_{10}(t)|\le\frac{1}{10^{1024}} \Bigr] \ge 1-\frac{1}{10}\,.$ It is clear, by the definition of $$\{\xi_n(t)\}$$, that the approximate solution $$\xi_{10}(t)$$ has a multiple stochastic integral representation. \section{Preliminaries} The following two lemmas follow immediately from It\^o's formula. \begin{lemma}\label{lemma1} If $$\alpha(t)\in L_w^1[0,T]$$ and $$\beta(t)\in L_w^2[0,T]$$ for any $$T>0$$ and if $d\eta(t)=\alpha(t)\,dt+\beta(t)\,dw(t)\,,$ then $de^{\eta(t)} =e^{\eta(t)}\,d\eta(t)+\frac{1}{2}\,\beta^2 (t)e^{\eta(t)}\,dt\,.$ \end{lemma} \begin{proof} For a function $$f(x) = e^x$$, It\^o's formula gives $d\,f(\eta(t))=\Bigl(f'(\eta(t))\alpha(t) +\frac{1}{2}\,f''(\eta(t))\beta^2 (t)\Bigr)dt +f'(\eta(t))\beta(t)\,dw(t)\,;$ this implies the desired formula. \end{proof} \begin{lemma}\label{lemma2} If $$\alpha_1(t), \alpha_2(t)\in L_w^1[0,T]$$ and $$\beta_1(t), \beta_2(t)\in L_w^2[0,T]$$ for any $$T>0$$ and if $d\xi_i(t)=\alpha_i(t)\,dt+\beta_i(t)\,dw(t)\,,\quad i=1,2\,,$ then $d \bigl(\xi_1(t)\,\xi_2(t)\bigr) =\xi_2(t)\,d\xi_1(t)+\xi_1(t)\,d\xi_2(t)+\beta_1(t)\beta_2(t)\,dt\,.$ \end{lemma} \begin{proof} Applying It\^o's formula for a 2-dimensional diffusion process $$(\xi_1(t), \xi_2(t))$$ and a function $$f(x_1,x_2) = x_1x_2$$, we have \begin{align*} &d f\bigl(\xi_1(t),\xi_2(t)\bigr)\\ &= \Bigl(\sum_{i=1}^2\frac{\partial f}{\partial x_i} \bigl(\xi_1(t),\xi_2(t)\bigr)\alpha_i(t) +\frac{1}{2}\sum_{i,j=1}^2\frac{\partial^2 f}{\partial x_i\partial x_j} \bigl(\xi_1(t),\xi_2(t)\bigr)\beta_i(t)\beta_j(t)\Bigr)dt\\ &\quad+\sum_{i=1}^2\frac{\partial f}{\partial x_i} \bigl(\xi_1(t),\xi_2(t)\bigr)\beta_i(t)\,dw(t)\,;\\ \end{align*} this completes the proof. \end{proof} Lemmas \ref{lemma1} and \ref{lemma2} show the following three key lemmas. \begin{lemma} \label{lemma3} For $$n=1,2,3,\dots$$ and $a_0(t)=a_{0,n}(t)\,,\quad a_1(t)=a_{1,n}(t)\,, \quad b_0(t)=b_{0,n}(t)\,,\quad b_1(t)=b_{1,n}(t)\,,$ the initial value problem for the linear stochastic differential equation $d\xi(t)=\bigl(a_0(t)+a_1(t)\,\xi(t)\bigr)\,dt +\bigl(b_0(t)+b_1(t)\,\xi(t)\bigr)\,dw(t)\,, \quad \xi(0)=\xi_0$ has an explicit solution $\zeta(t)=e^{\eta(t)}\Bigl( \xi_0+\int_0^t\bigl(a_0(s)-b_0(s)b_1(s)\bigr)e^{-\eta(s)}\,ds +\int_0^tb_0(s)e^{-\eta(s)}\,dw(s)\Bigr)\,,$ where $\eta(t)=\int_0^t\Bigl(a_1(s)-\frac{1}{2} b_1^2(s)\Bigr)\,ds +\int_0^t b_1(s)\,dw(s)\,.$ \end{lemma} \begin{proof} Since Lemma \ref{lemma1} gives $de^{\eta(t)} =e^{\eta(t)}d\eta(t)+\frac{1}{2} b_1^2(t)e^{\eta(t)}\,dt =a_1(t)e^{\eta(t)}\,dt+b_1(t)e^{\eta(t)}\,dw(t)\,,$ Lemma \ref{lemma2} shows \begin{align*} d\zeta(t) &=d\,\Bigl(e^{\eta(t)}\Bigl( \xi_0+\int_0^t\bigl(a_0(s)-b_0(s)b_1(s)\bigr)e^{-\eta(s)}\,ds +\int_0^tb_0(s)e^{-\eta(s)}\,dw(s)\Bigr)\Bigr)\\ &=\Bigl( \xi_0+\int_0^t\bigl(a_0(s)-b_0(s)b_1(s)\bigr)e^{-\eta(s)}\,ds +\int_0^t b_0(s)e^{-\eta(s)}\,dw(s) \Bigr)\,de^{\eta(t)}\\ &\quad +e^{\eta(t)}\Bigl( \bigl(a_0(t)-b_0(t)b_1(t)\bigr)e^{-\eta(t)}\,dt +b_0(t)e^{-\eta(t)}\,dw(t)\Bigr)\\ &\quad +\bigl(b_1(t)e^{\eta(t)}\bigr)\bigl(b_0(t)e^{-\eta(t)}\bigr)\,dt\\ &= \zeta(t)\bigl(a_1(t)\,dt+b_1(t)\,dw(t)\bigr)\\ &\quad +\bigl(a_0(t)-b_0(t)b_1(t)\bigr)\,dt+b_0(t)\,dw(t) +b_1(t) b_0(t)\,dt\\ &=\bigl(a_0(t)+a_1(t)\,\zeta(t)\bigr)\,dt +\bigl(b_0(t)+b_1(t)\,\zeta(t)\bigr)\,dw(t)\,. \end{align*} \end{proof} \begin{remark}\label{remark1} \rm It follows immediately from the definition of $$\xi_{n + 1}(t)$$ and Lemma \ref{lemma3} that $$\xi_{n + 1}(0) = \xi_0$$ and $d\xi_{n + 1}(t) =\bigl(a_{0,n}(t)+a_{1,n}(t)\,\xi_{n + 1}(t)\bigr)\,dt +\bigl(b_{0,n}(t)+b_{1,n}(t)\,\xi_{n + 1}(t)\bigr)\,dw(t)$ for $$n=0,1,2,\dots$$. Therefore, $$\{\xi_n(t)\}$$ is exactly the same sequence introduced by Kawabata and Yamada \cite{Kawabata}; this implies the convergence $\lim_{n\to\infty} E\Bigl[ \sup_{0\le t\le T}|\,\xi_n(t)-\xi(t)\,|^2\Bigr]=0$ for any $$T>0$$. By their result, we have only to concentrate on the estimation of errors. \end{remark} \begin{lemma}\label{lemma4} For $$n=0,1,2,\dots$$, we have $\varepsilon_{n + 1}(t) =e^{\eta_n(t)}\Bigl(\int_0^t\bigl(\alpha_{0,n}(s) -\beta_{0,n}(s) b_{1,n}(s)\bigr)e^{-\eta_n(s)}ds +\int_0^t \beta_{0,n}(s)e^{-\eta_n(s)}dw(s)\Bigr)\,,$ where \begin{gather*} \alpha_{0,n}(t) = \varepsilon_n^2(t)\int_0^1 (\theta-1)\,\frac{\partial^2 a}{\partial x^2} \bigl(t,\xi_n(t)-\theta \varepsilon_n(t)\bigr)\,d\theta\,,\\ \beta_{0,n}(t) = \varepsilon_n^2(t)\int_0^1 (\theta-1)\,\frac{\partial^2 b}{\partial x^2} \bigl(t,\xi_n(t)-\theta \varepsilon_n(t)\bigr)\,d\theta\,. \end{gather*} \end{lemma} \begin{proof} Since $$\xi_{n + 1}(t)$$ is a solution of the linear stochastic differential equation in Remark \ref{remark1}, by \eqref{problem0} and \eqref{errors}, we have \begin{align*} &d \varepsilon_{n + 1}(t)\\ &= d\,\xi_{n + 1}(t)-d\,\xi(t)\\ &= \Bigl(a(t,\xi_n(t)) -\frac{\partial a}{\partial x}\bigl(t,\xi_n(t)\bigr)\varepsilon_n(t) -a\bigl(t,\xi_n(t) - \varepsilon_n(t)\bigr) +a_{1,n}(t) \varepsilon_{n + 1}(t)\Bigr)\,dt\\ &\quad +\Bigl(b(t,\xi_n(t)) -\frac{\partial b}{\partial x}\bigl(t,\xi_n(t)\bigr)\varepsilon_n(t) -b\bigl(t,\xi_n(t) - \varepsilon_n(t)\bigr) +b_{1,n}(t) \varepsilon_{n + 1}(t)\Bigr)\,dw(t)\,. \end{align*} Let us consider an auxiliary function $F(\theta)=a\bigl(t,\xi_n(t)-\theta \varepsilon_n(t)\bigr)\,, \quad 0\le\theta\le 1\,.$ Then, integration by parts shows $F(1)=F(0)+F'(0)+\int_0^1 (1-\theta)\,F''(\theta)\,d\theta\,;$ this gives $\alpha_{0,n}(t) =a(t,\xi_n(t)) -\frac{\partial a}{\partial x}\bigl(t,\xi_n(t)\bigr)\varepsilon_n(t) -a\bigl(t,\xi_n(t) - \varepsilon_n(t)\bigr)\,.$ Similarly, we have $\beta_{0,n}(t) =b(t,\xi_n(t)) -\frac{\partial b}{\partial x}\bigl(t,\xi_n(t)\bigr)\varepsilon_n(t) -b\bigl(t,\xi_n(t) - \varepsilon_n(t)\bigr)\,.$ Therefore, we obtain $d\varepsilon_{n + 1}(t) =\bigl(\alpha_{0,n}(t) +a_{1,n}(t) \varepsilon_{n + 1}(t)\bigr)\,dt +\bigl(\beta_{0,n}(t) +b_{1,n}(t) \varepsilon_{n + 1}(t)\bigr)\,dw(t)\,;$ seeing the proof of Lemma \ref{lemma3} for $$a_0(t)=\alpha_{0,n}(t)$$, $$a_1(t)=a_{1,n}(t)$$, $$b_0(t)=\beta_{0,n}(t)$$ and $$b_1(t)=b_{1,n}(t)$$, this completes the proof. \end{proof} \begin{lemma}\label{lemma5} For any $$t > 0$$, we obtain \begin{gather*} E\bigl[|e^{\eta_n(t)}-1|^2\bigr] \leq 4t(A_1\sqrt{t}+B_1)^2e^{4t(A_1\sqrt{t}+B_1)^2}\,,\\ E\bigl[|e^{-\eta_n(t)}-1|^2\bigr] \leq 4t\bigl((A_1+B_1^2)\sqrt{t}+B_1\bigr)^2 e^{4t((A_1+B_1^2)\sqrt{t}+B_1)^2}\,. \end{gather*} \end{lemma} \begin{proof} Since Lemma \ref{lemma1} implies $de^{\eta_n(t)}=a_{1,n}(t)e^{\eta_n(t)}dt +b_{1,n}(t)e^{\eta_n(t)}dw(t)\,,$ we easily have \begin{gather*} e^{\eta_n(t)}-1 = \int_0^t a_{1,n}(s)e^{\eta_n(s)}ds +\int_0^t b_{1,n}(s)e^{\eta_n(s)}dw(s)\,,\\ \big|\,a_{1,n}(s)e^{\eta_n(s)}\big| \leq A_1+A_1|e^{\eta_n(s)}-1|\,,\\ \big| b_{1,n}(s)e^{\eta_n(s)}\big| \leq B_1+B_1|e^{\eta_n(s)}-1|\,. \end{gather*} Hence, the stochastic Gronwall inequality \cite{Amano1} shows one of the desired estimates. Similarly, by Lemma \ref{lemma1}, we obtain $e^{-\eta_n(t)}-1 =-\int_0^t \bigl(a_{1,n}(s)-(b_{1,n}(s))^2\bigr)e^{-\eta_n(s)}ds -\int_0^t b_{1,n}(s)e^{-\eta_n(s)}dw(s)$ and a simple calculation gives \begin{gather*} \big|\bigl(a_{1,n}(s)-(b_{1,n}(s))^2\bigr)e^{-\eta_n(s)}\big| \leq (A_1+B_1^2)+(A_1+B_1^2)|e^{-\eta_n(s)}-1|\,,\\ \big| b_{1,n}(s)e^{-\eta_n(s)}\big| \leq B_1+B_1|e^{-\eta_n(s)}-1|\,. \end{gather*} Therefore, by the stochastic Gronwall inequality \cite{Amano1}, we obtain the remaining inequality. \end{proof} \begin{remark}\label{remark2} \rm By Fubini's theorem and Lemma \ref{lemma5}, we can show that $e^{\pm\eta_n(t)}=(e^{\pm\eta_n(t)}-1)+1\in M_w^2[0,T]$ and \begin{gather*} E\Bigl[\int_0^T e^{2\eta_n(t)}dt\Bigr] \leq 2\int_0^T 4t(A_1\sqrt{t}+B_1)^2 e^{4t(A_1\sqrt{t}+B_1)^2} dt+2T\,,\\ E\Bigl[\int_0^T e^{-2\eta_n(t)}dt\Bigr] \leq 2\int_0^T 4t\bigl((A_1+B_1^2)\sqrt{t}+B_1\bigr)^2 e^{4t((A_1+B_1^2)\sqrt{t}+B_1)^2}dt+2T \end{gather*} for any $$T > 0$$. \end{remark} Martingale inequalities (see, for example, \cite{Friedman}) play important roles in the proof of our error estimate. \begin{lemma}\label{lemma6} If $$f(t) \in M_w^2[0,T]$$, $$T > 0$$, then $P\Bigl[\sup_{0\le t\le T}\Big|\int_0^t f(s)\,dw(s) \,\Big|>\alpha\Bigr] \le\frac{1}{\alpha^2}\,E\Bigl[\int_0^T f^2(s)\,ds\Bigr]$ for any positive number $$\alpha$$. \end{lemma} \begin{lemma} \label{lemma7} If $$f(t) \in L_w^2[0,T]$$, $$T > 0$$, then $P\Bigl[\sup_{0\le t\le T}\Bigl( \int_0^t f(s)\,dw(s)-\frac{\alpha}{2}\int_0^t f^2(s)\,ds \Bigr)>\beta\Bigr] \le e^{-\alpha\beta}$ for any positive numbers $$\alpha$$ and $$\beta$$. \end{lemma} \begin{remark}\label{remark3} \rm Since $$b_{1,n}(t) \in L_w^2[0,T]$$, \begin{align*} \eta_n(t) &= \int_0^t a_{1,n}(s)\,ds+\int_0^t b_{1,n}(s)\,dw(s) -\frac{1}{2}\,\int_0^t \bigl(b_{1,n}(s)\bigr)^2 ds\\ &\leq A_1 t+\int_0^t b_{1,n}(s)\,dw(s) -\frac{1}{2}\,\int_0^t \bigl(b_{1,n}(s)\bigr)^2 ds \end{align*} and \begin{align*} -\eta_n(t) &= \int_0^t (-a_{1,n}(s))\,ds+\int_0^t \bigl(b_{1,n}(s)\bigr)^2 ds\\ &\quad +\int_0^t \bigl(-b_{1,n}(s)\bigr)\,dw(s) -\frac{1}{2}\,\int_0^t \bigl(-b_{1,n}(s)\bigr)^2 ds\\ &\leq (A_1+B_1^2)\,t+\int_0^t \bigl(-b_{1,n}(s)\bigr)\,dw(s) -\frac{1}{2}\,\int_0^t \bigl(-b_{1,n}(s)\bigr)^2 ds\,,\\ \end{align*} it follows from Lemma \ref{lemma7} that \begin{align*} &P\bigl[\;\sup_{0\le s\le t}e^{\eta_n(s)}>R\,\bigr]\\ &\le P\Bigl[\sup_{0\le s\le t} \Bigl(\int_0^s b_{1,n}(u)\,dw(u) -\frac{1}{2}\,\int_0^s \bigl(b_{1,n}(u)\bigr)^2 du\Bigr) >-A_1t+\log R\Bigr]\\ &\le e^{A_1 t}R^{- 1} \end{align*} and \begin{align*} &P\bigl[\;\sup_{0\le s\le t}e^{-\eta_n(s)}>R\,\bigr]\\ &\le P\Bigl[\sup_{0\le s\le t}\Bigl(\int_0^s \bigl(-b_{1,n}(u)\bigr)\,dw(u) -\frac{1}{2}\,\int_0^s \bigl(-b_{1,n}(u)\bigr)^2 du\Bigr) >-(A_1 + B_1^2)\,t+\log R\Bigr]\\ &\le e^{(A_1+B_1^2)\,t}\,R^{- 1} \end{align*} for all $$R \ge 1$$ and $$0 \le t \le T$$. \end{remark} \section{Proof of Theorem \ref{theorem1}} \begin{proof} Let us take real numbers $$R \ge 1$$ and $$0 < \rho \le 1$$ arbitrarily. Then, by Lemma \ref{lemma4}, we can show that $\sup_{0\le t\le T}|\varepsilon_{n + 1}(t)|>R\rho^2 \quad\text{and}\quad \sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho$ imply $\sup_{0\le t\le T}e^{\eta_n(t)}>\sqrt{R}$ or $\sup_{0\le t\le T}\int_0^t \big|\alpha_{0,n}(s)-\beta_{0,n}(s) b_{1,n}(s)\big|e^{-\eta_n(s)}ds >\frac{\sqrt{R}}{2}\rho^2$ or $\sup_{0\le t\le T} \,\Big|\int_0^t\beta_{0,n}(s)e^{-\eta_n(s)}dw(s)\Big| >\frac{\sqrt{R}}{2}\rho^2$ for every $$n = 0,1,2,\dots$$\,. In fact, we have to use only an argument of contradiction. By Remark \ref{remark3}, we easily have $P\bigl[\,\sup_{0\le t\le T}e^{\eta_n(t)}>\sqrt{R}\;\bigr] \le e^{TA_1}R^{- 1 / 2}\,.$ By $\big|\,\alpha_{0,n}(s)-\beta_{0,n}(s) b_{1,n}(s)\,\big| \le\frac{1}{2}(A_2+B_1B_2) \varepsilon_n^2(s)\,,$ Remark \ref{remark3} and direct computation, it follows that \begin{align*} &P\Big[\,\sup_{0\le t\le T}\int_0^t \big|\alpha_{0,n}(s)-\beta_{0,n}(s) b_{1,n}(s)\big|e^{-\eta_n(s)}ds >\frac{\sqrt{R}}{2}\rho^2 \text{ and }\sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho\Bigr]\\ &\le T(A_2+B_1B_2)e^{T(A_1+B_1^2)}\,R^{- 1 / 2}\,. \end{align*} Since $\beta_{0,n}(s)=\min\bigl(\varepsilon_n^2(s), \rho^2\bigr) \int_0^1 (\theta-1)\,\frac{\partial^2 b}{\partial x^2} \bigl(s,\xi_n(s)-\theta\varepsilon_n(s)\bigr)\,d\theta$ for $$0\le s\le T$$ when $$\sup_{0\le t\le T}|\varepsilon_n(t)| \le \rho$$, Lemma \ref{lemma6} and Remark \ref{remark2} show \begin{align*} &P\Bigl[\sup_{0\le t\le T} \Big|\int_0^t\beta_{0,n}(s)e^{-\eta_n(s)}dw(s)\Big| >\frac{\sqrt{R}}{2}\rho^2\text{ and } \sup_{0\le t\le T}|\varepsilon_n(t)| \le \rho\Bigr]\\ & \le P\Bigl[\sup_{0\le t\le T}\,\Big|\;\int_0^t \Bigl(\min\bigl(\varepsilon_n^2(s), \rho^2\bigr) \int_0^1 (\theta-1)\,\frac{\partial^2 b}{\partial x^2} \bigl(s,\xi_n(s)-\theta\varepsilon_n(s)\bigr)\,d\theta\Bigr)\\ &\quad \times e^{-\eta_n(s)}dw(s)\;\Big|>\frac{\sqrt{R}}{2}\rho^2\Bigr]\\ &\le 2B_2^2\Bigl(\int_0^T 4t\bigl((A_1+B_1^2)\sqrt{t}+B_1\bigr)^2 e^{4t((A_1+B_1^2)\sqrt{t}+B_1)^2}dt+T\Bigr) R^{- 1}\,. \end{align*} Combining the above estimates, we can show that there exists a nonnegative constant $$C=C(A_1,A_2,B_1,B_2,T)$$ independent of $$R$$, $$\rho$$ and $$n$$ such that $P\Bigl[\sup_{0\le t\le T}|\varepsilon_{n + 1}(t)|>R\rho^2 \text{ and } \sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho\Bigr]\le C\,R^{- 1 / 2}$ for $$n = 0,1,2,\dots$$. Consequently, we have $P\Bigl[\sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho \text{ implies } \sup_{0\le t\le T}|\varepsilon_{n + 1}(t)|\le R\rho^2\Bigr] \ge 1-C\,R^{- 1 / 2}$ for all $$R \ge 1$$, $$0 < \rho \le 1$$ and every $$n = 0,1,2,\dots$$. \end{proof} Finally, we give a slight improvement of Theorem \ref{theorem1}. At the beginning of the proof of Theorem \ref{theorem1} where we have made a classification of an event, if we replace the lower bounds $\sqrt{R}\,,\quad \frac{\sqrt{R}}{2}\rho^2\,, \quad \frac{\sqrt{R}}{2}\rho^2$ with $T^{-1 / 3}\sqrt{R}\,,\quad \frac{T^{1 / 3}\sqrt{R}}{2}\rho^2\,, \quad \frac{T^{1 / 3}\sqrt{R}}{2}\rho^2$ respectively, then we can show that the above constant $$C(A_1,A_2,B_1,B_2,T)=O(T^{1 / 3})$$ as $$T \to 0$$. Therefore, our Newton's method may work better when it is used with a small time interval. \begin{thebibliography}{0} \bibitem{Amano1} K. Amano; A stochastic Gronwall inequality and its applications, \emph{J. Ineq. Pure Appl. Math.}, \textbf{6}(2005), Issue 1, Article 17, 1--5. \bibitem{Amano2} K.~Amano; Newton's method for stochastic differential equations and its error estimate, \emph{Proc. Japan Acad.}, \textbf{84}(2008), Ser.A, 1-3. \bibitem{Cyganowsky} S.~Cyganowsky, J.~Ombach and P.~E.~Kloeden; \emph{From Elementary Probability to Stochastic Differential Equations with MAPLE\/}, Springer, 2001. \bibitem{Friedman} A.~Friedman; \emph{Stochastic Differential Equations and Applications, Volume I}, Academic Press, 1975. \bibitem{Kawabata} S.~Kawabata and T.~Yamada; On Newton's method for stochastic differentail equations, \emph{Seminaire de Probabilites}, XXV(1991), 121--137. \end{thebibliography} \end{document}