% vim: tw=50 % 26/11/2022 11AM \newpage \section{The Second Derivative} We'll start with a result in partial derivatives \[ \frac{\partial^2 f}{\partial x \partial y} = \frac{\partial^2 f}{\partial y \partial x} \] \begin{flashcard}[symmetry-of-second-partial-derivatives] \begin{theorem}[Symmetry of second partial derivatives] \cloze{ Let $f : \RR^n \to \RR^m$, $a \in \RR^n$ and $\eps > 0$. Suppose $D_iD_j f$ and $D_j D_i f$ exist on $B_\eps(a)$ and are continuous at $a$. Then $D_i D_j f(a) = D_j D_i f(a)$. } \end{theorem} \end{flashcard} \begin{proof} WLOG $m = 1$, $n = 2$, $a = (x, y)$, $i = 1$, $j = 2$. \\ Let \begin{align*} \Delta_h &= f(x + h, y + h) - f(x, y + h) - f(x + h, y) + f(x, y) \\ &= g(y + h) - g(y) \end{align*} where $g(t) = f(x + h, t) - f(x, t)$. Let $0 < |h| < \sqrt{\eps}$. Then \begin{align*} \Delta_h &= hg'(y + \theta_h h) &&(\theta_h \in (0, 1)) \\ &= h(D_2 f(x + h, y + \theta_h h) - D_2 f(x, y + \theta_h h)) \\ &= h^2 D_1D_2 f(x + \phi_hh, y + \theta_h h) &&(\phi_h \in (0, 1)) \end{align*} Similarly, $\Delta_h = h^2 D_2D_1f(x + \zeta_h h, y + \xi_h h)$ for some $\zeta_h, \xi_h \in (0, 1)$. Hence \[ D_1D_2 f(x + \phi_h h, y + \theta_h h) = D_2 D_1 f(x + \zeta_h h, y + \xi_h h) \] So let $h \to 0$ and use continuity of $D_1D_2 f$ and $D_2D_1 f$ at $(x, y)$ \[ D_1 D_2 f(x, y) = D_2 D_1f(x, y) \qedhere \] \end{proof} \begin{hiddenflashcard}[symmetry-of-partial-derivatives-proof] Proof of symmetry of partial derivatives? \\ \cloze{ WLOG work $\RR^2 \to \RR$. \\ Define \[ \Delta_h = f(x + h, y + h) - f(x + h, y) - f(x, y + h) + f(x, y) \] Expand this in two ways using MVT to get \[ h^2 D_1D_2 f(x + \phi_h h, y + \theta_h h) = h^2 D_2D_1 f(x + \zeta_h h, y + \xi_h h) \] Divide by $h^2$ then take a limit as $h \to 0$ to get \[ D_1 D_2 f(x, y) = D_2 D_1 f(x, y) \] } \end{hiddenflashcard} \myskip What is the second derivative really? \\ Let $f : \RR^n \to \RR^m$ be everywhere differentiable. For each $x \in \RR^n$, $Df|_x \in \mathcal{L}(\RR^n, \RR^m)$. Define $F : \RR^n \to \mathcal{L}(\RR^n, \RR^m) \sim \RR^{nm}$ by $F(x) = D f|_x$. If $F$ is differentiable at $a \in \RR^n$ then we say $f$ is \emph{twice differentiable} at $a$ and the \emph{second derivative} of $f$ at $a$ is $D^k f|_a = DF|_a$. What is $D^2 f|_a$? \[ D^2 f|_a \in \mathcal{L}(\RR^n, \mathcal{L}(\RR^n, \RR^m)) \sim \operatorname{Bil}(\RR^n \times \RR^n, \RR^m) \] So $D^2 f|_a$ is a bilinear map from $\RR^n \times \RR^n \to \RR^m$. If $f$ twice differentiable at $a$, this says \[ Df|_{a + h} = Df|_a + D^2 f|_a(h) + o(h) \] (Everything in this expression is a linear map), i.e. \[ Df|_{a + h}(h) = Df|_a(k) + \ub{D^2 f|_a(h, h)}_{\text{bilinear in $h, k$}} + \ub{o_k(h)}_{\text{for fixed $k$, this is $o(h)$}} \] \begin{example*} $f : \mathcal{M}_n \to \mathcal{M}_n$, $f(A) = A^3$. \begin{align*} f(A + K) &= (A + K)^3 \\ &= \ub{A^3}_{f(A)} + \ub{A^2K + AKA + KA^2}_{\text{linear in $K$}} + \ub{\text{terms involving $K^2$}}_{o(K)} \end{align*} So $f$ everywhere differentiable with \[ Df|_A(K) = A^2K + AKA + KA^2 \] Now \begin{align*} Df|_{A + H}(K) &= (A+H)^2K + (A+H)K(A+H) + K(A+H)^2 \\ &= \ub{A^2K + AKA + KA^2}_{Df|_A(K)} \\ &~~~~+ \ub{AHK + HAK + AKH + HKA + KAH + KHA}_{\text{Bilinear}} \\ &~~~~+ \ub{H^2K + HKH + KH^2}_{o_k(h)} \end{align*} So $f$ is twice differentiable at $A$ and \[ D^2 f|_A(H, k) = AHK + HAK + AKH + HKA + KAH + KHA \] \end{example*} \begin{remark*} For definition to work, enough to have $f$ differentiable on some neighbourhood of $a$. \end{remark*} \noindent How does $D^2 f|_a$ relate to the $D_iD_jf(a)$? Suppose $f : \RR^n \to \RR$ is twice differentiable at $a \in \RR^n$. Then, with $e_1, \dots, e_n$ the standard basis, \begin{align*} \frac{D_jf(a + he_i) - D_jf(a)}{h} &= \frac{D^2f|_a(he_i, e_j) + o(h)}{h} \\ &= D^2 f|_a(e_i, e_j) + o(1) \\ &\to D^2 f|_a (e_i, e_j) \end{align*} So $D_iD_jf(a) = D^2 f|_a(e_i, e_j)$. So if $H$ is the $n \times n$ matrix representing the bilinear form $D^2 f|_a$, we have \[ H_{ij} = D_iD_jf(a) \] We call $H$ the \emph{Hessian} matrix of $f$. If $\RR^n \to \RR^m$, could do this for each $f_i : \RR^n \to \RR$ ($i = 1, \dots, m$), or think about matrices whose entries are elements of $\RR^m$. \begin{definition*} Let $f : \RR^n \to \RR^m$ and $a \in \RR^m$. We say $f$ is \emph{continuously differentiable} at $a$ if $Df|_x$ exists for all $x$ in same ball $B_\delta(a)$ ($\delta > 0$) and the function $x \mapsto Df|_x$ is continuous at $a$. \end{definition*} \noindent If $f$ is twice differentiable at $a$ then Theorem 10 tells us that $H$ is a symmetric matrix. Hence under this condition, $D^2 f|_a$ is a symmetric bilinear form. \\ An application: \begin{definition*} Let $f : \RR^n \to \RR$, $a \in \RR^n$. We say $a$ is a \emph{local maximum} (respectively \emph{minimum}) for $f$ if there is some $\delta > 0$ such that for all $x \in B_\delta(a)$ we have $f(x) \le f(a)$ (respectively $f(x) \ge f(a)$). \end{definition*} \begin{proposition} Let $f : \RR^n \to \RR$ and let $a$ be a local maximum / minimum for $f$. Suppose $f$ differentiable at $a$. Then $Df|_a$ is the zero map. \end{proposition} \begin{proof} Let $u \in \RR^n$. For each $\lambda \neq 0$ in $\RR$, \begin{align*} \frac{f(a + \lambda u) - f(a)}{\lambda} &= \frac{Df|_a(\lambda u) + o(\lambda)}{\lambda} \\ &\to Df|_a(u) \end{align*} as $\lambda \to 0$. Assume WLOG $a$ is a maximum (otherwise consider $-f$). Then \[ \frac{f(a + \lambda u) - f(u)}{\lambda} \begin{cases} \ge 0 & \text{if $\lambda < 0$} \\ \le 0 & \text{if $\lambda > 0$} \end{cases} \] Hence $Df|_a(u) = 0$. \end{proof} \myskip Converse of course does not hold: for example $f : \RR \to \RR$, $f(x) = x^3$, $a = 0$. \begin{flashcard}[second-order-taylor-theorem] \begin{lemma}[Second-order Taylor Theorem] \cloze{ Let $f : \RR^n \to \RR$ be twice differentiable at $a \in \RR^n$. Then \[ f(a + h) = f(a) + Df|_a(h) + \half D^2 f|_a(h, h) + o(\|h\|^2) \] } \end{lemma} \prompt{ \begin{proof} \cloze{ Consider $g : [0, 1] \to \RR$ defined by \[ g(t) = f(a + th) - f(a) - tDf|_a(h) - \frac{t^2}{2} D^2f|_a(h, h) \] Note $g$ continuous, differentiable on $(0, 1)$ with easy to calculate derivative, and that $g(0) = 0$. Now use MVT and let $h \to 0$. } \end{proof} } \end{flashcard} \begin{proof} Define $g : [0, 1] \to \RR$ by \[ g(t) = f(a + th) - f(a) - tDf|_a(h) - \frac{t^2}{2} D^2 f|_a(h, h) \] Clearly $g$ is continuous on $[0, 1]$, $g(0) = 0$ and $g$ is differentiable on $(0, 1)$ with \[ g'(t) = Df|_{a + th}(h) - Df|_a(h) - tD^2 f|_a(h, h) \] By Mean Value Theorem, $\exists t \in (0, 1)$ such that $g(1) - g(0) = g'(t)$. Hence \begin{align*} \frac{\big|f(a + h) - f(a) - Df|_a(h) - \half D^2 f|_a(h, h) \big|}{\|h\|^2} &= \frac{\big| Df|_{a + th} (h) - Df|_a(h) - tD^2 f|_a(h, h) \big|}{\|h\|^2} \\ &= \frac{\big| D^2f|_a(th, h) + o(\|h\|^2) - tD^2f|_a(h, h)\big|}{\|h\|^2} \\ &= \frac{|o(\|h\|^2)|}{\|h\|^2} \\ &\to 0 \end{align*} as $h \to 0$. \end{proof} \begin{theorem} Let $f : \RR^n \to \RR$ and $a \in \RR^n$. Suppose $f$ is twice differentiable at $a$ (so, in particular, $D^2 f|_a$ is a symmetric bilinear form) and $Df|_a = 0$. Then \[ D^2 f|_a \text{ positive definite} \implies \text{$a$ local minimum} \] and \[ D^2 f|_a \text{ negative definite} \implies \text{$a$ local maximum} \] \end{theorem} \begin{proof} Suppose WLOG $D^2 f|_a$ positive definite (otherwise consider $-f$). Then with respect to some orthonormal basis $D^2 f|_a$ has diagonal matrix with strictly positive elements on the leading diagonal. Have $\forall x \in \RR^n$, $D^2 f|_a(x, x) \ge \mu\|x\|^2$ where $\mu > 0$ is the least eigenvalue of $D^2 f|_a$. By Lemma 12, \begin{align*} \frac{f(a + h) - f(a)}{\|h\|^2} &= \half \frac{D^2f|_a(h, h)}{\|h\|^2} + o(1) \\ &\ge \half \mu + o(1) \\ &\to \half \mu \end{align*} as $h \to 0$. But $\half \mu > 0$ so for $h$ sufficiently small, \[ \frac{f(a + h) - f(a)}{\|h\|^2} > 0 \] so $f(a + h) - f(a) > 0$ so $a$ is a local minimum for $f$. \end{proof} \newpage \section{Ordinary Differential Equations} \begin{lemma} Let $A \subset \RR^n$, $B \subset \RR^m$ with $A$ compact and $B$ closed. Let $X = \mathcal{C}(A, B) = \{f : A \to B \mid \text{$A$ continuous}\}$ with uniform metric \[ d(f, g) = \sup_{x \subset A} \|f(x) - g(x)\| \] Then $X$ is a complete metric space. \end{lemma} \begin{proof} As $A$ compact, $d$ is well-defined. Let $(f_n)$ be a Cauchy sequence in $X$. Then $(f_n)$ is uniformly Cauchy so uniformly convergent by General Principle of Uniform Convergence on each coordinate. So $f_n \to f$ uniformly for some $f : A \to \RR^m$. Uniform limit of continuous functions is continuous so $f$ is continuous. And $\forall x \in A$, $f_n(x) \to f(x)$ so, as $B$ closed, $f(x) \in B$. So $f \in X$ and $d(f_n, f) \to 0$. \end{proof} \myskip Often we want to solve an ODE but can't find a closed-form solution. \begin{itemize} \item Numerical Methods \item Phase plane portraits \end{itemize} \emph{But} this is silly if the ODE has no solution. So want a general result telling us under appropriate conditions ODEs have unique solutions. Typical ODE: $\dfrac{y}{x} = \phi(x, y)$, subject to $y = y_0$ when $x = x_0$. Useful to think about things $\RR^n \to \RR^n$. Want to solve the initial value problem: \[ f : \RR \to \RR^n \] \[ f'(t) = \phi(t, f(t)) \] for all $t \in \cdots$, and $f(t_0) = y_0$.