% vim: tw=50
% 26/11/2022 11AM

\newpage
\section{The Second Derivative}

We'll start with a result in partial derivatives
\[ \frac{\partial^2 f}{\partial x \partial y} =
\frac{\partial^2 f}{\partial y \partial x} \]

\begin{flashcard}[symmetry-of-second-partial-derivatives]
\begin{theorem}[Symmetry of second partial
    derivatives]
    \cloze{
    Let $f : \RR^n \to \RR^m$, $a \in \RR^n$ and
    $\eps > 0$. Suppose $D_iD_j f$ and $D_j D_i f$
    exist on $B_\eps(a)$ and are continuous at
    $a$. Then $D_i D_j f(a) = D_j D_i f(a)$.
}
\end{theorem}
\end{flashcard}

\begin{proof}
    WLOG $m = 1$, $n = 2$, $a = (x, y)$, $i = 1$,
    $j = 2$. \\
    Let
    \begin{align*}
        \Delta_h
        &= f(x + h, y + h) - f(x, y + h) - f(x +
        h, y) + f(x, y) \\
        &= g(y + h) - g(y)
    \end{align*}
    where $g(t) = f(x + h, t) - f(x, t)$. Let $0 <
    |h| < \sqrt{\eps}$. Then
    \begin{align*}
        \Delta_h
        &= hg'(y + \theta_h h)
        &&(\theta_h \in (0, 1)) \\
        &= h(D_2 f(x + h, y + \theta_h h) - D_2
        f(x, y + \theta_h h)) \\
        &= h^2 D_1D_2 f(x + \phi_hh, y + \theta_h
        h)
        &&(\phi_h \in (0, 1))
    \end{align*}
    Similarly, $\Delta_h = h^2 D_2D_1f(x + \zeta_h
    h, y + \xi_h h)$ for some $\zeta_h, \xi_h \in
    (0, 1)$. Hence
    \[ D_1D_2 f(x + \phi_h h, y + \theta_h h) =
    D_2 D_1 f(x + \zeta_h h, y + \xi_h h) \]
    So let $h \to 0$ and use continuity of $D_1D_2
    f$ and $D_2D_1 f$ at $(x, y)$
    \[ D_1 D_2 f(x, y) = D_2 D_1f(x, y) \qedhere \]
\end{proof}

\begin{hiddenflashcard}[symmetry-of-partial-derivatives-proof]
Proof of symmetry of partial derivatives? \\
\cloze{
WLOG work $\RR^2 \to \RR$. \\
Define
\[ \Delta_h = f(x + h, y + h) - f(x + h, y) - f(x,
y + h) + f(x, y) \]
Expand this in two ways using MVT to get
\[ h^2 D_1D_2 f(x + \phi_h h, y + \theta_h h) =
h^2 D_2D_1 f(x + \zeta_h h, y + \xi_h h) \]
Divide by $h^2$ then take a limit as $h \to 0$ to
get
\[ D_1 D_2 f(x, y) = D_2 D_1 f(x, y) \]
}
\end{hiddenflashcard}

\myskip
What is the second derivative really? \\
Let $f : \RR^n \to \RR^m$ be everywhere
differentiable. For each $x \in \RR^n$, $Df|_x \in
\mathcal{L}(\RR^n, \RR^m)$. Define $F : \RR^n \to
\mathcal{L}(\RR^n, \RR^m) \sim \RR^{nm}$ by $F(x)
= D f|_x$. If $F$ is differentiable at $a \in
\RR^n$ then we say $f$ is \emph{twice
differentiable} at $a$ and the \emph{second
derivative} of $f$ at $a$ is $D^k f|_a = DF|_a$.
What is $D^2 f|_a$?
\[ D^2 f|_a \in \mathcal{L}(\RR^n,
\mathcal{L}(\RR^n, \RR^m)) \sim
\operatorname{Bil}(\RR^n \times \RR^n, \RR^m) \]
So $D^2 f|_a$ is a bilinear map from $\RR^n \times
\RR^n \to \RR^m$. If $f$ twice differentiable at
$a$, this says
\[ Df|_{a + h} = Df|_a + D^2 f|_a(h) + o(h) \]
(Everything in this expression is a linear map),
i.e.
\[ Df|_{a + h}(h) = Df|_a(k) + \ub{D^2 f|_a(h,
h)}_{\text{bilinear in $h, k$}} +
\ub{o_k(h)}_{\text{for fixed $k$, this is $o(h)$}} \]

\begin{example*}
    $f : \mathcal{M}_n \to \mathcal{M}_n$, $f(A) =
    A^3$.
    \begin{align*}
        f(A + K)
        &= (A + K)^3 \\
        &= \ub{A^3}_{f(A)} + \ub{A^2K + AKA +
        KA^2}_{\text{linear in $K$}} +
        \ub{\text{terms involving $K^2$}}_{o(K)}
    \end{align*}
    So $f$ everywhere differentiable with
    \[ Df|_A(K) = A^2K + AKA + KA^2 \]
    Now
    \begin{align*}
        Df|_{A + H}(K)
        &= (A+H)^2K + (A+H)K(A+H) + K(A+H)^2 \\
        &= \ub{A^2K + AKA + KA^2}_{Df|_A(K)} \\
        &~~~~+
        \ub{AHK + HAK + AKH +
        HKA + KAH + KHA}_{\text{Bilinear}} \\
        &~~~~+
        \ub{H^2K + HKH +
        KH^2}_{o_k(h)}
    \end{align*}
    So $f$ is twice differentiable at $A$ and
    \[ D^2 f|_A(H, k) = AHK + HAK + AKH + HKA +
    KAH + KHA \]
\end{example*}

\begin{remark*}
    For definition to work, enough to have $f$
    differentiable on some neighbourhood of $a$.
\end{remark*}

\noindent
How does $D^2 f|_a$ relate to the $D_iD_jf(a)$?
Suppose $f : \RR^n \to \RR$ is twice
differentiable at $a \in \RR^n$. Then, with $e_1,
\dots, e_n$ the standard basis,
\begin{align*}
    \frac{D_jf(a + he_i) - D_jf(a)}{h}
    &= \frac{D^2f|_a(he_i, e_j) + o(h)}{h} \\
    &= D^2 f|_a(e_i, e_j) + o(1) \\
    &\to D^2 f|_a (e_i, e_j)
\end{align*}
So $D_iD_jf(a) = D^2 f|_a(e_i, e_j)$. So if $H$ is
the $n \times n$ matrix representing the bilinear
form $D^2 f|_a$, we have
\[ H_{ij} = D_iD_jf(a) \]
We call $H$ the \emph{Hessian} matrix of $f$. If
$\RR^n \to \RR^m$, could do this for each $f_i :
\RR^n \to \RR$ ($i = 1, \dots, m$), or think about
matrices whose entries are elements of $\RR^m$.

\begin{definition*}
    Let $f : \RR^n \to \RR^m$ and $a \in \RR^m$.
    We say $f$ is \emph{continuously
    differentiable} at $a$ if $Df|_x$ exists for
    all $x$ in same ball $B_\delta(a)$ ($\delta >
    0$) and the function $x \mapsto Df|_x$ is
    continuous at $a$.
\end{definition*}

\noindent
If $f$ is twice differentiable at $a$ then Theorem
10 tells us that $H$ is a symmetric matrix. Hence
under this condition, $D^2 f|_a$ is a symmetric
bilinear form. \\
An application:

\begin{definition*}
    Let $f : \RR^n \to \RR$, $a \in \RR^n$. We say
    $a$ is a \emph{local maximum} (respectively
    \emph{minimum}) for $f$ if there is some
    $\delta > 0$ such that for all $x \in
    B_\delta(a)$ we have $f(x) \le f(a)$
    (respectively $f(x) \ge f(a)$).
\end{definition*}

\begin{proposition}
    Let $f : \RR^n \to \RR$ and let $a$ be a local
    maximum / minimum for $f$. Suppose $f$
    differentiable at $a$. Then $Df|_a$ is the
    zero map.
\end{proposition}

\begin{proof}
    Let $u \in \RR^n$. For each $\lambda \neq 0$
    in $\RR$,
    \begin{align*}
        \frac{f(a + \lambda u) - f(a)}{\lambda}
        &= \frac{Df|_a(\lambda u) +
        o(\lambda)}{\lambda} \\
        &\to Df|_a(u)
    \end{align*}
    as $\lambda \to 0$. Assume WLOG $a$ is a
    maximum (otherwise consider $-f$). Then
    \[ \frac{f(a + \lambda u) - f(u)}{\lambda}
    \begin{cases}
        \ge 0 & \text{if $\lambda < 0$} \\
        \le 0 & \text{if $\lambda > 0$}
    \end{cases} \]
    Hence $Df|_a(u) = 0$.
\end{proof}

\myskip
Converse of course does not hold: for example $f :
\RR \to \RR$, $f(x) = x^3$, $a = 0$.

\begin{flashcard}[second-order-taylor-theorem]
\begin{lemma}[Second-order Taylor Theorem]
    \cloze{
    Let $f : \RR^n \to \RR$ be twice
    differentiable at $a \in \RR^n$. Then
    \[ f(a + h) = f(a) + Df|_a(h) + \half D^2
    f|_a(h, h) + o(\|h\|^2) \]
}
\end{lemma}
\prompt{

\begin{proof}
\cloze{
    Consider $g : [0, 1] \to \RR$ defined by
    \[ g(t) = f(a + th) - f(a) - tDf|_a(h) -
    \frac{t^2}{2} D^2f|_a(h, h) \]
    Note $g$ continuous, differentiable on $(0,
    1)$ with easy to calculate derivative, and
    that $g(0) = 0$. Now use MVT and let $h \to
    0$.
}
\end{proof}
}
\end{flashcard}

\begin{proof}
    Define $g : [0, 1] \to \RR$ by
    \[ g(t) = f(a + th) - f(a) - tDf|_a(h) -
    \frac{t^2}{2} D^2 f|_a(h, h) \]
    Clearly $g$ is continuous on $[0, 1]$, $g(0) =
    0$ and $g$ is differentiable on $(0, 1)$ with
    \[ g'(t) = Df|_{a + th}(h) - Df|_a(h) - tD^2
    f|_a(h, h) \]
    By Mean Value Theorem, $\exists t \in (0, 1)$
    such that $g(1) - g(0) = g'(t)$. Hence
    \begin{align*}
        \frac{\big|f(a + h) - f(a) - Df|_a(h) - \half
        D^2 f|_a(h, h) \big|}{\|h\|^2}
        &= \frac{\big| Df|_{a + th} (h) - Df|_a(h)
        - tD^2 f|_a(h, h) \big|}{\|h\|^2} \\
        &= \frac{\big| D^2f|_a(th, h) + o(\|h\|^2)
        - tD^2f|_a(h, h)\big|}{\|h\|^2} \\
        &= \frac{|o(\|h\|^2)|}{\|h\|^2} \\
        &\to 0
    \end{align*}
    as $h \to 0$.
\end{proof}

\begin{theorem}
    Let $f : \RR^n \to \RR$ and $a \in \RR^n$.
    Suppose $f$ is twice differentiable at $a$
    (so, in particular, $D^2 f|_a$ is a symmetric
    bilinear form) and $Df|_a = 0$. Then
    \[ D^2 f|_a \text{ positive definite} \implies
    \text{$a$ local minimum} \]
    and
    \[ D^2 f|_a \text{ negative definite} \implies
    \text{$a$ local maximum} \]
\end{theorem}

\begin{proof}
    Suppose WLOG $D^2 f|_a$ positive definite
    (otherwise consider $-f$). Then with respect
    to some orthonormal basis $D^2 f|_a$ has
    diagonal matrix with strictly positive
    elements on the leading diagonal. Have
    $\forall x \in \RR^n$, $D^2 f|_a(x, x) \ge
    \mu\|x\|^2$ where $\mu > 0$ is the least
    eigenvalue of $D^2 f|_a$. By Lemma 12,
    \begin{align*}
        \frac{f(a + h) - f(a)}{\|h\|^2}
        &= \half \frac{D^2f|_a(h, h)}{\|h\|^2} +
        o(1) \\
        &\ge \half \mu + o(1) \\
        &\to \half \mu
    \end{align*}
    as $h \to 0$. But $\half \mu > 0$ so for $h$
    sufficiently small,
    \[ \frac{f(a + h) - f(a)}{\|h\|^2} > 0 \]
    so $f(a + h) - f(a) > 0$ so $a$ is a local
    minimum for $f$.
\end{proof}

\newpage
\section{Ordinary Differential Equations}

\begin{lemma}
    Let $A \subset \RR^n$, $B \subset \RR^m$ with
    $A$ compact and $B$ closed. Let $X =
    \mathcal{C}(A, B) = \{f : A \to B \mid
    \text{$A$ continuous}\}$ with uniform metric
    \[ d(f, g) = \sup_{x \subset A} \|f(x) - g(x)\| \]
    Then $X$ is a complete metric space.
\end{lemma}

\begin{proof}
    As $A$ compact, $d$ is well-defined. Let
    $(f_n)$ be a Cauchy sequence in $X$. Then
    $(f_n)$ is uniformly Cauchy so uniformly
    convergent by General Principle of Uniform
    Convergence on each coordinate. So $f_n \to f$
    uniformly for some $f : A \to \RR^m$. Uniform
    limit of continuous functions is continuous so
    $f$ is continuous. And $\forall x \in A$,
    $f_n(x) \to f(x)$ so, as $B$ closed, $f(x) \in
    B$. So $f \in X$ and $d(f_n, f) \to 0$.
\end{proof}

\myskip
Often we want to solve an ODE but can't find a
closed-form solution.
\begin{itemize}
    \item Numerical Methods
    \item Phase plane portraits
\end{itemize}
\emph{But} this is silly if the ODE has no
solution. So want a general result telling us
under appropriate conditions ODEs have unique
solutions. Typical ODE: $\dfrac{y}{x} = \phi(x,
y)$, subject to $y = y_0$ when $x = x_0$. Useful
to think about things $\RR^n \to \RR^n$. Want to
solve the initial value problem:
\[ f : \RR \to \RR^n \]
\[ f'(t) = \phi(t, f(t)) \]
for all $t \in \cdots$, and $f(t_0) = y_0$.