% vim: tw=50
% 22/11/2022 11AM

\noindent
Partial derivatives \emph{can} still be useful for
computation:

\begin{proposition}
    Let $f : \RR^n \to \RR^m$ and $a \in \RR^n$.
    Write
    \[ f =
    \begin{pmatrix}
        f_1 \\
        \vdots \\
        f_m
    \end{pmatrix}
    \]
    where for each $i$, $f_i : \RR^n \to \RR$.
    Then
    \begin{enumerate}[(a)]
        \item $f$ is differentiable at $a$ if and
            only if each $f_i$ is differentiable
            at $a$, in which case
            \[ Df|_a = 
            \begin{pmatrix}
                Df_1|_a \\
                \vdots \\
                Df_m|_a
            \end{pmatrix}
            \]
            and
        \item if $f$ is differentiable at $a$ and
            $A$ is the matrix of $Df|_a$ in terms
            of the standard bases then $A_{ij} =
            D_j f_i(a)$.
    \end{enumerate}
\end{proposition}

\begin{proof}
    \begin{enumerate}[(a)]
        \item \begin{enumerate}[$\Rightarrow$]
            \item[$\Rightarrow$] Write
                \[ f(a + h) = f(a) + Df|_a(h) +
                \eps(h) \|h\| \]
                where $\eps(h) \to 0$ as $h \to
                0$. Then
                \[ f_i(a + h) = f_i(a) +
                (Df|_a)_i(h) + \eps_i(h) \|h\| \]
                where $(Df|_a)_i : \RR^n \to \RR$
                is linear and $|\eps_i(h)| \le
                \|\eps(h)\| \to 0$ as $h \to 0$.
            \item[$\Leftarrow$] For each $i$,
                write
                \[ f_i(a + h) = f_i(a) +
                Df_i|_a(h) + \eps_i(h) \|h\| \]
                where $\eps_i(h) \to 0$ as $h \to
                0$. Then
                \[ f(a + h) = f(a) + \alpha(h) +
                \eps(h) \|h\| \]
                where
                \[ \alpha =
                \begin{pmatrix}
                    Df_1|_a \\
                    \vdots \\
                    Df_m|_a
                \end{pmatrix}
                : \RR^n \to \RR^m \]
                is linear and
                \[ \|\eps(h)\| = \left\|
                \begin{pmatrix}
                    \eps_1(h) \\
                    \vdots \\
                    \eps_m(h)
                \end{pmatrix}
                \right\| = \sqrt{\sum_{i = 1}^m
                \eps_i(h)^2} \to 0 \]
                as $h \to 0$.
        \end{enumerate}
        \item Write
            \[ f(a + h) = f(a) + Df|_a(h) +
            \eps(h)\|h\| \]
            where $\eps(h) \to 0$ as $h \to 0$.
            Let $e_1, \dots, e_n$ be the standard
            basis of $\RR^n$. Then
            \[ \frac{f(a + ke_j) - f(a)}{k} =
            \frac{Df|_a(ke_j) + \eps(ke_j)
            \|ke_j\|}{k} = Df|_a(e_j) + \eps(ke_j)
            \to Df|_a(e_j) \]
            as $k \to 0$. So all partial
            derivatives of $f$ exist at $a$ and
            $D_jf(a) = Df|_a(e_j)$. \qedhere
    \end{enumerate}
\end{proof}

\begin{definition*}
    The matrix $A$ in (b) is called the
    \emph{Jacobian} matrix of $f$ at $a$.
\end{definition*}

\begin{hiddenflashcard}[JacobianMatrix]
    What is the Jacobian matrix?
    \[ A_{ij} = \cloze{D_jf_i(a)} \]
\end{hiddenflashcard}

\begin{theorem}[The Chain Rule]
    Let $f : \RR^p \to \RR^n$ be differentiable at
    $a \in \RR^p$, and let $g : \RR^n \to \RR^m$
    be differentiable at $f(a) \in \RR^n$. Then $g
    \circ f$ is differentiable at $a$ with
    \[ D(g \circ f)|_a = Dg|_{f(a)} \circ Df|_a \]
\end{theorem}

\begin{remark*}
    In principle this should be obvious: if $f$ is
    approximately linear near $f(a)$ then $g \circ
    f$ is approximately linear near $a$ and the
    linear approximation to get near $a$ is the
    obvious thing. \\
    Proof looks a bit messy - calculation to make
    sure error terms behave.
\end{remark*}

\begin{proof}
    Write
    \[ f(a + h) = f(a) + \alpha(h) + \eps(h) \|h\| \]
    and
    \[ g(f(a) + k) = g(f(a)) + \beta(k) + \eta(k)
    \|k\| \]
    where $\alpha = Df|_a$, $\beta = Dg|_{f(a)}$
    are linear, $\eps(h) \to 0$ as $h \to 0$ and
    $\eta(k) \to 0$ as $k \to 0$. Now:
    \begin{align*}
        g(f(a + h))
        &= g(f(a) + \alpha(h) + \eps(h)\|h\|) \\
        &= g(f(a)) + \beta(\alpha(h) +
        \eps(h)\|h\|) + \eta(\alpha(h) +
        \eps(h)\|h\|) \|\alpha(h) + \eps(h)
        \|h\| \| \\
        &= g(f(a)) +
        \ub{\beta(\alpha(h))}_{\text{linear}}
        + \ub{\zeta(h) \|h\|}_{\text{small}}
    \end{align*}
    where
    \[ \zeta(h) = \beta(\eps(h)) + \eta(\alpha(h)
    + \eps(h) \|h\|) \left\|
    \frac{\alpha(h)}{\|h\|} + \eps(h) \right\| \]
    Now, $\eps(h) \to 0$ as $h \to 0$ and $\beta$
    linear, so continuous, so $\beta(\eps(h)) \to
    \beta(0) = 0$ as $h \to 0$. Next, $\alpha$
    linear so continuous so $\alpha(h) \to
    \alpha(0) = 0$ as $h \to 0$. And $\eps(h)
    \|h\| \to 0 \times 0 = 0$ as $h \to 0$. So
    $\alpha(h) + \eps(h) \|h\| \to 0$ as $h \to
    0$. WLOG $\eta(0) = 0$ so $g$ continuous at
    $0$. Then $\eta(\alpha(h) + \eps(h) \|h\| \to
    0$ as $h \to 0$. Finally,
    \begin{align*}
        \left\| \frac{\alpha(h)}{\|h\|} + \eps(h)
        \right\|
        &\le \frac{\|\alpha(h)\|}{\|h\|} +
        \|\eps(h)\| \\
        &\le \frac{\|a\|\|h\|}{\|h\|} +
        \|\eps(h)\| \\
        &= \|\alpha\| + \|\eps(h)\| \\
        &\to \|\alpha\|
    \end{align*}
    as $h \to 0$. Hence $\zeta(h) \to 0$ as $h \to
    0$.
\end{proof}

\subsubsection*{Examples}

\begin{enumerate}[(1)]
    \item Suppose $f$ is constant. Then $f(a + h)
        = f(a) + 0 + 0\|h\|$ So $f$ is everywhere
        differentiable with derivative the zero
        map.
    \item Suppose $f$ is linear. Then
        \[ f(a + h) = f(a) + f(h) + 0\|h\| \]
        so $f$ everywhere differentiable with
        $Df|_a = f$ for all $a$.
    \item Suppose $f : \RR \to \RR^m$. As remarked
        earlier for $a \in \RR$, $f$ is
        differentiable in old sense at $a$ if and
        only if it is differentiable in new sense,
        in which case $Df|_a(h) = hf'(a)$.
    \item Using the above together with Chain
        Rule, get lots of differentiable
        functions, for example
        \[ f : \RR^2 \to \RR^2, \qquad f\left(
        \begin{pmatrix}
            x \\
            y
        \end{pmatrix}
        \right) =
        \begin{pmatrix}
            e^{x + y} \\
            \cos(xy)
        \end{pmatrix}
        \]
        is differentiable. Why? The projection
        maps $\pi_1, \pi_2 : \RR^2 \to \RR$,
        $\pi_s(x, y) = x, \pi_2(x, y) = y$ are
        linear so differentiable. So by Chain
        Rule:
        \[ f_1(z) = e^{\pi_1(z) + \pi_2(z)},
        \qquad f_2(z) = \cos(\pi_1(z)\pi_2(z)) \]
        are differentiable. So by Proposition 5(a),
        $f$ is differentiable. \\
        What is derivative of $f$ at $z = (x, y)$?
        It's some linear map $\RR^2 \to \RR^2$. By
        Proposition 5(b), the matrix of the
        derivative is given by the partial
        derivatives:
        \[ Df|_{(x, y)} =
        \begin{pmatrix}
            e^{x + y} & e^{x + y} \\
            -y\sin xy & -x \sin xy
        \end{pmatrix}
        \]
    \item Let $\mathcal{M}_n$ be the vector space
        of $n \times n$ real matrices. So
        $\mathcal{M}_n \sim \RR^{n^2}$ so can
        consider differentiability of $f :
        \mathcal{M}_n \to \mathcal{M}_n$. Recall
        that the definition still same if we
        replace the Euclidean norm by the operator
        norm, so write $\|\bullet\|$ for operator
        norm on $\mathcal{M}_n$. Define $f :
        \mathcal{M}_n \to \mathcal{M}_n$ by $f(A)
        = A^2$. Then:
        \[ f(A + H) = (A + H)^2 = \ub{A^2}_{f(A)}
        + \ub{AH + HA}_{\text{linear}} +
        \ub{H^2}_{\text{higher order}} \]
        where
        \[ \left\| \frac{H^2}{\|H\|} \right\| \le
        \frac{\|H\|^2}{\|H\|} = \|H\| \to 0 \]
        as $H \to 0$. So $f$ everywhere
        differentiable and
        \[  Df|_A(H) = AH + HA \]
    \item We have $\det : \mathcal{M}_n \to \RR$.
        We have:
        \begin{align*}
            \det(I + H)
            &=
            \begin{vmatrix}
                1 + H_{11} & H_{12} & \cdots & H_{1n} \\
                H_{21} & 1 + H_{22} & \cdots & H_{2n} \\
                \vdots & \vdots & \ddots & \vdots
                H_{n1} & H_{n2} & \cdots & 1 + H_{nn}
            \end{vmatrix} \\
            &= \ub{1}_{\det I} +
            \ub{\Trace(H)}_{\text{linear in $H$}}
            + \ub{\text{other terms
            involving two or more $H_{ij}$
            multiplied together.}}_{\text{higher
            order}}
        \end{align*}
        Note
        \[ \left| \frac{H_{ij} H_{kl}}{\|H\|_2}
        \right| \le |H_{kl}| \to 0 \]
        as $H \to 0$. ($\|H\|_2$ is the Euclidean
        norm). So $\det$ is differentiable at $I$
        with $D\det|_i(H) = \Trace(H)$. Suppose $A
        \in \mathcal{M}_n$ invertible. Then
        \begin{align*}
            \det(A + H)
            &= \det(A) \det(I + A^{-1}H) \\
            &= \det A (1 + \Trace(A^{-1} H) +
            \eps(A^{-1}H) \|A^{-1}H\|) \\
            &= \det A + (\det A)(\Trace A^{-1} H)
            + (\det A) \eps(A^{-1} H) \|A^{-1} H\|
        \end{align*}
        where $\eps(K) \to 0$ as $K \to 0$. And
        \begin{align*}
            \left| \frac{(\det A) \eps(A^{-1}H)
            \|A^{-1}H\|}{\|H\|} \right| 
            &\le |(\det A)\eps(A^{-1}H)
            \|A^{-1}\|| \\
            &\to 0
        \end{align*}
        as $H \to 0$. So $\det$ differentiable at
        $A$ with $D\det|_A(H) = (\Trace A^{-1} H)
        (\det A)$.
\end{enumerate}