% vim: tw=50
% 24/11/2022 11AM

\noindent
Recall: If $f : \RR \to \RR$ differentiable with
zero derivative everywhere then $f$ is constant.
This followed from the mean value theorem.

\begin{flashcard}[MeanValueIneqRn]
\begin{theorem}[Mean value inequality]
    Let $f : \RR^n \to \RR^m$. Suppose $f$ is
    differentiable on an open set $X \subset
    \RR^n$ with $a, b \in X$. Suppose further that
    \[ \cloze{[a, b] = \{a + t(b - a) \mid 0 \le t \le
    1\} \subset X} .\]
    Then
    \[ \|f(b) - f(a)\| \le \|b - a\| \sup_{z
    \in (a, b)} \|D f|_z\| \]
    where $(a, b) = \cloze{[a, b]
    \setminus \{a, b\}}$.
\end{theorem}
\end{flashcard}

\begin{hiddenflashcard}[MVI-proof]
Proof of Mean Value Inequality? \\
\cloze{
Define $\phi : [0, 1] \to \RR$ by
\[ \phi(t) = f(a + t(b - a)) \cdot (f(b) - f(a)) \]
Note $\phi$ differentiable and we can compute its
derivative because we can write $\phi = \alpha
\circ f \circ \beta$ where
\[ \alpha(x) = x \cdot (f(b) - f(a)) \qquad
\beta(t) = a + t(b - a) \]
Now apply the (real) mean value theorem to $\phi$:
\[ \|f(b) - f(a)\|^2 \le \cdots \le \|f(b) -
f(a)\| \|Df|_{a + t(b - a)}\| \|b - a\| \]
(and divide by $\|f(b) - f(a)\|$).
}
\end{hiddenflashcard}

\begin{proof}
    Define $\phi : [0, 1] \to \RR$ by $\phi(t) =
    f(a + t(b - a)) \cdot (f(b) - f(a))$. Then
    $\phi = \alpha \circ f \circ \beta$ where
    $\beta : [0, 1] \to \RR^n$, $\alpha(x) = x
    \cdot (f(b) - f(a))$. Clearly $\phi$ is
    continuous on $[0, 1]$. Now $\alpha$ is a
    linear map so is everywhere differentiable
    with $D\alpha|x = \alpha$. Next, $\beta([0,
    1]) \subset X$ and $f$ is differentiable on
    $X$. Finally, if $t \in (0, 1)$ then $\beta$
    differentiable at $t$ with $\beta'(t) = b -
    a$, i.e. $D\beta|_t(h) = h(b - a)$. Hence by
    the Chain Rule, if $t \in (0, 1)$ then $\phi$
    is differentiable at $t$ and
    \begin{align*}
        D \phi|_t(h)
        &=
        D\alpha|_{f(\beta(t))}(Df|_{\beta(t)}(D
        \beta|_t(h))) \\
        &= \alpha(Df|_{a + t(b - a)}(h(b - a))) \\
        &= (f(b) - f(a) \cdot (hDf|_{a + t(b -
        a)}(b - a)) \\
        &= h((f(b) - f(a)) \cdot Df|_{a + t(b -
        a)} (b - a)
    \end{align*}
    That is,
    \[ \phi'(t) = (f(b) - f(a)) \cdot Df|_{a + t(b
    - a)} (b - a) \]
    So, by the Mean Value Theorem,
    \begin{align*}
        \|f(b) - f(a)\|^2
        &= (f(b) - f(a)) \cdot
        f(b) - (f(b) - f(a)) \cdot f(a) \\
        &= \phi(1) - \phi(0) \\
        &= \phi'(t)
        &&\text{for some $t \in (0, 1)$} \\
        &= (f(b) - f(a)) \cdot Df|_{a + t(b -
        a)}(b - a) \\
        &\le \|f(b) - f(a)\| \|Df|_{a + t(b - a)}
        (b - a) \|
        &&\text{Cauchy Schwartz} \\
        &\le \|f(b) - f(a)\| \|D f|_{a + t(b -
        a)} \| \|b - a\|
    \end{align*}
    Hence
    \[ \|f(b) - f(a)\| \le \|b - a\| \|Df|_{a +
    t(b - a)} \| \qedhere \]
\end{proof}

\begin{corollary}
    Let $X \subset \RR^n$ be open and connected,
    and let $f : X \to \RR^m$ be differentiable
    with $Df|_x$ the zero map for all $x \in X$.
    Then $f$ is constant on $X$.
\end{corollary}

\begin{proof}
    By Mean Value Inequality, $f$ is `locally
    constant': for each $x \in X$, there is some
    $\delta > 0$ such that $B_\delta(x) \subset X$
    and so $f$ is constant on $B_\delta(x)$.
    (Since $B_\delta(x)$ is convex so contains
    line segments joining each pair of points.) \\
    Note that as $X$ is open, if $U \subset X$
    then $U$ open in $X$ if and only if $U$ is
    open in $\RR^n$. If $x = \emptyset$ then done.
    Suppose not. Fix $a \in X$. Let
    \[ U = \{x \in X \mid f(x) = f(a)\} .\]
    \begin{itemize}
        \item $U \neq \emptyset$ because $a \in
            U$.
        \item $U$ is open: If $b \in U$ then there
            is some $\delta > 0$ such that
            $B_\delta(b) \subset X$ and $f$
            constant on $B_\delta(b)$ so
            $B_\delta(b) \subset U$.
        \item $U$ is closed in $X$: if $b \in X
            \setminus U$ then there is some
            $\delta > 0$ such that $B_\delta(b)
            \subset X$ and $f$ constant on
            $B_\delta(b)$ so $B_\delta(b) \subset
            X \setminus U$. So $X \setminus U$ open
            in $\RR^n$, so open in $X$. So $U$ is
            closed in $X$.
    \end{itemize}
    But $X$ is connected, so $U = X$.
\end{proof}

\myskip
We've seen if $f$ differentiable at $a$ then
partial derivatives exist at $a$ and the matrix
($*$) of $Df|_a$ is given by the partial
derivatives. \\
But, on the other hand, can have all partial
derivative existing at $a$ but $f$ not
differentiable at $a$. \\
However, there is a partial converse to ($*$).

\begin{flashcard}[deducing-differentiable-theorem]
\begin{theorem}[Continuous Partial Derivatives
    Implies Differentiable]
    \cloze{
    Let $f : \RR^n \to \RR^m$ and let $a \in
    \RR^n$. Suppose there is some
    \fcemph{neighbourhood} of
    $a$ such that the partial derivatives $D_i f$
    ($1 \le i \le n$) all exist and are \fcemph{continuous
    at $a$}. Then $f$ is differentiable at $a$.
}
\end{theorem}
\end{flashcard}

\begin{hiddenflashcard}[deducing-differentiable-theorem-proof]
Proof that if partial derivatives exist and are
continuous, then $f$ differentiable? \\
\cloze{
This is a proof for $f : \RR^2 \to \RR$. \\
Let $a = (x, y)$. Using MVT, we have for some
$\theta_{h,k}, \phi_h \in (0, 1)$ functions of $h$ and $k$:
\begin{align*}
    f(x + h, y + k) - f(x, y)
    &= (f(x + h, y + k) - f(x + h, y))
    + (f(x + h, y) - f(x, y)) \\
    &= kD_2 f(x + h, y + \theta_{h,k} k)
    + hD_1 f(x + \phi_h h, y)
\intertext{Since $D_1, D_2$ continuous, $D_2 f(x + h, y +
\theta_{h, k} k) \to D_2 f(x, y)$ as $(h, k) \to
0$. Similarly for $D_1$. So defining appropriate
$\eta, \zeta$ that $\to 0$ as $(h, k) \to 0$, we
get:}
    &= kD_2 f(x, y) + k\eta(h, k)
    + hD_1 f(x, y) + h\zeta(h, k) \\
    &= hD_1 f(x, y) + kD_2 f(x, y)
    + (k\eta(h, k) + h\zeta(h, k))
\end{align*}
the first part is linear so just want to show that
the other part tends to $0$ fast enough. Note:
\[ \left\|\frac{h\zeta(h, k) + k\eta(h, k)}{\sqrt{h^2 +
k^2}} \right\| \le |\zeta(h, k)| + |\eta(h, k)|
\to 0 \]
as $(h, k) \to 0$. So $f$ differentiable at $a$.
}
\end{hiddenflashcard}

\noindent
How can we prove this? For simplicity, we'll just
prove this when $n = 2$, $m = 1$. So $f : \RR^2
\to \RR$. Write $a = (x, y)$. Want to think about
$f(x + h, y + k)$ for small $h, k$. Now, by
definition of partial derivatives,
\[ f(x + h, y + k) = f(x + h, y) + kD_2 f(x + h,
y) + o(k) \tag{$*$} \]
and
\[ f(x + h, y) = f(x, y) + hD_1 f(x, y) + o(h) \]
Hence
\begin{align*}
    f(x + h, y + k)
    &= f(x, y) + hD_1 f(x, y) + k D_2 f(x + h, y)
    + o(h) + o(k) \\
    &= f(x, y) + hD_1 f(x, y) + k(D_2f(x, y) +
    o(1)) + o(h) + o(k) \\
    &= f(x, y) + \ub{hD_1f(x, y) + kD_2 f(x,
    y)}_{\text{linear in $(h, k)$}} + \ub{o(h)
    + o(k)}_{o((h, k))}
\end{align*}
Unfortunately, this is nonsense. In particular,
the $o(k)$ in ($*$) is actually also dependent on
$h$. Call it $\eta(h, k)$. We need $\frac{\eta(h,
k)}{k} \to 0$ as $(h, k) \to (0, 0)$.  But only
know for each $h$, $\frac{\eta(h, k)}{k} \to 0$
as $k \to 0$, and this is weaker. \\
In fact, to write a proof that actually works, we
need Mean Value Theorem.

\begin{proof}
    For simplicity, $n = 2$, $m = 1$. $a = (x,
    y)$. Take $(h, k)$ small. Then by MVT,
    \[ f(x + h, y + k) - f(x + h, y) = kD_2 f(x +
    h), y + \theta_{h, k} k) \]
    for some $\theta_{h, k} \in (0, 1)$. Again by
    MVT,
    \[ f(x + h, y) - f(x, y) = hD_1 f(x + \phi_h
    h, y) \]
    for some $\phi_h \in (0, 1)$. Hence
    \[ f(x + h, y + k) - f(x, y) = k D_2 f(x + h),
    y + \theta_{h, k} k) + hD_1f(x + \phi_h h,
    y) \]
    As $(h, k) \to (0, 0)$ we have $x + h, y +
    \theta_{h, k} k) \to (x, y)$ and $(x +
    \phi_h h, y) \to (x, y)$, so by continuity
    of $D_1, D_2$ at $(x, y)$, we have
    \[ D_2 f(x + y, y + \theta_{h, k} k) \to D_2
    f(x, y) \]
    and
    \[ D_1 f(x + \phi_h h, y) \to D_1 f(x, y) \]
    Write $D_2f(x + h, y + \theta_{h, k} k) = D_2 f(x,
    y) + \eta(h, k)$
    and $D_1 f(x + \phi_h h, y) = D_1 f(x, y) +
    \zeta(h, k)$ where $\eta(h, k), \zeta(h, k)
    \to 0$ as $(h, k) \to (0, 0)$. Then
    \[ f(x + h), y + k) = f(x, y) + hD_1 f(x, y) +
    k D_2 f(x, y) + h\zeta(h, k) + k\eta(h, k) \]
    Now $(h, k) \mapsto hD_1 f(x, y) + k D_2 f(x,
    y)$ is linear, and
    \[ \left| \frac{h\zeta(h, k) + k\eta(h,
    k)}{\sqrt{h^2 + k^2}} \right| \le |\zeta(h,
    k)| + |\eta(h, k)| \to 0 \]
    as $(h, k) \to (0, 0)$. So $f$ is
    differentiable at $a = (x, y)$.
\end{proof}

\subsubsection*{Remarks}

\begin{enumerate}[(1)]
    \item Same proof basically does $f : \RR^n \to
        \RR$ for general $n$ (with more notation).
        Then get $f : \RR^n \to \RR^m$ by looking
        at each $f_i : \RR^n \to \RR$ ($1 \le i
        \le m$).
    \item If you try to prove something like this
        and don't use MVT it's probably wrong.
\end{enumerate}