% vim: tw=50 % 24/11/2022 11AM \noindent Recall: If $f : \RR \to \RR$ differentiable with zero derivative everywhere then $f$ is constant. This followed from the mean value theorem. \begin{flashcard}[MeanValueIneqRn] \begin{theorem}[Mean value inequality] Let $f : \RR^n \to \RR^m$. Suppose $f$ is differentiable on an open set $X \subset \RR^n$ with $a, b \in X$. Suppose further that \[ \cloze{[a, b] = \{a + t(b - a) \mid 0 \le t \le 1\} \subset X} .\] Then \[ \|f(b) - f(a)\| \le \|b - a\| \sup_{z \in (a, b)} \|D f|_z\| \] where $(a, b) = \cloze{[a, b] \setminus \{a, b\}}$. \end{theorem} \end{flashcard} \begin{hiddenflashcard}[MVI-proof] Proof of Mean Value Inequality? \\ \cloze{ Define $\phi : [0, 1] \to \RR$ by \[ \phi(t) = f(a + t(b - a)) \cdot (f(b) - f(a)) \] Note $\phi$ differentiable and we can compute its derivative because we can write $\phi = \alpha \circ f \circ \beta$ where \[ \alpha(x) = x \cdot (f(b) - f(a)) \qquad \beta(t) = a + t(b - a) \] Now apply the (real) mean value theorem to $\phi$: \[ \|f(b) - f(a)\|^2 \le \cdots \le \|f(b) - f(a)\| \|Df|_{a + t(b - a)}\| \|b - a\| \] (and divide by $\|f(b) - f(a)\|$). } \end{hiddenflashcard} \begin{proof} Define $\phi : [0, 1] \to \RR$ by $\phi(t) = f(a + t(b - a)) \cdot (f(b) - f(a))$. Then $\phi = \alpha \circ f \circ \beta$ where $\beta : [0, 1] \to \RR^n$, $\alpha(x) = x \cdot (f(b) - f(a))$. Clearly $\phi$ is continuous on $[0, 1]$. Now $\alpha$ is a linear map so is everywhere differentiable with $D\alpha|x = \alpha$. Next, $\beta([0, 1]) \subset X$ and $f$ is differentiable on $X$. Finally, if $t \in (0, 1)$ then $\beta$ differentiable at $t$ with $\beta'(t) = b - a$, i.e. $D\beta|_t(h) = h(b - a)$. Hence by the Chain Rule, if $t \in (0, 1)$ then $\phi$ is differentiable at $t$ and \begin{align*} D \phi|_t(h) &= D\alpha|_{f(\beta(t))}(Df|_{\beta(t)}(D \beta|_t(h))) \\ &= \alpha(Df|_{a + t(b - a)}(h(b - a))) \\ &= (f(b) - f(a) \cdot (hDf|_{a + t(b - a)}(b - a)) \\ &= h((f(b) - f(a)) \cdot Df|_{a + t(b - a)} (b - a) \end{align*} That is, \[ \phi'(t) = (f(b) - f(a)) \cdot Df|_{a + t(b - a)} (b - a) \] So, by the Mean Value Theorem, \begin{align*} \|f(b) - f(a)\|^2 &= (f(b) - f(a)) \cdot f(b) - (f(b) - f(a)) \cdot f(a) \\ &= \phi(1) - \phi(0) \\ &= \phi'(t) &&\text{for some $t \in (0, 1)$} \\ &= (f(b) - f(a)) \cdot Df|_{a + t(b - a)}(b - a) \\ &\le \|f(b) - f(a)\| \|Df|_{a + t(b - a)} (b - a) \| &&\text{Cauchy Schwartz} \\ &\le \|f(b) - f(a)\| \|D f|_{a + t(b - a)} \| \|b - a\| \end{align*} Hence \[ \|f(b) - f(a)\| \le \|b - a\| \|Df|_{a + t(b - a)} \| \qedhere \] \end{proof} \begin{corollary} Let $X \subset \RR^n$ be open and connected, and let $f : X \to \RR^m$ be differentiable with $Df|_x$ the zero map for all $x \in X$. Then $f$ is constant on $X$. \end{corollary} \begin{proof} By Mean Value Inequality, $f$ is `locally constant': for each $x \in X$, there is some $\delta > 0$ such that $B_\delta(x) \subset X$ and so $f$ is constant on $B_\delta(x)$. (Since $B_\delta(x)$ is convex so contains line segments joining each pair of points.) \\ Note that as $X$ is open, if $U \subset X$ then $U$ open in $X$ if and only if $U$ is open in $\RR^n$. If $x = \emptyset$ then done. Suppose not. Fix $a \in X$. Let \[ U = \{x \in X \mid f(x) = f(a)\} .\] \begin{itemize} \item $U \neq \emptyset$ because $a \in U$. \item $U$ is open: If $b \in U$ then there is some $\delta > 0$ such that $B_\delta(b) \subset X$ and $f$ constant on $B_\delta(b)$ so $B_\delta(b) \subset U$. \item $U$ is closed in $X$: if $b \in X \setminus U$ then there is some $\delta > 0$ such that $B_\delta(b) \subset X$ and $f$ constant on $B_\delta(b)$ so $B_\delta(b) \subset X \setminus U$. So $X \setminus U$ open in $\RR^n$, so open in $X$. So $U$ is closed in $X$. \end{itemize} But $X$ is connected, so $U = X$. \end{proof} \myskip We've seen if $f$ differentiable at $a$ then partial derivatives exist at $a$ and the matrix ($*$) of $Df|_a$ is given by the partial derivatives. \\ But, on the other hand, can have all partial derivative existing at $a$ but $f$ not differentiable at $a$. \\ However, there is a partial converse to ($*$). \begin{flashcard}[deducing-differentiable-theorem] \begin{theorem}[Continuous Partial Derivatives Implies Differentiable] \cloze{ Let $f : \RR^n \to \RR^m$ and let $a \in \RR^n$. Suppose there is some \fcemph{neighbourhood} of $a$ such that the partial derivatives $D_i f$ ($1 \le i \le n$) all exist and are \fcemph{continuous at $a$}. Then $f$ is differentiable at $a$. } \end{theorem} \end{flashcard} \begin{hiddenflashcard}[deducing-differentiable-theorem-proof] Proof that if partial derivatives exist and are continuous, then $f$ differentiable? \\ \cloze{ This is a proof for $f : \RR^2 \to \RR$. \\ Let $a = (x, y)$. Using MVT, we have for some $\theta_{h,k}, \phi_h \in (0, 1)$ functions of $h$ and $k$: \begin{align*} f(x + h, y + k) - f(x, y) &= (f(x + h, y + k) - f(x + h, y)) + (f(x + h, y) - f(x, y)) \\ &= kD_2 f(x + h, y + \theta_{h,k} k) + hD_1 f(x + \phi_h h, y) \intertext{Since $D_1, D_2$ continuous, $D_2 f(x + h, y + \theta_{h, k} k) \to D_2 f(x, y)$ as $(h, k) \to 0$. Similarly for $D_1$. So defining appropriate $\eta, \zeta$ that $\to 0$ as $(h, k) \to 0$, we get:} &= kD_2 f(x, y) + k\eta(h, k) + hD_1 f(x, y) + h\zeta(h, k) \\ &= hD_1 f(x, y) + kD_2 f(x, y) + (k\eta(h, k) + h\zeta(h, k)) \end{align*} the first part is linear so just want to show that the other part tends to $0$ fast enough. Note: \[ \left\|\frac{h\zeta(h, k) + k\eta(h, k)}{\sqrt{h^2 + k^2}} \right\| \le |\zeta(h, k)| + |\eta(h, k)| \to 0 \] as $(h, k) \to 0$. So $f$ differentiable at $a$. } \end{hiddenflashcard} \noindent How can we prove this? For simplicity, we'll just prove this when $n = 2$, $m = 1$. So $f : \RR^2 \to \RR$. Write $a = (x, y)$. Want to think about $f(x + h, y + k)$ for small $h, k$. Now, by definition of partial derivatives, \[ f(x + h, y + k) = f(x + h, y) + kD_2 f(x + h, y) + o(k) \tag{$*$} \] and \[ f(x + h, y) = f(x, y) + hD_1 f(x, y) + o(h) \] Hence \begin{align*} f(x + h, y + k) &= f(x, y) + hD_1 f(x, y) + k D_2 f(x + h, y) + o(h) + o(k) \\ &= f(x, y) + hD_1 f(x, y) + k(D_2f(x, y) + o(1)) + o(h) + o(k) \\ &= f(x, y) + \ub{hD_1f(x, y) + kD_2 f(x, y)}_{\text{linear in $(h, k)$}} + \ub{o(h) + o(k)}_{o((h, k))} \end{align*} Unfortunately, this is nonsense. In particular, the $o(k)$ in ($*$) is actually also dependent on $h$. Call it $\eta(h, k)$. We need $\frac{\eta(h, k)}{k} \to 0$ as $(h, k) \to (0, 0)$. But only know for each $h$, $\frac{\eta(h, k)}{k} \to 0$ as $k \to 0$, and this is weaker. \\ In fact, to write a proof that actually works, we need Mean Value Theorem. \begin{proof} For simplicity, $n = 2$, $m = 1$. $a = (x, y)$. Take $(h, k)$ small. Then by MVT, \[ f(x + h, y + k) - f(x + h, y) = kD_2 f(x + h), y + \theta_{h, k} k) \] for some $\theta_{h, k} \in (0, 1)$. Again by MVT, \[ f(x + h, y) - f(x, y) = hD_1 f(x + \phi_h h, y) \] for some $\phi_h \in (0, 1)$. Hence \[ f(x + h, y + k) - f(x, y) = k D_2 f(x + h), y + \theta_{h, k} k) + hD_1f(x + \phi_h h, y) \] As $(h, k) \to (0, 0)$ we have $x + h, y + \theta_{h, k} k) \to (x, y)$ and $(x + \phi_h h, y) \to (x, y)$, so by continuity of $D_1, D_2$ at $(x, y)$, we have \[ D_2 f(x + y, y + \theta_{h, k} k) \to D_2 f(x, y) \] and \[ D_1 f(x + \phi_h h, y) \to D_1 f(x, y) \] Write $D_2f(x + h, y + \theta_{h, k} k) = D_2 f(x, y) + \eta(h, k)$ and $D_1 f(x + \phi_h h, y) = D_1 f(x, y) + \zeta(h, k)$ where $\eta(h, k), \zeta(h, k) \to 0$ as $(h, k) \to (0, 0)$. Then \[ f(x + h), y + k) = f(x, y) + hD_1 f(x, y) + k D_2 f(x, y) + h\zeta(h, k) + k\eta(h, k) \] Now $(h, k) \mapsto hD_1 f(x, y) + k D_2 f(x, y)$ is linear, and \[ \left| \frac{h\zeta(h, k) + k\eta(h, k)}{\sqrt{h^2 + k^2}} \right| \le |\zeta(h, k)| + |\eta(h, k)| \to 0 \] as $(h, k) \to (0, 0)$. So $f$ is differentiable at $a = (x, y)$. \end{proof} \subsubsection*{Remarks} \begin{enumerate}[(1)] \item Same proof basically does $f : \RR^n \to \RR$ for general $n$ (with more notation). Then get $f : \RR^n \to \RR^m$ by looking at each $f_i : \RR^n \to \RR$ ($1 \le i \le m$). \item If you try to prove something like this and don't use MVT it's probably wrong. \end{enumerate}