% vim: tw=50 % 22/11/2022 11AM \noindent Partial derivatives \emph{can} still be useful for computation: \begin{proposition} Let $f : \RR^n \to \RR^m$ and $a \in \RR^n$. Write \[ f = \begin{pmatrix} f_1 \\ \vdots \\ f_m \end{pmatrix} \] where for each $i$, $f_i : \RR^n \to \RR$. Then \begin{enumerate}[(a)] \item $f$ is differentiable at $a$ if and only if each $f_i$ is differentiable at $a$, in which case \[ Df|_a = \begin{pmatrix} Df_1|_a \\ \vdots \\ Df_m|_a \end{pmatrix} \] and \item if $f$ is differentiable at $a$ and $A$ is the matrix of $Df|_a$ in terms of the standard bases then $A_{ij} = D_j f_i(a)$. \end{enumerate} \end{proposition} \begin{proof} \begin{enumerate}[(a)] \item \begin{enumerate}[$\Rightarrow$] \item[$\Rightarrow$] Write \[ f(a + h) = f(a) + Df|_a(h) + \eps(h) \|h\| \] where $\eps(h) \to 0$ as $h \to 0$. Then \[ f_i(a + h) = f_i(a) + (Df|_a)_i(h) + \eps_i(h) \|h\| \] where $(Df|_a)_i : \RR^n \to \RR$ is linear and $|\eps_i(h)| \le \|\eps(h)\| \to 0$ as $h \to 0$. \item[$\Leftarrow$] For each $i$, write \[ f_i(a + h) = f_i(a) + Df_i|_a(h) + \eps_i(h) \|h\| \] where $\eps_i(h) \to 0$ as $h \to 0$. Then \[ f(a + h) = f(a) + \alpha(h) + \eps(h) \|h\| \] where \[ \alpha = \begin{pmatrix} Df_1|_a \\ \vdots \\ Df_m|_a \end{pmatrix} : \RR^n \to \RR^m \] is linear and \[ \|\eps(h)\| = \left\| \begin{pmatrix} \eps_1(h) \\ \vdots \\ \eps_m(h) \end{pmatrix} \right\| = \sqrt{\sum_{i = 1}^m \eps_i(h)^2} \to 0 \] as $h \to 0$. \end{enumerate} \item Write \[ f(a + h) = f(a) + Df|_a(h) + \eps(h)\|h\| \] where $\eps(h) \to 0$ as $h \to 0$. Let $e_1, \dots, e_n$ be the standard basis of $\RR^n$. Then \[ \frac{f(a + ke_j) - f(a)}{k} = \frac{Df|_a(ke_j) + \eps(ke_j) \|ke_j\|}{k} = Df|_a(e_j) + \eps(ke_j) \to Df|_a(e_j) \] as $k \to 0$. So all partial derivatives of $f$ exist at $a$ and $D_jf(a) = Df|_a(e_j)$. \qedhere \end{enumerate} \end{proof} \begin{definition*} The matrix $A$ in (b) is called the \emph{Jacobian} matrix of $f$ at $a$. \end{definition*} \begin{hiddenflashcard}[JacobianMatrix] What is the Jacobian matrix? \[ A_{ij} = \cloze{D_jf_i(a)} \] \end{hiddenflashcard} \begin{theorem}[The Chain Rule] Let $f : \RR^p \to \RR^n$ be differentiable at $a \in \RR^p$, and let $g : \RR^n \to \RR^m$ be differentiable at $f(a) \in \RR^n$. Then $g \circ f$ is differentiable at $a$ with \[ D(g \circ f)|_a = Dg|_{f(a)} \circ Df|_a \] \end{theorem} \begin{remark*} In principle this should be obvious: if $f$ is approximately linear near $f(a)$ then $g \circ f$ is approximately linear near $a$ and the linear approximation to get near $a$ is the obvious thing. \\ Proof looks a bit messy - calculation to make sure error terms behave. \end{remark*} \begin{proof} Write \[ f(a + h) = f(a) + \alpha(h) + \eps(h) \|h\| \] and \[ g(f(a) + k) = g(f(a)) + \beta(k) + \eta(k) \|k\| \] where $\alpha = Df|_a$, $\beta = Dg|_{f(a)}$ are linear, $\eps(h) \to 0$ as $h \to 0$ and $\eta(k) \to 0$ as $k \to 0$. Now: \begin{align*} g(f(a + h)) &= g(f(a) + \alpha(h) + \eps(h)\|h\|) \\ &= g(f(a)) + \beta(\alpha(h) + \eps(h)\|h\|) + \eta(\alpha(h) + \eps(h)\|h\|) \|\alpha(h) + \eps(h) \|h\| \| \\ &= g(f(a)) + \ub{\beta(\alpha(h))}_{\text{linear}} + \ub{\zeta(h) \|h\|}_{\text{small}} \end{align*} where \[ \zeta(h) = \beta(\eps(h)) + \eta(\alpha(h) + \eps(h) \|h\|) \left\| \frac{\alpha(h)}{\|h\|} + \eps(h) \right\| \] Now, $\eps(h) \to 0$ as $h \to 0$ and $\beta$ linear, so continuous, so $\beta(\eps(h)) \to \beta(0) = 0$ as $h \to 0$. Next, $\alpha$ linear so continuous so $\alpha(h) \to \alpha(0) = 0$ as $h \to 0$. And $\eps(h) \|h\| \to 0 \times 0 = 0$ as $h \to 0$. So $\alpha(h) + \eps(h) \|h\| \to 0$ as $h \to 0$. WLOG $\eta(0) = 0$ so $g$ continuous at $0$. Then $\eta(\alpha(h) + \eps(h) \|h\| \to 0$ as $h \to 0$. Finally, \begin{align*} \left\| \frac{\alpha(h)}{\|h\|} + \eps(h) \right\| &\le \frac{\|\alpha(h)\|}{\|h\|} + \|\eps(h)\| \\ &\le \frac{\|a\|\|h\|}{\|h\|} + \|\eps(h)\| \\ &= \|\alpha\| + \|\eps(h)\| \\ &\to \|\alpha\| \end{align*} as $h \to 0$. Hence $\zeta(h) \to 0$ as $h \to 0$. \end{proof} \subsubsection*{Examples} \begin{enumerate}[(1)] \item Suppose $f$ is constant. Then $f(a + h) = f(a) + 0 + 0\|h\|$ So $f$ is everywhere differentiable with derivative the zero map. \item Suppose $f$ is linear. Then \[ f(a + h) = f(a) + f(h) + 0\|h\| \] so $f$ everywhere differentiable with $Df|_a = f$ for all $a$. \item Suppose $f : \RR \to \RR^m$. As remarked earlier for $a \in \RR$, $f$ is differentiable in old sense at $a$ if and only if it is differentiable in new sense, in which case $Df|_a(h) = hf'(a)$. \item Using the above together with Chain Rule, get lots of differentiable functions, for example \[ f : \RR^2 \to \RR^2, \qquad f\left( \begin{pmatrix} x \\ y \end{pmatrix} \right) = \begin{pmatrix} e^{x + y} \\ \cos(xy) \end{pmatrix} \] is differentiable. Why? The projection maps $\pi_1, \pi_2 : \RR^2 \to \RR$, $\pi_s(x, y) = x, \pi_2(x, y) = y$ are linear so differentiable. So by Chain Rule: \[ f_1(z) = e^{\pi_1(z) + \pi_2(z)}, \qquad f_2(z) = \cos(\pi_1(z)\pi_2(z)) \] are differentiable. So by Proposition 5(a), $f$ is differentiable. \\ What is derivative of $f$ at $z = (x, y)$? It's some linear map $\RR^2 \to \RR^2$. By Proposition 5(b), the matrix of the derivative is given by the partial derivatives: \[ Df|_{(x, y)} = \begin{pmatrix} e^{x + y} & e^{x + y} \\ -y\sin xy & -x \sin xy \end{pmatrix} \] \item Let $\mathcal{M}_n$ be the vector space of $n \times n$ real matrices. So $\mathcal{M}_n \sim \RR^{n^2}$ so can consider differentiability of $f : \mathcal{M}_n \to \mathcal{M}_n$. Recall that the definition still same if we replace the Euclidean norm by the operator norm, so write $\|\bullet\|$ for operator norm on $\mathcal{M}_n$. Define $f : \mathcal{M}_n \to \mathcal{M}_n$ by $f(A) = A^2$. Then: \[ f(A + H) = (A + H)^2 = \ub{A^2}_{f(A)} + \ub{AH + HA}_{\text{linear}} + \ub{H^2}_{\text{higher order}} \] where \[ \left\| \frac{H^2}{\|H\|} \right\| \le \frac{\|H\|^2}{\|H\|} = \|H\| \to 0 \] as $H \to 0$. So $f$ everywhere differentiable and \[ Df|_A(H) = AH + HA \] \item We have $\det : \mathcal{M}_n \to \RR$. We have: \begin{align*} \det(I + H) &= \begin{vmatrix} 1 + H_{11} & H_{12} & \cdots & H_{1n} \\ H_{21} & 1 + H_{22} & \cdots & H_{2n} \\ \vdots & \vdots & \ddots & \vdots H_{n1} & H_{n2} & \cdots & 1 + H_{nn} \end{vmatrix} \\ &= \ub{1}_{\det I} + \ub{\Trace(H)}_{\text{linear in $H$}} + \ub{\text{other terms involving two or more $H_{ij}$ multiplied together.}}_{\text{higher order}} \end{align*} Note \[ \left| \frac{H_{ij} H_{kl}}{\|H\|_2} \right| \le |H_{kl}| \to 0 \] as $H \to 0$. ($\|H\|_2$ is the Euclidean norm). So $\det$ is differentiable at $I$ with $D\det|_i(H) = \Trace(H)$. Suppose $A \in \mathcal{M}_n$ invertible. Then \begin{align*} \det(A + H) &= \det(A) \det(I + A^{-1}H) \\ &= \det A (1 + \Trace(A^{-1} H) + \eps(A^{-1}H) \|A^{-1}H\|) \\ &= \det A + (\det A)(\Trace A^{-1} H) + (\det A) \eps(A^{-1} H) \|A^{-1} H\| \end{align*} where $\eps(K) \to 0$ as $K \to 0$. And \begin{align*} \left| \frac{(\det A) \eps(A^{-1}H) \|A^{-1}H\|}{\|H\|} \right| &\le |(\det A)\eps(A^{-1}H) \|A^{-1}\|| \\ &\to 0 \end{align*} as $H \to 0$. So $\det$ differentiable at $A$ with $D\det|_A(H) = (\Trace A^{-1} H) (\det A)$. \end{enumerate}