% vim: tw=50 % 02/03/2023 11AM \subsubsection*{Multivariate Normal Theory} Recall: if $X$ is a random vector, then \begin{align*} \EE[AX + b] &= A\EE X + b \\ \Var(AX + b) &= A\Var(X) A^\top \end{align*} \begin{flashcard}[multivariate-normal-defn] \begin{definition*} We say $X$ has a multivariate normal distribution if \cloze{for any $t \in \RR^n$, $t^\top X$ is normal.} \end{definition*} \end{flashcard} \begin{proposition*} If $X$ is MVN then $AX + b$ is MVN. \end{proposition*} \begin{proof} Say $AX + b$ is in $\RR^m$. Take $t \in \RR^m$. \[ t^\top (X + b) = (A^\top t)^\top X + t^\top b \] Since $X$ is MVN, $A^\top t)^\top X$ is a normal distribution, and since $t^\top b$ is a constant, this means that $t^\top (AX + b)$ is normal. \end{proof} \begin{proposition*} A MVN distribution is fully specified by its mean and variance. \end{proposition*} \begin{proof} Take $X_1$, $X_2$ both MVN with mean $\mu$ and variance $\Sigma$. We'll show that their mgf's are equal, hence $X_1$ and $X_2$ have the same distribution. \begin{align*} \EE e^{1 \cdot t^\top X_1} &= M_{t^\top X_1} (1) &&\text{$t^\top X_1$ is Normal} \\ &= \exp \left( 1 \cdot \EE(t^\top X_1) + \half \Var(t^\top X_1) \cdot 1^2 \right) \\ &= \exp \left( t^\top \mu + \half t^\top \Sigma t \right) \end{align*} This just depends on $\mu$, $\Sigma$, so it is the same for $X_1$, $X_2$. \end{proof} \subsubsection*{Orthogonal projections} \begin{flashcard}[orthogonal-projection] \begin{definition*} \begin{enumerate}[(1)] \item We say $P \in \RR^{n \times n}$ is an \emph{orthogonal projection} if it is: \begin{itemize} \item \cloze{Idempotent: $PP = P$.} \item \cloze{Symmetric: $P^\top = P$.} \end{itemize} \item Or equivalently, $P \in \RR^{n \times n}$ is an \emph{orthogonal projection} if \cloze{for any $v \in \col(P)$, $Pv = v$, and for any $w \in \col(P)^\perp$, $Pw = 0$.} \end{enumerate} \end{definition*} \end{flashcard} \begin{proposition*} (1) and (2) are equivalent. \end{proposition*} \begin{proof} \begin{itemize} \item[$(1) \implies (2)$] Take $v \in \col(P)$, so $v = Pa$ for some $a \in \RR^n$. Then \[ Pv = PPa = Pa = v \] Take $w \in \col(P)^\perp$. Then $P^\top w = 0$. Hence \[ Pw = P^\top w = 0 \] \item[$(2) \implies (1)$] We can write any $a \in \RR^n$ uniquely as $a = v + w$, $w \in \col(P)^\perp$, $v \in \col(P)$. Then \[ P^2 a = PP(v + w) = Pv = P(v + w) = Pa \] As $a$ was arbitrary, $P = P^2$. For symmetry, take $u_1, u_2 \in \RR^n$. Then \[ \ub{(P u_1)}_{\in \col(P)}^\top \ub{((I - P)u_2)}_{\in \col(P)^\perp} = 0 \] $\implies u_1^\top (P^\top - P^\top P) u_2 = 0$. Since this holds for all $u_1, u_2 \in \RR^n$, $P^\top = P^\top P$. But $P^\top P$ is symmetric, hence $P^\top$ is symmetric, hence $P$ symmetric. \qedhere \end{itemize} \end{proof} \begin{corollary*} If $P$ is orthogonal projection, then $I - P$ is as well. \end{corollary*} \begin{proof} \[ (I - P)^\top = I - P^\top = I - P \] and \[ (I - P)(I - P) = I - 2P + PP = I - P \qedhere \] \end{proof} \begin{proposition*} If $P \in \RR^{n \times n}$ is an orthogonal projection then \[ P = U U^\top \] where the columns of $U$ form an orthogonal basis for $\col(P)$. (if $k = \rank(P)$, then $U \in \RR^{n \times k}$). \end{proposition*} \begin{proof} $UU^\top$ is cleraly symmetric and also idempotent \[ U\ub{U^\top U}_{I_k} U^\top = UU^\top \] So $UU^\top$ is an orthogonal projection. To show it is equal to $P$, note $\col(P) = \col(UU^\top)$ by construction. \end{proof} \begin{corollary*} \[ k = \rank(P) = \Trace(\ub{U^\top U}_{I_k}) = \Trace(UU^\top) = \Trace(P) \] \end{corollary*} \begin{theorem*} If $X$ is MVN, $X \sim \normaldist(0, \sigma^2 I)$ and $P$ is an orthogonal projection, then \begin{enumerate}[(1)] \item $PX \sim \normaldist(9, \sigma^2 P)$, $(I - P) X \sim \normaldist(0, \sigma^2 (I - P))$, $PX$, $(I - P)X$ independent. \item $\frac{\|PX\|^2}{\sigma^2} \sim \chi_{\rank(P)}^2$ \end{enumerate} \end{theorem*} \begin{proof} The vector \[ \begin{pmatrix} P \\ I - P \end{pmatrix} X \] is MVN, because it is a linear function of $X$. The distribution is specified by the mean and variance: \[ \EE \left[ \begin{matrix} PX \\ (I - P)X \end{matrix} \right] \begin{pmatrix} P \\ I - P \end{pmatrix} \EE X = 0 \] and: \begin{align*} \Var \begin{pmatrix} PX \\ (I - P)X \end{pmatrix} &= \begin{pmatrix} P \\ I - P \end{pmatrix} \Var(X) \begin{pmatrix} P \\ I - P \end{pmatrix} ^\top \\ &= \begin{pmatrix} P \\ I - P \end{pmatrix} \sigma^2 I \begin{pmatrix} P \\ I - P \end{pmatrix} ^\top \\ &= \sigma^2 \begin{bmatrix} P & \cancel{P(I - P)} \\ \cancel{(I - P)P} & I - P \end{bmatrix} \end{align*} Let $Z \sim \normaldist (0, \sigma^2 P)$, $Z' \sim \normaldist(0, \sigma^2 (I - P))$, $Z, Z'$ independent. Then \[ \begin{pmatrix} Z \\ Z' \end{pmatrix} \sim \normaldist \left( 0, \sigma^2 \begin{bmatrix} P & 0 \\ 0 & I - P \end{bmatrix} \right) \] So \[ \begin{pmatrix} PX \\ (I - P)X \end{pmatrix} \stackrel{d}{=} \begin{pmatrix} Z \\ Z' \end{pmatrix} \] hence $PX$, $(I - P)X$ independent. This proves (1). \myskip For (2): \[ \frac{\|PX\|^2}{\sigma^2} = \frac{(PX)^\top PX}{\sigma^2} = \frac{X^\top (UU^\top)^\top UU^\top X}{\sigma^2} = \frac{X^\top UU^\top X}{\sigma^2} \] Cols of $U$ form orthogonal basis for $\col(P)$ \[ \implies \frac{\|PX\|^2}{\sigma^2} = \frac{\|U^\top X\|^2}{\sigma^2} = \sum_{i = 1}^{\rank(P)} \frac{(U^\top X)_i^2}{\sigma^2} \] But $U^\top X \sim \normaldist (0, \sigma^2 I)$ \[ \Var(U^\top X) = U^\top \Var(X) U = \sigma^2 U^\top U = \sigma^2 I \] Therefore $(U^\top X)_i$, $i = 1, \ldots, \rank(P)$ are IID $\normaldist(0, \sigma^2)$ \[ \implies \frac{(U^\top X)_i}{\sigma} \iidsim \normaldist(0, 1) \] Hence $\frac{\|PX\|^2}{\sigma^2}$ is the sum of $\rank(P)$ squared independent $\normaldist(0, 1)$ variables, i.e. $\chi_{\rank(P)}^2$. \end{proof} \subsubsection*{Application} $X_1, \ldots, X_n \iidsim \normaldist(\mu, \sigma^2)$. Both $\mu, \sigma^2$ unknown. Recall that the mle for $\mu$ is $\ol{X} = \frac{1}{n} \sum X_i$. The mle for $\sigma^2$ is $\hat{\sigma}^2 = \frac{S_{XX}}{n}$, where $S_{XX} = \sum_i (X_i - \ol{X})^2$. \begin{theorem*} \begin{enumerate}[(i)] \item $\ol{X} \sim \normaldist(\mu, \sigma^2 / n)$ \item $\frac{S_{XX}}{\sigma^2} \sim \chi_{n - 1}^2$ \item $\ol{X}$, $S_{XX}$ independent. \end{enumerate} \end{theorem*} \begin{proof} Let $\mathbf{1} = (1, \ldots, 1)^\top \in \RR^n$. Let $P = \frac{1}{n} \mathbf{1} \mathbf{1}^\top$ be an orthogonal projection onto $\Span(\mathbf{1})$. Easy to check that $P = P^\top = P^2$. We can write \[ X = \begin{pmatrix} X_1 \\ X_2 \\ \vdots \\ X_n \end{pmatrix} = \mu \mathbf{1} + \eps \] where $\eps \sim \normaldist (0, \sigma^2 I)$. Note: \begin{itemize} \item $\ol{X}$ is a function of $PX$ \[ PX = \mu\mathbf{1} + P\eps \] because $\ol{X} = (PX)_1$. In particular, $\ol{X}$ is function of $P\eps$. \item \begin{align*} S_{XX} &= \sum_i (X_i - \ol{X})^2 \\ &= \|X - \mathbf{1}\ol{X}\|^2 \\ &= \|(I - P)X\|^2 \\ &= \|(I - P)\eps\|^2 \end{align*} so $S_{XX}$ is a function of $(I - P)\eps$. By previous theorem, $P_\eps \ci (I - P)\eps$. Hence $\ol{X} \ci S_{XX}$. Part (i) we've shown before. Also, \[ \frac{S_{XX}}{\sigma^2} = \frac{\|(I - P)\eps\|^2}{\sigma^2} \sim \chi_{\ub{\Trace(I - P)}_{n - 1}}^2 \qedhere \] \end{itemize} \end{proof}