% vim: tw=50
% 02/03/2023 11AM

\subsubsection*{Multivariate Normal Theory}

Recall: if $X$ is a random vector, then
\begin{align*}
    \EE[AX + b]
    &= A\EE X + b \\
    \Var(AX + b)
    &= A\Var(X) A^\top
\end{align*}

\begin{flashcard}[multivariate-normal-defn]
\begin{definition*}
    We say $X$ has a multivariate normal
    distribution if \cloze{for any $t \in
    \RR^n$, $t^\top X$ is normal.}
\end{definition*}
\end{flashcard}

\begin{proposition*}
    If $X$ is MVN then $AX + b$ is MVN.
\end{proposition*}

\begin{proof}
    Say $AX + b$ is in $\RR^m$. Take $t \in
    \RR^m$.
    \[ t^\top (X + b) = (A^\top t)^\top X + t^\top
    b \]
    Since $X$ is MVN, $A^\top t)^\top X$ is a
    normal distribution, and since $t^\top b$ is a
    constant, this means that $t^\top (AX + b)$ is
    normal.
\end{proof}

\begin{proposition*}
    A MVN distribution is fully specified by its
    mean and variance.
\end{proposition*}

\begin{proof}
    Take $X_1$, $X_2$ both MVN with mean $\mu$ and
    variance $\Sigma$. We'll show that their mgf's
    are equal, hence $X_1$ and $X_2$ have the same
    distribution.
    \begin{align*}
        \EE e^{1 \cdot t^\top X_1}
        &= M_{t^\top X_1} (1)
        &&\text{$t^\top X_1$ is Normal} \\
        &= \exp \left( 1 \cdot \EE(t^\top X_1) +
        \half \Var(t^\top X_1) \cdot 1^2 \right)
        \\
        &= \exp \left( t^\top \mu + \half t^\top
        \Sigma t \right)
    \end{align*}
    This just depends on $\mu$, $\Sigma$, so it is
    the same for $X_1$, $X_2$.
\end{proof}

\subsubsection*{Orthogonal projections}

\begin{flashcard}[orthogonal-projection]
\begin{definition*}
    \begin{enumerate}[(1)]
        \item We say $P \in \RR^{n \times n}$ is
            an \emph{orthogonal projection} if it
            is:
            \begin{itemize}
                \item \cloze{Idempotent: $PP =
                    P$.}
                \item \cloze{Symmetric: $P^\top =
                    P$.}
            \end{itemize}
        \item Or equivalently, $P \in \RR^{n \times n}$ is an
            \emph{orthogonal projection} if
            \cloze{for
            any $v \in \col(P)$, $Pv = v$, and for
            any $w \in \col(P)^\perp$, $Pw = 0$.}
    \end{enumerate}
\end{definition*}
\end{flashcard}

\begin{proposition*}
    (1) and (2) are equivalent.
\end{proposition*}

\begin{proof}
    \begin{itemize}
        \item[$(1) \implies (2)$] Take $v \in
            \col(P)$, so $v = Pa$ for some $a \in
            \RR^n$. Then
            \[ Pv = PPa = Pa = v \]
            Take $w \in \col(P)^\perp$. Then
            $P^\top w = 0$. Hence
            \[ Pw = P^\top w = 0 \]
        \item[$(2) \implies (1)$] We can write any
            $a \in \RR^n$ uniquely as $a = v + w$,
            $w \in \col(P)^\perp$, $v \in
            \col(P)$. Then
            \[ P^2 a = PP(v + w) = Pv = P(v + w) =
            Pa \]
            As $a$ was arbitrary, $P = P^2$. For
            symmetry, take $u_1, u_2 \in \RR^n$.
            Then
            \[ \ub{(P u_1)}_{\in \col(P)}^\top
            \ub{((I - P)u_2)}_{\in \col(P)^\perp} = 0 \]
            $\implies u_1^\top (P^\top - P^\top P)
            u_2 = 0$. Since this holds for all
            $u_1, u_2 \in \RR^n$, $P^\top = P^\top
            P$. But $P^\top P$ is symmetric, hence
            $P^\top$ is symmetric, hence $P$
            symmetric. \qedhere
    \end{itemize}
\end{proof}

\begin{corollary*}
    If $P$ is orthogonal projection, then $I - P$
    is as well.
\end{corollary*}

\begin{proof}
    \[ (I - P)^\top = I - P^\top = I - P \]
    and
    \[ (I - P)(I - P) = I - 2P + PP = I - P
    \qedhere \]
\end{proof}

\begin{proposition*}
    If $P \in \RR^{n \times n}$ is an orthogonal
    projection then
    \[ P = U U^\top \]
    where the columns of $U$ form an orthogonal
    basis for $\col(P)$. (if $k = \rank(P)$, then
    $U \in \RR^{n \times k}$).
\end{proposition*}

\begin{proof}
    $UU^\top$ is cleraly  symmetric and also
    idempotent
    \[ U\ub{U^\top U}_{I_k} U^\top = UU^\top \]
    So $UU^\top$ is an orthogonal projection. To
    show it is equal to $P$, note $\col(P) =
    \col(UU^\top)$ by construction.
\end{proof}

\begin{corollary*}
    \[ k = \rank(P) = \Trace(\ub{U^\top U}_{I_k})
    = \Trace(UU^\top) = \Trace(P) \]
\end{corollary*}

\begin{theorem*}
    If $X$ is MVN, $X \sim \normaldist(0, \sigma^2
    I)$ and $P$ is an orthogonal projection, then
    \begin{enumerate}[(1)]
        \item $PX \sim \normaldist(9, \sigma^2 P)$,
            $(I - P) X \sim \normaldist(0,
            \sigma^2 (I - P))$, $PX$, $(I - P)X$
            independent.
        \item $\frac{\|PX\|^2}{\sigma^2} \sim
            \chi_{\rank(P)}^2$
    \end{enumerate}
\end{theorem*}

\begin{proof}
    The vector
    \[
    \begin{pmatrix}
        P \\
        I - P
    \end{pmatrix}
    X \]
    is MVN, because it is a linear function of
    $X$. The distribution is specified by the mean
    and variance:
    \[ \EE \left[
    \begin{matrix}
        PX \\
        (I - P)X
    \end{matrix}
    \right]
    \begin{pmatrix}
        P \\
        I - P
    \end{pmatrix}
    \EE X = 0 \]
    and:
    \begin{align*}
        \Var
        \begin{pmatrix}
            PX \\
            (I - P)X
        \end{pmatrix}
        &=
        \begin{pmatrix}
            P \\
            I - P
        \end{pmatrix}
        \Var(X)
        \begin{pmatrix}
            P \\
            I - P
        \end{pmatrix}
        ^\top \\
        &=
        \begin{pmatrix}
            P \\
            I - P
        \end{pmatrix}
        \sigma^2 I
        \begin{pmatrix}
            P \\
            I - P
        \end{pmatrix}
        ^\top \\
        &= \sigma^2
        \begin{bmatrix}
            P & \cancel{P(I - P)} \\
            \cancel{(I - P)P} & I - P
        \end{bmatrix}
    \end{align*}
    Let $Z \sim \normaldist (0, \sigma^2 P)$, $Z'
    \sim \normaldist(0, \sigma^2 (I - P))$, $Z,
    Z'$ independent. Then
    \[
    \begin{pmatrix}
        Z \\
        Z'
    \end{pmatrix}
    \sim \normaldist \left( 0, \sigma^2
    \begin{bmatrix}
        P & 0 \\
        0 & I - P
    \end{bmatrix}
    \right) \]
    So
    \[
    \begin{pmatrix}
        PX \\
        (I - P)X
    \end{pmatrix}
    \stackrel{d}{=}
    \begin{pmatrix}
        Z \\
        Z'
    \end{pmatrix}
    \]
    hence $PX$, $(I - P)X$ independent. This
    proves (1).

    \myskip
    For (2):
    \[ \frac{\|PX\|^2}{\sigma^2} = \frac{(PX)^\top
    PX}{\sigma^2} = \frac{X^\top (UU^\top)^\top
    UU^\top X}{\sigma^2} = \frac{X^\top UU^\top
    X}{\sigma^2} \]
    Cols of $U$ form orthogonal basis for
    $\col(P)$
    \[ \implies \frac{\|PX\|^2}{\sigma^2} =
    \frac{\|U^\top X\|^2}{\sigma^2} = \sum_{i =
    1}^{\rank(P)} \frac{(U^\top X)_i^2}{\sigma^2} \]
    But $U^\top X \sim \normaldist (0, \sigma^2
    I)$
    \[ \Var(U^\top X) = U^\top \Var(X) U =
    \sigma^2 U^\top U = \sigma^2 I \]
    Therefore $(U^\top X)_i$, $i = 1, \ldots,
    \rank(P)$ are IID $\normaldist(0, \sigma^2)$
    \[ \implies \frac{(U^\top X)_i}{\sigma}
    \iidsim \normaldist(0, 1) \]
    Hence $\frac{\|PX\|^2}{\sigma^2}$ is the sum
    of $\rank(P)$ squared independent
    $\normaldist(0, 1)$ variables, i.e.
    $\chi_{\rank(P)}^2$.
\end{proof}

\subsubsection*{Application}

$X_1, \ldots, X_n \iidsim \normaldist(\mu,
\sigma^2)$. Both $\mu, \sigma^2$ unknown. Recall
that the mle for $\mu$ is $\ol{X} = \frac{1}{n}
\sum X_i$. The mle for $\sigma^2$ is
$\hat{\sigma}^2 = \frac{S_{XX}}{n}$, where $S_{XX}
= \sum_i (X_i - \ol{X})^2$.

\begin{theorem*}
    \begin{enumerate}[(i)]
        \item $\ol{X} \sim \normaldist(\mu,
            \sigma^2 / n)$
        \item $\frac{S_{XX}}{\sigma^2} \sim
            \chi_{n - 1}^2$
        \item $\ol{X}$, $S_{XX}$ independent.
    \end{enumerate}
\end{theorem*}

\begin{proof}
    Let $\mathbf{1} = (1, \ldots, 1)^\top \in
    \RR^n$. Let $P = \frac{1}{n} \mathbf{1}
    \mathbf{1}^\top$ be an orthogonal projection
    onto $\Span(\mathbf{1})$. Easy to check that
    $P = P^\top = P^2$. We can write
    \[ X =
    \begin{pmatrix}
        X_1 \\
        X_2 \\
        \vdots \\
        X_n
    \end{pmatrix}
    = \mu \mathbf{1} + \eps \]
    where $\eps \sim \normaldist (0, \sigma^2 I)$.
    Note:
    \begin{itemize}
        \item $\ol{X}$ is a function of $PX$
            \[ PX = \mu\mathbf{1} + P\eps \]
            because $\ol{X} = (PX)_1$. In
            particular, $\ol{X}$ is function of
            $P\eps$.
        \item
            \begin{align*}
                S_{XX}
                &= \sum_i (X_i - \ol{X})^2 \\
                &= \|X - \mathbf{1}\ol{X}\|^2 \\
                &= \|(I - P)X\|^2 \\
                &= \|(I - P)\eps\|^2
            \end{align*}
            so $S_{XX}$ is a function of $(I -
            P)\eps$. By previous theorem, $P_\eps
            \ci (I - P)\eps$. Hence $\ol{X} \ci
            S_{XX}$. Part (i) we've shown before.
            Also,
            \[ \frac{S_{XX}}{\sigma^2} =
            \frac{\|(I - P)\eps\|^2}{\sigma^2}
            \sim \chi_{\ub{\Trace(I - P)}_{n -
            1}}^2 \qedhere \]
    \end{itemize}
\end{proof}