% vim: tw=50
% 18/02/2023 09AM

\subsubsection*{Generalised Likelihood Ratio Tests}

$X \sim f_X(\bullet \mid \theta)$, $H_0$: $\theta
\in \Theta_0$, $H_1$: $\theta \in \Theta_1$. The
generalised likelihood ratio statistic:
\begin{flashcard}[generalised-likelihood-ratio-statistic]
\prompt{Generalised likelihood ratio statistic? \\}
\cloze{
\[ \Lambda_x (H_0; H_1) = \frac{\sup_{\theta \in
\Theta_1} f_X(x \mid \theta)}{\sup_{\theta \in
\Theta_0} f_X(x \mid \theta)} \]}
\end{flashcard}
Large values of $\Lambda_x$ indicate larger
departure from $H_0$.

\begin{example*}
    $X_1, \ldots, X_n \iidsim \normaldist(\mu,
    \sigma_0^2)$, $\sigma_0$ is known. Wish to
    test $H_0$: $\mu = \mu_0$, $H_1$: $\mu \neq
    \mu_0$ for fixed $\mu_0$. Here $\Theta_0 =
    \{\mu_0\}$, $\Theta_1 = \RR \setminus
    \{\mu_0\}$. The GLR is
    \[ \Lambda_x (H_0; H_1) = \frac{(2\pi
    \sigma_0^2)^{-\pi/2} \exp \left(
    -\frac{1}{2\sigma_0^2} \sum_i (x_i - \ol{x})^2 \right)}
    {(2\pi \sigma_0^2)^{\pi/2} \exp \left(
    -\frac{1}{2\sigma_0^2} \sum_i (x_i - \mu_0)^2 \right)} \]
    Taking $2 \cdot \log$ of $\Lambda_x$ (monotone
    increasing transformation)
    \[ 2\log \Lambda x = \frac{n}{\sigma_0^2}
    (\ol{x} - \mu_0)^2 \]
    The GLR test rejects $H_0$ when $\Lambda_x$ is
    large (or when $2\log \Lambda_x$ is large),
    i.e. when
    \[ \left| \sqrt{n} \frac{(\ol{x} -
    \mu_0}{\sigma_0} \right| \]
    is large. (Under $H_0$, the expression in the
    modulus has a $\normaldist(0, 1)$
    distribution). For a test of size $\alpha$,
    reject when
    \[ \left| \sqrt{n} \frac{(\ol{x} -
    \mu_0)}{\sigma_0} \right| > z_{\alpha/2} =
    \Phi^{-1} \left( 1 - \frac{\alpha}{2} \right) \]
    \begin{center}
        \includegraphics[width=0.3\linewidth]
        {images/5eca3248af6d11ed.png}
    \end{center}
    This is called a 2-sided test.
\end{example*}

\begin{note*}
    $2\log \Lambda_x = n\frac{(\ol{x} -
    \mu_0)}{\sigma_0^2} \sim \chi_1^2$ under
    $H_0$.
\end{note*}

\noindent
We can also define the critical region of the GLR
test as
\[ \left\{ x : n \frac{(\ol{x} -
\mu_0)}{\sigma_0^2} > \chi_1^2(\alpha) \right\} \]
In general, we can approximate the distribution of
$2\log \Lambda_x$ with a $\chi_1^2$ distribution
when $n$ is large(!)

\subsubsection*{Wilks' Theorem}

Suppose $\theta$ is $k$-dimensional $\theta =
(\theta_1, \ldots, \theta_k)$. The dimension of a
hypothesis $H_0$: $\theta \in \Theta_0$ is the
number of ``free parameters'' in $\Theta_0$.
\begin{enumerate}[(1)]
    \item $\Theta_0 = \{\theta \in \RR^k :
        \theta_1 = \theta_2 = \cdots =
        \theta_p = 0\}$ for  some $p < k$. Here
        $\dim(\theta_0) = k - p$.
    \item Let $A \in \RR^{p \times k}$, $b \in
        \RR^p$, $p < k$>
        \[ \Theta_0 = \{\theta \in \RR^k :
        A\theta = b\} \]
        $\dim(\Theta_0) = k - p$ if rows of $A$
        are linearly independent ($\Theta_0$ is a
        hyperplane).
    \item $\Theta_0 = \{\theta \in \RR^k :
        \theta_0 = f_i(\phi), \phi \in \RR^p\}$,
        $p < l$. Here $\phi$ are the free
        parameters; $f_i$ need not be linear.
        Under regularity conditions
        $\dim(\theta_0) = p$.
\end{enumerate}

\begin{flashcard}[wilks-theorem]
\begin{theorem*}[Wilk's Theorem]
    \cloze{
    Suppose $\Theta_0 \subset \Theta_1$ (``nested
    hypotheses'')
    \[ \dim(\Theta_1) - \dim(\Theta_0) = p \]
    If $X_1, \ldots, X_n$ are iid from
    $f_X(\bullet \mid \theta0$, then as $n \to
    \infty$, the limiting distribution of
    $2\log\Lambda_x$ under $H_0$ is $\chi_p^2$.
    That is, for any $\theta \in \Theta_0$, any $l
    > 0$,
    \[ \PP_\theta(z \log \Lambda_x \le l)
    \stackrel{n \to \infty}{\longrightarrow}
    \PP(Z \le l) \]
    where $Z \sim \chi_p^2$.
}
\end{theorem*}
\end{flashcard}

\noindent
How to use this? If we reject $H_0$ when
$2\log\Lambda_x \ge \chi_p^2(\alpha)$ then when
$n$ is large, the size of the test is $\approx
\alpha$. (!!!)

\begin{example*}
    In the two-sided normal mean test
    \[ \Theta_0 = \{\mu_0\}, \qquad \Theta_1 = \RR
    \setminus \{\mu_0\} \]
    we found $2\log \Lambda_x \sim \chi_1^2$. If
    we take $\Theta_1 = \RR$, the GLR statistic
    doesn't change, so $2\log\Lambda_x \sim
    \chi_1^2$.
    \[ \dim(\theta_1) - \dim(\Theta_0) = 1 - 0 = 1 \]
    The prediction of Wilk's theorem is exact.
\end{example*}

\begin{proof}
    Wait for Part II Principles of Statistics :(
\end{proof}

\subsubsection*{Tests of goodness of fit}

$X_1, \ldots, X_n$ are iid samples from a
distribution on $\{1, 2, \ldots, k\}$. Let $p_i =
\PP(X_1 = i)$, let $N_i$ be the number of
observations equal to $i$. So,
\[ \sum_{i = 1}^k p_i = 1, \qquad \sum_{i = 1}^k
N_i = n \]
Goodness of fit test: $H_0$: $p = \tilde{p}$ for
some fixed distribution $\tilde{p}$ on $\{1,
\ldots, k\}$. $H_1$: $p$ is \emph{any}
distribution with $\sum_{i = 1}^k p_i = 1$, $p_i
\ge 0$.

\begin{example*}
    Mendel crossed $n = 556$ smooth yellow peas
    with wrinkled green peas. Each member of the
    progeny can have any combination of the 2
    features: $SY$, $SG$, $WY$, $WG$. Let $(p_1,
    p_2, p_3, p_4)$ be the probabilities of each
    type, and $(N_1, \ldots, N_4)$ are the number
    of progeny of each type, $\sum N_i = n = 556$.

    \myskip
    Mendel's hypothesis:
    \[ H_0 : p = \left( \frac{9}{16},
    \frac{3}{16}, \frac{3}{16}, \frac{1}{16}
    \right) \defeq \tilde{p} \]
    Is there any evidence in $N_1, \ldots, N_4$ to
    reject $H_0$? The model can be written $(N_1,
    \ldots, N_k) \sim \operatorname{Multinomial}(n;
    p_1, \ldots, p_k)$. Likelihood: $L(p) \propto
    p_1^{N_1} \cdots p_k^{N_k}$
    \[ \implies l(p) = \text{const} + \sum_i N_i
    \log p_i \]
    We can test $H_0$ against $H_1$ using a GLR
    test:
    \[ 2\log\Lambda_x = 2 \left(\sup_{p \in \Theta_1}
    l(p) - \sup_{p \in \Theta_0} l(p)\right) \]
    Since $\Theta_0 = \{\tilde{p}\}$, $\sup_{p \in
    \Theta_0} l(p) = l(\tilde{p})$. In the
    alternative $p$ must satisfy $\sum p_i = 1$.
    \[ \sup_{p \in \Theta_1} l(p) = \sup_{p : \sum
    p_i = 1} \sum_i N_i \log p_i \]
    Use Lagrangian $\mathcal{L}(p, \lambda) =
    \sum_i N_i \log p_i - \lambda \left( \sum_i
    p_i - 1 \right)$. We find that $\hat{p}_i =
    \frac{N_i}{n}$ (the observed propoertion of
    samples of type $i$).
    \begin{align*}
        2\log\Lambda
        &= 2(l(\hat{p}) - l(\tilde{p})) \\
        &= 2 \sum_i N_i \log \left( \frac{N_i}{n
        \cdot \tilde{p}_i} \right)
    \end{align*}
    Wilk's theorem tells us that $2\log\Lambda_x$
    is approximately $\chi_p^2$ with
    \[ p = \dim(\Theta_1) - \dim(\Theta_0) = (k -
    1) - 0 = k - 1 \]
    So we can reject the $H_0$ with size $\approx
    \alpha$ when
    \[ 2\log\Lambda_x > \chi_{k - 1}^2 (\alpha) \]
\end{example*}