% vim: tw=50
% 04/02/2023 09AM

\subsubsection*{Recipe for Confidence Interval}

\begin{enumerate}[(1)]
    \item Find some quantity $R(X, \theta)$ such
        that the $\PP_\theta$-distribution of
        $R(X, \theta)$ does not depend on
        $\theta$. This is called a \emph{pivot}.
        For example
        \[ z = \sqrt{n}(\ol{X} - \mu) \sim
        \normaldist(0, 1) \qquad \forall \mu \]
    \item Write down a probability statement about
        the pivot of the form
        \[ \PP(c_1 \le R(X, \theta) \le c_2) =
        \gamma \]
        by using the quantities $c_1, c_2$ of the
        distribution of $R(X, \theta)$ [typically
        a $\normaldist(0, 1)$ or $\chi_p^2$
        distribution).
    \item Rearrange the inequalities to leave
        $\theta$ in the middle.
\end{enumerate}

\begin{proposition*}
    If $T$ is a monotone increasing function $T :
    \RR \to \RR$, and $(A(x), B(X))$ is a
    $100\gamma\%$ confidence interval for
    $\theta$, then $(T(A(X)), T(B(X)))$ is a
    confidence interval for $T(\theta)$.
\end{proposition*}

\begin{remark*}
    When $\theta$ is a vector, we talk about
    confidence sets.
\end{remark*}

\begin{example*}
    $X_1, \ldots, X_n \iidsim \normaldist(0,
    \sigma^2)$. Find a $95\%$ confidence interval
    for $\sigma^2$.
    \begin{enumerate}[(1)]
        \item Note that $\frac{X_i}{\sigma} \sim
            \normaldist(0, 1)$
            \[ \implies \sum_{i = 1}^n
            \frac{X_i^2}{\sigma^2} \sim \chi_n^2 \]
            Hence $R(X, \sigma^2) = \sum_i
            \frac{X_i^2}{\sigma^2}$ is a pivot.
        \item Let $C_1 =
            F_{\chi_n^2}^{-1}(0.025)$, $c_2 =
            F_{\chi_n^2}^{-1}(0.975)$. Then
            \[ \PP\left(c_1 \le \frac{1}{\sigma^2}
            \sum_i X_i^2 \le c_2\right) = 0.95 \]
        \item Rearranging:
            \[ \PP\left( \frac{\sum X_i^2}{c_2}
            \le \sigma^2 \le \frac{\sum
            X_i^2}{c_1} \right) = 0.95 \]
            Hence $\left[ \frac{\sum X_i^2}{c_2},
            \frac{\sum X_i^2}{c_1} \right]$ is a
            $95\%$ confidence interval for
            $\sigma^2$.
    \end{enumerate}
    Hence, using the proposition above, $\left[
    \sqrt{\frac{\sum X_i^2}{c_2}},
    \sqrt{\frac{X_i^2}{c_1}} \right]$ is a $95\%$
    confidence interval for $\sigma$.
\end{example*}

\begin{example*}
    $X_1, \ldots, X_n \iidsim \Ber(p)$, $n$ is
    large. Find an approximate $95\%$ confidence
    interval for $p$.
    \begin{enumerate}[(1)]
        \item The mle for $p$ is $\hat{p} =
            \frac{1}{n} \sum_{i = 1}^n X_i$. By
            the Central limit theorem when $n$ is
            large, $\hat{p}$ is approximately
            $\normaldist \left( p, \frac{p(1 -
            p)}{n} \right)$. Therefore $\sqrt{n}
            \frac{(\hat{p} - p)}{\sqrt{p(1 - p)}}$
            is approximately $\normaldist(0, 1)$.
        \item $z = \Phi^{-1}(0.975)$
            \[ \PP \left( -z \le
            \frac{\sqrt{n}(\hat{p} - p)}{\sqrt{p(1
            - p)}} \le z \right) \approx 0.95 \]
        \item Rearranging this is tricky. Argue
            that as $n \to \infty$, $\hat{p}(1 -
            \hat{p}) \to p(1 - p)$. So replace
            denominator:
            \[ \PP\left( -z \le
            \frac{\sqrt{n}(\hat{p} -
            p)}{\sqrt{\hat{p}(1 - \hat{p})}} \le z
            \right) \approx 0.95 \]
            Now it's easier to rearrange:
            \[ \PP\left(\hat{p} - z
            \frac{\sqrt{\hat{p}(1 -
            \hat{p})}}{\sqrt{n}} \le p \le \hat{p}
            + z \frac{\sqrt{\hat{p}(1 -
            \hat{p})}}{\sqrt{n}} \right) \approx
            0.95 \]
    \end{enumerate}
    So $\left[ \hat{p} \pm z \frac{\sqrt{\hat{p}(1
    - \hat{p}j}}{\sqrt{n}} \right]$ is an
    approximate $95\%$ confidence interval for
    $p$.
\end{example*}

\begin{note*}
    \begin{itemize}
        \item $z \approx 1.95$
        \item $\sqrt{\hat{p}(1 - \hat{p})} \le
            \half$ for all $\hat{p} \in (0, 1)$
    \end{itemize}
    So a ``conservative'' confidence interval is
    $\left[ \hat{p} \pm 1.96 \cdot \half \cdot
    \frac{1}{\sqrt{n}} \right]$.
\end{note*}

\subsection{Interpreting Confidence intervals}

Suppose $X_1, X_2 \iidsim \Unif\left[ \theta -
\half, \theta + \half \right]$. What is a sensible
$50\%$ confidence interval for $\theta$? Consider
\begin{align*}
    \PP(\text{$\theta$ is between $X_1, X_2$})
    &= \PP(\min(X_1, X_2) \le \theta \le \max(X_1,
    X_2)) \\
    &= \PP(X_1 \le \theta \le X_2) + \PP(X_2 \le
    \theta \le X_1) \\
    &= \half \times \half + \half \times \half \\
    &= \half
\end{align*}
Immediately conclude that $(\min(X_1, X_2),
\max(X_1, X_2))$ is a $50\%$ confidence interval
for $\theta$.

\myskip
\emph{But} we observe $X_1 = x_1$, $X_2 = x_2$
with $|x_1 - x_2| > \half$. In this case we can be
sure that $\theta$ is in $(\min(x_1, x_2),
\max(x_1, x_2)$.

\myskip
Frequentist interpretation of confidence interval
is entirely correct! If we repeat the experiment
many times $\theta \in (\min(X_1, X_2), \max(X_1,
X_2))$ \emph{exactly} $50\%$ of the time. However,
we cannot say that \emph{given} a \emph{specific}
observation $(x_1, x_2)$ we are ``$50\%$ certain
that $\theta \in \text{C.I.}$.

\subsubsection*{Bayesian Inference}

So far, we have assume that there is some true
parameter $\theta$. That data $X$ has pdf (or pmf)
$f_X(\bullet \mid \theta)$.

\myskip
Bayesian analysis is a different framework, where
we treat $\theta$ as a random variable taking
values in $\Theta$.

\myskip
We being by assigning to $\theta$ a \emph{prior
distribution} $\pi(\theta)$, which represents the
investigator's opinions or information about
$\theta$ \emph{before} seeing any data.
Conditional on $\theta$, the data $X$ has pdf (or
pmf) $f_X(x \mid \theta)$. Havign observed a
specific value of $X = x$, this information is
combined with the prior to form the
\emph{posterior distribution}. $\pi(\theta \mid
x)$ which is the conditional distribution of
$\theta$ given $X = x$.

\myskip
By Bayes rule:
\[ \pi(\theta \mid x) = \frac{\pi(\theta) \cdot
f_X(x \mid \theta)}{f_X(x)} \]
where $f_X(x)$ is the marginal probability of $X$
and:
\[ f_X(x) = \begin{cases}
    \int_\Theta f_X(x \mid \theta) \pi(\theta) \dd
    \theta & \text{if $\theta$ is constant} \\
    \sum_{\theta \in \Theta} f_X(x \mid \theta)
    \pi(\theta) & \text{if $\theta$ is discrete}
\end{cases} \]