% vim: tw=50 % 04/02/2023 09AM \subsubsection*{Recipe for Confidence Interval} \begin{enumerate}[(1)] \item Find some quantity $R(X, \theta)$ such that the $\PP_\theta$-distribution of $R(X, \theta)$ does not depend on $\theta$. This is called a \emph{pivot}. For example \[ z = \sqrt{n}(\ol{X} - \mu) \sim \normaldist(0, 1) \qquad \forall \mu \] \item Write down a probability statement about the pivot of the form \[ \PP(c_1 \le R(X, \theta) \le c_2) = \gamma \] by using the quantities $c_1, c_2$ of the distribution of $R(X, \theta)$ [typically a $\normaldist(0, 1)$ or $\chi_p^2$ distribution). \item Rearrange the inequalities to leave $\theta$ in the middle. \end{enumerate} \begin{proposition*} If $T$ is a monotone increasing function $T : \RR \to \RR$, and $(A(x), B(X))$ is a $100\gamma\%$ confidence interval for $\theta$, then $(T(A(X)), T(B(X)))$ is a confidence interval for $T(\theta)$. \end{proposition*} \begin{remark*} When $\theta$ is a vector, we talk about confidence sets. \end{remark*} \begin{example*} $X_1, \ldots, X_n \iidsim \normaldist(0, \sigma^2)$. Find a $95\%$ confidence interval for $\sigma^2$. \begin{enumerate}[(1)] \item Note that $\frac{X_i}{\sigma} \sim \normaldist(0, 1)$ \[ \implies \sum_{i = 1}^n \frac{X_i^2}{\sigma^2} \sim \chi_n^2 \] Hence $R(X, \sigma^2) = \sum_i \frac{X_i^2}{\sigma^2}$ is a pivot. \item Let $C_1 = F_{\chi_n^2}^{-1}(0.025)$, $c_2 = F_{\chi_n^2}^{-1}(0.975)$. Then \[ \PP\left(c_1 \le \frac{1}{\sigma^2} \sum_i X_i^2 \le c_2\right) = 0.95 \] \item Rearranging: \[ \PP\left( \frac{\sum X_i^2}{c_2} \le \sigma^2 \le \frac{\sum X_i^2}{c_1} \right) = 0.95 \] Hence $\left[ \frac{\sum X_i^2}{c_2}, \frac{\sum X_i^2}{c_1} \right]$ is a $95\%$ confidence interval for $\sigma^2$. \end{enumerate} Hence, using the proposition above, $\left[ \sqrt{\frac{\sum X_i^2}{c_2}}, \sqrt{\frac{X_i^2}{c_1}} \right]$ is a $95\%$ confidence interval for $\sigma$. \end{example*} \begin{example*} $X_1, \ldots, X_n \iidsim \Ber(p)$, $n$ is large. Find an approximate $95\%$ confidence interval for $p$. \begin{enumerate}[(1)] \item The mle for $p$ is $\hat{p} = \frac{1}{n} \sum_{i = 1}^n X_i$. By the Central limit theorem when $n$ is large, $\hat{p}$ is approximately $\normaldist \left( p, \frac{p(1 - p)}{n} \right)$. Therefore $\sqrt{n} \frac{(\hat{p} - p)}{\sqrt{p(1 - p)}}$ is approximately $\normaldist(0, 1)$. \item $z = \Phi^{-1}(0.975)$ \[ \PP \left( -z \le \frac{\sqrt{n}(\hat{p} - p)}{\sqrt{p(1 - p)}} \le z \right) \approx 0.95 \] \item Rearranging this is tricky. Argue that as $n \to \infty$, $\hat{p}(1 - \hat{p}) \to p(1 - p)$. So replace denominator: \[ \PP\left( -z \le \frac{\sqrt{n}(\hat{p} - p)}{\sqrt{\hat{p}(1 - \hat{p})}} \le z \right) \approx 0.95 \] Now it's easier to rearrange: \[ \PP\left(\hat{p} - z \frac{\sqrt{\hat{p}(1 - \hat{p})}}{\sqrt{n}} \le p \le \hat{p} + z \frac{\sqrt{\hat{p}(1 - \hat{p})}}{\sqrt{n}} \right) \approx 0.95 \] \end{enumerate} So $\left[ \hat{p} \pm z \frac{\sqrt{\hat{p}(1 - \hat{p}j}}{\sqrt{n}} \right]$ is an approximate $95\%$ confidence interval for $p$. \end{example*} \begin{note*} \begin{itemize} \item $z \approx 1.95$ \item $\sqrt{\hat{p}(1 - \hat{p})} \le \half$ for all $\hat{p} \in (0, 1)$ \end{itemize} So a ``conservative'' confidence interval is $\left[ \hat{p} \pm 1.96 \cdot \half \cdot \frac{1}{\sqrt{n}} \right]$. \end{note*} \subsection{Interpreting Confidence intervals} Suppose $X_1, X_2 \iidsim \Unif\left[ \theta - \half, \theta + \half \right]$. What is a sensible $50\%$ confidence interval for $\theta$? Consider \begin{align*} \PP(\text{$\theta$ is between $X_1, X_2$}) &= \PP(\min(X_1, X_2) \le \theta \le \max(X_1, X_2)) \\ &= \PP(X_1 \le \theta \le X_2) + \PP(X_2 \le \theta \le X_1) \\ &= \half \times \half + \half \times \half \\ &= \half \end{align*} Immediately conclude that $(\min(X_1, X_2), \max(X_1, X_2))$ is a $50\%$ confidence interval for $\theta$. \myskip \emph{But} we observe $X_1 = x_1$, $X_2 = x_2$ with $|x_1 - x_2| > \half$. In this case we can be sure that $\theta$ is in $(\min(x_1, x_2), \max(x_1, x_2)$. \myskip Frequentist interpretation of confidence interval is entirely correct! If we repeat the experiment many times $\theta \in (\min(X_1, X_2), \max(X_1, X_2))$ \emph{exactly} $50\%$ of the time. However, we cannot say that \emph{given} a \emph{specific} observation $(x_1, x_2)$ we are ``$50\%$ certain that $\theta \in \text{C.I.}$. \subsubsection*{Bayesian Inference} So far, we have assume that there is some true parameter $\theta$. That data $X$ has pdf (or pmf) $f_X(\bullet \mid \theta)$. \myskip Bayesian analysis is a different framework, where we treat $\theta$ as a random variable taking values in $\Theta$. \myskip We being by assigning to $\theta$ a \emph{prior distribution} $\pi(\theta)$, which represents the investigator's opinions or information about $\theta$ \emph{before} seeing any data. Conditional on $\theta$, the data $X$ has pdf (or pmf) $f_X(x \mid \theta)$. Havign observed a specific value of $X = x$, this information is combined with the prior to form the \emph{posterior distribution}. $\pi(\theta \mid x)$ which is the conditional distribution of $\theta$ given $X = x$. \myskip By Bayes rule: \[ \pi(\theta \mid x) = \frac{\pi(\theta) \cdot f_X(x \mid \theta)}{f_X(x)} \] where $f_X(x)$ is the marginal probability of $X$ and: \[ f_X(x) = \begin{cases} \int_\Theta f_X(x \mid \theta) \pi(\theta) \dd \theta & \text{if $\theta$ is constant} \\ \sum_{\theta \in \Theta} f_X(x \mid \theta) \pi(\theta) & \text{if $\theta$ is discrete} \end{cases} \]