% vim: tw=50
% 09/02/2023 11AM

\subsubsection*{Bayesian Analysis}

Idea: treat $\theta$ as a random variable. \\
Prior distribution: $\pi(\theta)$ (Info about
$\theta$ before seeing data) \\
Joint distribution of $X, \theta$:
\[ f_X(x \mid \theta) \cdot \pi(\theta) \]
\begin{flashcard}[posterior-defn]
Posterior distribution:
\cloze{
\begin{align*}
    \pi(\theta \mid x)
    &= \frac{f_X(f \mid \theta) \pi(\theta)}{\int
    f_X(x \mid \theta) \pi(\theta) \dd \theta} \\
    &\propto f_X(x \mid \theta) \pi(\theta)
\end{align*}
}
\end{flashcard}
(likelihood times prior).

\begin{example*}[Prior choice clear]
    Patient gets a COVID test:
    \[ \theta = \begin{cases}
        0 & \text{patient does not have COVID} \\
        1 & \text{patient does have COVID}
    \end{cases} \]
    Data:
    \[ X = \begin{cases}
        0 & \text{negative test} \\
        1 & \text{positive test}
    \end{cases} \]
    We know: Sensitivity of test:
    \[ f_X(X = 1 \mid \theta = 1) \]
    Specificity of test:
    \[ f_X(X = 0 \mid \theta = 0) \]
    What prior? Suppose we don't know anything
    about patient but we know that a proportion
    $p$ of people in the UK are infected today.
    Natural choice:
    \[ \pi(\theta = 1) = p \]
    Chance of infection given true test?
    \[ \pi(\theta = 1 \mid X = 1) =
    \frac{\pi(\theta = 1) f_X(X = 1 \mid \theta =
    1)}{\pi(\theta = 0) f_X(X = 1 \mid \theta = 0)
    + \pi(\theta = 1) f_X(X = 1 \mid \theta = 1)} \]
    If $\pi(\theta = 0) \gg \pi(\theta = 1)$, this
    posterior can be small.
\end{example*}

\begin{example*}
    $\theta \in [0, 1]$ mortality rate for new
    surgery at addenbrookes. In the first 10
    operations, there were no deaths. Model: $X_i
    \sim \Ber(\theta)$, $X_i = 1$ if $i$-th
    operation is death, 0 otherwise.
    \[ f_X(x \mid \theta) = \theta^{\sum X_i} (1 -
    \theta)^{10 - \sum X_i} \]
    Prior: We're told that the surgery is
    performed in other hospitals with a mortality
    rate ranging from $3\%$ to $20\%$, with an
    average of $10\%$. We'll say that
    $\pi(\theta)$ is $\Beta(a, b)$. We choose $a =
    3$, $b = 27$, so that the mean of
    $\pi(\theta)$ is $0.1$ and
    \[ \pi(0.03 < \theta < 0.2) = 0.9 \]
    Posterior:
    \begin{align*}
        \pi(\theta \mid x)
        &\propto \pi(\theta) \times f_X(x \mid
        \theta) \\
        &\propto \theta^{a - 1} (1 - \theta)^{b -
        1} \theta^{\sum x_i} (1 - \theta)^{10 -
        \sum x_i} \\
        &= \theta^{\sum x_i + a - 1} (1 -
        \theta)^{b + 10 - \sum x_i - 1}
    \end{align*}
    (we ommitted the normalising constant of
    $\Beta(a, b)$ because it does not depend on
    $\theta$). We deduce this is a $\Beta \left(
    \sum x_i + a, 10 - \sum x_i + b \right)$
    distribution. In our case
    \[ \sum_{i = 1}^{10} x_i = 0, \quad a = 0,
    \quad b = 27 \]
    $\implies \Beta(3, 37)$
    \begin{center}
        \includegraphics[width=0.6\linewidth]
        {images/4ce8df80a86e11ed.png}
    \end{center}
\end{example*}

\begin{note*}
    Here prior and posterior are in the same
    family of distrbutions. This is known as
    conjugacy.
\end{note*}

\noindent
What to do with posterior? The information in
$\pi(G \mid x)$ can be used to make decisions
under uncertainty.

\subsubsection*{Formal Process}

\begin{enumerate}[(1)]
    \item We must pick a decision $\delta \in D$.
    \item The loss function $L(\theta, \delta)$ is
        the loss incurred when we make decision
        $\delta$ and true parameter has value
        $\theta$. For example $\delta = \{0, 1\}$,
        $\delta = 1$ means we ask the patient to
        self isolate. Then, $L(\theta = 0, \delta
        = 1)$ is the loss incurred when we ask a
        non-infected patient to self-isolate.
    \item We pick decision which minimises the
        posterior expected loss:
        \[ \delta^* = \arg \min_{\delta \in D}
        \int_{\Theta} L(\theta, \delta)
        \pi(\theta \mid x) \dd \theta \]
        (Von Neumann-Morgenstern theorem)
\end{enumerate}
Point estimation: \\
The decision is a ``best guess'' for the true
parameter, so $\delta \in \Theta$.
\begin{flashcard}[bayes-estimator]
The \emph{Bayes
estimator} $\hat{\theta}^{(b)}$ minimises
\[ h(\delta) = \cloze{\int_{\Theta} L(\theta, \delta)
\pi(\theta \mid x) \dd\theta} \]
\end{flashcard}

\begin{example*}
    Quadratic loss $L(\theta, \delta) = (\theta -
    \delta)^2$
    \[ h(\delta) = \int (\theta - \delta)^2
    \pi(\theta \mid x) \dd \theta \]
    $h'(\delta) = 0$ if
    \[ \int (\theta - \delta) \pi(\theta \mid x)
    \dd \theta = 0 \]
    \[ \iff \int \theta \pi(\theta \mid x) \dd
    \theta = \delta \ub{\int \pi(\theta \mid x)
    \dd \theta}_{=1} \]
    Hence $\hat{\theta}^{(b)}$ equals the
    posterior mean of $\theta$.
    \begin{hiddenflashcard}[quadratic-loss-bayes-estimator]
        Bayes estimator for quadratic loss? \\
        \cloze{
        Loss function $L(\theta, \delta) = (\theta
        - \delta)^2$, then $\hat{\theta}^{(b)}$ is
        the posterior mean of $\theta$.
        }
    \end{hiddenflashcard}
\end{example*}

\begin{example*}
    Absolute error loss $L(\theta, \delta) =
    |\theta - \delta|$
    \begin{align*}
        h(\delta)
        &= \int |\theta - \delta| \pi(\theta \mid
        x) \dd \theta \\
        &= \int_{-\infty}^\delta -(\theta -
        \delta) \pi(\theta \mid x) \dd \theta
        + \int_\delta^\infty (\theta - \delta)
        \pi(\theta \mid x) \dd \theta \\
        &= -\int_{-\infty}^\delta \theta
        \pi(\theta \mid x) \dd \theta
        + \int_\delta^\infty \theta \pi(\theta
        \mid x) \dd \theta + \delta
        \int_{-\infty}^\delta \pi(\theta \mid x)
        \dd \theta - \delta \int_\delta^\infty
        \pi(\theta \mid x) \dd \theta
    \end{align*}
    Take derivative with respect to $\delta$. By
    the FTC,
    \[ h'(\delta) = \int_{-\infty}^\delta
    \pi(\theta \mid x) \dd \theta -
    \int_\delta^\infty \pi(\theta \mid x) \dd \theta \]
    So $h'(\delta) = 0$ if and only if
    \[ \int_{-\infty}^\delta \pi(\theta \mid x)
    \dd \theta = \int_\delta^\infty \pi(\theta
    \mid x) \dd\theta \]
    So in this case
    \[ \hat{\theta}^{(b)} = \text{median of the
    posterior} \]
    \begin{hiddenflashcard}[absolute-error-loss]
        Bayes estimator for absolute error loss? \\
        \cloze{
        Loss function $L(\theta, \delta) = |\theta
        - \delta|$, then $\hat{\theta}^{(b)}$ is
        the median of the posterior.
        }
    \end{hiddenflashcard}
\end{example*}

\subsubsection*{Credible Interval}

A $100\gamma\%$ \emph{credible interval} $(A(x),
B(x))$ is one which satisfies
\[ \pi(A(x) \le \theta \le B(x) \mid x) = \gamma \]
($A$ and $B$ are fixed at the observed data $x$,
but $\theta$ is random).
\[ \int_{A(x)}^{B(x)} \pi(\theta \mid x)\dd \theta
= \gamma \]
In example sheet 2:
\begin{center}
    \includegraphics[width=0.3\linewidth]
    {images/369f9644a87111ed.png}
\end{center}

\begin{note*}
    We \emph{can} interpret intervals
    conditionally (``given $x$, we are
    $100\gamma\%$ sure that $\theta \in [A(x),
    B(x)]$'').
\end{note*}

\begin{note*}
    If $T$ is a sufficient statistic, $\pi(\theta
    \mid x)$ only depends on $x$ through $T(x)$.
    \begin{align*}
        \pi(\theta \mid x)
        &\propto \pi(\theta) \times f_X(x \mid \theta)
        \\
        &= \pi(\theta) g(T(x), \theta) h(x) \\
        &\propto \pi(\theta) g(T(x), \theta)
    \end{align*}
\end{note*}