% vim: tw=50 % 26/01/2023 11AM \subsection{Sufficiency} $X_1, \ldots, X_n$ are IID random variables from a distribution with pdf (or pmf) $f_X(\bullet \mid \theta)$. Let $X = (X_1, \ldots, X_n)$. \myskip Question: Is there a statistic $T(X)$ which contains all information in $X$ needed to estimate $\theta$? \begin{flashcard}[sufficiency-defn] \begin{definition*}[Sufficiency] \cloze{ A statistic $T$ is \emph{sufficient} for $\theta$ if the \fcemph{conditional distribution} of $X$ given $T(X)$ \fcemph{does not depend} on $\theta$. } \end{definition*} \end{flashcard} \begin{remark*} $\theta$ and $T(X)$ could be vector-valued. \end{remark*} \begin{example*} $X_1, \ldots, X_n \iidsim \Ber(\theta)$ for $\theta \in [0, 1]$. \begin{align*} f_X(\bullet \mid \theta) &= \prod_{i = 1}^n \theta^{x_i} (1 - \theta)^{1 - x_i} \\ &= \theta^{\sum x_i} (1 - \theta)^{n - \sum x_i} \end{align*} \begin{note*} This only depends on $X$ through $T(X) = \sum_{i = 1}^n X_i$. \end{note*} For $x$ with $\sum x_i = t$, \begin{align*} f_{X \mid T = t}(x \mid T(x) = t) &= \frac{\PP_\theta(X = x, T(X) = t)}{\PP_\theta(T(X) = t)} \\ &= \frac{\PP_\theta(X = x)}{\PP_\theta(T(X) = t} \\ &= \frac{\theta^{\sum x_i} (1 - \theta)^{n - \sum x_i}}{{n \choose t} \theta^t (1 - \theta)^{n - t}} \\ &= {n \choose t}^{-2} \end{align*} As this doesn't depend on $\theta$, $T(X)$ is sufficient for $\theta$. \end{example*} \begin{flashcard}[factorisation-criterion] \begin{theorem*}[Factorisation criterion] \cloze{ $T$ is sufficient for $\theta$ if and only if \[ f_X(x \mid \theta) = g(T(x), \theta) \cdot h(x) \] for suitable functions $g, h$. } \end{theorem*} \end{flashcard} \begin{proof} (Discrete case) \myskip Suppose $f_X(x \mid \theta) = g(T(X), \theta) h(X)$. If $T(x) = t$, then \begin{align*} f_{X \mid T = t}(x \mid T = t) &= \pfrac{\PP_\theta(X = x, \cancel{T(X) = t}}{\PP_\theta(T(X) = t)} \\ &= \frac{g(T(X), \theta) h(X)}{ \sum_{x' \colon T(x') = t} g(T(x;), \theta) h(x')} \\ &= \frac{\cancel{g(t, \theta)}}{\cancel{g(t, \theta)}} \frac{h(x)}{ \sum_{x' \colon T(x') = t} h(x')} \end{align*} As this doesn't depend on $\theta$, $T(X)$ is sufficient. \myskip Conversely, suppose $T(X)$ is sufficient, then \begin{align*} \PP_\theta(X = \tau) &= \PP_\theta(X = x, T(X) = t) \\ &= \ub{\PP_\theta(T(X) = t)}_{g(t, \theta)} \cdot \ub{\PP_\theta(X = x \mid T(X) = t)}_{h(x)} \end{align*} Then by sufficiency of $T$, $h(x)$ doesn't depend on $\theta$ (so it is a function of $x$). Thus the pmf of $X$, $f_X(\bullet \mid \theta)$ factorises as in the statement of the theorem. \end{proof} \begin{example*} $X_1, \ldots, X_n \iidsim \Ber(\theta)$. \[ f_X(x \mid \theta) = \theta^{\sum x_i} (1 - \theta)^{n - \sum x_i} \] Take $g(t, \theta) = \theta^t (1 - \theta)^{n - t}$, $h(x) = 1$. This immediately implies $T(X) = \sum x_i$ is sufficient. \end{example*} \begin{example*} $X_1, \ldots, X_n \iidsim \Unif([0, \theta])$, $\theta > 0$. Then \begin{align*} f_X(x \mid \theta) &= \prod_{i = 1}^n \frac{1}{\theta} \mathbbm{1}_{x_i \in [0, \theta]} \\ &= \ub{\frac{1}{\theta^n} \mathbbm{1}_{\{\max_i x_i \le \theta\}}}_{T(x), \theta)} \ub{\mathbbm{1}_{\{\min_i x_i \ge 0\}}}_{h(x)} \end{align*} $T(x) = \max_i x_i$. Then by factorisation lemma, $T(x) = \max_i x_i$ is sufficient for $\theta$. \end{example*} \subsubsection*{Minimal Sufficiency} Sufficient stats are \emph{not} unique. Indeed any 1-to-1 function of a sufficient statistic is also sufficient. Also $T(X) = X$ is always sufficient by not very useful. \begin{definition*} A sufficient statistic $T$ is \emph{minimal sufficient} if it is a function of any other sufficient statistic. That is, if $T'$ is also sufficient, then \[ T'(x) = T'(y) \implies T(x) = T(y) \] for all $x, y \in \mathcal{X}^n$. \end{definition*} \begin{remark*} Any two minimal sufficient statistics, $T, T'$ are ``in bijection with each other'': \[ T(x) = T(y) \iff T'(x) = T'(y) \] Useful condition to check minimal sufficiency. \end{remark*} \begin{flashcard}[minimal-sufficiency-thm] \begin{theorem*}[Minimal Sufficiency Theorem] \cloze{ Suppose that $T(X)$ is a statistic such that $f_X(x \mid \theta) / f_X(y \mid \theta)$ is constant as a function of $\theta$ if and only if $T(x) = T(y)$. Then $T$ is minimal sufficient. } \end{theorem*} \end{flashcard} \noindent Let $x \stackrel{1}{\sim} y$ if $\frac{f_X(x \mid \theta)}{f_X(y \mid \theta)}$ is constant in $\theta$. It's easy to check that $\stackrel{1}{\sim}$ is an equivalence relation. \myskip Similarly, for a given statistic $T$, $x \stackrel{2}{\sim} y$ if $T(x) = T(y)$ defines another equivalence relation. The condition of theorem says $\stackrel{1}{\sim}$ and $\stackrel{2}{\sim}$ are the same. \begin{note*} We can always construct a statistic $T$ which is constant on the equivalence classes of $\stackrel{1}{\sim}$, which by the theorem is minimal sufficient. \end{note*} \begin{proof} For any value $t$ of $T$, let $z_t$ be a representative from the equivalence class \[ \{x \mid T(x) = t\} \] Then \begin{align*} f_X(x \mid \theta) &= \ub{f_X(z_{T(x)} \mid \theta)}_{g(T(x), \theta)} \ub{\frac{f_X(x \mid \theta)}{f_X(z_{T(x)} \mid \theta)}}_{h(x)} \end{align*} Where $h(x)$ does not depend on $\theta$ by the hypothesis, as $x \stackrel{1}{\sim} z_{T(x)}$. By factorisation criterion, $T$ is sufficient. \myskip To prove that $T$ is minimal, take any other sufficient statistic $S$. Want to prove that if $S(x) = S(y)$ then $T(x) = T(y)$. By factorisation criterion, there are functions $g_S, h_S$ such that \[ f_X(x \mid \theta) = g_S(S(x), \theta) h_S(x) \] Suppose $S(x) = S(y)$. Then \[ \frac{f_X(x \mid \theta)}{f_X(y \mid \theta)} = \frac{\cancel{g_S(S(x), \theta)} h_S(x)}{\cancel{g_S(S(y), \theta)} h_S(y)} \] which doesn't depend on $\theta$. Hence $x \stackrel{1}{\sim} y$. By hypothesis, $x \stackrel{2}{\sim}$, hence $T(x) = T(y)$. \end{proof} \begin{remark*} Sometimes the range of $X$ depends on $\theta$ (for example $X_1, \ldots, X_n \iidsim \Unif([0, \theta])$. In this case we can interpret \begin{center} ``$\frac{f_X(x \mid \theta}{f_X(y \mid \theta)}$ is constant in $\theta$'' \end{center} to mean that $f_X(x \mid \theta) = c(x, y) f_X(y \mid \theta)$ for some function $c$ which does not depend on $\theta$. \end{remark*}