% vim: tw=50 % 19/01/2023 11AM \setcounter{section}{-1} \section{Introduction} Statistics: The science of making informed decisions. Can include: \begin{itemize} \item Design of experiments \item Graphical exploration of data \item \emph{Formal statistical inference} $\in$ Decision theory \item Communication of results. \end{itemize} Let $X_1, X_2, \dots, X_n$ be independent observations from some distribution $f_X(\bullet \mid \theta)$, with parameter $\theta$. We wish to infer the value of $\theta$ from $X_1, \dots, X_n$. \begin{itemize} \item Estimating $\theta$ \item Quantifying uncertainty in estimator \item Testing a hypothesis about $\theta$. \end{itemize} \subsection{Probability Review} Let $\Omega$ be the \emph{sample space} of outcomes in an experiment. A ``nice'' or measurable subset of $\Omega$ is called an \emph{event}, we denote the set of events $\mathcal{F}$. A function $\PP : \mathcal{F} \to [0, 1]$ is called a \emph{probability measure} if: \begin{itemize} \item $\PP(\phi) = 0$ \item $\PP(\Omega) = 1$ \item $\PP\left( \bigcup_{i = 1}^\infty A_i \right) = \sum_{i = 1}^\infty \PP(A_i)$ if $(A_i)$ are disjoint. \end{itemize} A \emph{random variable} is a (measurable) function $X \colon \RR \to \RR$. For example: tossing a coin twice $\Omega = \{HH, HT, TH, TT\}$. $X$: number of heads. \[ X(HH) = 2 \qquad X(TH) = X(HT) = 1 \qquad X(TT) = 0 \] The \emph{distribution function} of $X$ is \[ F_X(x) = \PP(X \le x) \] A \emph{discrete} random variable takes values in a countable $\mathcal{X} \in \RR$, its \emph{probability mass function} or pmf is $p_X(x) = \PP(X = x)$. We say $X$ has \emph{continuous} distribution if it has a \emph{probability density function} or pdf satisfying \[ \PP(X \in A) = \int_A f_X(x) \dd x \] for any ``nice'' set $A$. \myskip The \emph{expectation} of $X$ is \[ \EE X = \begin{cases} \sum_{x \in \mathcal{X}} x p_X(x) & \text{if $X$ is discrete} \\ \int xf_X(x) \dd x & \text{if $X$ is continuous} \end{cases} \] If $g : \RR \to \RR$, \[ \EE g(x) = \int g(x) f_X(x) \dd x \] The \emph{variance} of $X$ is \[ \Var(X) = \EE((X - \EE X)^2) \] We say that $X_1, X_2, \dots, X_n$ are independent if for all $x_1, \dots, x_n$ \[ \PP(X_1 \le x_2, \ldots, X_n \le x_n) = \PP(X_1 \le x_1) \cdots \PP(X_n \le x_n) \] If the variables have pdf's, then \[ f_X(x) = \prod_{i = 1}^n f_{X_i}(x_i) \] ($x = (x_1, \dots, x_n)$, $X = (X_1, \dots, X_n)$). \subsubsection*{Linear transformations} If $a_1, \dots, a_n \in \RR$ \[ \EE(a_1X_1 + \cdots + a_nX_n) = a_1 \EE X_1 + \cdots + a_n \EE X_n \] \[ \Var(a_1X_1 + \cdots + a_n X_n) = \sum_{i, j} a_i a_J \Cov(X_i, X_j) \] ($\Cov(X_i, X_i) = \EE((X_i - \EE X_i)(X_j - \EE X_j))$). If $X = (X_1, \dots, X_n)^\top$ \begin{align*} \EE X &= (\EE X_1, \dots, \EE X_n)^\top \\ \EE (a^\top X) &= a^\top \EE X \\ \Var(a^\top X) = a^\top \ub{\Var(X)}_{(\Var(X))_{ij} = \Cov(X_i, X_j)} a \end{align*} \subsubsection*{Moment generating functions} \[ M_X(t) = \EE(e^{tX}) \] This may only exist for $t$ in some neighbourhood of $0$. \begin{itemize} \item $\EE(X^n) = \dfrac[n]{}{t} M_X(0)$ \item $M_X = M_Y \implies F_X = F_Y$ \item Makes it easy to find the distribution function of sums of IID variables. \end{itemize} \begin{example*} Let $X_1, \dots, X_n$ be IID $\Poisson(\mu)$ \begin{align*} M_{X_1}(t) &= \EE e^{tX_1} \\ &= \sum_{x = 0}^\infty e^{tx} \cdot \frac{e^{-\mu}\mu^x}{x!} \\ &= e^{-\mu} \sum_{x = 0}^\infty \frac{(e^t \mu)^x}{x!} \\ &= e^{-\mu} e^{\mu \exp t} \\ &= e^{-\mu(1 - e^t)} \end{align*} $S_n = X_1 + \cdots + X_n$. \begin{align*} M_{S_n}(t) &= \EE e^{t(X_1 + \cdots X_n)} \\ &= \prod_{i = 1}^n \EE e^{tX_i} &&\text{(independent)} \\ &= e^{-\mu(1 - e^t)n} \end{align*} Observe this is $\Poisson(\mu n)$ mgf. So $S_n \sim \Poisson(\mu n)$. \end{example*} \subsubsection*{Limit Theorems} Weak law of large numbers (WLLN). $X_1, \dots, X_n$ are IID with $\EE X_1 = \mu$. \[ \ol{X}_n = \frac{1}{n} \sum_{i = 1}^n X_i \] is the ``sample mean''. For all $\eps > 0$, \[ \PP(\ub{|\ol{X}_n - \mu| > \eps}_{\text{event that depends only on $X_1, \dots, X_n$}}) \to 0 \qquad \text{as $n \to \infty$} \] Strong law of large numbers (SLLN) \[ \PP(\ol{X}_n \stackrel{n \to \infty}{\longrightarrow} \mu) = 1 \] (This event depends on \emph{whole} sequence $X_1, X_2, \dots$. $\ol{X}_n \to \mu \iff \forall \eps > 0 \exists N \forall n > N |\ol{X}_n - \mu| < \eps$. \subsubsection*{Central Limit Theorem} $Z_n = \frac{\sqrt{n}(\ol{X}_n - \mu)}{\sigma}$ where $\sigma^2 = \Var(X_i)$. Then $Z_n$ is approximately $\normaldist(0, 1)$ as $n \to \infty$. \[ \PP(Z_n \le z) \to \Phi(z) \qquad \text{as $n \to \infty$}\qquad \forall z \in \RR \] where $\Phi$ is the distribution function of a $\normaldist(0, 1)$ variable. \subsubsection*{Conditioning} Let $X$ and $Y$ be discrete random variables. Their joint pmf is \[ p_{X,Y}(x, y) = \PP(X = x, Y = y) \] The marginal pmf \[ p_X(x) = \PP(X = x) = \sum_{y \in Y} p_{X,Y}(x, y) \] Conditional pmf of $X$ given $Y = y$ is \begin{align*} p_{X \mid Y}(x \mid y) &= \PP(X = x \mid Y = y) \\ &= \frac{\PP(X = x, Y = y)}{\PP(Y = y)} \\ &= \frac{p_{X, Y}(x, y)}{p_Y(y)} \end{align*} (defined $= 0$ if $p_Y(y) = 0$). If $X, Y$ are continuous, the joint pdf $f_{X,y}$ has \[ \PP(X \le x', Y \le y') = \int_{-\infty}^{x'} \int_{-\infty}^{y'} f_{X, Y}(x, y) \dd y \dd x \] The marginal pdf of $Y$ is \[ f_Y(y) = \int_{-\infty}^\infty f_{X, Y}(x, y) \dd x \] The conditional pdf of $X$ given $Y$ is \[ f_{X \mid Y}(x \mid y) = \frac{f_{X, Y}(x, y)}{f_Y(y)} \] Conditional expectation: \[ \EE(X \mid Y) = \begin{cases} \sum_x x p_{X \mid Y}(x \mid y) \\ \int_{-\infty}^\infty x f_{X \mid Y}(x \mid y) \dd x \end{cases} \] (this is treated as a random variable, which is a function of $Y$). \myskip Tourer property: \[ \EE(\EE(X \mid Y)) = \EE X \] Conditional variance formula: \begin{align*} \Var(X) = \EE(X^2) - (\EE X)^2 \\ &= \EE(\EE(X^2 \mid Y)) - (\EE(\EE(X \mid Y)))^2 \\ &= \EE(\EE(X^2 \mid Y) - [\EE(X \mid Y)]^2) + \EE[\EE(X \mid Y)^2] - \EE[(X \mid Y)] \\ &= \EE \Var(X \mid Y) + \Var(\EE(X \mid Y)) \end{align*} \begin{hiddenflashcard}[conditional-variance-formula] Conditional variance formula? \\ \[ \cloze{\Var(X) = \EE \Var(X \mid Y) + \Var \EE(X \mid Y)} \] \end{hiddenflashcard}