% vim: tw=50 % 03/03/2022 11AM \subsubsection*{Studying $\mathrm{N}(\mu, \sigma^2)$ via linear transformations} Facts about $X \sim \mathrm{N}(\mu, \sigma^2)$: \begin{enumerate}[(i)] \item $X$ has the same distribution as $\mu + \sigma Z$ where $Z \sim \mathrm{N}(0, 1)$. \item $X$ has CDF \[ F_X(x) = \Phi \left( \frac{x - \mu}{\sigma} \right) \] \begin{notation*} $\Phi$ is the CDF of $\mathrm{N}(0, 1)$ \end{notation*} \item $\EE[X] = \mu$, $\mathrm{Var}(X) = \sigma^2$. \end{enumerate} \begin{proof} \begin{enumerate}[(i)] \item $g(z) = \mu + \sigma z$ so $g^{-1}(x) = \frac{x - \mu}{\sigma}$. Then $g(Z)$ has density \begin{align*} &= \frac{1}{\sigma} f_Z \left( \frac{x - \mu}{\sigma} \right) \\ &= \frac{1}{\sigma \sqrt{2\pi}} e^{-\frac{(x - \mu)^2}{2\sigma^2}} \end{align*} \item $F_{g(Z)}(x) = \PP(g(Z) \le x) = \PP \left( Z \le \frac{x - \mu}{\sigma} \right) = \Phi \left( \frac{x - \mu}{\sigma} \right)$. \item Use part (i): \[ \EE[X] = \EE[\mu + \sigma Z] = \mu + \sigma \EE[Z] = \mu \] \[ \mathrm{Var}(\mu + \sigma Z) = \sigma^2 \mathrm{Var}(Z) = \sigma^2 \] \end{enumerate} \end{proof} \begin{remark*} Reduces to $\Phi$: lookup in book / table / Wolfram Alpha. \end{remark*} \noindent \ul{Usage}: $X \sim \mathrm{N}(\mu, \sigma^2)$ \begin{align*} \PP(a \le X \le b) &= \PP \left( \frac{a - \mu}{\sigma} \le \frac{X - \mu}{\sigma} \le \frac{b - \mu}{\sigma} \right) \\ &= \PP \left( \frac{a - \mu}{\sigma} \le Z \le \frac{b - \mu}{\sigma} \right) \\ &= \Phi \left( \frac{b - \mu}{\sigma} \right) - \Phi \left( \frac{a - \mu}{\sigma} \right) \end{align*} \ul{Special case}: \[ a = \mu - k\sigma, \qquad b = \mu + k\sigma \] ($k \in \{1, 2, \dots\}$). Recall: $\sigma$ is the \emph{standard deviation}. \[ \PP(a \le X \le b) = \Phi(k) - \Phi(-k) \] ``within $k$ standard deviations of the mean''. \begin{center} \includegraphics[width=0.6\linewidth] {images/019f955ea30211ec.png} \end{center} \begin{definition*} $X$ a continuous random variable. The \emph{median} of $X$ is the number $m$ such that $\PP(X \le m) = \PP(X \ge m) = \half$, i.e. \[ \int_{-\infty}^m f_X(x) \dd x = \int_m^\infty f_X(x) \dd x = \half \] \end{definition*} \subsubsection*{Comments} \begin{itemize} \item For $X \sim \mathrm{N}(\mu, \sigma^2)$ and other distributions symmetric about mean, we have median $m = \EE[X]$. \item Sometimes $|X - m|$ better than $|X - \mu|$ for interpretation. \end{itemize} \subsubsection*{More than one continuous Random Variables} Allow random variables to take values in $\RR^n$. For example \[ X = (X_1, \dots, X_n) \in \RR^n \] is a random variable. Say $X$ has density $f : \RR^n \to [0, \infty)$ if \[ \PP(X_1 \le x_1, \dots, x_n \le x_n) = \int_{-\infty}^{x_1} \cdots \int_{-\infty}^{x_n} f(u_1, \dots, u_n) \prod_i \dd u_i \] (integrate over $(-\infty, x_1] \times \cdots \times (-\infty, x_n]$) % )) \myskip \ul{Consequence}: \[ \PP((X_1, \dots, X_n) \in A) = \int_A f(u) \dd u \] for all ``measurable'' $A \subset \RR^n$. \begin{definition*} $f$ is called a \emph{multivariate density function} or (especially $n = 2$) a \emph{joint density}. \end{definition*} \begin{definition*} Random variables $X_1, \dots, X_n$ \emph{independent} if \[ \PP(X_1 \le x_1, \dots, X_n \le x_n) = \PP(X_1 \le x_1) \cdots \PP(X_n \le x_n) \tag{$*$} \] \end{definition*} \noindent \ul{Goal}: convert to statement about densities. \begin{definition*} $X = (X_1, \dots, X_n)$ has density $f$. The \emph{marginal density} $f_{X_i}$ of $X_i$ is \[ f_{X_i}(x_i) = \int_{-\infty}^\infty \cdots \int_{-\infty}^\infty f(x_1, \dots, x_n) \prod_{j \neq i} \dd x_j \] ``density of $X_i$ viewed as a random variable by itself''. \end{definition*} \begin{theorem} $X = (X_1, \dots, X_n)$ has density $f$. \begin{enumerate}[(a)] \item if $X_1, \dots, X_n$ independent, with marginals $f_{X_1}, \dots, x_{X_n}$. Then \[ f(X_1, \dots, X_n) = f_{X_1}(x_1) \cdots f_{X_n}(x_n) \] \item Suppose $f$ factorises as \[ f(X_1, \dots, X_n) = g_1(x_1) \cdots g_n(x_n) \] for non-negative functions $(g_i)$. Then $X_1, \dots, X_n$ are independent and marginal $f_{X_i} \propto g_i$. \end{enumerate} \end{theorem} \begin{proof} \begin{enumerate}[(a)] \item \eqnoskip \begin{align*} \PP(X_1 \le x_1, \dots, X_n \le x_n) &= \PP(X_1 \le x_1) \cdots \PP(X_n \le x_n) \\ &= \left[ \int{-\infty}^\infty f_{X_1}(u_1) \dd u_1 \right] \cdots \left[ \int_{-\infty}^\infty f_{X_n}(u_n) \dd u_n \right] \\ &= \int_{-\infty}^{x_1} \int_{-\infty}^{x_n} \prod f_{X_i} (u_i) \prod \dd u_i \end{align*} which matches with definition of $f$. \item Idea: \begin{itemize} \item Replace $g_i(x)$ with $h_i(x) = \frac{g_i(x)}{\int g_i(u) \dd u}$. $h_i$ \emph{is} a density. \item compute integral at ($*$) \end{itemize} \end{enumerate} \end{proof} \subsubsection*{Transformation of Multiple Random Variables} Key Example 1: $X, Y$ independent with densities $f_X, f_Y$. \\ Goal: density of $Z = X + Y$. \begin{enumerate}[Step 1:] \item Declare the joint density \[ f_{X, Y}(x, y) = f_X(x) f_Y(y) .\] \item CDF of $Z$: \begin{align*} \PP(X + Y \le z) &= \iint_{\{x + y \le z\}} f_{X, Y}(x, y) \dd x \dd y \\ &= \int_{x = -\infty}^\infty \int_{y = -\infty}^{z - x} f_X(x) f_Y(y) \dd x \dd y \\ &= \int_{x = -\infty}^\infty \int_{y' = -\infty}^z f_Y(y' - x) f_X(x) \dd y' \dd x &\text{substitute $y' = y + x$} \\ &= \int_{y = -\infty}^x \dd y \left( \int{x = -\infty}^\infty f_Y(y - x) f_X(x) \dd x \right) \end{align*} So density of $Z$: \[ f_Z(z) = \ub{\int_{x = -\infty}^\infty f_Y(z - x) f_X(x) \dd x}_{\text{Convolution of $f_X$ and $f_Y$}} \] \end{enumerate}