%! TEX root = PM.tex % vim: tw=50 % 28/11/2023 10AM Birkhoff: $(E, \mathcal{E}, \mu)$ $\sigma$-finite, $f \in \Lp[1](\mu)$ and $\theta$ \gls{mp}, and $S_n(f) = f + f \circ \theta + \cdots + f \circ \theta^{n - 1}$. Then there exists a \gls{tinv_fn} $\ol{f}$ such that $\frac{S_n(f)}{n} \to \ol{f}$ as $n \to \infty$ $\mu$ \gls{al_ev}. Von Neumann: $\mu$ is finite, then $\frac{S_n(f)}{n} \to \ol{f}$ in $\Lp[1]$ as $n \to \infty$. \begin{remark*} \phantom{} \begin{enumerate}[(1)] \item \refsteplabel[Remark (1)]{lec24_rem1}% If $\mu$ a probability measure and $\theta$ \gls{ergeodic}, then $\ol{f}$ is a constant \gls{al_surely}, so $\ol{f} = \int \ol{f} \dd \mu = \int f \dd \mu$. Also, \[ \int f \dd \mu = \int \frac{S_n(f)}{n} \dd \mu \to \int \ol{f} \dd \mu .\] Hence $\inf \ol{f} \dd \mu = \int f \dd \mu$. Then, $\frac{S_n(f)}{n} \to \EE(f)$ as $n \to \infty$ $\mu$ \gls{al_surely} and in $\Lp[1]$. \item For $\theta$ \gls{mp} and $f \in \Lp[1]$, $\frac{S_n(f)}{n} \to \EE(f \given \mathcal{E}_0)$ $\mu$ \gls{al_surely} in $\Lp[1]$ as $n \to \infty$. For $f \in \Lp[2]$, just (a version of) the projection of $f$ on $\Lp[2](\mathcal{E}_0)$. \end{enumerate} \end{remark*} \subsubsection*{Bernoulli Shifts and Strong Law of Large Numbers} On the infinite product space $E = \RR^\NN = \{x = (x_1, x_2, \ldots) : x_i \in \RR \forall i\}$ consider the cylinder sets \[ \mathcal{A} = \left\{ \prod_{n = 1}^\infty A_n : A_n \in \mathcal{B} = \mathcal{B}(\RR) \forall n, \& A_n = \RR \forall n \ge N \text{ for some $N \in \NN$}\right\} .\] For example, \[ (0, 1) \cap \RR \cap \RR \cap \cdots \in \mathcal{A} \] whereas \[ (0, 1) \cap (0, 1) \cap (0, 1) \cap \cdots \notin \mathcal{A} .\] Then $\mathcal{A}$ is a \pisys{} and $\mathcal{E} = \sigma(\mathcal{A})$. Check: \begin{itemize} \item $\mathcal{E} = \sigma(\mathcal{A}) = \sigma(f_n : n \in \NN)$ where $f_n : E \to \RR$, $f_n(x) = x_n$ are the coordinate maps. \item $\mathcal{E}$ is the Borel \sigalg{} generated by the toplogy of poinwise convergence. \end{itemize} Now consider a sequence of IID random variables $(X_n)_{n \in \NN}$ on some probability space $(\Omega, \mathcal{F}, \PP)$, (such a sequence always exists), with the common distribution or law $\mu_{X_n} = \PP \circ X_n^{-1} = m \forall n$. The map $X : (\Omega, \mathcal{F}) \to (E, \mathcal{E})$, $X(\omega) = (X_1(\omega), X_2(\omega), \ldots)$ is measurable. The image measure $\PP \circ X^{-1} \eqdef \mu$ is a probability measure on $(E, \mathcal{E})$, that satisfies for any $A = A_1 \times \cdots \times A_n \times \RR \times \cdots \in \mathcal{A}$, \begin{align*} \mu(A) &= \PP \circ X^{-1}(A) \\ &= \PP(X_1 \in A, X_2 \in A_2, \ldots, X_N \in A_N) \\ &= \PP(X_1 \in A_1)\PP(X_2 \in A_2) \cdots \PP(X_N \in A_N) &&\text{as $X_i$ IID} \\ &= m(A_1) m(A_2) \cdots m(A_n) \\ &= \prod_{n = 1}^\infty m(A_n) \end{align*} and $\mu$ is the unique probability measure on $\mathcal{E}$ such that \[ \mu(A) = \prod_{n = 1}^\infty m(A_n) .\] Under $\mu$, the coordinate maps $f_n$ are IID with law $m$. The probability space $(E, \mathcal{E}, \mu)$ is called the canonical model for an IID sequence of random variables with law $m$. Define the shift map $\theta : E \to E$ by $\theta(x_1, x_2, \ldots) = (x_2, x_3, \ldots)$ (similar to $x \to 2x \pmod{1}$ on $((0, 1), \lambda)$). \begin{theorem*} On $(E, \mathcal{E}, \mu)$, the shift map $\theta$ is measurable, \gls{mp} and \gls{ergodic}. \end{theorem*} \begin{proof} Measurable is obvious. \gls{mp}? Enough to check on $\mathcal{A}$, i.e. for $A = A_1 \times \cdots \times A_n \times \RR \times \cdots$. Indeed: \begin{align*} \mu \circ \theta^{-1}(A) \\ &= \mu(\RR \times A_1 \times A_2 \times \cdots) \\ &= \prod_{i = 1}^\infty m(A_i) \\ &= \mu(A) \end{align*} Ergodicity: Recall the \gls{tail_sig} \[ \tailsig = \bigcap_n \tau_n \] where $\tau_n = \sigma(x_{n + 1}, x_{n + 2}, \ldots) = \sigma(f_{n + 1}, f_{n + 2})$. For $A = \prod_n A_n \in \mathcal{A}$, \[ \theta^{-n}(A) = \RR \times \cdots \times \RR \times A_1 \times \cdots = \{x_{n + 1} \in A_1, x_{n + 2} \in A_2, \ldots\} \in \tau_n \qquad \forall n, \forall A \in \mathcal{A} \] If $A \in \mathcal{E}_0$, then $\theta^{-1}(A) = A$, so $\theta^{-n}(A) = A$ for all $n$. So $A = \in \tau_n$ for all $n$. So $A \in \bigcap_n \tau_n = \tailsig$, i.e. $\mathcal{E} \subseteq \tailsig$. But the $(x_i)$ IID and hence $\tailsig$ is $\mu$-trivial (\nameref{kol_0_1}), so $\mathcal{E}_0$ is $\mu$-trivial. \end{proof} \begin{theorem*} Let $m$ be a probability measure on $\RR$ such that $\int_\RR |x| dm(x) < \infty$ and $\int_\RR x \dd m(x) = \nu$. Let $(E, \mathcal{E}, \mu)$ be the canonical model, where the coordinate maps $f_n(x) = x_n$ are IID with law $m$. Then \[ \mu \left( \left\{ x : \frac{x_1 + \cdots + x_n}{n} \to \nu \text{ as $n \to \infty$} \right\} \right) = 1 .\] \end{theorem*} \begin{proof} Let $\theta : E \to E$ be the shift map $\theta(x_1, x_2, \ldots) = (x_2, x_3, \ldots)$. It is measure preserving and \gls{ergodic}. Consider $f : E \to \RR$ as $f(x) = x_1$. Then $f \in \Lp[1](\mu)$ as $\int |f| \dd \mu = \int |x_1| \dd m(x_1) < \infty$. Also, \[ S_n(f) = f + f \circ \theta + \cdots + f \circ \theta^{n - 1} = x_1 + x_2 + \cdots + x_n .\] Hence by Birkhoff and von Neumann, as $\theta$ ergodic, by \nameref{lec24_rem1} earlier this lecture, \[ \frac{S_n(f)}{n} = \frac{x_1 + \cdots + x_n}{n} \to \ol{f} = \int f \dd \mu \int x_1 \dd m(x_1) = \nu \] $\mu$ \gls{al_surely}. \end{proof} \begin{theorem*}[Kolmogorov Strong Law of Large Numbers (1930)] Let $(X_n)$ be a sequence of IID integrable random variables, with $\EE X_1 = \nu$. Set $S_n = \sum_{i = 1}^n X_i$. Then $\frac{S_n}{n} \to \nu$ \gls{al_surely} as $n \to \infty$. \end{theorem*} \begin{proof} Let $m$ be the law of $X_n$, $\mu = \PP \circ X^{-1}$ where $X : \Omega \to E$ is $X(\omega) = (X_1(\omega), X_2(\omega), \ldots)$. Then apply the previous theorem. \end{proof} This is the end of the course. \begin{remark*} \phantom{} \begin{enumerate}[(1)] \item If $(\mu_n)$ is a sequence of probability measures that converges \gls{conv_wly} to $\mu$, then $(\mu_n)$ is ``tight'', i.e. $\forall \eps > 0$, $\exists$ a compact set $K$ such that $\mu_n(K^c) < \eps$ for all $n$. \item If $(\mu_n)$ is a sequence of probability measures that are tight, then there exists a subsequence $(n_k)$ and a probability measure $\mu$ such that $(\mu_{n_k}) \to \mu$ \gls{conv_wly} (Banach-Alaoglu Theorem) (Prokhorov Theorem). \end{enumerate} \end{remark*}