%! TEX root = PM.tex
% vim: tw=50
% 28/11/2023 10AM

Birkhoff: $(E, \mathcal{E}, \mu)$ $\sigma$-finite,
$f \in \Lp[1](\mu)$ and $\theta$ \gls{mp}, and
$S_n(f) = f + f \circ \theta + \cdots + f \circ
\theta^{n - 1}$. Then there exists a \gls{tinv_fn}
$\ol{f}$ such that $\frac{S_n(f)}{n} \to \ol{f}$
as $n \to \infty$ $\mu$ \gls{al_ev}.

Von Neumann: $\mu$ is finite, then
$\frac{S_n(f)}{n} \to \ol{f}$ in $\Lp[1]$ as $n
\to \infty$.

\begin{remark*}
  \phantom{}
  \begin{enumerate}[(1)]
    \item \refsteplabel[Remark (1)]{lec24_rem1}%
      If $\mu$ a probability measure and $\theta$
      \gls{ergeodic}, then $\ol{f}$ is a constant
      \gls{al_surely}, so $\ol{f} = \int \ol{f} \dd
      \mu = \int f \dd \mu$. Also,
      \[ \int f \dd \mu = \int \frac{S_n(f)}{n} \dd
      \mu \to \int \ol{f} \dd \mu .\]
      Hence $\inf \ol{f} \dd \mu = \int f \dd \mu$.
      Then, $\frac{S_n(f)}{n} \to \EE(f)$ as $n \to
      \infty$ $\mu$ \gls{al_surely} and in $\Lp[1]$.
    \item For $\theta$ \gls{mp} and $f \in
      \Lp[1]$, $\frac{S_n(f)}{n} \to \EE(f \given
      \mathcal{E}_0)$ $\mu$ \gls{al_surely} in
      $\Lp[1]$ as $n \to \infty$. For $f \in
      \Lp[2]$, just (a version of) the projection
      of $f$ on $\Lp[2](\mathcal{E}_0)$.
  \end{enumerate}
\end{remark*}

\subsubsection*{Bernoulli Shifts and Strong Law of
Large Numbers}

On the infinite product space $E = \RR^\NN = \{x =
(x_1, x_2, \ldots) : x_i \in \RR \forall i\}$
consider the cylinder sets
\[ \mathcal{A} = \left\{ \prod_{n = 1}^\infty A_n
: A_n \in \mathcal{B} = \mathcal{B}(\RR) \forall
n, \& A_n = \RR \forall n \ge N \text{ for some $N
\in \NN$}\right\} .\]
For example,
\[ (0, 1) \cap \RR \cap \RR \cap \cdots \in \mathcal{A} \]
whereas
\[ (0, 1) \cap (0, 1) \cap (0, 1) \cap \cdots
\notin \mathcal{A} .\]
Then $\mathcal{A}$ is a \pisys{} and $\mathcal{E}
= \sigma(\mathcal{A})$. Check:
\begin{itemize}
  \item $\mathcal{E} = \sigma(\mathcal{A}) =
    \sigma(f_n : n \in \NN)$ where $f_n : E \to
    \RR$, $f_n(x) = x_n$ are the coordinate maps.
  \item $\mathcal{E}$ is the Borel \sigalg{}
    generated by the toplogy of poinwise
    convergence.
\end{itemize}
Now consider a sequence of IID random variables
$(X_n)_{n \in \NN}$ on some probability space
$(\Omega, \mathcal{F}, \PP)$, (such a sequence
always exists), with the common distribution or
law $\mu_{X_n} = \PP \circ X_n^{-1} = m \forall
n$. The map $X : (\Omega, \mathcal{F}) \to (E,
\mathcal{E})$, $X(\omega) = (X_1(\omega),
X_2(\omega), \ldots)$ is measurable.

The image measure $\PP \circ X^{-1} \eqdef \mu$ is
a probability measure on $(E, \mathcal{E})$, that
satisfies for any $A = A_1 \times \cdots \times
A_n \times \RR \times \cdots \in \mathcal{A}$,
\begin{align*}
  \mu(A)
  &= \PP \circ X^{-1}(A) \\
  &= \PP(X_1 \in A, X_2 \in A_2, \ldots, X_N \in
  A_N) \\
  &= \PP(X_1 \in A_1)\PP(X_2 \in A_2) \cdots \PP(X_N
  \in A_N)
  &&\text{as $X_i$ IID} \\
  &= m(A_1) m(A_2) \cdots m(A_n) \\
  &= \prod_{n = 1}^\infty m(A_n)
\end{align*}
and $\mu$ is the unique probability measure on
$\mathcal{E}$ such that
\[ \mu(A) = \prod_{n = 1}^\infty m(A_n) .\]
Under $\mu$, the coordinate maps $f_n$ are IID
with law $m$.

The probability space $(E, \mathcal{E}, \mu)$ is
called the canonical model for an IID sequence of
random variables with law $m$. Define the shift
map $\theta : E \to E$ by $\theta(x_1, x_2,
\ldots) = (x_2, x_3, \ldots)$ (similar to $x \to
2x \pmod{1}$ on $((0, 1), \lambda)$).

\begin{theorem*}
  On $(E, \mathcal{E}, \mu)$, the shift map
  $\theta$ is measurable, \gls{mp} and
  \gls{ergodic}.
\end{theorem*}

\begin{proof}
  Measurable is obvious. \gls{mp}? Enough to check
  on $\mathcal{A}$, i.e. for $A = A_1 \times
  \cdots \times A_n \times \RR \times \cdots$.
  Indeed:
  \begin{align*}
    \mu \circ \theta^{-1}(A) \\
    &= \mu(\RR \times A_1 \times A_2 \times
    \cdots) \\
    &= \prod_{i = 1}^\infty m(A_i) \\
    &= \mu(A)
  \end{align*}
  Ergodicity: Recall the \gls{tail_sig}
  \[ \tailsig = \bigcap_n \tau_n \]
  where $\tau_n = \sigma(x_{n + 1}, x_{n + 2},
  \ldots) = \sigma(f_{n + 1}, f_{n + 2})$. For $A
  = \prod_n A_n \in \mathcal{A}$,
  \[ \theta^{-n}(A) = \RR \times \cdots \times \RR
  \times A_1 \times \cdots = \{x_{n + 1} \in A_1,
  x_{n + 2} \in A_2, \ldots\} \in \tau_n \qquad
  \forall n, \forall A \in \mathcal{A} \]
  If $A \in \mathcal{E}_0$, then $\theta^{-1}(A) =
  A$, so $\theta^{-n}(A) = A$ for all $n$. So $A =
  \in \tau_n$ for all $n$. So $A \in \bigcap_n
  \tau_n = \tailsig$, i.e. $\mathcal{E} \subseteq
  \tailsig$. But the $(x_i)$ IID and hence
  $\tailsig$ is $\mu$-trivial (\nameref{kol_0_1}),
  so $\mathcal{E}_0$ is $\mu$-trivial.
\end{proof}

\begin{theorem*}
  Let $m$ be a probability measure on $\RR$ such
  that $\int_\RR |x| dm(x) < \infty$ and $\int_\RR
  x \dd m(x) = \nu$. Let $(E, \mathcal{E}, \mu)$
  be the canonical model, where the coordinate
  maps $f_n(x) = x_n$ are IID with law $m$. Then
  \[ \mu \left( \left\{ x : \frac{x_1 + \cdots +
  x_n}{n} \to \nu \text{ as $n \to \infty$}
  \right\} \right) = 1 .\]
\end{theorem*}

\begin{proof}
  Let $\theta : E \to E$ be the shift map
  $\theta(x_1, x_2, \ldots) = (x_2, x_3, \ldots)$.
  It is measure preserving and \gls{ergodic}.
  Consider $f : E \to \RR$ as $f(x) = x_1$. Then
  $f \in \Lp[1](\mu)$ as $\int |f| \dd \mu = \int
  |x_1| \dd m(x_1) < \infty$. Also,
  \[ S_n(f) = f + f \circ \theta + \cdots + f
  \circ \theta^{n - 1} = x_1 + x_2 + \cdots + x_n
  .\]
  Hence by Birkhoff and von Neumann, as $\theta$
  ergodic, by \nameref{lec24_rem1} earlier this
  lecture,
  \[ \frac{S_n(f)}{n} = \frac{x_1 + \cdots +
  x_n}{n} \to \ol{f} = \int f \dd \mu \int x_1 \dd
  m(x_1) = \nu \]
  $\mu$ \gls{al_surely}.
\end{proof}

\begin{theorem*}[Kolmogorov Strong Law of Large
  Numbers (1930)]
  Let $(X_n)$ be a sequence of IID integrable
  random variables, with $\EE X_1 = \nu$. Set
  $S_n = \sum_{i = 1}^n X_i$. Then $\frac{S_n}{n}
  \to \nu$ \gls{al_surely} as $n \to \infty$.
\end{theorem*}

\begin{proof}
  Let $m$ be the law of $X_n$, $\mu = \PP \circ
  X^{-1}$ where $X : \Omega \to E$ is $X(\omega) =
  (X_1(\omega), X_2(\omega), \ldots)$. Then apply
  the previous theorem.
\end{proof}

This is the end of the course.

\begin{remark*}
  \phantom{}
  \begin{enumerate}[(1)]
    \item If $(\mu_n)$ is a sequence of
      probability measures that converges \gls{conv_wly} to
      $\mu$, then $(\mu_n)$ is ``tight'', i.e.
      $\forall \eps > 0$, $\exists$ a compact set
      $K$ such that $\mu_n(K^c) < \eps$ for all
      $n$.
    \item If $(\mu_n)$ is a sequence of
      probability measures that are tight, then
      there exists a subsequence $(n_k)$ and a
      probability measure $\mu$ such that
      $(\mu_{n_k}) \to \mu$ \gls{conv_wly}
      (Banach-Alaoglu Theorem) (Prokhorov
      Theorem).
  \end{enumerate}
\end{remark*}