%! TEX root = PM.tex
% vim: tw=50
% 17/10/2023 10AM

\begin{example*}
  \[ \{\text{$A_n$ infinite often}\}
  = \{\text{infinitely many of the $\{A_n\}$ occur}\}
  = \bigcap_{n = 1}^\infty \bigcup_{m \ge n} A_m \]
  So if $A_n = \{\text{$H$ in the $n$-th toss}\}$, then
  \[ \{\text{$A_n$ infinitely often}\}
  = \{\text{infinitely many heads}\} \]
\end{example*}

\begin{remark*}
  The lemma holds for any measure $\mu$ (not just
  probability measures).
\end{remark*}

\begin{flashcard}[borel-cantelli-lemma-2]
\begin{lemma*}[Borel Cantelli Lemma 2]
  \refstepcounter{customlemma}
  \label{borel_cantelli_2}
  \cloze{
  Assume the events $(A_n)$ are independent. Then
  if $\sum_n \PP(A_n) = \infty$, then
  $\PP(\text{$A_n$ infinitely often}) = 1$.
  }
\end{lemma*}

\begin{proof}
  \cloze{
  We will use the inequality $1 - a \le e^{-a}$
  for all $a \ge 0$. Now, $(A_n)_{n \in \NN}$ are
  independent so $(A_n^c)_{n \in \NN}$ are
  independent. So, for all $n$ and $N \ge n$,
  \[ 0
  \le \PP \left( \bigcap_{m = n}^N A_m^c \right)
  = \prod_{m = n}^N \PP (A_m^c)
  = \prod_{m = n}^N (1 - \PP(A_m))
  \le e^{-\sum_{m = n}^N \PP(A_m)} \]
  Taking $N \to \infty$,
  \[ 0
  \le \PP \left( \bigcap_{m = n}^\infty A_m^c \right)
  \le \lim_{N \to \infty} \PP \left( \bigcap_{m =
  n}^N A_m^c \right)
  \le \lim_{N \to \infty} e^{-\sum_{m = n}^N
  \PP(A_m)}
  = \lim_{n \to \infty} e^{-\sum_{m = n}^\infty \PP(A_m)}
  = 0 \]
  So,
  \[ \PP \left( \bigcap_{m = n}^\infty A_m^c
  \right) = 0 \]
  i.e.
  \[ \label{6_53_star}
  \PP \left( \bigcup_{m = n}^\infty A_m \right)
  = 1 \qquad \forall n \tag{$*$} \]
  $\bigcup_{m = n}^\infty A_m \eqdef B_n$. Then
  \[ B_n \downarrow \bigcap_n B_n = \bigcap_n
  \bigcup_{m \ge n} A_m = \{\text{$A_n$ infinitely
  often}\} .\]
  So, as $\PP(B_n) = 1$ for all $n$ (by
  \eqref{6_53_star}), so $\PP(\text{$A_n$
  infinitely often}) = \lim_{n \to \infty}
  \PP(B_n) = 1$.
  }
\end{proof}
\end{flashcard}

\begin{remark*}
  If $(A_n)_{n \in \NN}$ independent, then
  $\{\text{$A_n$ infinitely often}\}$ is a $0/1$
  event. For all ``tail events'', the probability
  is $0/1$ (Kolmogorov $0-1$ law, will prove
  later).
\end{remark*}

\newpage
\section{Measurable Functions}

\begin{flashcard}[measurable-func-defn]
\begin{definition*}[measurable function]
  \cloze{
  Let $(E, \mathcal{E})$ and $(G, \mathcal{G})$ be
  2 measurable functions. A map $f : E \to G$ is
  called \emph{measurable} if $f^{-1}(A) \in
  \mathcal{E} ~\forall A \in \mathcal{G}$, where
  $f^{-1}(A)$ is the pre-image of $A$ under $f$,
  i.e.
  \[ f^{-1}(A) = \{x \in E : f(a) \in A\} .\]
  When $(G, \mathcal{G}) = (\RR,
  \mathcal{B}(\RR))$, we simply say $f$ is
  measurable. If $E$ is a topological space and
  $\mathcal{E} = \mathcal{B}(E)$, then $f$ is
  called Borel.
  }
\end{definition*}
\end{flashcard}

\begin{remark*}
  Preimages preserve set operations:
  \[ f^{-1} \left( \bigcup_i A_i \right) =
  \bigcup_i f^{-1}(A_i) \qquad \text{and} \qquad
  f^{-1} (G \setminus A) = E \setminus f^{-1}(A) .\]
  (Checking these is an exercise).

  So, $\{f^{-1}(A) : A \in \mathcal{G}\}$ is a
  $\sigma$-algebra on $E$ and $\{A \subset G :
  f^{-1(A)} \in \mathcal{E}\}$ is a
  $\sigma$-algebra on $G$. If $\mathcal{G} =
  \sigma(\mathcal{A})$ and $f^{-1}(A) \in
  \mathcal{E} ~\forall A \in \mathcal{A}$, then
  $\{A \subset G : f^{-1}(A) \in \mathcal{E}\}$ is
  a $\sigma$-algebra containing $\mathcal{A}$,
  hence it contains $\sigma(\mathcal{A}) =
  \mathcal{G}$. So $f$ is measurable.

  In particular, when $G = \RR$, $\mathcal{G} =
  \mathcal{B}$, then $\mathcal{B} =
  \sigma(\mathcal{A})$ where $\mathcal{A} =
  \{-\infty, y] : y \in \RR\}$, so $f$ is Borel
  measurable if and only if $\{x \in E : f(x) \le
  y\} \in \mathcal{E} ~\forall y \in \RR$. If $E$
  is a topological space, $f : E \to \RR$
  continuous, then for $\mathcal{A} = \{U :
  \text{$U$ open}\}$, $f^{-1}(E) \in \mathcal{E}$
  (as $f^{-1}(U)$ is open). So $f$ is
  Borel-measurable.
\end{remark*}

\begin{example*}
  For $A \subseteq E$, the indicator function
  \[ \mathbbm{1}_A(x) = \begin{cases}
    1 & x \in A \\
    0 & x \not\in A
  \end{cases} \]
  is measurable if and only if $A \in
  \mathcal{E}$.

  Composition of measurable functions is
  measurable (easy exercise).

  For a family of functions $f_i : E \to G$, $i
  \in I$, we can make all $(f_i)$ measurable with
  respect to the $\sigma$-algebra
  \[ \mathcal{E} = \sigma(f_i^{-1}(A) : A \in
  \mathcal{G}, i \in I) .\]
  $\mathcal{E}$ is called the $\sigma$-algebra
  generated by $\{f_i\}_{i \in I}$.
\end{example*}

\begin{proposition*}
  If $f_1, f_2, \ldots$ are measurable
  $\RR$-valued, then
  \[ f_1 + f_2, \quad f_1 f_2, \quad \inf_n f_n,
  \quad \sup_n f_n, \quad \liminf_n f_n, \quad
  \limsup_n f_n \]
  are all measurable.
\end{proposition*}

\begin{proof}
  See \es{1}.
\end{proof}

\begin{flashcard}[]
\begin{theorem*}[Monotone Class Theorem]
  \refstepcounter{customtheorem}
  \label{monotone_class_thm}
  \cloze{
  Let $(E, \mathcal{E})$ be a measurable space and
  $\mathcal{A}$ a $\pi$-system generating
  $\mathcal{E}$. Let $\mathcal{V}$ be a vector
  space of bounded functions $f : E \to \RR$ such
  that
  \begin{enumerate}[(1)]
    \item $1 \in \mathcal{V}$ and $\mathbbm{1}_A
    \in \mathcal{V} ~\forall A \in \mathcal{A}$
    \item If $f_n \in \mathcal{V} ~\forall n$ and
      $f$ bounded with $0 \le f_n \uparrow f$,
      then $f \in \mathcal{V}$.
  \end{enumerate}
  Then $\mathcal{V}$ contains all bounded
  measurable functions.
  }
\end{theorem*}

\begin{proof}
  \cloze{Let $\mathcal{D} = \{A \in \mathcal{E} :
  \mathbbm{1}_A \in \mathcal{V}\}$. Then
  $\mathcal{D}$ is a $d$-system. This is because $\mathbbm{1} =
  \mathbbm{1}_E \in D$, $\mathbbm{1}_{B \setminus
  A} = \mathbbm{1}_B - \mathbbm{A} \in
  \mathcal{V}$, if $A \subseteq B$, as
  $\mathcal{V}$ is a vector space. If $A_n \in
  \mathcal{D}$, i.e. $\mathbbm{1}_{A_n} \in
  \mathcal{V}$, $A_n \uparrow A$, then
  $\mathbbm{1}_{A_n} \uparrow \mathbbm{1}_A$ so
  by (2), $\mathbbm{1}_A \in \mathcal{V}$, so $A
  \in \mathcal{D}$.

  It contains
  the $\pi$-system $\mathcal{A}$ so by
  \nameref{dynkins_lemma}, contains
  $\sigma(\mathcal{A}) = \mathcal{E}$, so
  $\mathcal{D} = \mathcal{E}$, i.e. $\mathbbm{1}_A
  \in \mathcal{V} ~\forall A \in \mathcal{E}$.
  Since $\mathcal{V}$ is a vector space, it
  contains all finite linear combinations of
  indicators of measurable sets. So,
  \[ f_n = 2^{-n} \left\lfloor 2^n f \right\rfloor
  \in \mathcal{V} .\]
  Then
  \begin{align*}
    f_n(x)
    &= 2^{-n} \left\lfloor 2^n f(x) \right\rfloor
    \\
    &= 2^{-n} \sum_{j = 0}^n j \mathbbm{1}_{\{2^n
    f(x) \in [j, j + 1)\}} \\
    &= 2^{-n} \sum_{j = 0}^{K_n} j
    \mathbbm{1}_{\{f^{-1}([j / 2^n, (j + 1)/2^n))\}}
  \end{align*}
  for some finite $K_n$ since $f$ is bounded. Then
  $f_n \le f \le f_n + 2^{-n}$. So $|f_n - f| \to
  0$ as $n \to \infty$ and $f_n \uparrow f$.

  So $0 \le f_n \uparrow f, f_n \in \mathcal{V}$,
  and $f$ is bounded non-negative. So $f \in
  \mathcal{V}$ by (2). Finally, for any $f$
  bounded measurable, $f = f^+ - f^-$, where $f^+
  = \max(f, 0)$, $f^- = \max(-f, 0)$. $f^+$, $f^-$
  are bounded non-negative measurable and $\in
  \mathcal{V}$. So since $\mathcal{V}$ is a vector
  space, $f \in \mathcal{V}$.}
\end{proof}
\end{flashcard}