% vim: tw=50 % 06/10/2022 10AM \setcounter{section}{-1} \section{Introduction} \begin{definition*}[Markov Chains] \emph{Markov chains} are random processes (sequence of random variables) that retain no memory of the past. \[ \text{past} \,\, \stackrel[\text{present}]{}{\perp} \,\, \text{future} \] \end{definition*} \subsubsection*{History} \begin{itemize} \item Markov in 1906 \item Poisson process, branching processes existed before. \\ \emph{Motivation}: Extend the law of large numbers to the non IID setting. \item Koluogorov in 1930: continuous time Markov processes. \item Brownian motion: fundamental object in modern probability theory. \end{itemize} \subsubsection*{Why Study Markov Chains?} Simplest mathematical models for random phenomena evolving in time. \begin{itemize} \item Simple: amenable to analysis - tools from probability, analysis, combinatorics. \item Applications: population growth, mathematical genetics, queuing networks, Monte Carlo simulation, \dots \end{itemize} \subsection{Page-Rank algorithm} This is an example of a simple algorithm which was previously used by search engines such as Google. \myskip Model the web as a directed graph, $G : (V, E)$. $V$ is the set of websites (the vertices), and $(i, j) \in E$ if and only if $i$ contains a link to page $j$. Let $L(i)$ be the number of outgoing edges from $i$. Define \[ \hat{p}_{ij} = \begin{cases} \frac{1}{L(i)} & \text{if $L(i) > 0$ and $(i, j) \in E$} \\ \frac{1}{n} & \text{if $L(i) = 0$} \end{cases} \qquad (n = |V|) \] Now also define for $\alpha \in (0, 1)$, \[ p_{ij} = \alpha \hat{p}_{ij} + (1 - \alpha) \frac{1}{n} \] A random surfer tosses a coin, with probability $\alpha$ and chooses to go to: either $\hat{p}$ or uniformly at random. We want to find the invariant distribution: \[ \pi = \pi p \] where \[ \pi_i = \text{proportion of time spent at state $i$ by the surfer} \] Once we solve for this, if $\pi_i > \pi_j$ then $i$ is more important than $j$ and Google ranks it higher. \newpage \section{Markov Chains} We will always denote state space by $I$, and it will always be finite or countable. The probability space will always be $(\Omega, \mathcal{F}, \PP)$. We will now more formally define a Markov Chain: \begin{flashcard} \begin{definition*}[Markov Chain] A stochastic process $(X_n)_{n \ge 0}$ is called a \emph{Markov chain} (with values in $I$) if \cloze{$\forall n \ge 0, \forall x_0, \dots, x_{n + 1} \in I$}, \[ \cloze{\PP(\ub{X_{n + 1} = x_{n + 1}}_{\text{future}} \mid \ub{X_n = x_n}_{\text{present}}, \ub{\dots, X_0 = x_0}_{\text{past}}) = \PP(X_{n + 1} = x_{n + 1} \mid X_n = x_n)} \] \end{definition*} \end{flashcard} \noindent If $\PP(X_{n = 1} = y \mid X_n = x)$ is independent of $n$ $\forall x, y$, then $X$ is called \emph{time-homogenous} (this is what we will focus on in this course). Otherwise \emph{time-inhomogeneous}. \myskip Define $P(x, y) = \PP(X_1 = y \mid X_0 = x)$ for $x, y \in I$. $P$ is called the transition matrix of the Markov chain. \[ \sum_{y \in I} P(x, y) = \sum_{y \in I} \PP(X_1 = y \mid X_0 = x) = 1 \] $P$ is called a \emph{stochastic matrix}. \begin{flashcard} \begin{definition*} $(X_n)_{n \ge 0}$ with values in $I$ is called $\mathrm{Markov}(\lambda, P)$ if \cloze{$X_0 \sim \lambda$} and $(X_n)_{n \ge 0}$ is a Markov chain with transition matrix $P$, i.e. \begin{enumerate}[(1)] \item \cloze{$\PP(X_0 = x) = \lambda(x)$ for all $x \in I$} \item \cloze{$\PP(X_{n + 1} = x_{n + 1} \mid X_n = x_n \dots X_0 = x_0) = \PP(x_n, x_{n + 1})$ for all $n, x_0, \dots, x_{n + 1}$} \end{enumerate} \end{definition*} \end{flashcard} \begin{notation*} $P(x, y) = p_{xy} = p(x, y)$ \end{notation*} \noindent Draw a diagram (directed graph), and put a directed edge between $x$ and $y$ ($x \to y$) if $P(x, y) > 0$, and write the probability on top of these arrows. \begin{itemize} \item \[ P = \begin{bmatrix} \alpha & 1 - \alpha \\ 1 - \beta & \beta \end{bmatrix} \qquad \alpha, \beta \in (0, 1) \] \begin{center} \includegraphics[width=0.6\linewidth] {images/9ace69ca455b11ed.png} \end{center} \item \[ P = \begin{bmatrix} \half & \half & 0 \\ 0 & \frac{1}{3} & \frac{2}{3} \\ 1 & 0 & 0 \end{bmatrix} \] \begin{center} \includegraphics[width=0.6\linewidth] {images/bddb4c6c455b11ed.png} \end{center} \end{itemize} \begin{theorem*} $X$ is $\Markov(\lambda, P)$ if and only if for all $n \ge 0$ and $x_0, \dots, x_n \in I$, \[ \PP(X_0 = x_0, \dots, X_n = x_n) = \lambda(x_0) P(x_0, x_1) \cdots P(x_{n - 1}, x_n) \] \end{theorem*} \begin{proof} \begin{enumerate} \item[$\Rightarrow$] \begin{align*} \PP(X_n = x_n, \dots, X_0 = x_0) &= \PP(X_n = x_n \mid X_{n - 1} = x_{n - 1}, \dots, X_0 = x_0) \\ &\,\,\,\,\,\,\times \PP(X_{n - 1} = x_{n - 1}, \dots, X_0 = x_0) \\ &= P(x_{n - 1}, x_n) \PP(X_{n - 1} = x_{n - 1}, \dots, X_0 = x_0) \\ &= \cdots \\ &= \lambda(x_0) P(x_0, x_1) \cdots P(x_{n - 1}, x_n) \end{align*} \item[$\Leftarrow$] for $n = 0$, $\PP(X_0 = x_0) = \lambda(x_0)$ \begin{align*} \PP(X_n = x_n \mid X_{n - 1} = x_{n - 1} \cdots X_0 = x_0) &= \frac{\PP(X_n = x_n, X_{n - 1} = x_{n - 1}, \dots, X_0 = x_0}{\PP(X_{n - 1} = x_{n - 1}, \dots, X_0 = x_0)} \\ &= P(x_{n - 1}, x_n) \end{align*} \end{enumerate} \end{proof} \begin{definition*} Let $i \in I$. The $\delta_i$-mass at $i$ is defined as \[ \delta_{ij} = 1(i = j) = \begin{cases} 1 & \text{if $i = j$} \\ 0 & \text{otherwise} \end{cases} \] \end{definition*} \begin{flashcard} \begin{definition*} Let $X_1, \dots, X_n$ be discrete random variables with values in $I$. They are independent if \cloze{for all $x_1, \dots, x_n \in I$} \[ \cloze{ \PP(X_1 = x_1, \dots, X_n = x_n) = \prod_{i = 1}^n \PP(X_i = x_i)} \] \end{definition*} \end{flashcard} \noindent Let $(X_n)_{n \ge 0}$ be a set of random variables in $I$. They are independent if for all $i_1 < i_2 < \cdots < i_k$, for all $k$ and for all $x_1, \dots, x_k$, \[ \PP(X_{i_1} = x_1, \dots, X_{i_k} = x_k) = \prod_{j = 1}^k P(X_{i_j} = x_j) \] Let $(X_n)_{n \ge 0}$ and $(Y_n)_{n \ge 0}$ be 2 sequences. $X \perp Y$ if for all $k, m \in \NN$, and for all $i_1 < \cdots < i_k$, $j_1 < \cdots < j_m$, $x_1, \dots, x_k, y_1, \dots, y_m$, \begin{align*} \PP(X_{i_1} = x_1, \dots, X_{i_k} = x_k, Y_{j_1} = y_1, \dots, Y_{j_m} = y_m) \\ = \PP(X_{i_1} = x_1, \dots, X_{i_k} = x_k) \times \PP(Y_{j_1} = y_1, \dots, Y_{j_m} = y_m) \end{align*}