\documentclass[11pt]{scrartcl} % vim: tw=50 \usepackage{notesheader} \begin{document} \title{Vectors and Matrices} \author{} \date{\today} \maketitle \tableofcontents \newpage \setcounter{section}{-1} \section{Introduction} This course covers ``linear algebra'', topics in \emph{algebra} \& \emph{geometry} \noindent It involves approaches that are \begin{center} concrete \& abstract computational \& conceptual \end{center} \noindent The key ideas to develop / build on are: \begin{itemize} \item Elementary geometry (Euclidean): points, lines, planes in 2d or 3d; length, angles \item Points described by coordinates \item Points described by vectors; what is a vector? \item Simple transformations e.g. rotations \& reflections $\to$ linear maps. \end{itemize} \subsection{Plan} \begin{enumerate} \item Complex Numbers \item Vectors in 3 dimensions \item Vectors in General, $\RR^n$ \& $\CC^n$ \item Matrices \& Linear Maps \item Determinants \& Inverses \item Eigenvalues \& Eigenvectors \item Changing Bases, Canonical Forms \& Symmetries \end{enumerate} \newpage \section{Complex Numbers} \subsection{Basic Definitions} The following terms will not be defined here but assumed to be understood: \begin{itemize} \item $\CC$, $+$, $\times$ \item conjugate, modulus, argument \item complex plane / Argand diagram \end{itemize} \noindent Construct $\CC$ by adding an element $i$ to real numbers $\RR$, with \[ i^2 = -1 .\] Any complex number $z \in \CC$ has the form \[ z = x + iy \qquad \text{with $x, y \in \RR$} ;\] $x = \mathrm{Re}(z)$ is the \emph{real part}; $y = \mathrm{Im}(z)$ is the \emph{imaginary part}. \noindent $\RR \subset \CC$ consisting of elements $x = i0 = x$. \noindent In following, use notation above \& \[ z_1 = x_1 + iy_1, \quad z_2 = x_2 + iy_2 \quad \text{etc} .\] \begin{enumerate} \item \underline{Addition} (\& subtraction). Define \[ z_1 \pm z_2 = (x_1 \pm x_2) + i(y_1 \pm y_2) \] \item \underline{Multiplication}. Define \[ z_1z_2 = (x_1x_2 - y_1y_2) + i(x_1y_2 + x_2y_1) \] If $z \neq 0$, note that \[ z^{-1} = \frac{x}{x^2 + y^2} - \frac{y}{x^2 + y^2} \] satisfies $zz^{-1} = 1$. \item \underline{Complex conjugate} Define \[ \ol{z} = z^* = x - iy \] Then: \[ \mathrm{Re}(z) = \half(z + \ol{z}) \] and \[ \mathrm{Im}(z) = \frac{1}{2i} (z - \ol{z}) \] $\ol{(\ol{z})} = z$ \& further \[ \ol{z_1 + z_2} = \ol{z_1} + \ol{z_2} \] \[ \ol{z_1z_2} = \ol{z_1}\ol{z_2} \] \item \underline{Modulus} is defined by $r =|z|$, real \& $\ge 0$, with $r^2 = |z|^2 = z\ol{z} = x^2 + y^2$ \item \underline{Argument} $\theta = \arg(z)$ real, defined for $z \neq 0$ by \[ z = r(\cos \theta + i \sin \theta) \] for some real $\theta$ (this is known as \emph{polar form}) \[ \cos \theta = \frac{x}{\sqrt{x^2 + y^2}}, \quad \sin\theta = \frac{y}{\sqrt{x^2 + y^2}} \] \[ \implies \tan \theta = \frac{y}{x} \] $\arg(z)$ is determined only \emph{$\bmod\ 2\pi$} i.e. can change $\theta \to \theta + 2n\pi$ for $n \in \ZZ$. To make it unique we can restrict the range, e.g. the \emph{principal value} defined by \[ -\pi < \theta \le \pi \] \item \underline{Argand diagram \& Complex Plane} Plot $\mathrm{Re}(z)$ \& $\mathrm{Im}(z)$ on orthogonal axes, then $r = |z|$ \& $\theta = \arg(z)$ are length \& angle shown \begin{center} \begin{tsqx} (-0.2,0)--(1.5,0) EndArrow (0,-0.2)--(0,1.5) EndArrow label Re($z$) @ (1.5,-0.15) label Im($z$) @ (0.15,1.5) z = (1.3,1) origin--z label $x$ @ (0.65,-0.1) label $y$ @ (-0.1,0.5) label $r$ @ 0.5*z+(-0.05,0.05) z--(0,1) dashed z--(1.3,0) dashed anglemark4 (1,0) origin z 10 label $\alpha$ @ 0.4*dir(17) \end{tsqx} \end{center} \end{enumerate} \begin{example*} Consider \[ z = -1 + i\sqrt{3} = 2\left( -\half + i\frac{\sqrt{3}}{2} \right) \] here $z = 2$ \& $\arg(z) = \frac{2\pi}{3} + 2n\pi$. Note $\tan \theta = -\sqrt{3} \implies \theta = \frac{2\pi}{3} + 2n\pi = \arg(z)$ \emph{or} $\theta = -\frac{\pi}{3} + 2n\pi = \arg(-z)$. \begin{center} \begin{tsqx} (-1.1,0)--(1.1,0) EndArrow (0,-1.1)--(0,1.1) EndArrow label Re @ (1.1,-0.15) label Im @ (0.15,1.1) z = dir(120) label $-z$ @ 0.1*dir(-z)-z origin--z EndArrow origin--origin-z EndArrow anglemark4 (1,0) origin z 5 label $\frac{2\pi}{3}$ @ 0.3*dir(45) \end{tsqx} \end{center} \end{example*} \subsection{Basic Properties \& Consequences} \noindent\textbf{Aside (motivating the definitions leading to $\CC$)} \\ Note that $\ZZ$ can be seen as a way to solve some equations involving $\ZZ$, for example $x + 3 = 0$. Rational numbers can then be used to solve other equations such as $5x + 1 = 0$, and real numbers are used to solve some quadratics and other higher degree polynomials, such as $x^2 - 2$. Finally, the complex numbers are used to allow us to solve more equations that we couldn't before, such as $x^2 + 4 = 0$. This leads to the fundamental theorem of algebra. \begin{enumerate}[(i)] \item $\CC$ with operations $+$, $\times$ is a \emph{field}. \noindent i.e. $\CC$ with $+$ is an abelian group \& distributive laws hold, i.e. \[ z_1(z_2 + z_3) = z_1z_2 + z_1z_3 .\] \item \underline{Fundamental Theorem of Algebra} A polynomial of degree $n$ with coefficients in $\CC$ can be written as a product of $n$ linear factors \[ P(z) = c_nz^n + \cdots + c_1z + c_0 \qquad c_i \in \CC, c_n \neq 0 \] \[ = c_n(z - \alpha_1) \cdots (z - \alpha_n) \qquad \alpha_i \in \CC .\] Hence $P(z) = 0$ has at least one root \& $n$ roots counted with multiplicity. \item Addition \& Subtraction as parallelogram constructions: \begin{center} \begin{tsqx} (-1.5,0)--(1.5,0) EndArrow (0,-0.3)--(0,1.5) EndArrow label Re @ (1.5,-0.15) label Im @ (0.15,1.5) z_2 = (0.3,0.8) z_1 = (1.1,0.3) origin--z_1 EndArrow origin--z_2 EndArrow origin--z_1+z_2 EndArrow origin--z_2-z_1 EndArrow z_1--z_2--z_1+z_2--z_1 dashed z_2-z_1--z_2 dashed labelled z_1 + z_2 labelled z_2 - z_1 \end{tsqx} \end{center} Complex conjugation is reflection in real axis \begin{center} \begin{tsqx} (-0.3,0)--(1.5,0) EndArrow (0,-1.5)--(0,1.5) EndArrow label Re @ (1.5,-0.15) label Im @ (0.15,1.5) z := (1,0.8) label $z = x + iy$ @ z+0.1*dir(z) label $\overline{z} = z - iy$ @ (1,-0.8)+0.1*dir((1,-0.8)) origin--z EndArrow origin--(1,-0.8) EndArrow z--(1,-0.8) dashed \end{tsqx} \end{center} \item \phantom{.}\\[-3\baselineskip]\noindent \begin{proposition*}[Composition Property] Modulus / length obeys \[ |z_1z_2| = |z_1||z_2| \] \end{proposition*} \begin{proof} This result follows immediately by just expanding. \end{proof} \begin{proposition*}[Triangle Inquality] \[ |z_1 + z_2| \le |z_1| + |z_2| \] \end{proposition*} \begin{proof} Compare \[ LHS^2 = (z_1 + z_2)\ol{(z_1 + z_2)} \] \[ RHS^2 = |z_1|^2 + 2|z_1||z_2| + |z_2|^2 \] Compare ``cross terms'': \begin{align*} z_1\ol{z_2} + z_2\ol{z_1} &\le 2|z_1||z_2| \\ \iff \half (z_1\ol{z_2} + \ol{(z_1\ol{z_2})}) &\le |z_1||\ol{z_2}| \\ \iff \mathrm{Re}(z_1\ol{z_2}) &\le |z_1\ol{z_2}| \end{align*} as desired. \end{proof} \begin{proposition*}[Alternative form of triangle inequality] Replace $z_1$ by $z_2 - z_1$ and rearrange to get \begin{align*} |z_2 - z_1| &\ge |z_2| - |z_1| \\ \text{or} &\ge |z_1| - |z_2 \end{align*} so \[ |z_2 - z_1| \ge \left| |z_2| - |z_1| \right| \] \end{proposition*} \item \phantom{.}\\[-3\baselineskip]\noindent \begin{proposition*} $z_1 = r_1(\cos\theta_1 + i\sin\theta_1)$ and $z_2 = r_2(\cos\theta_2 + i\sin\theta_2)$ implies that \[ z_1z_2 = r_1r_2(\cos(\theta_1 + \theta_2) + i\sin(\theta_1 + \theta_2)) \] \end{proposition*} \begin{proof} Just expand and apply trig formulae. \end{proof} \begin{theorem*}[De Moivre's Theorem] \[ (\cos\theta + i\sin\theta)^n = \cos n\theta + i \sin n \theta \qquad \forall n \in \ZZ \] (for $z \neq 0$, $z^0 = 1$ \& $z^{-n} = (z^{-1})^n$ for $n > 0$.) \end{theorem*} \begin{proof} Use the proposition above and induct. \end{proof} \end{enumerate} \subsection{Exponential \& Trigonometric Functions} Define $\exp$, $\cos$, $\sin$ as functions on $\CC$ by \[ \exp(z) = e^z = \sum_{n = 0}^\infty \frac{z^n}{n!} \] \begin{align*} \cos(z) &= \half (e^{iz} + e^{-iz}) \\ &= 1 - \frac{1}{2!} z^2 + \frac{1}{4!} z^4 + \cdots \end{align*} \begin{align*} \sin(z) &= \frac{1}{2i} (e^{iz} - e^{-iz}) \\ &= z - \frac{1}{3!} z^3 + \frac{1}{5!} z^5 + \cdots \end{align*} \noindent These series converge $\forall z \in \CC$ and such series can be multiplied, rearranged, and differentiated. \\ Furthermore \[ e^z e^w = e^{z + w} \] From above \[ e^0 = 1 \qquad \text{and} \qquad (e^z)^n = e^{nz} \qquad n \in \ZZ \] \begin{proof} Induction for positive integers, and for negative integers use \[ e^z e^{-z} = 1 \implies e^{-z} = (e^z)^{-1} \] \end{proof} \begin{lemma*} For $z = x + iy$ \begin{enumerate}[(i)] \item $e^z = e^x(\cos y + i\sin y)$ \item $\exp$ on $\CC$ takes all complex values except 0. \item $e^z = 1 \iff z = 2n\pi i$, $n \in \ZZ$. \end{enumerate} \end{lemma*} \begin{proof} \begin{enumerate}[(i)] \item $e^{x + iy} = e^x e^{iy}$ but $e^{iy} = \cos y + i\sin y$. \item $|e^z| = e^x$ takes all real values $> 0$. $\arg(e^z) = y$ taking all possible values. \item \begin{align*} e^z = 1 &\iff e^x = 1, \cos y = 1, \sin y = 0 \\ &\iff x = 0 \text{ and } y = 2\pi n \end{align*} as required. \end{enumerate} \end{proof} \noindent Returning to polar form or $\bmod$ / $\arg$ form (Subsection 1.1 (v)), this can be written \[ z = r(\cos\theta + i\sin\theta) = re^{i\theta} \] for $r = |z|$ and $\theta = \arg(z)$. \\ De Moivre's Theorem now follows from \[ (e^{i\theta})^n = e^{in\theta} .\] \subsubsection*{Roots of unity} $z$ is an $N$-th root of unity if $z^N = 1$. To find all solutions: \begin{align*} z = re^{i\theta} \text{ satisfies }& z^N = 1 \\ \iff r^N e^{iN\theta} &= 1 \\ \iff r^N = 1 \text{ and }& N\theta = 2n\pi \qquad n \in \ZZ \end{align*} \noindent This gives $N$ distinct solutions: \begin{align*} z &= e^{2\pi n / N} \qquad n = 0, 1, \dots, N - 1 \\ &= \cos \frac{2\pi n}{N} + i \sin \frac{2 \pi n}{N} \\ &= \omega^n \end{align*} where $\omega = e^{2\pi / N}$. \begin{center} \begin{tsqx} \omega^0 = dir 0 R-45 \omega^1 = dir 60 \omega^2 = dir 120 \omega^3 = dir 180 R-135 \omega^4 = dir 240 \omega^5 = dir 300 unitcircle (-1.5,0)--(1.5,0) EndArrow (0,-1.5)--(0,1.5) EndArrow label Re @ (1.5,-0.15) label Im @ (0.15,1.5) \end{tsqx} \end{center} \subsection{Transformations; lines \& circles} Consider the following transformations on $\CC$ (maps $\CC \to \CC$). \begin{align*} z &\mapsto z + a &&\text{translation by $a \in \CC$} \\ z &\mapsto \lambda z &&\text{scaling by $\lambda \in \RR$} \\ z &\mapsto e^{i\alpha}z &&\text{rotation by $\alpha \in \RR$} \\ z &\mapsto \ol{z} &&\text{reflection in real axis} \\ z &\mapsto \frac{1}{z} &&\text{inversion} \end{align*} \noindent Consider general point on a \emph{line} in $\CC$ through $z_0$ and parallel to $w \neq 0$ (fixed $z_0$, $w \in \CC$): \begin{center} \begin{tsqx} (-1.5,0)--(1.5,0) EndArrow (0,-0.5)--(0,1.5) EndArrow label Re @ (1.5,-0.15) label Im @ (0.15,1.5) z_0 = (-0.5,1) w = (0.75,0.3) z_0+w--z_0-w origin--z_0 EndArrow origin--w EndArrow \end{tsqx} \end{center} \[ z = z_0 + \lambda w \] for any real parameter $\lambda$. \\ To eliminate $\lambda$, take conjugate \[ \ol{z} = \ol{z_0} + \lambda \ol{w} \] and then combine \[ \ol{w}z - w \ol{z} = \ol{w} z_0 - w \ol{z_0} \] Consider general point on a \emph{circle} with centre $c \in \CC$ and radius $\rho$: \begin{center} \begin{tsqx} (-1.5,0)--(1.5,0) EndArrow (0,-0.5)--(0,1.5) EndArrow label Re @ (1.5,-0.15) label Im @ (0.15,1.5) c ;= (1,0.5) R-90 CR c 0.3 P := c+0.3*dir(70) c--P EndArrow c-(0.4,0)--c+(0.4,0) anglemark4 c+(1,0) c c+dir(70) 3 label $\alpha$ @ c+0.15*dir(35) label $\rho$ @ c+0.15*dir(90) \end{tsqx} \end{center} % \[ z = c + \rho e^{i\alpha} \qquad \text{for any real $\alpha$} \] Equivalently \[ |z - c| = \rho \] or $|z^2| - \ol{c}z - c \ol{z} = \rho^2 - |c|^2$. (squaring sides above). \smallskip\noindent M\"obius transformations are generated by translations, scalings, rotations and inversion. They can be viewed as acting on \[ \CC_{\infty} = \CC \cup \{\infty\} \] which is geometrically a sphere (see IA Groups). \subsection{Logarithms \& Complex Powers} Define \[ w = \log z \qquad z \in \CC, z \neq 0 \] by \[ e^w = \exp w = z \] i.e. $\log$ is inverse of $\exp$ but $\exp$ is many-to-one ($e^z = e^{z + 2n\pi i}$) and so $\log$ is \emph{multi-valued}. \[ z = re^{i\theta} = e^{\log r} e^{i\theta} = e^{\log r + i\theta} \implies \log z = \log(r + i\theta) = \log|z| + i\arg|z| \] Multiple values of $\arg$ and $\log$ are related: \[ \theta \to \theta + 2n\pi \] \[ \log z \to \log z + 2n\pi i \] where $n \in \ZZ$. To make them single valued we can restrict e.g. $0 \le \theta < 2\pi$ or $-\pi < \theta \le \pi$ (called the \emph{principal value}). \begin{example*} \[ z = -3i = 3(-i) = e^{\log 3} e^{-i\pi/2 + 2n\pi i} = e^{\log 3 - i\pi/2 + 2n\pi i} \] Hence \[ \log z = \log 3 - \frac{i\pi}{2} + 2n\pi i \] \[ \arg z = \begin{cases} 3 \pi/2 & \text{ if we use $0 \le \theta < 2\pi$} \\ -\pi/2 & \text{ if we use $-\pi < \theta \le \pi$} \end{cases} \] \end{example*} We define \emph{complex powers} by \[ z^\alpha = e^{\alpha \log z} \qquad z \in \CC, z \neq 0 \& \alpha \in \CC \] This is multi-valued in general under the change $\arg z \to \arg z + 2n \pi$ \[ z^\alpha \to z^\alpha e^{2\pi in\alpha} \] \begin{enumerate}[(i)] \item If $\alpha = P \in \ZZ$ then $z^\alpha = z^p$ unique. \item If $\alpha = \frac{p}{q} \in \QQ$, then $z^\alpha = z^{p/q}$ takes finitely many values. \end{enumerate} but in general we have \emph{infinitely} many values. \subsubsection*{Examples} \begin{itemize} \item $(1 + i)^{1/2}$: $1 + i = \sqrt{2} e^{i\pi/4} = e^{\half\log 2 + i\pi/4}$ Hence \begin{align*} \log(1 + i) &= \half \log 2 + \frac{i\pi}{4} + 2n\pi i \\ \implies (1 + i)^{1/2} &= e^{\half\log(1 + i)} \\ &= e^{\frac{1}{4}\log 2 + i\pi/8 + n\pi i} \\ &= 2^{1/4}e^{i\pi/8}(-1)^n \end{align*} \begin{center} \begin{tsqx} (-1.5,0)--(1.5,0) EndArrow (0,-1.5)--(0,1.5) EndArrow label Re @ (1.5,-0.15) label Im @ (0.15,1.5) origin--(1,1) EndArrow label $1 + i$ @ (1.1,1.1) X := 1.189*dir(22.5) origin--X EndArrow origin--origin-X EndArrow anglemark4 (1,0) origin X 5 anglemark4 (-1,0) origin -X 5 label $(1 + i)^{1/2}$ @ (0.1,0.1)+X label $(1 + i)^{1/2}$ @ (-0.1,-0.1)-X \end{tsqx} \end{center} \eqitem \begin{align*} (-3i)^i &= e^{i \log(-3i)} \\ &= e^{i(\log 3 - i\pi/2 + 2n\pi i)} \\ &= e^{i\log 3} e^{\pi/2 - 2n\pi} \qquad n \in \ZZ \end{align*} \end{itemize} \section{Vectors in 3 Dimensions} A vector is a quantity with magnitude and direction (e.g. force, electric and magnetic fields) - all examples modelled on \emph{position}. \\\\ Take geometrical approach to position vectors in 3D space based on standard (Euclidean) notions of points, lines, planes, length, angle etc. Choose point $O$ as the origin, then points $A$, $B$ have position vectors \[ \ul{a} = \vec{OA}, \qquad \ul{b} = \vec{OB} \] \begin{center} \begin{tsqx} ! size(5cm); O = origin R-90 B = (1,1) A = (2,0.3) O--A EndArrow O--B EndArrow label $\underline{a}$ @ 0.5*A-(0.02,0.1) label $\underline{b}$ @ 0.5*B+(-0.1,0.1) \end{tsqx} \end{center} lengths denoted by $|\ul{a}| = |\vec{OA}|$. Also, $\ul{o}$ is the position vector for $O$. \subsection{Vector Addition and Scalar Multiplication} \begin{enumerate}[(i)] \item \ul{Scalar Multiplication} Given $\ul{a}$, position vector for $A$, and a \emph{scalar} $\lambda \in \RR$, $\lambda \ul{a}$ is position vector of point $A'$ on $OA$ with \[ |\lambda \ul{a}| = |\vec{OA'}| = |\lambda| |ul{a}| \] as shown \begin{center} \begin{tsqx} O .= origin A = dir 30 R130 A' = 2.5*A label $A'$ @ O-A'+0.15*dir(210) label $\underline{a}$ @ 0.5*A+(0.1,-0.1) label $\lambda\underline{a}, \lambda > 0$ @ A'+(0.1,-0.3) label $\lambda\underline{a}, \lambda < 0$ @ O-A'+(0.5,-0.1) O--A EndArrow O--O-A' EndArrow A--A' EndArrow \end{tsqx} \end{center} Say $\ul{a}$ and $\ul{b}$ are \emph{parallel}, $\ul{a} \parallel \ul{b}$ iff $\ul{a} = \lambda \ul{b}$ or $\ul{b} = \lambda \ul{a}$. This definition allows $\lambda < 0$, and $\lambda = 0$ so $\ul{a} \parallel \ul{o}$ for any $\ul{a}$. \item Given $\ul{a}$, $\ul{b}$ position vectors of $A$, $B$, construct a parallelogram $OACB$ \begin{center} \begin{tsqx} O = origin R225 B = (0.5,1) A = (2,0) C = A+B O->B O->C O->A B--C--A dashed label $\underline{b}$ @ 0.5*B+0.1*dir(170) label $\underline{c}$ @ 0.5*C+0.1*dir(120) label $\underline{a}$ @ 0.5*A+0.1*dir(-90) \end{tsqx} \end{center} and define $\ul{a} + \ul{b} = \ul{c}$, position vector of point $C$ provided $\ul{a} \not\,\parallel \ul{b}$; if $\ul{a} \parallel \ul{b}$ then we can write $\ul{a} = \alpha \ul{u}$, $\ul{b} = \beta \ul{u}$ for some $\ul{u}$, and then \[ \ul{a} + \ul{b} = (\alpha + \beta) \ul{u} \] \item \ul{Properties} For any vectors $\ul{a}, \ul{b}, \ul{c}$ \[ \ul{a} + \ul{o} = \ul{o} + \ul{a} = \ul{a} \] so $\ul{o}$ is the identity for $+$. We also have that there exists some $-\ul{a}$ such that \[ \ul{a} = (-\ul{a}) = (-\ul{a}) + \ul{a} = \ul{o} \] so there exists an inverse of every vector. We also have \[ \ul{a} + \ul{b} = \ul{b}+ \ul{a} \] so $+$ is commutative. It is also associative, i.e. \[ \ul{a} + (\ul{b} + \ul{c}) = (\ul{a} + \ul{b}) + \ul{c} \] We also have the following properties \[ \lambda(\ul{a} + \ul{b}) = \lambda\ul{a} + \lambda\ul{b} \] \[ (\lambda + \mu)\ul{a} = \lambda\ul{a} + \mu\ul{a} \] \[ \lambda(\mu\ul{a}) = (\lambda\mu)\ul{a} \] All can be checked geometrically i.e. associativity from parallelepiped. \item \ul{Linear Combinations and Span} A \emph{linear combination} of vectors $\ul{a}, \ul{b}, \dots, \ul{c}$ is an expression \[ \alpha\ul{a} + \beta\ul{b} + \cdots + \gamma\ul{c} \] for some $\alpha, \beta, \dots, \gamma \in \RR$. The \emph{span} of a set of vectors is \begin{align*} \mathrm{span}\{\ul{a}, \ul{b}, \dots, \ul{c}\} \\ = \{\alpha \ul{a} + \beta\ul{b} + \cdots + \gamma\ul{c} : \alpha, \beta, \dots, \gamma \in \RR \} \end{align*} If $\ul{a} \neq \ul{a}$ then $\mathrm{span}\{\ul{a}\} = \{\lambda\ul{a}\}$, i.e. the \emph{line} through $O$ and $A$. If $\ul{a} \not\,\parallel \ul{b}$ then \[ \mathrm{span}\{\ul{a}, \ul{b}\} = \{\alpha \ul{a} + \beta\ul{b} : \alpha, \beta \in \RR\} \] i.e. the \emph{plane} through $O$, $A$ and $B$. \end{enumerate} \subsection{Scalar or Dot Product} \begin{enumerate}[(i)] \item \ul{Definition}: Given $\ul{a}$ and $\ul{b}$ let $\theta$ be the angle between them; then \[ \ul{a} \cdot \ul{b} = |\ul{a}||\ul{b}| \cos\theta \] \begin{center} \begin{tsqx} ! size(5cm); O = origin R180 A := dir -20 B := 0.8*dir(30) label $\underline{a}$ @ A+0.05*dir(-20) label $\underline{b}$ @ B+0.05*dir(30) O->B O->A anglemark4 A O B 5 label $\theta$ @ 0.2*dir(5) \end{tsqx} \end{center} \emph{scalar} or \emph{dot product} or \emph{inner product} ($\theta$ defined unless $|\ul{a}|$ or $|\ul{b}| = 0$ and then $\ul{a} \cdot \ul{b} = 0$.) \item \ul{Properties} \[ \ul{a} \cdot \ul{b} = \ul{b} \cdot \ul{a} \] \[ \ul{a} \cdot \ul{a} = |\ul{a}|^2 \ge 0 \& = 0 \text{ iff } \ul{a} = 0 \] \[ (\lambda \ul{a})\cdot\ul{b} = \lambda(\ul{a}\cdot\ul{b}) = \ul{a}\cdot(\lambda\ul{b}) \] \[ \ul{a} \cdot(\ul{b} + \ul{c}) = \ul{a}\cdot\ul{b} + \ul{a}\cdot\ul{c} \] \item \ul{Interpretation} For $\ul{a} \neq 0$, consider $\ul{u} = \frac{\ul{a}}{|\ul{a}|}$ \[ \ul{u} \cdot \ul{b} = \frac{1}{\ul{a}}\ul{a}\cdot\ul{b} = |\ul{b}| \cos\theta \] is \emph{component} of $\ul{b}$ along $\ul{a}$. \begin{center} \begin{tsqx} O := origin (-0.2,0)--(0,0) (0,-0.2)--(0,0) B := (1.3,1.3) label $\underline{b}$ @ B+0.1*dir(45) A := (0,0.7) label $\underline{a}$ @ A+(-0.1,0) O->A A->(0,1.3) O->(1.3,0) (1.3,0)->(1.5,0) (0,1.3)->(0,1.5) label $\underline{b}_\perp$ @ (1.3,-0.1) label $\underline{b}_\parallel$ @ (-0.1,1.3) O->B (0,1.3)--B--(1.3,0) dashed anglemark4 B O A 5 label $\theta$ @ 0.2*dir(67) \end{tsqx} \end{center} We can \emph{resolve} $\ul{b} = \underbrace{\ul{b}_\parallel}_{\parallel \ul{a}} + \underbrace{\ul{b}_\perp}_{ \perp \ul{a}}$ where $\ul{a} \perp \ul{b}$ iff $\ul{a} \cdot \ul{b} = 0$. Note $\ul{a} \cdot \ul{b} = \ul{a} \cdot \ul{b}_\parallel$. (The expressions can be computed as $\ul{b}_\parallel = (\ul{b} \cdot \ul{u}) \ul{u}$, $\ul{b}_\perp = \ul{b} - (\ul{b} \cdot \ul{u}) \ul{u}$. \end{enumerate} In general, vectors $\ul{a}$ and $\ul{b}$ are \emph{orthogonal} or \emph{perpendicular}, written \[ \ul{a} \perp \ul{b} \iff \ul{a} \cdot \ul{b} = 0 \] definition allows $\ul{a}$ or $\ul{b}$ = $\ul{o}$; $\ul{o} \perp \text{any vector}$. \subsection{Orthonormal Bases and Components} Choose vectors $\ul{e_1}$, $\ul{e_2}$, $\ul{e_3}$ that are \emph{orthonormal} i.e. each of unit length and mutually perpendicular. \[ \ul{e_j} \cdot \ul{j} = \begin{cases} 1 & \text{if $i = j$} \\ 0 & \text{if $i \neq j$} \end{cases} \] Equivalent to choosing Cartesian axes along these directions, $\{\ul{e_i}\}$ is a \emph{basis}: any vector can be expressed \[ \ul{a} = \sum_i a_i\ul{e_i} = a_1\ul{e_1} + a_2\ul{e_2} = a_3\ul{e_3} \] and each component $a_i$ is \emph{uniquely} determined. \[ a_i = \ul{e_i} \cdot \ul{a} \] \begin{center} \begin{tsqx} ! size(5cm); O := origin O->>(0,1) O->>(1,0) O->>dir(-30) (1,0)--(2.5,0) (0,1)--(0,2.5) dir(-30)--2.5*dir(-30) label $\underline{e_3}$ @ (-0.2,1) label $\underline{e_2}$ @ (1,0.2) label $\underline{e_1}$ @ rotate(-30)*(1,-0.2) (-0.5,0)--O dashed A := (1.1,2.2) label $\underline{a}$ @ A+(0.2,0) O->A A--(-0.5,2.2) dashed label $a_3$ @ (-1,1.1) brace (-0.5,0) (-0.5,2.2) \end{tsqx} \end{center} Each $\ul{a}$ can now be identified with set of components in \[ \ub{(a_1, a_2, a_3)}_{\text{row vector}} \qquad \text{or} \qquad \ub{\begin{pmatrix} a_1 \\ a_2 \\ a_3 \end{pmatrix}}_{\text{column vector}} \] Note \begin{align*} \ul{a} \cdot \ul{b} &= asdf \left( \sum_i a_i\ul{e_i} \right) \cdot \left( \sum_j b_j \ul{e_j} \right) \\ &= a_1b_1 + a_2b_2 + a_3b_3 \\ \text{and} |\ul{a}|^2 &= a_1^2 + a_2^2 + a_3^2 \qquad \text{Pythagoras} \end{align*} $\ul{e_1}$, $\ul{e_2}$, $\ul{e_3}$ are also often written $\ul{i}$, $\ul{j}$, $\ul{k}$. \subsection{Vector or Cross Product} \begin{definition*} Given $\ul{a}$ and $\ul{b}$, let $\theta$ be angle between them measured in sense shown relative to a unit normal $\ul{n}$ to the plan they span \begin{center} \begin{tsqx} ! size(5cm); B := 0.9*dir(15) A := dir(-20) N := 0.3*dir(90) O := origin O->>N O->>A O->>B label $\underline{b}$ @ B+0.05*dir(B) label $\underline{a}$ @ A+0.05*dir(A) label $\underline{n}$ @ N+0.05*dir(N) arc3 O 0.15*dir(-20) 0.15*dir(15) EndArcArrow(SimpleHead) label $\theta$ @ 0.2*dir(-2.5) \end{tsqx} \end{center} ``right-handed sense''. (unit normal $\equiv$ unit vector $\perp$ plane); \\ then \[ \ul{a} \times \ul{b} = |\ul{a}||\ul{b}| \sin\theta \ul{n} \] (sometimes $\wedge$ is used instead of $\times$) is \emph{vector} or \emph{cross} product. \\ Note $\emph{n}$ is defined up to a choice of sign if $\ul{a} \not\,\parallel \ul{b}$, but changing sign of $\ul{n}$ means changing $\theta$ to $2\pi - \theta$ so definition is undefined; $\ul{n}$ is not defined it $\ul{a} \parallel \ul{b}$, and $\theta$ is not defined it $|\ul{a}|$ or $|\ul{b}| = 0$, but $\ul{a} \times \ul{b} = \ul{o}$ in these cases. \end{definition*} \subsubsection*{Properties} \[ \ul{a} \times \ul{b} = -\ul{b} \times \ul{a} \] \[ (\lambda \ul{a}) \times \ul{b} = \lambda (\ul{a} \times \ul{b}) = \ul{a} \times (\lambda \ul{b}) \] \[ \ul{a} \times (\ul{b} + \ul{c}) = \ul{a} \times \ul{b} + \ul{a} \times \ul{c} \] \[ \ul{a} \times \ul{b} = \ul{o} \iff \ul{a} \parallel \ul{b} \] \[ \ul{a} \times \ul{b} \perp \ul{a} \& \ul{b} \] \[ \ul{a} \cdot (\ul{a} \times \ul{b}) = \ul{b} \cdot (\ul{a} \times \ul{b}) = 0 .\] \subsubsection*{Interpretations} \begin{itemize} \item $\ul{a} \times \ul{b}$ is the \emph{vector} area of the parallelogram shown \begin{center} \begin{tsqx} O := origin A := (2,0) B := (0.5,0.8) label $\underline{a}$ @ A-(0,0.1) label $\underline{b}$ @ B+(-0.1,0.1) anglemark4 A O B 4 O->>B O->>A X := foot B O A B--A+B--A dashed B--X dashed rightanglemark4 A X B 4 label $\theta$ @ 0.2*dir(27) \end{tsqx} \end{center} \[ \ul{a} \times \ul{b} = |\ul{a}||\ul{b}|\sin\theta \qquad\text{for $\sin\theta \ge 0$} = \text{``base''} \times \text{``$\perp$ height''} \] \emph{scalar} area \\ Direction of normal $\ul{n}$ gives orientation of parallelogram in space. \item Fix $\ul{a}$ and consider $\ul{x} \perp \ul{a}$; then $\ul{x} \mapsto \ul{a} \times \ul{x}$ scales $|\ul{x}|$ by a factor of $|\ul{a}|$ and rotates $\ul{x}$ by $\pi/2$ in plane $\perp \ul{a}$ as shown. \begin{center} \begin{tsqx} ! size(5cm); A := (-0.8,0.8) B := (-1.2,-0.8) C := (0.8,-0.8) D := (1.2,0.8) A--B--C--D--A X := (-0.6,-0.48) AX := (0.8,-0.324) A' := (0,0.96) O := origin O->>X O->>AX O->>A' label $\underline{x}$ @ X+0.1*dir(X) label $\underline{a}$ @ A'+0.1*dir(A') label $\underline{a} \times \underline{x}$ @ AX+(0,0.1) arc3 O 0.2*dir(X) 0.2*dir(AX) EndArcArrow(SimpleHead) label $\frac{\pi}{2}$ @ 0.3*dir(-85) \end{tsqx} \end{center} \end{itemize} \subsubsection*{Component Expressions} Consider $\ul{e_1}$, $\ul{e_2}$, $\ul{e_3}$ orthonormal basis as in section 2.3 but assume in addition that \[ \ul{e_1} \times \ul{e_2} = \ul{e_3} = -\ul{e_2} \times \ul{e_1} \] \[ \ul{e_1} \times \ul{e_3} = \ul{e_1} = -\ul{e_3} \times \ul{e_2} \] \[ \ul{e_3} \times \ul{e_1} = \ul{e_2} = -\ul{e_1} \times \ul{e_3} \] (all equalities from any one) This is called a \emph{right-handed} orthonormal basis. Now for \[ \ul{a} = \sum_i a_i\ul{e_i} = (a_1 \ul{e_1} + a_2\ul{e_2} + a_3\ul{e_3}) \] and \[ \ul{b} = \sum_j b_j\ul{e_j} = (b_1\ul{e_1} + b_2\ul{e_2} + b_3\ul{e_3}) \] we get \begin{align*} \ul{a} \times \ul{b} &= (a_2b_3 - a_3b_2)\ul{e_1} \\ &\,\,\,\, + (a_3b_1 - a_1b_3) \ul{e_2} \\ &\,\,\,\, + (a_1b_2 - a_2b_1) \ul{e_3} \end{align*} \subsection{Triple Products} \subsubsection*{Scalar Triple Product} \begin{notation*} Define \begin{align*} &\,\,\,\,\,\,\,\,\ul{a} \cdot (\ul{b} \times \ul{c}) = \ul{b} \cdot (\ul{c} \times \ul{a}) = \ul{c} \cdot (\ul{a} \times \ul{b}) \\ &= -\ul{a} \cdot (\ul{c} \times \ul{b}) = -\ul{b} \cdot (\ul{a} \times \ul{c}) = -\ul{c} \cdot (\ul{b} \times \ul{a}) \\ &= [\ul{a}, \ul{b}, \ul{c}] \end{align*} \end{notation*} Interpretation: $|\ul{c} \cdot (\ul{a} \times \ul{b})|$ is volume of parallelepiped shown $ = (\text{area of parallelogram base}) \times (\perp \text{ height}) = |\ul{a} \times \ul{b}||c||\cos\phi|$ \begin{center} \begin{tsqx} A := (1.5,0) B := (0.5,0.5) C := (0.35,0.8) AB := (0,1) label $\underline{a}$ @ A+(0,-0.1) label $\underline{b}$ @ B+(0.1,-0.1) label $\underline{c}$ @ C+(0.1,-0.1) label $\underline{a} \times \underline{b}$ @ AB+(-0.25,0) O := origin O->>AB O->>A O->>B O->>C A--A+B--B--B+C--C--A+C--A dashed A+C--A+B+C--B+C dashed A+B--A+B+C dashed \end{tsqx} \end{center} $\ul{c} \cdot \ul{a} \times \ul{b}$ ``signed volume''; if $\ul{c} \cdot \ul{a} \times \ul{b} > 0$ say $\ul{a}$, $\ul{b}$, $\ul{c}$ right-handed set. \begin{remark*} $\ul{a} \cdot \ul{b} \times \ul{c} = 0$ if and only if $\ul{a}$, $\ul{b}$ and $\ul{c}$ are \emph{co-planar} meaning one of them lies in plane spanned by other two. For example $\ul{c} = \alpha \ul{a} + \beta \ul{b}$ belonging to $\mathrm{span}\{\ul{a}, \ul{b}\}$. \end{remark*} \begin{example*} \[ \ul{a} = (2,0,-1) \qquad \ul{b} = (7,-3, 5) \] \begin{align*} \implies \ul{a} \times \ul{b} &= (0.5-(-1)(-3))\ul{e_1} \\ &\,\,\,\, + ((-1)\cdot 7 - 2.5) \ul{e_2} \\ &\,\,\,\, + (2 \cdot (-3) - 0.7) \ul{e_3} \\ &= (-3,-17, -6) \end{align*} Test whether $\ul{a}$, $\ul{b}$, $\ul{c}$ coplanar with $\ul{c} = (3, -3, 7)$ \[ \ul{c} \cdot \ul{a} \times \ul{b} = 3(-3) + (-3)(-17) + 7(-6) = 0 ;\] consistent with $\ul{c} = \ul{b} - 2\ul{a}$. \end{example*} \subsubsection*{Vector Triple Product} \[ \ul{a} \times (\ul{b} \times \ul{c}) = (\ul{a} \cdot \ul{c})\ul{b} - (\ul{a} \cdot \ul{b})\ul{c} \] \[ (\ul{a} \times \ul{b}) \times \ul{c} = (\ul{a} \cdot \ul{c})\ul{b} - (\ul{b} \cdot \ul{c})\ul{a} \] Form of RHS is constrained by definitions above, or could check explicitly. Return to these formulas using index notation and summation convention. \subsection{Lines, Planes and Other Vector Equations} \begin{enumerate}[(a)] \item \ul{Lines} \\ General point on a line through $\ul{a}$ with direction $\ul{u} (\neq \ul{o})$ has position vector \[ \ul{r} = \ul{a} = \lambda \ul{u} \qquad \lambda \in \RR \] parametric form \begin{center} \begin{tsqx} O := origin label $\underline{o}$ @ O+0.1*dir(-100) A := (-0.5,1) R := (1,1.4) O->>A O->>R A->>0.7*A+0.3*R A--1.3*A-0.3*R 0.7*A+0.3*R->>R R--1.3*R-0.3*A label $\underline{a}$ @ A+0.1*dir(-135) label $\underline{r}$ @ R+0.1*dir(-50) label $\underline{u}$ @ 0.7*A+0.3*R+0.1dir(100) label $\lambda\underline{u}$ @ R+0.1*dir(100) \end{tsqx} \end{center} Alternative form without parameter $\lambda$ obtained by crossing with $\ul{u}$: \[ \ul{u} \times \ul{r} = \ul{u} \times \ul{a} \] Conversely \[ \ul{u} \times (\ul{r} - \ul{a}) = \ul{o} \] and this holds if and only if \[ \ul{r} - \ul{a} = \lambda \ul{u} \] for some real $\lambda$. Now consider \[ \ul{u} \times \ul{r} = \ul{C} \] where $\ul{u}$, $\ul{c}$ are given vectors with $\ul{u} \neq \ul{o}$. Note that \[ \ul{u} \cdot (\ul{u} \times \ul{r}) = \ul{u} \cdot \ul{c} = 0 \] If $\ul{u} \cdot \ul{c} \neq 0$ then we hae a contradiction i.e. no solutions. If $\ul{u} \cdot \ul{c} = 0$, try a particular solution by considering \[ \ul{u} \times (\ul{u} \times \ul{c}) = (\ul{u} \cdot \ul{c})\ul{u} - (\ul{u} \cdot \ul{u})\ul{c} = -|\ul{u}|^2\ul{c} \] Hence \[ \ul{a} = -\frac{1}{|\ul{u}|^2}(\ul{u} \times \ul{c}) \] is a solution. General solution (arguing as before) is \[ \ul{r} = \ul{a} + \lambda \ul{u} \] \item \ul{Planes} \\ General point on a plane through $\ul{a}$ with directions $\ul{u}$, $\ul{v}$ in plane ($\ul{u} \not\,\parallel \ul{v}$) has position vector \[ \ul{r} = \ul{a} + \lambda \ul{u} + \mu\ul{v} \qquad \lambda, \mu \in \RR \] parametric form \begin{center} \begin{tsqx} W := (-1.6,1) X := (0.6,1) Y := (1.6,2.5) Z := (-0.6,2.5) W--X--Y--Z--W O .= origin A := (-0.5,1.5) U := A+(0.3,0) V := A+(0.1,0.35) O->>A R := A+(1,0)+(0.2,0.7) O->>R A->>U U->>A+(1,0) A->>V V->>A+(0.2,0.7) A+(0.2,0.7)--R--A+(1,0) dashed label $\underline{a}$ @ A+(-0.1,0.1) label $\underline{v}$ @ V+(-0.05,0.1) label $\underline{u}$ @ U+(0,-0.1) \end{tsqx} \end{center} \begin{center} \includegraphics[width=0.6\linewidth] {images/c1df80022e6a11ec.png} \end{center} Alternative form without parameters obtained by dotting with normal \[ \ul{n} = \ul{u} \times \ul{v} \neq (\ul{o} \text{ since $\ul{u} \not\,\parallel \ul{v}$ but not necessarily a unit vector}) \] This gives \[ \ul{n} \cdot \ul{r} = \ul{n} \cdot \ul{a} = k \] where $k$ is a constant. Note component of $\ul{r}$ along $\ul{n}$ is \[ \frac{\ul{n} \cdot \ul{r}}{|\ul{n}|} = \frac{k}{|\ul{n}|} \qquad \text{(constant)} \] is clearly a plane and moreover $\frac{|k|}{|\ul{n}|}$ is perpendicular distance of plane from $\ul{o}$. \begin{center} \includegraphics[width=0.6\linewidth] {images/51981eca2e6b11ec.png} \end{center} \item \ul{Other Vector Equations} \\ Consider equations for $\ul{r}$ (unknown) written in vector notation with given (constant) vectors. Possible approaches: \begin{itemize} \item Can re-write and convert to some standard form, e.g. \[ |\ul{r}|^2 + \ul{r} \cdot \ul{a} = k, \qquad \text{constant} \] Then we can complete the square: \[ |\ul{r} + \half\ul{a}|^2 = (\ul{r} + \half\ul{a}) \cdot (\ul{r} + \half \ul{a}) = k + \frac{1}{4} |\ul{a}|^2 \] Equation of a sphere, centre $-\half \ul{a}$ and radius $\sqrt{k + \frac{1}{4} |\ul{a}|^2}$, provided $k + \frac{1}{4} |\ul{a}|^2 > 0$. For equations linear in $\ul{r}$. \item Try dotting and crossing with constant vectors to learn more (see examples). \item Can try expressing \[ \ul{r} = \alpha\ul{a} + \beta\ul{b} + \gamma\ul{c} \] for some non-$\omega$-planar $\ul{a}$, $\ul{b}$, $\ul{c}$ and solve for $\alpha$, $\beta$, $\gamma$. \item Can choose basis and use index / matrix notation. \end{itemize} \end{enumerate} \subsection{Index (suffix) Notation and the Summation Convention} \begin{enumerate}[(a)] \item \ul{Components; $\delta$ and $\varepsilon$} \\ Write vectors $\ul{a}$, $\ul{b}$, \dots in terms of components. $a_i$, $b_i$, \dots, with respect to an orthonormal, right-handed basis \[ \ul{e_1}, \ul{e_2}, \ul{e_3} \] Indices or suffices $i, j, k, l, p, q, \dots$ take values 1, 2, 3. Then \[ \ul{c} = \alpha \alpha \ul{a} + \beta\ul{b} \] \[ \iff c_i = [\alpha \ul{a} + \beta\ul{b}] = \alpha a_i \beta b_i \] for $i = 1, 2, 3$ (\emph{free index}) \[ \ul{a} \cdot \ul{b} = \sum_i a_ib_i = \sum_j a_jb_j \] \[ \ul{x} = \ul{a} = (\ul{b} \cdot \ul{c}) \ul{d}\] for $j = 1, 2, 3$ free index. \[ \iff x_j = a_j + \left(\sum_k b_k c_k\right)d_j \] \begin{definition*}[Kronecker Delta] \[ \delta_{ij} = \begin{cases} 1 & \text{if $i = j$} \\ 0 & \text{if $i \neq j$} \end{cases} \] \[ \delta{ij} = \delta{ji} \qquad \text{(symmetric)} \] As an asdfasdf matrix \[ \begin{pmatrix} \delta_{11} & \delta{12} & \delta_{13} \\ \delta_{21} & \delta_{22} & \delta_{23} \\ \delta_{31} & \delta_{32} & \delta_{33} \end{pmatrix} = \begin{pmatrix} 1 & 0 & 0 \\ 0 & 1 & 0 \\ 0 & 0 & 1 \end{pmatrix} \] Then \[ \ul{e_i} \cdot \ul{j} = \delta_{ij} \] and \begin{align*} \ul{a} \cdot \ul{b} &= \left(\sum_i a_i \ul{e_i} \right) \cdot \left( \sum_j b_j \ul{e_j} \right) \\ &= \sum_{ij} a_ib_j \ul{e_i} \cdot \ul{e_j} \\ &= \sum_{ij} a_ib_j \delta_{ij} \\ &= \sum_i a_ib_i \end{align*} \end{definition*} \begin{definition*}[Levi-Civita Epsilon] \[ \varepsilon_{ijk} = \begin{cases} +1 & \text{if $(i, j, k)$ even permutation of $(1, 2, 3)$} \\ -1 & \text{if $(i, j, k)$ odd permutation of $(1, 2, 3)$} \\ 0 & \text{else} \end{cases} \] i.e. \[ \varepsilon_{123} = \varepsilon{231} = \varepsilon_{312} = +1 \] \[ \varepsilon_{321} = \varepsilon_{213} = \varepsilon_{132} = -1 \] \[ \varepsilon_{ijk} = 0 \qquad \text{if any two index values match} \] Note that $\varepsilon_{ijk}$ is \emph{totally} antisymmetric: exchanging any pair of indices produces a change in sign. Then \[ \ul{e_i} \times \ul{e_j} = \sum_k \varepsilon_{ijk} \ul{e_k} \] e.g. \[ \ul{e_2} \times \ul{e_1} = \sum_k \varepsilon_{21k} \ul{e_k} = \varepsilon_{213} \ul{e_3} \] And \begin{align*} \ul{a} \times \ul{b} = \left( \sum_i a_i\ul{e_i} \right) \times \left( \sum_j b_j \ul{e_j} \right) \\ &= \sum_{ij} a_ib_j \ul{e_i} \times \ul{e_j} \\ &= \sum_{ij} a_ib_j \sum_k \varepsilon_{ijk} \ul{e_k} \\ &= \sum_k \left( \sum_{ij} \varepsilon_{ijk} a_ib_j \right) \ul{e_k} \end{align*} Hence \[ (\ul{a} \times \ul{b})_k = \sum_{ij} \varepsilon_{ijk} a_ib_j \] e.g. \begin{align*} (\ul{a} \times \ul{b})_3 &= \sum_{ij} \varepsilon_{ij3} a_ib_j \\ &= \varepsilon_{123} a_1b_2 + \varepsilon_{213} a_2b_1 \\ &= a_1b_2 - a_2b_1 \end{align*} \end{definition*} \item \ul{Summation Convention} \\ With component / index notation, we observe that indices that appear \emph{twice} in a given term are (usually) summed over. In the summation convention we \emph{omit} $\sum$ signs for repeated indices: the sum is understood. \\ \ul{Examples} \begin{enumerate}[(i)] \eqitem \begin{align*} &a_i \delta_{ij} \qquad \text{$\sum_i$ understood} \\ =&a_1\delta_{1j} + a_2\delta_{2j} + a_3\delta_{3j} \\ =&\begin{cases} a_1 &\text{if $j = 1$} \\ a_2 &\text{if $j = 2$} \\ a_3 &\text{if $j = 3$} \end{cases} \end{align*} \emph{or} \[ a_i\delta_{ij} = a_j \] true for $j = 1, 2, 3$. \item Here on the first line we have $\sum_{i, j}$ is understood, and on the second line we have the $\sum_i$ is understood \begin{align*} \ul{a} \cdot \ul{b} &= \delta_{ij} a_ib_j \\ &= a_ib_i \end{align*} \item Here $\sum_{j, k}$ is understood \[ (\ul{a} \times \ul{b})_i = \varepsilon_{ijk} a_j b_k \] \item Here $\sum_{ijk}$ is understood \[ \ul{a} \cdot \ul{b} \times \ul{c} = \varepsilon_{ijk} a_i b_j c_k \] \item Here $\sum_i$ is understood \[ \delta_{ii} = \delta_{11} + \delta{22} + \delta_{33} = 3 \] \item On the last line we have that $\sum_j$ is understood \begin{align*} [(\ul{a} \cdot \ul{c}) \ul{b} - (\ul{a} \cdot \ul{b})\ul{c}]_i &= (\ul{a} \cdot \ul{c}) b_i - (\ul{a} \cdot \ul{b})c_i \\ &= a_j c_j b_i - a_j b_j c_i \end{align*} \end{enumerate} \ul{Summation Convention Rules} \begin{enumerate}[(i)] \item An index occurring exactly \emph{once} in any term must appear once in \emph{every} term and it can take any value - a \emph{free} index. \item An index occurring exactly \emph{twice} in a given term is summed over - a \emph{repeated} or \emph{contracted} or \emph{dummy} index. \item No index can occur more than twice. \end{enumerate} \ul{Application}: proof of the vector triple product identity. Consider \begin{align*} [\ul{a} \times (\ul{b} \times \ul{c})]_i &= \varepsilon_{ijk} a_j (\ul{b} \times \ul{c})_k \\ &= \varepsilon_{ijk} a_j \varepsilon_{kpq} b_p c_q \\ &= \varepsilon_{ijk} \varepsilon_{pqk} a_j b_p c_q \end{align*} Now \[ \varepsilon_{ijk} \varepsilon_{pqk} = \delta_{ip} \delta_{jq} - \delta_{iq} \delta_{jp} \] (see section (c) velow). Then \begin{align*} [\ul{a} \times (\ul{b} \times \ul{c})]_i &= (\delta_{ip} \delta_{jq} - \delta_{iq} \delta_{jp}) a_j b_p c_q \\ &= a_j \delta_{ip} b_p \delta_{jq} c_q - a_j \delta_{jp} b_p \delta_{iq} c_q \\ &= a_j b_i c_j - a_j b_j c_i \\ &= (a_j c_j) b_i - (a_j b_j) c_i \\ &= (\ul{a} \cdot \ul{c}) b_i - (\ul{a} \cdot \ul{b}) c_i \\ &= [(\ul{a} \cdot \ul{c}) \ul{b} - (\ul{a} \cdot \ul{b}) \ul{c}]_i \end{align*} True for $i = 1, 2, 3$ hence \[ \ul{a} \times (\ul{b} \times \ul{c}) = (\ul{a} \cdot \ul{c}) \ul{b} - (\ul{a} \cdot \ul{b}) \ul{c} \] \item \ul{$\varepsilon$ $\varepsilon$ identities} \\ \begin{itemize} \item Expected to know this and quote it: \[ \varepsilon_{ijk} \varepsilon_{pqk} = \delta_{ip} \delta_{jq} - \delta_{iq} \delta_{jp} = \varepsilon_{kij} \varepsilon_{kpq} \] Check: RHs and LHS are both antisymmetric (change sign) under \[ i \leftrightarrow j \qquad \text{or} \qquad p \leftrightarrow q \] SO both sides vanish if $i$ and $j$ or $p$ and $q$ take same values. Now suffices to check \[ i = p = 1 \qquad\text{and}\qquad j = q = 2 \] \[ LHS = \varepsilon_{123} \varepsilon_{123} = +1 \] \[ RHS = \delta_{11} \delta_{22} - \delta_{12} \delta_{21} = +1 \] or $i = q = 1$ and $j = p = 2$ \[ LHS = \varepsilon_{123} \varepsilon_{213} = (+1)(-1) = -1 \] \[ RHS = \delta_{12} \delta{21} - \delta_{11} \delta_{22} = -1 \] All other index choices work similarly. \item $\varepsilon_{ijk} \varepsilon_{pjk} = 2\delta_{ip}$ \ul{contract} result aove \begin{align*} \varepsilon_{ijk} \varepsilon_{pjk} &= \delta_{ip} \delta_{jj} - \delta_{ij} \delta_{jp} \\ &= 3 \delta_{ip} - \delta_{ip} \\ &= 2 \delta_{ip} \end{align*} \item $\varepsilon_{ijk} \varepsilon_{ijk} = 6$. \end{itemize} \end{enumerate} \newpage \section{Vectors in General; $\RR^n$ and $\CC^n$} \subsection{Vectors in $\RR^n$} \subsubsection*{(a) Definitions} If we regard vectors as sets of components, it is easy to generalise from 3 to $n$ dimensions. \begin{itemize} \item Let $\RR^n = \{ \ul{x} = (x_1, \dots, x_n : x_i \in \RR\}$ and define \begin{enumerate}[(i)] \item \ul{addition} \\ \[ \ul{x} + \ul{y} = (x_1 + y_1, \dots, x_n + y_n) \] \item \ul{scalar multiplication} \[ \lambda \ul{x} = (\lambda x_1, \dots, \lambda x_n) \] \end{enumerate} for any $\ul{x}, \ul{y} \in \RR^n$ and $\lambda \in \RR$. \item \emph{Inner product} or \emph{scalar product} on $\RR^n$ is defined by \[ \ul{x} \cdot \ul{y} = \sum_i x_i y_i = x_1 y_1 + \cdots x_n y_n \] \ul{Properties} \begin{enumerate}[(i)] \item Symmetric $\ul{c} \cdot \ul{y} = \ul{y} \cdot \ul{x}$ \item Bilinear (linear in each vector) \[ (\lambda \ul{X} + \lambda' \ul{x}') \cdot \ul{y} = \lambda (\ul{x} \cdot \ul{y}) + \lambda' (\ul{x}' \cdot \ul{y}) \] and \[ \ul{x} \cdot (\mu \ul{y} + \mu' \ul{y}' = \mu(\ul{x} \cdot \ul{y}) + \mu' (\ul{x} \cdot \ul{y}') \] \item \ul{Positive definite} \[ \ul{x} \cdot \ul{x} = \sum_{i} x_i^2 \ge 0 \] and is equal to 0 if and only if $\ul{x} = \ul{o}$. The \emph{length} or \emph{norm} of vector $\ul{x}$ is $|\ul{x}| (\ge 0)$ defined by $|\ul{z}|^2 = \ul{x} \cdot \ul{x}$. \item For $\ul{x} \in \RR^n$ we can write \[ \ul{x} = \sum_i x_i \ul{e_i} \] where \begin{align*} \ul{e_1} &= (1, 0, \dots, 0) \\ \ul{e_1} &= (0, 1, \dots, 0) \\ &\vdots \\ \ul{e_n} &= (0, 0, \dots, 1) \end{align*} call $\{\ul{e_i}\}$ the \emph{standard basis} for $\RR^n$. Note that it is orthonormal: \[ \ul{e_i} \cdot \ul{e_j} = \delta_{ij} = \begin{cases} 1 & \text{if $i = j$} \\ 0 & \text{if $i \neq j$} \end{cases} \] \end{enumerate} \end{itemize} \subsubsection*{(b) Cauchy-Schwarz and $\triangle$ Inequalities Proposition} \begin{proposition*}[Cauchy-Schwarz] \[ |\ul{x} \cdot \ul{y}| \le |\ul{x}||\ul{y}| \] for $\ul{x}, \ul{y} \in \RR^n$ and equality holds if and only if $\ul{x} = \lambda \ul{y}$ or $\ul{y} = \lambda \ul{x}$ ($\ul{x} \parallel \ul{y}$) for some $\lambda \in \RR$. \end{proposition*} Deductions reveal geometrical aspects of inner product: \begin{enumerate}[(i)] \item Set \[ \ul{x} \cdot \ul{y} = |\ul{x}||\ul{y}| \cos\theta \] to define angle $\theta$ between $\ul{x}$ and $\ul{y}$ \item $\triangle$ inequality holds \[ |\ul{x} + \ul{y}| \le |\ul{x}| + |\ul{y}| \] \end{enumerate} Now we present a proof of the Cauchy-Schwarz inequality \begin{proof} If $\ul{y} = \ul{o}$, result is immediate. \\ If $\ul{y} \neq \ul{o}$, consider \begin{align*} |\ul{x} - \lambda \ul{y}|^2 &= (\ul{x} - \lambda\ul{y}) \cdot (\ul{x} - \lambda \ul{y}) \\ &= |\ul{x}|^2 - 2\lambda \ul{x} \cdot \ul{y} + \lambda^2|\ul{y}|^2 \ge 0 \end{align*} This is a quadratic in real $\lambda$ with at most one real root, so discriminant satisfies \[ (-2\ul{x} \cdot \ul{y})^2 - 4|\ul{x}|^2|\ul{y}|^2 \le 0 \] Equality holds if and only if $disc = 0$ which holds if and only if $\lambda \ul{y} = \ul{x}$ for some $\lambda \in \RR$. \end{proof} \noindent Now we present a proof of the $\triangle$ inequality. \begin{proof} \[ LHS^2 = |\ul{x} + \ul{y}|^2 = |\ul{x}|^2 + 2 \ul{x} \cdot \ul{y} + |\ul{y}|^2 \] \[ RHS^2 = (|\ul{x}| + |\ul{y}|)^2 = |\ul{x}|^2 + 2|\ul{x}||\ul{y}| + |\ul{y}|^2 \] and compare using Cauchy-Schwarz. \end{proof} \subsubsection*{(c) Comments} Inner product on $\RR^n$. \[ \ul{a} \cdot \ul{b} = \delta_{ij} a_i b_i \] Component definition matches geometrical definition for $n = 3$ (section 2.2). \\ In $\RR^3$ also have a cross product with component definition \[ (\ul{a} \times \ul{b})_i = \varepsilon_{ijk} a_j b_k \] (geometrical definition given in section 2.4) \\ In $\RR^n$ we have $\varepsilon_{ij\dots l}$ totally antisymmetric. (see chapter 5). Cannot use this to define vector-valued product except in $n = 3$. But in $\RR^2$ have $\varepsilon_{ij}$ with \[ \varepsilon_{12} = -\varepsilon_{21} = 1 \] and can use this to define an additional scalar cross product in 2D. \begin{align*} [\ul{a}, \ul{b}] &= \varepsilon_{ij} a_i b_j \\ &= a_1 b_2 - a_2 b_1 \qquad \text{for $\ul{a}, \ul{b} \in \RR^2$} \end{align*} Geometrically, this gives (signed) area of parallelogram \[ [\ul{a}, \ul{b}] = |\ul{a}||\ul{b}| \sin\theta \] \begin{center} \begin{tsqx} ! size(5cm); O := origin A := 1.5*dir(-5) B := dir(45) O->A O->B A--A+B--B dashed label $\underline{a}$ @ A+(0,-0.1) label $\underline{b}$ @ B+(0,0.1) anglemark4 A O B 4 label $\theta$ @ 0.3*dir(20) \end{tsqx} \end{center} Compare with \[ [\ul{a}, \ul{b}, \ul{c}] = \ul{a} \times \ul{b} \times \ul{c} = \varepsilon_{ijk} a_i b_j c_k \] (signed) volume of parallelepiped. \subsection{Vector Spaces} \subsubsection*{(a) Axioms; span; subspaces} Let $V$ be a set of objects called \emph{vectors} with operations \begin{enumerate}[(i)] \item $\ul{v} + \ul{w} \in V$ \item $\lambda \ul{v} \in V$ \end{enumerate} (the above expressions are defined $\forall \ul{v}, \ul{w} \in V$ and $\forall \lambda \in \RR$) \\ Then $V$ is a \emph{real vector space} if $V$ is an abelian group under $+$ and \begin{align*} \lambda (\ul{v} + \ul{w}) &= \lambda \ul{v} + \lambda \ul{w} \\ (\lambda + \mu) \ul{v} &= \lambda \ul{v} + \mu \ul{v} \\ \lambda (\mu \ul{v}) &= (\lambda\mu) \ul{v} \\ 1 \ul{v} &= \ul{v} \end{align*} These axioms or key properties apply to geometrical vectors with $V$ 3D space or to vectors in $V = \RR^n$, as above, as well as other examples. \\ For vectors $\ul{v_1}, \ul{v_2}, \dots, \ul{v_r} \in V$ we can form a \emph{linear combination} \[ \lambda_1 \ul{v_1} + \lambda_2 \ul{v_2} + \cdots + \lambda_r \ul{v_r} \in V \] for any $\lambda_i \in \RR$; the span is defined \[ \mathrm{span}\{\ul{v_1}, \ul{v_2}, \dots, \ul{v_r}\} = \{\sum_i \lambda_i \ul{v_i} : \lambda_i \in \RR\} \] A \emph{subspace} of $V$ is a subset that is itself a vector space. \\ Note $V$ and $\{\ul{o}\}$ are subspaces. \[ \mathrm{span}\{\ul{v_1}, \ul{v_2}, \dots, \ul{v_r}\} \] is a subspace for any vectors $\ul{v_1}, \dots, \ul{v_r}$. Note: a non-empty subset $U \subseteq V$ is a subspace if and only if \[ \ul{v}, \ul{w} \in U \implies \lambda \ul{v} + \mu\ul{w} \in U \,\,\,\forall \lambda, \mu \in \RR \] \begin{example*} In 3D or $\RR^3$ a line or plane through $\ul{o}$ is a subspace but a line or plane that doesn't contain $\ul{o}$ is not a subspace. For example \[ \ul{v_1} = \begin{pmatrix} 1 \\ 0 \\ -1 \end{pmatrix} , \ul{v_2} = \begin{pmatrix} 1 \\ 1 \\ -2 \end{pmatrix} , \ul{n} = \begin{pmatrix} 1 \\ 1 \\ 1 \end{pmatrix} \] \[ \mathrm{span}\{\ul{v_1}, \ul{v_2}\} = \{\ul{r} : \ul{n} \cdot \ul{r} = 0\} \] which is a plane and subspace. But \[ \{ul{r} : \ul{n} \cdot \ul{r} = 1\} \] is a plane but not a subspace ($\ul{r}$, $\ul{r'}$ on plane then $(\ul{r} + \ul{r'}) \cdot \ul{n} = 2$) \end{example*} \subsubsection*{(b) Linear Dependence and Independence} For vectors $\ul{v_1}, \ul{v_2}, \dots, \ul{v_r} \in V$, with $V$ a real vector space, consider the \emph{linear relation} \[ \lambda_1 \ul{v_1} + \lambda_2 \ul{v_2} + \cdots + \lambda_r \ul{v_r} = \ul{o} \tag{$*$} \] If $(*) \implies \lambda_i = 0$ for every $i$ then the vectors form a \emph{linearly independent} set (they obey only the \emph{trivial} linear relation with $\lambda_i = 0$). \\ If $(*)$ holds with at least one $\lambda_i \neq 0$ then the vectors form a \emph{linearly dependent} set (they obey a \emph{non-trivial} linear relation.) \textbf{Examples} \begin{itemize} \item \[ \begin{pmatrix} 1 \\ 0 \end{pmatrix} , \begin{pmatrix} 0 \\ 1 \end{pmatrix} , \begin{pmatrix} 0 \\ 2 \end{pmatrix} \] is linearly dependent because \[ 0 \begin{pmatrix} 1 \\ 0 \end{pmatrix} + 2 \begin{pmatrix} 0 \\ 1 \end{pmatrix} + (-1) \begin{pmatrix} 0 \\ 2 \end{pmatrix} = 0 \] Note that we cannot express $\begin{pmatrix} 1 \\ 0 \end{pmatrix}$ in terms of the others, but it is still linearly dependent. \item Any set containing $\ul{o}$ is linearly dependent. For example \[ \{ \begin{pmatrix} 1 \\ 0 \end{pmatrix} , \begin{pmatrix} 0 \\ 0 \end{pmatrix} \} \] we have \[ 0 \begin{pmatrix} 1 \\ 0 \end{pmatrix} + 412 \begin{pmatrix} 0 \\ 0 \end{pmatrix} = \ul{o} \] non-trivial linear relation. \item $\{\ul{a}, \ul{b}, \ul{c}\}$ in $\RR^3$ linearly independent if $\ul{a}\cdot \ul{b} \times \ul{c} \neq 0$. Consider \[ \alpha \ul{a} + \beta\ul{b} + \gamma \ul{c} = \ul{o} \] Take dot with $\ul{b} \times \ul{c}$ to get \[ \alpha \ul{a} \cdot \ul{b} \times \ul{c} = 0 \implies \alpha = 0 \] and we can get $\beta = \gamma = 0$ with a similar argument. \end{itemize} \subsubsection*{(c) Inner Product} This is an additional structure on a real vector space $V$, also characterised by axioms. For $\ul{v}, \ul{w} \in V$ write inner product $\ul{v} \cdot \ul{w}$ or $(\ul{v}, \ul{w}) \in \RR$. This satisfies axioms corresponding to the properties in section 3.1(a) \begin{enumerate}[(i)] \item symmetric \item bilinear \item positive definite \end{enumerate} \begin{lemma*} In a real vector space $V$ with inner product, if $\bf{v}_1, \dots, \bf{v}_r$ are non-zero and orthogonal: \[ \ub{(\bf{v}_i, \bf{v}_i) \neq 0}_{\text{fixed $i$}} \qquad \text{and} \qquad \ub{(\bf{v}_i, \bf{v}_j) = 0}_{i \neq j} \] then $\bf{v}_1, \dots, \bf{v}_r$ are linearly independent. \end{lemma*} \begin{proof} \begin{align*} \sum_i \alpha_i \bf{v}_i &= \bf{0} \\ (\bf{v}_j, \sum_i \alpha_i \bf{v}_i) &= \sum_i \alpha_i (\bf{v}_j, \bf{v}_i) \\ &= \alpha_j (\bf{v}_j, \bf{v}_j) \\ &= 0 \\ \implies \alpha_j &= 0 \end{align*} as claimed. \end{proof} \subsection{Bases and Dimension} For a vector space $V$, a basis is a set \[ \mathfrak{B} = \{\bf{e}_1, \dots, \bf{e}_n\} \] such that \begin{enumerate}[(i)] \item $\mathfrak{B}$ spans $V$, i.e. any $\bf{v} \in V$ can be written \[ \bf{v} = \sum_{i = 1}^n v_i \bf{e}_i \] \item $\mathfrak{B}$ is linearly independent. \end{enumerate} \noindent Given (ii), the coefficients $v_i$ in (i) are unique since \[ \sum_i v_i \bf{e}_i = \sum_i v_i' \bf{e}_i \] \[ \implies \sum_i (v_i - v_i') \bf{e}_i = \bf{0} \] \[ \implies v_i = v_i' \] $v_i$ are \emph{components} of $\bf{v}$ with respect to $\mathfrak{B}$. \noindent \ul{Examples} Standard basis for $\RR^n$ consisting of \[ \bf{e}_1 = \begin{pmatrix} 1 \\ 0 \\ \vdots \\ 0 \end{pmatrix} , \bf{e}_2 = \begin{pmatrix} 0 \\ 1 \\ \vdots \\ 0 \end{pmatrix} , \dots, \bf{e}_n = \begin{pmatrix} 0 \\ \vdots \\ 0 \\ 1 \end{pmatrix} \] is a basis according to general definition. \begin{enumerate}[(i)] \eqitem \begin{align*} \bf{x} = \begin{pmatrix} x_1 \\ \vdots \\ x_n \end{pmatrix} = x_1 \bf{e}_1 + \cdots + x_n\bf{e}_n \end{align*} \item $\bf{x} = \bf{0}$ if and only if $x_1 = x_2 = \cdots = x_n = 0$. \end{enumerate} Many other bases can be chosen, for example in $\RR^2$ we have bases \[ \left\{ \begin{pmatrix} 1 \\ 0 \end{pmatrix} , \begin{pmatrix} 1 \\ 1 \end{pmatrix} \right\} , \left\{ \begin{pmatrix} 1 \\ 1 \end{pmatrix} , \begin{pmatrix} 1 \\ -1 \end{pmatrix} \right\} ,\] or $\{\bf{a}, \bf{b}\}$ for any $\bf{a}, \bf{b} \in \RR^2$ with $\bf{a} \not\,\,\parallel \bf{b}$. In $\RR^3$, $\{\bf{a}, \bf{b}, \bf{c}\}$ is a basis if and only if \[ \bf{a} \cdot \bf{b} \times \bf{c} \neq 0 .\] Consider previous example of plane through $\bf{0}$, subspace in $\RR^3$ \[ \bf{n} \cdot \bf{r} = 0 \qquad \text{with} \qquad \bf{n} = \begin{pmatrix} 1 \\ 1 \\ 1 \end{pmatrix} ;\] we have $\{\bf{v}_1, \bf{v}_2\}$ basis with \[ \bf{v}_1 = \begin{pmatrix} 1 \\ 0 \\ -1 \end{pmatrix} , \bf{v}_2 = \begin{pmatrix} 1 \\ 1 \\ -2 \end{pmatrix} \] not normalised or $\perp$ but could choose orthonormal basis \[ \{\bf{u}_1, \bf{u}_2\} \qquad \text{with} \qquad \bf{u}_1 = \frac{1}{\sqrt{2}} \begin{pmatrix} 1 \\ -1 \\ 0 \end{pmatrix} , \qquad \text{and}\qquad \bf{u}_2 = \frac{1}{\sqrt{6}} \begin{pmatrix} 1 \\ 1 \\ -2 \end{pmatrix} .\] \begin{theorem*} If $\{\bf{e}_1, \dots, \bf{e}_n\}$ and $\{\bf{f}_1, \dots, \bf{f}_m\}$ are bases for a real vector space $V$, then \[ n = m .\] \end{theorem*} \begin{definition*} The number of vectors in any basis is the \emph{dimension} of $V$, $\mathrm{dim}\,\,V$. \end{definition*} \begin{note*} $\RR^n$ has dimension $n$ (!) \end{note*} \begin{proof} \[ \bf{f}_a = \sum_i A_{ai} \bf{e}_i \] and \[ \bf{e}_i = \sum_a B_{ia} \bf{f}_a \] for constants $A_{ai}$ and $B_{ia}$ and we use ranges of indices $i, j = 1, \dots, n$ and $a, b = 1, \dots, m$ [since $\{\bf{e}_i\}$ and $\{\bf{f}_a\}$ are bases] \begin{align*} \implies \bf{f}_a &= \sum_i A_{ai} \left( \sum_b B_{ib} \bf{f}_b \right) \\ &= \sum_b \left( \sum_i A_{ai} B_{ib} \bf{f}_b \right) \end{align*} But coefficients with respect to a basis are unique so \[ \sum_i A_{ai} B_{ib} = \delta_{ab} \] Similarly \[ \bf{e}_i = \sum_j \left( \sum_a B_{ia} A_{aj} \right) \bf{e}_j \] and hence \[ \sum_a B_{ia} A_{aj} = \delta_{ij} \] Now \begin{align*} \sum_{ia} A_{ai} B_{ia} &= \sum_a \delta_{aa} = m \\ = \sum_{ia} B_{ia} A_{ai} &= \sum_i \delta_{ii} = n \\ \implies m = n, &\text{ as required.} \end{align*} \end{proof} The steps in the proof above are within the scope of the course; but the proof without prompts is \emph{non-examinable}. \begin{note*} By convention the vector space $\{\bf{0}\}$ has dimension $\bf{0}$. Not every vector space is finite dimensional! \end{note*} \begin{proposition*} Let $V$ be a vector space of dimension $n$ (for example $\RR^n$). \begin{enumerate}[(i)] \item If $Y = \{\bf{w}_1, \dots, \bf{w}_m\}$ spans $V$, then $m \ge n$ and in the case where $m > n$, we can remove vectors can be removed from $Y$ to get a basis. \item If $X = \{\bf{u}_1, \dots, \bf{u}_k\}$ are linearly independent then $k \le n$ and in the case $k < n$ we can add vectors to $X$ to get a basis. \end{enumerate} \end{proposition*} \subsection{Vectors in $\CC^n$} \subsubsection*{(a) Definitions} Let $\CC^n = \{\bf{z} = (z_1, \dots, z_n) : z_j \in \ZZ\}$ and define: \begin{itemize} \item \ul{addition} $\bf{z} + \bf{w} = (z_1 + w_1, \dots, z_n + w_n)$ \item \ul{scalar multiplication} $\lambda \bf{z} = (\lambda z_1, \dots, \lambda z_n)$ for any $\bf{z}, \bf{w} \in \CC^n$. \end{itemize} Taking \emph{real} scalars $\lambda, \mu \in \RR$, $\CC^n$ is a real vector space obeying axioms or key properties in section 3.2(a). \\ Taking \emph{complex} scalars $\lambda, \mu \in \CC$, $\CC^n$ is a \emph{complex} vector space - same axioms or key properties hold, and definitions of linear combinations, linear (in)dependence, span, bases, dimension all generalise to complex scalars. \bigskip \noindent The distinction matters, for example \[ \bf{z} = (z_1, \dots, z_n) \in \CC^n \] with $z_j = x_j + iy_j$, $x_j, y_j \in \RR$ then \[ \bf{z} = \sum_j x_j \bf{e}_j + \sum_j y_j \bf{f}_j \] (real linear combination) where \[ \bf{e}_j = \ub{(0, \dots, 1, \dots, 0)}_{\text{position $j$}} \] \[ \bf{f}_j = \ub{(0, \dots, i, \dots, 0)}_{\text{position $j$}} \] therefore $\{\bf{e}_1, \dots, \bf{e}_n, \bf{f}_1, \dots, \bf{f}_n\}$ basis for $\CC^n$ as a \emph{real} vector space. So real dimension is $2n$. But \[ \bf{z} = \sum_j = z_j \bf{e}_j \qquad \text{and} \qquad \{\bf{e}_1, \dots, \bf{e}_n\} \] is a basis for $\CC^n$ as a complex vector space, dimension $n$ (over $\CC$). \subsubsection*{(b) Inner Product} Inner product or scalar product on $\CC^n$ is defined by \[ (\bf{z}, \bf{w}) = \sum_j \ol{z_j} w_j = \ol{z_1} w_1 + \cdots + \ol{z_n} w_n \] \ul{Properties} \begin{enumerate}[(i)] \item \ul{hermitian} $(\bf{w}, \bf{z}) = \ol{(\bf{z}, \bf{w})}$. \item \ul{Linear / anti-linear} \[ (\bf{z}, \lambda \bf{w} + \lambda' \bf{w}') = \lambda (\bf{z}, \bf{w}) + \lambda' (\bf{z}, \bf{w}') \] \[ (\mu\bf{z} + \mu' \bf{z}', \bf{w}) = \ol{\mu} (\bf{z}, \bf{w}) + \ol{\mu'} (\bf{z}', \bf{w}) \] \item \ul{positive definite} \\ $(\bf{z}, \bf{z}) = \sum_i |z_i|^2$ is real and $\ge 0$, and $0$ if and only if $\bf{z} = \bf{0}$. \end{enumerate} Defined \emph{length} or \emph{norm} of $\bf{z}$ to be $|\bf{z}| \ge 0$ with $|\bf{z}|^2 = (\bf{z}, \bf{z})$. \\ Define $\bf{z}, \bf{w} \in \CC^n$ to be \emph{orthogonal} if $(\bf{z}, \bf{w}) = 0$. \\ Note: the standard basis $\{\bf{e_j}\}$ for $\CC^n$ (see part (a)) is orthonormal \[ (\bf{e_i}, \bf{e_j}) = \delta_{ij} \] Also, if $\bf{z_1}, \bf{z_2}, \dots, \bf{z_k}$ are non-zero and orthogonal in sense above, then they are linearly independent over $\CC$ (same argument as in real case). \begin{example*} Complex inner product on $\CC$ $(n = 1)$ is \[ (z, w) = \ol{z}w \] Let $z = a_1 + ia_2$ (real and imaginary part) and $w = b_1 + ib_2$. Then \[ \bf{a} = (a_1, a_2), \bf{b} = (b_1, b_2) \in \RR^2 \qquad \text{corresponding vectors.} \] \begin{align*} \ol{z}w &= (a_1 b_1 + a_2 b_2) + i(a_1 b_2 - a_2 b_1) \\ &= \bf{a} \cdot \bf{b} + i [\bf{a}, \bf{b}] \end{align*} recover scalar dot and cross product in $\RR^2$. \end{example*} \newpage \section{Matrices and Linear Maps} \subsection{Introduction} \subsubsection*{(a) Definitions} A \emph{linear map} or \emph{linear transformation} is a function \[ T : V \to W \] between vector spaces $V$ (dim $n$) and $W$ (dim $m$) such that \[ T(\lambda \bf{x} + \mu \bf{y}) = \lambda T( \bf{x}) + \mu T(\bf{y}) \] for all $\bf{x}, \bf{y} \in V$ and $\lambda, \mu \in \RR$ or $\CC$ for $V$, $W$ both real or complex vector spaces. [mostly concerned with $V = \RR^n$ or $\CC^n$, $W = \RR^m$ or $\CC^m$] \begin{note*} A linear map is completely determined by its action on a basis $\{\bf{e}_n, \dots, \bf{e}_n\}$ for $V$, since \[ T\left(\sum_i x_i \bf{e}_i\right) = \sum_i x_i T(\bf{e}_i) \] --- \[ \bf{x}' = T(\bf{x}) \in W \] is the \emph{image} of $\bf{x} \in V$ \[ \mathrm{Im}(T) = \{\bf{x}' \in W : \bf{x}' = T(\bf{x}) \text{ for some $\bf{x} \in V$}\} \] is the \emph{image} of $T$. \end{note*} \begin{lemma*} $\mathrm{Ker}(T)$ is a subspace of $V$ and $\mathrm{Im}(T)$ is a subspace of $W$. \end{lemma*} \begin{customproof}{Check.} $\bf{x}, \bf{y} \in \mathrm{Ker}(T) \implies T(\lambda \bf{x} + \mu \bf{y}) = \lambda T(\bf{x}) + \mu T(\bf{y}) = \bf{0}$ and $\bf{0} \in \mathrm{Ker}(T)$ so the result follows. \\ Also $\bf{0} \in \mathrm{Im}(T)$ and $\bf{x}', \bf{y}' \in \mathrm{Im}(T)$ then \[ T(\lambda \bf{x} + \mu \bf{y}) = \lambda T(\bf{x}) + \mu T(\bf{y}) = \lambda \bf{x}' + \mu \bf{y}' \in \mathrm{Im}(T) \] for some $\bf{x}, \bf{y} \in V$. \end{customproof} \bigskip \noindent \ul{Examples} \begin{enumerate}[(i)] \item zero linear map $T : V \to W$ is given by $T(\bf{x}) = \bf{0}\,\,\forall \bf{x} \in V$. Then $\mathrm{Im}\,\,T = \{\bf{0}\}$, $\mathrm{Ker}\,\,T = V$. \item For $V = W$, the identity linear map $T : V \to V$ is given by \[ T(\bf{x}) = \bf{x} \qquad \forall \bf{x} \in V \] then $\mathrm{Im}\,\,T = V$, $\mathrm{Ker}\,\,T = \{\bf{0}\}$. \item $V = W = \RR^3$, $\bf{x}' = T(\bf{x})$ given by \[ \bf{x}_1' = 3x_1 + x_2 + 5x_3 \] \[ \bf{x}_2' = -x_1 - 2x_3 \] \[ \bf{x}_3' = 2x_1 + x_2 + 3x_3 \] \[ \mathrm{Ker}(T) = \left\{\lambda \begin{pmatrix} 2 \\ -1 \\ -1 \end{pmatrix} \right\} \qquad \text{(dim 1)} \] \[ \mathrm{Im}(T) = \left\{\lambda \begin{pmatrix} 3 \\ -1 \\ 2 \end{pmatrix} + \mu \begin{pmatrix} 1 \\ 0 \\ 1 \end{pmatrix} \right\} \qquad \text{(dim 2)} \] \end{enumerate} \subsubsection*{(b) Rank and Nullity} $\mathrm{dim}\,\,\mathrm{Im}(T)$ is the \emph{rank} of $T$ ($\le m$) and $\mathrm{dim}\,\,\mathrm{Ker}(T)$ is the \emph{nullity} of $T$ ($\le n$) \begin{theorem*}[Rank-nullity] For \[ T : V \to W \] a linear map (as in (a) above) \[ \mathrm{rank}(T) + \mathrm{null}(T) = n = \mathrm{dim}\,\,V \] \end{theorem*} \bigskip \noindent \ul{Examples} - refer to part (a) above \begin{enumerate}[(i)] \item $\mathrm{null}(T) + \mathrm{rank}(T) = n + 0 = n$ \item $\mathrm{null}(T) + \mathrm{rank}(T) = 0 + n = n$ \item $\mathrm{null}(T) + \mathrm{rank}(T) = 1 + 2 = 3$ \end{enumerate} \noindent Note that the following proof is \emph{non-examinable}. \begin{proof} Let $\bf{e}_1, \dots, \bf{e}_k$ be a basis for $\mathrm{Ker}(T)$ so $T(\bf{e}_i) = \bf{0}$ for $i = 1, \dots, k$. Extend by $\bf{e}_{k + 1}, \dots, \bf{e}_n$ to get a basis for $V$. Claim \[ \mathfrak{B} = \{T(\bf{e}_{k + 1}, \dots, T(\bf{e}_n)\} \] is a basis for $\mathrm{Im}(T)$. The result then follows since $\mathrm{null}(T) = k$ and $\mathrm{rank}(T) = n - k$, implying $\mathrm{null}(T) + \mathrm{rank}(T) = n$. \\ To check the claim: \begin{itemize} \item $\mathfrak{B}$ spans $\mathrm{Im}(T)$ since \begin{align*} \bf{x} &= \sum_1^n x_i \bf{e}_i \\ \implies T(\bf{x}) &= \sum_{i = k + 1}^n x_i T(\bf{e}_i) \end{align*} \item $\mathfrak{B}$ is linearly independent since \begin{align*} \sum_{i = k + 1}^n \lambda_i T(\bf{e}_i) &= \bf{0} \\ \implies T\left( \sum_{i = k = 1}^n \lambda_i \bf{e}_i \right) &= \bf{0} \\ \implies \sum_{i = k + 1}^n \lambda_i \bf{e}_i &\in \mathrm{Ker}(T) \\ \implies \sum_{i = k + 1}^n \lambda_i \bf{e}_i &= \sum_{i = 1}^k \mu_i \bf{e}_i \end{align*} But $\bf{e}_1, \dots, \bf{e}_n$ are linearly independent in $V$ \[ \implies \lambda_i = 0 \qquad (i = k + 1, \dots, n) \] \[ \implies \mu_i = 0 \qquad (i = 1, \dots, k) \] hence $\mathfrak{B}$ is linearly independent. \end{itemize} Therefore $\mathfrak{B}$ is a basis. \end{proof} \subsection{Geometrical Examples} \subsubsection*{(a) Rotations} In $\RR^2$, rotation about $\bf{0}$ through angle $\theta$ is defined by \[ \bf{e}_1 \mapsto \bf{e}_1' = (\cos\theta) \bf{e}_1 + (\sin\theta)\bf{e}_2 \] \[ \bf{e}_2 \mapsto \bf{e}_2' = -(\sin\theta) \bf{e}_1 + (\cos\theta) \bf{e}_2 \] \begin{center} \begin{tsqx} ! size(5cm); (0.6,0)->>(1,0) (0,0.6)->>(0,1) O := origin e_1 := (0.6,0) e_2 := (0,0.6) e_1' := rotate(30)*e_1 e_2' := rotate(30)*e_2 anglemark4 e_1 O e_1' 5 anglemark4 e_2 O e_2' 5 label $\mathbf{e}_1$ @ e_1+(0,-0.1) label $\mathbf{e}_2$ @ e_2+(0.1,0) label $\mathbf{e}_1'$ @ e_1'+0.1*dir(30) label $\mathbf{e}_2'$ @ e_2'+0.1*dir(120) label $\theta$ @ 0.2*dir(15) label $\theta$ @ 0.2*dir(105) (-0.5,0)->>e_1 (0,-0.2)->>e_2 O->>e_1' O->>e_2' \end{tsqx} \end{center} In $\RR^3$, rotation about axis given by $\bf{e}_3$ is defined as above, with \[ \bf{e}_3 \mapsto \bf{e}_3' = \bf{e}_3 \] Now consider rotation about axis $\bf{n}$ (unit vector). \\ Given $\bf{x}$, resolve $\parallel$ and $\perp$ to $\bf{n}$: \[ \bf{x} + \bf{x}_\parallel + \bf{x}_\perp \] with $\bf{x}_\parallel = (\bf{x} \cdot \bf{n}) \bf{n}$, and hence $\bf{n} \cdot \bf{x}_\perp = 0$. Under rotation \begin{align*} \bf{x}_\parallel &\mapsto \bf{x}_\parallel' = \bf{x}_\parallel \\ \bf{x}_\perp &\mapsto \bf{x}_\perp' = (\cos\theta) \bf{x}_\perp + (\sin\theta) \bf{n} \times \bf{x} \end{align*} by considering plane $\perp \bf{n}$, comparing to rotation in $\RR^2$ and noting that \[ |\bf{x}_\perp = |\bf{n} \times \bf{n} \times \bf{x}| \] \begin{center} \begin{tsqx} ! size(5cm); (-0.2,0)->>(1.5,0) (0,-0.2)->>(0,1.5) label $\mathbf{x}_\perp$ @ (1.6,0) label $\mathbf{n} \times \mathbf{x}$ @ (0,1.6) X := (1.2,0.8) label $\mathbf{x}_\perp'$ @ X+0.1*dir(X) anglemark4 (1,0) origin X 5 label $\theta$ @ 0.2*dir(18) origin->>X \end{tsqx} \end{center} Re-assemble: \[ \bf{x} \mapsto \bf{x}' = \bf{x}_\parallel' + \bf{x}_\perp' = (\cos\theta) \bf{X} + (1 - \cos\theta)(\bf{n} \cdot \bf{x}) \bf{n} + \sin\theta \bf{n} \times \bf{x} .\] \subsubsection*{(b) Reflections} Consider \emph{reflection} in plane in $\RR^3$ (or line in $\RR^2$ through $\bf{0}$ with unit normal $\bf{n}$. \\ Given $\bf{x}$, resolve $\parallel$ and $\perp$ to $\bf{n}$: \begin{align*} \bf{x}_\parallel &\mapsto \bf{x}_\parallel' = -\bf{x}_\parallel \\ \bf{x}_\perp &\mapsto \bf{x}_\perp' = \bf{x}_\perp \end{align*} \begin{center} \includegraphics[width=0.6\linewidth] {images/60a1704437d811ec.png} \end{center} \[ \bf{x} \mapsto \bf{x}' = \bf{x} - 2(\bf{x} \cdot \bf{n})\bf{n} \] \subsubsection*{(c) Dilations} A dilation by scale factors $\alpha$, $\beta$, $\gamma$ (real, $> 0$) along axes $\bf{e}_1$, $\bf{e}_2$, $\bf{e}_3$ in $\RR^3$ is defined by \[ \bf{x} = x_1 \bf{e}_1 + x_2 \bf{e}_2 + x_3 \bf{e}_3 \mapsto \bf{x}' = \alpha x_1 \bf{e}_1 + \beta x_2 \bf{e}_2 + \gamma x_3 \bf{e}_3 \] [unit cube $\to$ cuboid] \subsubsection*{(d) Shears} Given $\bf{a}$, $\bf{b}$ orthogonal unit vectors ($|\bf{a}| = |\bf{b}| = 1$ and $\bf{a} \cdot \bf{b} = 0$) define a shear with parameter $\lambda$ by \[ \bf{x} \mapsto \bf{x}' = \bf{x} + \lambda (\bf{x} \cdot \bf{b}) \bf{a} \] Definition applies in $\RR^n$ and $\bf{u}' = \bf{u}$ for any vector $\bf{u} \perp \bf{b}$. \begin{center} \includegraphics[width=0.6\linewidth] {images/3397cec637d911ec.png} \end{center} \subsection{Matrices as Linear Maps $\RR^n \to \RR^n$} \subsubsection*{(a) Definitions} Consider a linear map $T : \RR^n \to \RR^m$ and standard bases $\{\bf{e}_i\}$ and $\{\bf{f}_a\}$. Let $\bf{x}' = T(\bf{x})$ with \[ \bf{x} = \sum_i x_i \bf{e}_i = \begin{pmatrix} x_1 \\ \vdots \\ x_n \end{pmatrix} , \bf{x}' = \sum_a x_a' \bf{f}_a = \begin{pmatrix} x_1' \\ \vdots \\ x_m' \end{pmatrix} \] Linearity implies $T$ is determined by \[ T(\bf{e}_i) = \bf{e}_i' = \bf{C}_i \in \RR^m (i = 1, \dots, n) ;\] take these as \emph{columns} of an $m \times n$ \emph{array} or \emph{matrix} with \emph{rows} \[ \bf{R}_a \in \RR^n (a = 1, \dots, m) .\] $M$ has entries $M_{ai} \in \RR$ where $a$ labels rows and $i$ labels columns. \[ \begin{pmatrix} \uparrow & & \uparrow \\ \bf{C}_1 & \cdots & \bf{C}_n \\ \downarrow & & \downarrow \end{pmatrix} = M = \begin{pmatrix} \leftarrow & \bf{R}_1 & \rightarrow \\ & \vdots & \\ \leftarrow & \bf{R}_m & \rightarrow \end{pmatrix} \] \[ (\bf{C}_i)_a = M_{ai} = (\bf{R}_a)_i \] Action of $T$ is given by matrix $M$ multiplying vector $\bf{x}$ \begin{center} \fbox{$\bf{x}' = M\bf{x}$ defined by $\bf{x}_a' = M_{ai} x_i$ ($\sum$ convention)} \end{center} This follows from definitions above since \[ \bf{x}' = T\left( \sum_i x_i \bf{e}_i \right) = \sum_i x_i \bf{C}_i \] \begin{align*} \implies (\bf{x}')_a &= \sum_i x_i (\bf{C}_i)_a \\ &= \sum_i M_{ai} x_i \\ &= \sum_i (\bf{R}_a)_i x_i \\ &= \bf{R}_a \cdot \bf{x} \end{align*} Now regard properties of $T$ as properties of $M$. \[ \mathrm{Im}(T) = \mathrm{Im}(M) = \mathrm{span}\{\bf{C}_1, \dots, \bf{C}_n\} \] \emph{image of $M$} (or $T$) is span of column $S$. \[ \mathrm{Ker}(T) = \mathrm{Ker}(M) = \{\bf{x} : \bf{R}_a \cdot \bf{x} = 0 \,\,\forall a\} \] \emph{kernel of $M$} is subspace $\perp$ all rows \subsubsection*{(b) Examples} (Refer to sections 4.1 and 4.2) \begin{enumerate}[(i)] \item Zero map $\RR^n \to \RR^m$ corresponds to \emph{zero matrix} $M = 0$ with $M_{ai} = 0$. \item Identity map $\RR^n \to \RR^n$ corresponds to \emph{identity matrix} \begin{center} \includegraphics[width=0.6\linewidth] {images/9c9d4c0a37db11ec.png} \end{center} with $I_{ij} = \delta_{ij}$. \item $\RR^3 \to \RR^3$, $\bf{x}' = T(\bf{x}) = M \bf{x}$ with \[ M = \begin{pmatrix} 3 & 1 & 5 \\ -1 & 0 & -2 \\ 2 & 1 & 3 \end{pmatrix} , \bf{C}_1 = \begin{pmatrix} 3 \\ -1 \\ 2 \end{pmatrix} , \bf{C}_2 = \begin{pmatrix} 1 \\ 0 \\ 1 \end{pmatrix} , \bf{C}_3 = \begin{pmatrix} 5 \\ -2 \\ 3 \end{pmatrix} \] \begin{align*} \mathrm{Im}(T) &= \mathrm{Im}(M) \\ &= \mathrm{span}\{\bf{C}_1, \bf{C}_2, \bf{C}_3\} \\ &= \mathrm{span}\{\bf{C}_1, \bf{C}_2\} \qquad \text{since $\bf{C}_3 = 2\bf{C}_1 - \bf{C}_2$} \end{align*} \begin{align*} \bf{R}_1 &= \begin{pmatrix} 3 & 1 & 5 \end{pmatrix} \\ \bf{R}_2 &= \begin{pmatrix} -1 & 0 & -2 \end{pmatrix} \\ \bf{R}_3 &= \begin{pmatrix} 2 & 1 & 3 \end{pmatrix} \\ \bf{R}_2 \times \bf{R}_3 &= \begin{pmatrix} 2 & -1 & -1 \end{pmatrix} = \bf{u} \end{align*} and we can notice that $\bf{u}$ is perpendicular to all rows. In fact \[ \mathrm{Ker}(T) = \mathrm{Ker}(M) = \{\lambda\bf{u}\} .\] \item Rotation through $\theta$ about $\bf{0}$ in $\RR^2$ \begin{align*} \bf{e}_1 = \begin{pmatrix} 1 \\ 0 \end{pmatrix} &\mapsto \begin{pmatrix} \cos\theta \\ \sin\theta \end{pmatrix} = \bf{C}_1 \\ \bf{e}_2 = \begin{pmatrix} 0 \\ 1 \end{pmatrix} &\mapsto \begin{pmatrix} -\sin\theta \\ \cos\theta \end{pmatrix} = \bf{C}_2 \\ \implies M &= \begin{pmatrix} \cos\theta & -\sin\theta \\ \sin\theta & \cos\theta \end{pmatrix} . \end{align*} \item Dilation $\bf{x}' = M\bf{x}$ with scale factors $\alpha$, $\beta$, $\gamma$ along axes in $\RR^3$: \[ M = \begin{pmatrix} \alpha & 0 & 0 \\ 0 & \beta & 0 \\ 0 & 0 & \gamma \end{pmatrix} \] \item Reflection in plane $\perp \bf{n}$ (unit vector) matrix $H$: \[ \bf{x}' = H\bf{x} = \bf{x} - 2(\bf{x} \cdot \bf{n}) \bf{n} \] \begin{align*} \bf{x}_i' &= x_i - 2x_j n_j n_i \\ &= (\delta_{ij} - 2n_i n_j) x_j \\ H_{ij} &= \delta_{ij} - 2n_i n_j \end{align*} For example \[ \bf{n} = \frac{1}{\sqrt{3}} \begin{pmatrix} 1 \\ 1 \\ 1 \end{pmatrix} \qquad\qquad n_i n_j = \frac{1}{3} \,\,\forall i, j \] \[ H = \frac{1}{3} \begin{pmatrix} 1 & -2 & -2 \\ -2 & 1 & -2 \\ -2 & -2 & 1 \end{pmatrix} \] \item Shear $\bf{x}' = S \bf{x} = \bf{x} + \lambda (\bf{b} \cdot \bf{x}) \bf{a}$ \[ \bf{x}_i' = S_{ij} x_j \] with \[ S_{ij} = \delta_{ij} + \lambda a_i b_j \] for example in $\RR^2$ with $\bf{a} = \begin{pmatrix} 1 \\ 0 \end{pmatrix} $ and $\bf{b} = \begin{pmatrix} 0 \\ 1 \end{pmatrix} $ \[ S = \begin{pmatrix} 1 & \lambda \\ 0 & 1 \end{pmatrix} \] \item Rotation win $\RR^3$ with axis $\bf{n}$ and angle $\theta$, \[ \bf{x}' = R\bf{x} \qquad x_i' = R_{ij} x_j \] where $R_{ij} = \delta_{ij} \cos\theta + (1 - \cos\theta) n_i n_j - (\sin\theta) \eps{ijk} n_k$ (see Example Sheet 2). \end{enumerate} \subsubsection*{(c) Isometries, area and determinant in $\RR^2$} Consider linear map $\RR^2 \to \RR^2$ given by a $2 \times 2$ matrix $M$: \[ \bf{x} \mapsto \bf{x}' = M\bf{x} \] \begin{enumerate}[(i)] \item When is $M$ an \emph{isometry} preserving lengths $|\bf{x}'| = |\bf{x}|$. This is equivalent to preserving inner products \[ \bf{x}' \cdot \bf{y}' = \bf{x} \cdot \bf{y} \] (since $\bf{x} \cdot \bf{y} = \half(|\bf{x} + \bf{y}|^2 - |\bf{x}|^2 - |\bf{y}|^2)$). Necessary conditions are \[ M \begin{pmatrix} 1 \\ 0 \end{pmatrix} = \begin{pmatrix} \cos\theta \\ \sin\theta \end{pmatrix} \qquad \text{for some $\theta$; most general unit vector in $\RR^2$} \] \[ M \begin{pmatrix} 0 \\ 1 \end{pmatrix} = \pm \begin{pmatrix} -\sin\theta \\ \cos\theta \end{pmatrix} \qquad \text{general unit vector perpendicular to other} \] Simple to check that these conditions are also sufficient and have two cases: \[ M = R = \begin{pmatrix} \cos\theta & -\sin\theta \\ \sin\theta & \cos\theta \end{pmatrix} \qquad \text{\emph{rotation}} \] or \[ M = H = \begin{pmatrix} \cos\theta & \sin\theta \\ \sin\theta & -\cos\theta \end{pmatrix} \qquad \text{\emph{reflection}} \] Compare with expression for reflection in Section 4.3(b)(vi) \[ H_{ij} = \delta_{ij} - 2n_i n_j \] and note for \[ \bf{n} = \begin{pmatrix} n_1 \\ n_2 \end{pmatrix} = \begin{pmatrix} -\sin\theta/2 \\ \cos\theta/2 \end{pmatrix} \] we get \[ H = \begin{pmatrix} 1 - 2\sin^2 \theta/2 & 2\sin\theta/2 \cos\theta/2 \\ 2\sin\theta/2 \cos\theta/2 & 1 - 2\cos^2\theta/2 \end{pmatrix} \] agreeing with $H$ above. This is reflection in a line in $\RR^2$ as shown \begin{center} \begin{tsqx} ! size(5cm); (-1.5,0)->>(1.5,0) (0,-1)->>(0,1) label $x_1$ @ (1.6,0) label $x_2$ @ (0,1.1) -1.5*dir(30)--1.5*dir(30) origin->>0.5*dir(120) label $\mathbf{n}$ @ 0.6*dir(120) anglemark4 (1.5,0) origin dir(20) 5 label $\theta/2$ @ 0.5*dir(15) \end{tsqx} \end{center} \item How does $M$ change \emph{areas} in $\RR^2$ (in general)? Consider unit square in $\RR^2$, mapped to parallelogram as shown, with area \[ [M\bf{e}_1, M\bf{e}_2] \] \begin{center} \begin{tsqx} ! size(5cm); (0,0)->>(1,0) (0,0)->>(0,1) label $\mathbf{e}_1$ @ (1.1,0) label $\mathbf{e}_2$ @ (0,1.1) (1,0)--(1,1)--(0,1) dashed Me_1 := (2,1) Me_2 := (0.7,1.5) label $M\mathbf{e}_1$ @ 1.1*Me_1 label $M\mathbf{e}_2$ @ 1.1*Me_2 origin->>Me_1 origin->>Me_2 Me_1--Me_1+Me_2--Me_2 dashed \end{tsqx} \end{center} ``scalar cross product'' \[ \left[ \begin{pmatrix} M_{11} \\ M_{21} \end{pmatrix} , \begin{pmatrix} M_{12} \\ M_{22} \end{pmatrix} \right] = M_{11}M_{22} - M_{12}M_{21} = \det M \] where $\det M$ is the \emph{determinant} of $2 \times 2$ matrix \[ M = \begin{pmatrix} M_{11} & M_{12} \\ M_{21} & M_{22} \end{pmatrix} \] This is factor (with sign) by areas are scaled under $M$. \\ Now compare with (i): \[ \det R = +1, \qquad \det H = -1 \] In either case $|\det M| = +1$. Consider shear \[ S = \begin{pmatrix} 1 & \lambda \\ 0 & 1 \end{pmatrix} ; \] this has $\det S = +1$ but it does not preserve lengths. \end{enumerate} \subsection{Matrices for Linear Maps in General} Consider a linear map \[ T : V \to W \] between real or complex vector spaces of dimension $n$, $m$, respectively and choose bases $\{\bf{e}_i\}$ with $i = 1, \dots, n$ for $V$ and $\{\bf{f}_a\}$ with $a = 1, \dots, m$ for $W$. The matrix $M$ for $T$ with respect to these bases is an $m \times n$ array with entries $M_{ai} \in \RR \text{ or } \CC$ . It is defined by \[ T(\bf{e}_i) = \sum_a \bf{f}_a M_{ai} \] note index positions. This is chosen to ensure that $T(\bf{x}) = \bf{x}'$ where \[ \bf{x} = \sum_i x_i \bf{e}_i \] and \[ \bf{x}' = \sum_a x_a' \bf{f}_a \] if and only if \[ x_a' = \sum_i M_{ai} x_i \] i.e. \[ \begin{pmatrix} x_1' \\ \vdots \\ x_m' \end{pmatrix} = \begin{pmatrix} M_{11} & \cdots & M_{1n} \\ \vdots & \ddots & \vdots \\ M_{m1} & \cdots & M_{mn} \end{pmatrix} \begin{pmatrix} x_1 \\ \vdots \\ x_n \end{pmatrix} \] \begin{moral*} Given choice of bases $\{\bf{e}_i\}$ and $\{\bf{f}\}$ \begin{itemize} \item $V$ is identified with $\RR^n$ (or $\CC^n$) \item $W$ is identified with $\RR^m$ (or $\CC^m$) \item $T$ is identified with $m \times n$ matrix $M$ \end{itemize} \end{moral*} \begin{note*} There are natural ways to combine linear maps. \\ If $S : V \to W$ is also linear, then so is \[ \alpha T + \beta S : V \to W \] defined by \[ (\alpha T + \beta S)(\bf{x}) = \alpha T(\bf{x}) + \beta S(\bf{x}) \] Or if $S : U \to V$ is also linear, then so is \[ T \circ S : U \to W \] composition of maps. \end{note*} \subsection{Matrix Algebra} \subsubsection*{(a) Linear Combinations} If $M$ and $N$ are $m \times n$ matrices, then $\alpha M + \beta N$ is an $m \times n$ matrix defined by \[ (\alpha M + \beta N)_{ai} = \alpha M_{ai} + \beta N_{ai} \] ($a = 1, \dots, m$; $i = 1, \dots, n$) [If $M$, $N$ represent linear maps $T, S : V \to W$, then $\alpha M + \beta N$ represents $\alpha T + \beta S$, all with respect to same choice of bases.] \subsubsection*{(b) Matrix Multiplication} If $A$ is an $m \times n$ matrix, entries $A_{ai}$ ($\in \RR \text{ or } \CC$) and $B$ is an $n \times p$ matrix, entries $B_{ir}$, then $AB$ is an $m \times p$ matrix defined by \[ (AB)_{ar} = A_{ai} B_{ir} \] The product $AB$ is not defined unless \[ \text{\# cols of $A$} = \text{\# rows of $B$} \] \[ a = 1, \dots, m \] \[ i = 1, \dots, n \] \[ r = 1, \dots, p .\] Matrix multiplication corresponds to composition of linear maps \[ [(AB)\bf{x}]_a = (AB_{ar})x_r \] and compare \begin{align*} [A(B\bf{x})]_a &= A_{ai} (B\bf{x})_i \\ &= A_{ai}(B_{ir}x_r) \\ &= (A_{ai}B_{ir}) x_r \end{align*} \begin{example*} \[ A = \begin{pmatrix} 1 & 3 \\ -5 & 0 \\ 2 & 1 \end{pmatrix} ,\qquad B = \begin{pmatrix} 1 & 0 & -1 \\ 2 & -1 & 3 \end{pmatrix} \] \[ AB = \begin{pmatrix} 7 & -3 & 8 \\ -5 & 0 & 5 \\ 4 & -1 & 1 \end{pmatrix} \] \[ BA = \begin{pmatrix} -1 & 2 \\ 13 & 9 \end{pmatrix} \] \end{example*} \noindent \textbf{Helpful points of view} \begin{enumerate}[(i)] \item Regarding $\bf{x} \in \RR^n$ as a column vector or $n \times 1$ matrix, definition of matrix multiplying a matrix as vector agree. \item For product $AB$ ($A$ is $m \times n$, $B$ is $n \times p$) have columns \[ \bf{C}_r(B) \in \RR^n \] \[ \bf{C}_r(AB) \in \RR^m \] related by \[ \bf{C}_r(AB) = A\bf{C}_r(B) \] \item \eqnoskip \[ AB = \begin{pmatrix} & \vdots & \\ \leftarrow & \bf{R}_a(A) & \rightarrow \\ & \vdots & \end{pmatrix} \begin{pmatrix} & \uparrow & \\ \cdots & \bf{C}_r(B) & \cdots \\ & \downarrow & \end{pmatrix} \] \begin{align*} A(B)_{ar} &= [\bf{R}_a(A)]_i [\bf{C}_r(B)]_i \\ &= \bf{R}_a(A) \cdot \bf{C}_r(B) \end{align*} dot product in $\RR^n$ for real matrices. \end{enumerate} \bigskip \noindent \ul{Properties} of matrix products \begin{align*} (\lambda M + \mu N)P &= \lambda (MP) + \mu (NP) \\ P(\lambda M + \mu N) &= \lambda (PM) + \mu(PN) \\ (MN)P &= M(NP) \end{align*} \subsubsection*{(c) Matrix Inverses} Consider a $m \times n$ matrix and $B$, $C$ $n \times m$, $B$ is a \emph{left} inverse for $A$ if \[ BA = I \qquad (n \times n) ;\] $C$ is a \emph{right} inverse for $A$ if \[ AC = I \qquad (m \times m) .\] If $m = n$, and $A$ is \emph{square}, one of these implies the other and $B = C = A^{-1}$ \emph{the} inverse. \[ AA^{-1} = A^{-1}A = I .\] Not every matrix has an inverse; if it does it is called \emph{invertible} or \emph{non-singular}. \\ Consider map $\RR^N \to \RR^n$ given by real matrix $M$. If $\bf{x}' = M\bf{x}$ and $M^{-1}$ exists then $\bf{x} = M^{-1} \bf{x}'$. \\ For $n = 2$, \begin{align*} x_1' &= M_{11}x_1 + M_{12}x_2 \\ x_2' &= M_{21}x_1 + M_{22}x_2 \\ \implies M_{22} x_1' - M_{12} x_2' &= (\det M) x_1 \\ \text{and }-M_{21} x_1' + M_{11} x_2' &= (\det M) x_2 \end{align*} So, if $\det M = M_{11} M_{22} - M_{12}M_{21} \neq 0$ then \[ M^{-1} = \frac{1}{\det M} \begin{pmatrix} M_{22} & -M_{12} \\ -M_{21} & M_{11} \end{pmatrix} \] \ul{Examples} \begin{align*} R(\theta) &= \begin{pmatrix} \cos\theta & -\sin\theta \\ \sin\theta & \cos\theta \end{pmatrix} \\ R(\theta)^{-1} &= R(-\theta) \\ H(\theta) &= \begin{pmatrix} \cos\theta & \sin\theta \\ \sin\theta & -\cos\theta \end{pmatrix} \\ H(\theta)^{-1} &= H(\theta) \\ S(\lambda) &= \begin{pmatrix} 1 & \lambda \\ 0 & 1 \end{pmatrix} \\ S(\lambda)^{-1} &= S(-\lambda) \end{align*} \subsubsection*{(d) Transpose and Hermitian Conjugate} \begin{enumerate}[(i)] \item If $M$ is an $m \times n$ matrix, then transpose $M^\top$ is an $n \times m$ matrix defined by \[ (M^\top)_{ia} = M_{ai} \] ``exchange rows and columns'' \[ a = 1, \dots, m; i = 1, \dots, n \] \ul{Properties} \[ (\alpha + \beta B)^\top = \alpha A^\top + \beta B^\top (A, B\,\,\,m \times n) \] \[ (AB)^\top = B^\top A^\top \] Check: \begin{align*} [(AB)^\top]_{ra} &= (AB)_{ar} \\ &= A_{ai} B_{ir} \\ &= (A^\top)_{ia} (B^\top)_{ri} \\ &= (B^\top)_{ri} (A^\top)_{ia} \\ &= (B^\top A^\top)_{ra} \qquad \text{as required.} \end{align*} \begin{note*} \[ \bf{x} = \begin{pmatrix} x_1 \\ \vdots \\ x_n \end{pmatrix} \qquad\text{column vector, $n \times 1$ matrix} \] \[ \implies \bf{x}^\top = (x_1, \dots, x_n) \qquad \text{row vector, $1 \times n$ matrix} \] Inner product on $\RR^n$ is \[ \bf{x} \cdot \bf{y} = \bf{x}^\top \bf{y} \qquad\text{scalar $1 \times 1$ matrix} \] but $\bf{y} \bf{x}^\top = M$, $n \times n$ matrix with $M_{ij} = y_i x_j$. \end{note*} \item If $M$ is square, $n \times n$, then $M$ is \emph{symmetric} if and only if $M^\top = M$ or $M_{ij} = M_{ji}$ and \emph{antisymmetric} if and only if $M^\top = -M$ or $M_{ij} = -M_{ji}$. Any square can be written as a sum of symmetric and antisymmetric parts: \[ M = S + A \] where $S = \half (M + M^\top)$ and $A = \half(M - M^\top)$. \begin{example*} If $A$ is $3 \times 3$ antisymmetric, then it can be re-written in terms of vector $\bf{a}$ \[ A = \begin{pmatrix} 0 & a_2 & -a_2 \\ -a_3 & 0 & a_1 \\ a_2 & -a_1 & 0 \end{pmatrix} \] \[ A_{ij} = \varepsilon_{ijk} a_k \qquad \text{and}\qquad a_k = \half \varepsilon_{kij} A_{ij} \] Then \begin{align*} (A\bf{x})_i &= A_{ij} x_j \\ &= \varepsilon_{ijk} a_k x_j \\ &= (\bf{x} \times \bf{a})_i \end{align*} \end{example*} \item If $M$ is $m \times n$ matrix the \emph{hermitian conjugate} $M^\dag$ is defined by \[ (M^\dag)_{ia} = \ol{M}_{ai} \] or \[ M^\dag = \ol{M}^\top = \ol{(M^\top)} \] \ul{Properties} \[ (\alpha A + \beta B)^\dag = \ol{\alpha} A^\dag + \ol{\beta} B^\dag \] \[ (AB)^\dag) = B^\dag A^\dag \] \begin{note*} \[ \bf{z} = \begin{pmatrix} z_1 \\ \vdots \\ z_n \end{pmatrix} \qquad\text{column vector, $n \times 1$ matrix} \] \[ \implies \bf{z}^\dag = (\ol{z_1}, \dots, \ol{z_n}) \qquad \text{row vector, $1 \times n$ matrix} \] Inner product on $\CC^n$ is \[ (\bf{z}, \bf{w}) = \bf{z}^\dag \bf{w} \qquad \text{scalar $1\times 1$ matrix} \] \end{note*} \item If $M$ is \emph{square} $n \times n$ then $M$ is \emph{hermitian} if $M^\dag = M$ or $M_{ij} = \ol{M}_{ji}$ and \emph{anti-hermitian} if $M^\dag = -M$ or $M_{ij} = -\ol{M}_{ji}$. \end{enumerate} \subsubsection*{(e) Trace} For any square $n \times n$ matrix $M$, the \emph{trace} is defined by \[ \mathrm{Tr}(M) = M_{ii} = M_{11} + \cdots + M{nn} \qquad \text{(sum of diagonal entries)} \] \ul{Properties} \[ \mathrm{tr}(\alpha M + \beta N) = \alpha \mathrm{tr}(M) + \beta \mathrm{tr}(N) \] \[ \mathrm{tr}(MN) = \mathrm{tr}(NM) \] check: \begin{align*} (MN)_ii &= M_{ia}N_{ai} \\ &= N_{ai} M_{ia} \\ &= (NM)_{aa} \end{align*} \[ \mathrm{tr}(M) = \mathrm{tr}(M^\top) \] \[ \mathrm{tr}(I) = n \qquad\text{for $I$ $n \times n$.} \] \[ I_{ij} = \delta_{ij} \qquad\text{and}\qquad I_{ii} = \delta_{ii} = n \] Previously decomposed \[ M = S + A \qquad\text{symmetric / antisymmetric parts} \] Let $T = S - \frac{1}{n} (\mathrm{tr}(S)) I$ or $T_{ij} = S_{ij} - \frac{1}{n} \mathrm{tr}(S) S_{ij}$, then $T_{ii} = \mathrm{tr}(T) = 0$; and note $\mathrm{tr}(M) = \mathrm{tr}(S)$ and $\mathrm{tr}(A) = 0$. So \[ M = \ub{T}_{\text{symm and traceless}} + \ub{A}_{\text{antisymm part}} + \frac{1}{n} \ub{\mathrm{tr}(M) I}_{\text{pure trace}} \] \begin{example*} \[ M = \begin{pmatrix} 1 & 2 & 3 \\ 4 & 5 & 6 \\ 1 & 2 & 3 \end{pmatrix} , \qquad S = \begin{pmatrix} 1 & 3 & 2 \\ 3 & 5 & 4 \\ 2 & 4 & 3 \end{pmatrix} , \qquad A = \begin{pmatrix} 0 & -1 & 1 \\ 1 & 0 & 2 \\ -1 & -2 & 0 \end{pmatrix} \] \[ \mathrm{tr}(S) = \mathrm{tr}(M) = 9 \] \[ T = \begin{pmatrix} -2 & 3 & 2 \\ 3 & 2 & 4 \\ 2 & 4 & 0 \end{pmatrix} \] \[ M = T + A + 3I \] Furthermore $A\bf{x} = \bf{x} \times \bf{a}$ where $\bf{a} = (2, -1, -1)$. \end{example*} \subsubsection*{Orthogonal and Unitary Matrices} A real $n \times n$ matrix $U$ is orthogonal if and only if \[ U^\top U = UU^\top = I \] i.e. \[ U^\top = U^{-1} \] These conditions can be written \[ U_{ki}U_{kj} = U_{ik}U_{jk} = \delta_{ij} \] (the left implies the columns are orthonormal, and the middle implies that the rows are orthonormal). [recall $[\bf{C}_i(U)]_k = U_{ki} = [R_k(U)]_i$] \[ \ub{ \begin{pmatrix} & \vdots & \\ \leftarrow & \bf{C}_i & \rightarrow \\ & \vdots & \end{pmatrix} }_{U^\top} \ub{ \begin{pmatrix} & \uparrow & \\ \cdots & \bf{C}_j & \cdots \\ & \downarrow & \end{pmatrix} }_{U} = I \] \[ \bf{C}_i \cdot \bf{C}_j = \delta_{ij} \] \emph{Equivalent definition} $U$ is orthogonal if and only if it preserves the inner product on $\RR^n$ \[ (U\bf{x}) \cdot (U\bf{y}) = \bf{x} \cdot \bf{y} \,\,\forall \bf{x}, \bf{y} \in \RR^n \] To check equivalence, write this as \[ (U\bf{x})^\top(U\bf{y}) = \bf{x}^\top \bf{y} \] \begin{align*} LHS &= (\bf{x}^\top U^\top)(U\bf{y}) \\ &= \bf{x}^\top (U^\top U) \bf{y} \\ &= RHS \,\,\forall \bf{x}, \bf{y} \end{align*} if and only if $U^\top U = I$. Note, since $\bf{C}_i = U\bf{e}_i$, columns are orthonormal is equivalent to \[ (U\bf{e}_i) \cdot (\bf{U} \bf{e}_j) = \bf{e}_i \cdot \bf{e}_j = \delta_{ij} \] \bigskip \noindent \textbf{Examples} \\ In $\RR^2$ we found all orthogonal matrices (section 4.3(c)): \[ \text{rotations } R(\theta) = \begin{pmatrix} \cos\theta & -\sin\theta \\ \sin\theta & \cos\theta \end{pmatrix} \] \[ \text{and reflections } H(\theta) = \begin{pmatrix} \cos\theta & \sin\theta \\ \sin\theta & -\cos\theta \end{pmatrix} \] Clearly \[ R(\theta)^\top = R(-\theta) = R(\theta)^{-1} \] \[ H(\theta)^\top = H(\theta) = H(\theta)^{-1} \] In $\RR^3$ found matrix $R(\theta)$ for rotation through $\theta$ about axis $\bf{n}$ \[ R(\theta)^\top = R(-\theta) \] since \[ R(\theta)_{ij} = R(-\theta)_{ji} \] and can check explicitly \[ R(\theta)^\top R(\theta) = R(-\theta) R(\theta) = I \] or \[ R(\theta)_{ki} R(\theta)_{kj} = \delta_{ij} \] A complex $n \times n$ matrix $U$ is \emph{unitary} if and only if \[ U^\dag U = UU^\dag = I \] i.e. \[ U^\dag = U^{-1} \] \emph{Equivalent definition}: $U$ is unitary if and only if it preserves the inner product on $\CC^n$ \[ (U\bf{z}, U\bf{w}) = (\bf{z}, \bf{w}) \,\,\forall \bf{z}, \bf{w} \in \CC^n \] To check equivalence write this as \[ (U\bf{z})^\dag (U\bf{w}) = \bf{z}^\dag \bf{w} \] \begin{align*} LHS &= (\bf{z}^\dag U^\dag) (U \bf{w}) \\ &= \bf{z}^\dag (U^\dag U)\bf{w} \\ &= RHS \,\,\forall \bf{z}, \bf{w} \end{align*} if and only if $U^\dag U = I$. \newpage \section{Determinants and Inverses} \subsection{Introduction} Consider a linear map \[ T : \RR^n \to \RR^n \] If $T$ is invertible then \[ \ub{\mathrm{Ker} \,\,T = \{\bf{0}\}}_{\substack{\text{because $T$} \\\text{one-to-one}}} \qquad \text{and} \qquad \ub{\mathrm{Im}\,\,T = \RR^n}_{\text{$T$ is onto}} \] These conditions are equivalent by rank-nullity. Conversely, if these conditions hold, then \[ \bf{e}_1' = T(\bf{e}_1), \dots, \bf{e}_n' = T(\bf{e}_n) \] is a basis (where $\{\bf{e}_i\}$ standard basis) and we can define a linear map $T^{-1}$ by \[ T^{-1}(\bf{e}_1') = \bf{e}_1, \dots, T^{-1}(\bf{e}_n') = \bf{e}_n \] How can we test whether the conditions hold from matrix $M$ representing $T$: \[ T(\bf{x}) = M\bf{x} \] and how can we find $M^{-1}$ when they do hold? \\ For any $M$ ($n \times n$) we will define a related matrix $\widetilde{M}$ ($n \times n$) and a scalar, the \emph{determinant} $\det(M)$ or $|M|$ such that \[ \widetilde{M}M = (\det M) I \tag{$*$} \] Then if $\det M \neq 0$, $M$ is invertible with \[ M^{-1} = \frac{1}{\det M} \widetilde{M} \] \ul{For $n = 2$} we found in section 4.4(c) that ($*$) holds with \[ M = \begin{pmatrix} M_{11} & M_{12} \\ M_{21} & M_{22} \end{pmatrix} \qquad \text{ and } \qquad \widetilde{M} = \begin{pmatrix} M_{22} & -M_{12} \\ -M_{21} & M_{11} \end{pmatrix} \] and \begin{align*} \det M &= \left| \begin{matrix} M_{11} & M_{12} \\ M_{21} & M_{22} \end{matrix} \right| \\ &= M_{11}M_{22} - M_{12}M_{21} \\ &= [M\bf{e}_1, M\bf{e}_2] \\ &= [\bf{C}_1(M), \bf{C}_2(M)] \\ &= \eps_{ij} M_{i1}M_{j2} \end{align*} Factor by which areas are scaled under $M$ \[ \det M \neq 0 \iff \{M\bf{e}_1, M\bf{e}_2\} \text{ linearly independent } \iff \mathrm{Im}(M) = \RR^2 \] \ul{For $n = 3$} consider similarly \begin{align*} [M\bf{e}_1, M\bf{e}_2, M\bf{e}_3] &= [\bf{C}_1(M), \bf{C}_2(M), \bf{C}_3(M)] &\text{scalar triple product} \\ &= \eps_{ijk} M_{i1} M_{j2} M_{k3} \\ &= \det M, \qquad \text{definition for $n = 3$} \end{align*} This is factor by which volumes are scaled under $M$ and \[ \det M \neq 0 \iff \{M\bf{e}_1, M\bf{e}_2, M\bf{e}_3\} \text{ linearly independent } \iff \mathrm{Im}(M) = \RR^3 \] Now define $\widetilde{M}$ from $M$ using rows / column notation: \[ \bf{R}_1(\widetilde{M}) = \bf{C}_2(M) \times \bf{C}_3(M) \] \[ \bf{R}_2(\widetilde{M}) = \bf{C}_3(M) \times \bf{C}_1(M) \] \[ \bf{R}_3(\widetilde{M}) = \bf{C}_1(M) \times \bf{C}_2(M) \] and note that \begin{align*} (\widetilde{M}M)_{ij} &= \bf{R}_i(\widetilde{M}) \cdot \bf{C}_j(M) \\ &= \ub{(\bf{C}_1(M) \cdot \bf{C}_2(M) \times \bf{C}_3(M))}_{\det M} \delta_{ij} \end{align*} as required. \begin{example*} \[ M = \begin{pmatrix} 1 & 3 & 0 \\ 0 & -1 & 2 \\ 4 & 1 & -1 \end{pmatrix} \] \[ \bf{C}_2 \times \bf{C}_3 = \begin{pmatrix} 3 \\ -1 \\ 1 \end{pmatrix} \times \begin{pmatrix} 0 \\ 2 \\ -1 \end{pmatrix} = \begin{pmatrix} -1 \\ 3 \\ 6 \end{pmatrix} \] \[ \bf{C}_3 \times \bf{C}_1 = \begin{pmatrix} 0 \\ 2 \\ -1 \end{pmatrix} \times \begin{pmatrix} 1 \\ 0 \\ 4 \end{pmatrix} = \begin{pmatrix} 8 \\ -1 \\ -2 \end{pmatrix} \] \[ \bf{C}_1 \times \bf{C}_2 = \begin{pmatrix} 1 \\ 0 \\ 4 \end{pmatrix} \times \begin{pmatrix} 3 \\ -1 \\ 1 \end{pmatrix} = \begin{pmatrix} 4 \\ 11 \\ -1 \end{pmatrix} \] \[ \widetilde{M} = \begin{pmatrix} -1 & 3 & 6 \\ 8 & -1 & -2 \\ 4 & 1 & -1 \end{pmatrix} \] and $\widetilde{M}M = (\det M)I$ where \[ \det M = \bf{C}_1 \cdot \bf{C}_2 \times \bf{C}_3 = 23 .\] \end{example*} \subsection{$\eps$ and Alternating Forms} \subsubsection*{(a) $\eps$ and Permutations} Recall: a permutation $\sigma$ on the set $\{1, 2, \dots, n\}$ is a bijection from this set to itself, specified by list \[ \sigma(1), \sigma(2), \dots, \sigma(n) \] Permutation $\sigma$ form a group, the symmetric group $S_n$ of order $n!$. The \emph{sign} or \emph{signature} $\eps(\sigma) = (-1)^k$ where $K$ is the number of transpositions (this is well-defined). The \emph{alternating} or $\eps$ symbol in $\RR^n$ or $\CC^n$ is defined by \[ \eps_{\ub{ij\dots }_{\text{$n$ indices}}} = \begin{cases} +1 & \text{if $i, j, \dots, l$ is an even permutation} \\ -1 & \text{if $i, j, \dots, l$ is an odd permutaiton} \\ 0 & \text{else} \end{cases} \] If $\sigma$ any permutation of $1, 2, \dots, n$ then \[ \eps_{\sigma(1)\sigma(2)\cdots\sigma(n)} = \eps(\sigma) \] \begin{lemma} \[ \sigma_{\sigma(i)\sigma(j)\cdots\sigma(l)} = \eps(\sigma) \eps_{ij\cdots l}\] ($\eps$ totally antisymmetric is a corollary) \end{lemma} \begin{proof} If $i, j, \dots, l$ is \emph{not} a permutation of $1, 2, \dots, n$ then $RHS = LHS = 0$. If $i = \rho(1)$, $j = \rho(2)$, \dots, $l = \rho(n)$ for some permutation $\rho$ then \[ RHS = \eps(\sigma) \eps(\rho) = \eps(\sigma\rho) = LHS \] as required. \end{proof} \subsubsection*{(b) Alternating Forms and Linear (In)dependence} Given $\bf{v}_1, \dots, \bf{v}_n \in \RR^n \text{ or }\CC^n$ the \emph{alternating form} combines them to produce scalar, defined by \begin{align*} [\bf{v}_1, \bf{v}_2, \dots, \bf{v}_n] &= \eps_{ij\cdots l} (\bf{v}_1)_i (\bf{v}_2)_j \cdots (\bf{v}_n)_l \\ &= \sum_\sigma \eps(\sigma) (\bf{v}_1)_{\sigma(1)} (\bf{v}_2)_{\sigma(2)} \vdots (\bf{v}_n)_{\sigma(n)} \end{align*} ($\sum_\sigma$ means sum over all $\sigma \in S_n$) \bigskip \noindent \textbf{Properties} \\ \begin{enumerate}[(i)] \item \emph{Multilinear} \begin{align*} [\bf{v}_1, \dots, \bf{v}_{p - 1}, \alpha \bf{u} + \beta \bf{w}, \bf{v}_{p + 1}, \dots, \bf{v}_n] = &\alpha [\bf{v}_1, \dots, \bf{v}_{p - 1}, \bf{u}, \bf{v}_{p + 1}, \dots, \bf{v}_n] \\ &+ \beta [\bf{v}_1, \dots, \bf{v}_{p - 1}, \bf{w}, \bf{w}_{p + 1}, \dots, \bf{v}_n] \end{align*} \item \emph{Totally antisymmetric} \[ [\bf{v}_{\sigma(1)}, \dots, \bf{v}_{\sigma(n)}] = \eps(\sigma) [\bf{v}_1, \dots, \bf{v}_n] \] \item $[\bf{e}_1, \bf{e}_2, \dots, \bf{e}_n] = 1$ for $\bf{e}_i$ standard basis vectors. Properties (i), (ii), (iii) fix the alternating form, and they also imply \item If $\bf{v}_p = \bf{v}_q$ for some $p \neq q$ then \[ [\bf{v}_1, \dots, \bf{v}_p, \dots, \bf{v}_q, \dots, \bf{v}_n] = 0 \] (from (ii), exchanging $\bf{v}_p \leftrightarrow \bf{v}_q$ changes sign of alternating form). \item If $\bf{v}_p = \sum_{i \neq p} \lambda_i \bf{v}_i$ then \[ [\bf{v}_1, \dots, \bf{v}_p, \dots, \bf{v}_n] = 0 \] (sub in and use (i) and (iv)). \end{enumerate} \begin{example*} In $\CC^4$, \[ \bf{v}_1 = \begin{pmatrix} i \\ 0 \\ 0 \\ 2 \end{pmatrix} , \qquad \bf{v}_2 = \begin{pmatrix} 0 \\ 0 \\ 5i \\ 0 \end{pmatrix} ,\] \[ \bf{v}_3 = \begin{pmatrix} 3 \\ 2i \\ 0 \\ 0 \end{pmatrix} , \qquad \bf{v}_4 = \begin{pmatrix} 0 \\ 0 \\ -i \\ 1 \end{pmatrix} \] \begin{align*} \implies [\bf{v}_1, \bf{v}_2, \bf{v}_3, \bf{v}_4] &= 5i [\bf{v}_1, \bf{e}_3, \bf{v}_3, \bf{v}_4] \\ &= 5i[i\bf{e}_1 + 2\bf{e}_4, \bf{e}_3, 3\bf{e}_1 + 2i\bf{e}_2, \cancel{-i\bf{e}_3} + \bf{e}_4] \\ &= 5i[i\bf{e}_1 + \cancel{2\bf{e}_4}, \bf{e}_3, 3\bf{e}_1 + 2i\bf{e}_2, \bf{e}_4] \\ &= 5i[i\bf{e}_1, \bf{e}_3, \cancel{3\bf{e}_1} + 2i\bf{e}_2, \bf{e}_4] \\ &= (5i \cdot i \cdot 2i) [\bf{e}_1, \bf{e}_3, \bf{e}_2, \bf{e}_4] \\ &= -10i(-1) \\ &= 10i \end{align*} \end{example*} \begin{note*} Properties (i) and (iii) immediate from definition. \end{note*} \begin{proof}[of property (ii)] \begin{align*} [\bf{v}_{\sigma(1)}, \dots, \bf{v}_{\sigma(n)}] &= \sum_\rho \eps(\rho) \ub{[\bf{v}_{\sigma(1)}]_{\rho(1)} \cdots [\bf{v}_{\sigma(n)}]_{\rho(n)}}_{\text{each term can be rewritten}} \\ &= \sum_\rho \eps(\rho) [\bf{v}_1]_{\rho\sigma^{-1}(1)} \cdots [\bf{v}_n]_{\rho\sigma^{-1}(n)} \\ &= \sum_\rho \eps(\sigma) \eps(\rho') [\bf{v}_1]_{\rho'(1)} \cdots [\bf{v}_n]_{\rho'(n)} \\ &= \eps(\sigma) \sum_{\rho'} \sigma(\rho') [\bf{v}_1]_{\rho'(1)} \cdots [\bf{v}_n]_{\rho'(n)} \\ &= \eps(\sigma) [\bf{v}_1, \dots, \bf{v}_n] \end{align*} as claimed. \end{proof} \begin{proposition*} \[ [\bf{v}_1, \dots, \bf{v}_n] \neq 0 \iff \bf{v}_1, \dots, \bf{v}_n \text{ linearly indendent} \] \end{proposition*} \begin{proof} To show ``$\Rightarrow$'' use property (v). If $\bf{v}_1, \dots, \bf{v}_n$ are linearly dependent then $\sum \alpha_i \bf{v}_i = \bf{0}$ where not all coefficients are zero. Suppose without loss of generality that $\alpha_p \neq 0$, then express $\bf{v}_p$ as a linear combination of $\bf{v}_i$ ($i \neq p$) and \[ [\bf{v}_1, \dots, \bf{v}_n] = 0 .\] To show ``$\Leftarrow$'' note that $\bf{v}_1, \dots, \bf{v}_n$ linearly independent means they also span (in $\RR^n$ or $\CC^n$) so we can write standard basis vectors as \[ \bf{e}_i = A_{ai}\bf{v}_a \] for some $A_{ai} \in \RR \text{ or } \CC$. But then \begin{align*} [\bf{e}_1, \dots, \bf{e}_n] &= [A_{a1} \bf{v}_a, A_{b2} \bf{v}_b, \dots, A_{cn} \bf{v}_c] \\ &= A_{a1}A_{b2} \cdots A_{cn} [\bf{v}_a, \bf{v}_b, \dots, \bf{v}_c] \\ &= A_{a1} A_{b2} \cdots A_{cn} \eps_{ab \cdots c} [\bf{v}_1, \bf{v}_2, \dots, \bf{v}_n] \end{align*} and $LHS = 1$, so $[\bf{v}_1, \bf{v}_2, \dots, \bf{v}_n] \neq 0$. Example in $\CC^4$ above: $\bf{v}_1$, $\bf{v}_2$, $\bf{v}_3$, $\bf{v}_4$ linearly independent. \end{proof} \subsection{Determinants in $\RR^n$ and $\CC^n$} \subsubsection*{(a) Definition} For an $n \times n$ matrix $M$ with columns \[ \bf{C}_a = M \bf{e}_a \] the \emph{determinant} $\det M \text{ or } |M| \in \RR \text{ or } \CC$ is defined by \begin{align*} \det M &= [\bf{C}_1, \bf{C}_2, \dots, \bf{C}_n] \\ &= [M\bf{e}_1, M\bf{e}_1, \dots, M\bf{e}_n] \\ &= \eps_{ij \cdots l} M_{i1} M_{j2} \cdots M_{ln} \\ \sum_\sigma \eps(\sigma) M_{\sigma(1)1} M_{\sigma(2)2} \cdots M_{\sigma(n)n} \end{align*} \begin{proposition*}[Tanspose Property] \[ \det M = \det M^\top \] \end{proposition*} So \begin{align*} \det(M) &= [\bf{R}_1, \bf{R}_2, \dots, \bf{R}_n] \\ &= \eps_{ij \cdots l} M_{1i} M_{2j} \cdots M_{nl} \\ &= \sum_\sigma \eps(\sigma) M_{1\sigma(1)} M_{2\sigma(2)} \cdots M_{n \sigma(n)} \end{align*} \begin{example*} In $\RR^3$ or $\CC^3$ \begin{align*} \det M &= \eps_{ijk} M_{i1} M_{j2} M_{k3} \\ &= M_{11} \left| \begin{matrix} M_{22} & M_{23} \\ M_{32} & M_{33} \end{matrix} \right| - M_{21} \left| \begin{matrix} M_{12} & M_{13} \\ M_{32} & M_{33} \end{matrix} \right| + M_{31} \left| \begin{matrix} M_{12} & M_{13} \\ M_{22} & M_{23} \end{matrix} \right| \end{align*} \end{example*} \subsubsection*{Properties} $\det M$ is a function of rows or columns of $M$ that is \begin{enumerate}[(i)] \item multilinear \item totally antisymmetric (or alternating) \item $\det I = 1$ \end{enumerate} \begin{theorem*} \begin{align*} \det M \neq 0 &\iff \text{cols of $M$ are linearly independent} \\ &\iff \text{rows of $M$ are linearly independent} \\ &\iff \mathrm{rank}\,\,M = n \qquad (M \,\,\,n \times n) \\ &\iff \mathrm{Ker}\,\,M = \{\bf{0}\} \\ &\iff M^{-1} \text{ exists} \end{align*} \end{theorem*} \begin{proof} All equivalences follow immediately from earlier results including discussion in section 5.1. \end{proof} \bigskip \begin{proof}[of Transpose Property] Suffices to show \[ \sum_\sigma \eps(\sigma) M_{\sigma(1)1} \cdots M_{\sigma(n)n} = \sum_\sigma \eps(\sigma) M_{1\sigma(1)} \cdots M_{2 \sigma(2)} \] But in a given term on the left hand side, \[ M_{\sigma(1)1} \cdots M_{\sigma(n)n} = M_{1\rho(1)} \cdots M_{n\rho(n)} \] by re-ordering factors, where $\rho = \sigma^{-1}$. Then $\eps(\sigma) = \eps(\rho)$ and $\sum_\sigma$ equivalent to $\sum_\rho$, so result follows. \end{proof} \subsubsection*{(b) Evaluating Determinants: Expanding by Rows or Columns} For $M$ $n \times n$, for each entry $M_{ia}$ define the \emph{minor} $M^{ia}$ to be the determinant of $(n - 1) \times (n - 1)$ matrix obtained by deleting row $i$ and column $a$ from $M$. \begin{proposition*} \begin{align*} \det M &= \sum_i (-1)^{i + a} M_{ia} M^{ia} &\text{$a$ fixed} \\ &= \sum_a (-1)^{i + a} M_{ia} M^{ia} &\text{$i$ fixed} \end{align*} called expanding by (or about) column $a$ or row $i$ respectively. \end{proposition*} \begin{proof} See section 5.4. \end{proof} \begin{example*} \[ M = \begin{pmatrix} i & 0 & 3 & 0 \\ 0 & 0 & 2i & 0 \\ 0 & 5i & 0 & -i \\ 2 & 0 & 0 & 1 \end{pmatrix} \] Expand by row 3 to find \[ \det M = \sum_a (-1)^{3 + a} M_{3a} M^{3a} \] \[ M_{31} = M_{33} = 0 ;\] \[ M_{32} = 5i, \quad M^{32} = \left| \begin{matrix} i & 3 & 0 \\ 0 & 2i & 0 \\ 2 & 0 & 1 \end{matrix} \right| \] \[ M_{34} = -i, \quad M^{34} = \left| \begin{matrix} i & 0 & 3 \\ 0 & 0 & 2i \\ 2 & 0 & 0 \end{matrix} \right| \] \[ M^{32} = i \left| \begin{matrix} 2i & 0 \\ 0 & 1 \end{matrix} \right| - 3 \left| \begin{matrix} 0 & 0 \\ 2 & 1 \end{matrix} \right| = i(2i) = -2 \tag{row 1} \] \[ M^{34} = i \left| \begin{matrix} 0 & 2i \\ 0 & 0 \end{matrix} \right| + 3 \left| \begin{matrix} 0 & 0 \\ 2 & 0 \end{matrix} \right| = 0 \tag{row 1} \] \[ \det M = (-1)^{3 + 2} 5i(-2) = 10i \] Alternatively we can expand by column 2: \begin{align*} \det M &= \sum_i (-1)^{2 + i} M_{i2} M^{i2} \\ &= (-1)^{2 + 3} M_{32} M^{32} \\ &= 10i \end{align*} (Calculated this previously as example of alternating form in $\CC^n$) \end{example*} \begin{lemma*} If \[ M = \left( \begin{tabular}{c|c} $A$ & $O$ \\ \hline $O$ & $I$ \end{tabular} \right) \] block form with $A$ an $r \times r$ matrix; $I$ an $(n - r) \times (n - r)$ identity, then $\det M = \det A$. \end{lemma*} \begin{proof} For $r = n - 1$, result follows by expanding about column $n$ or row $n$, and for $r < n - 1$, continue process. \end{proof} \subsubsection*{(c) Simplifying Determinants: Rows and Column Operations} From the definitions of $\det M$ in terms of columns ($a$) or rows ($i$) and the properties above (including section 5.2(b)) we note the following \begin{itemize} \item \ul{Row or Column Scalings} \\ If $\bf{R}_i \mapsto \lambda \bf{R}_i$ for some (fixed) $i$ or $\bf{C}_a \mapsto \lambda \bf{C}_i$ for some (fixed) $a$ then $\det M \mapsto \lambda \det M$. If \emph{all} rows or columns are scaled, so $M \mapsto \lambda M$, then $\det M \mapsto \lambda^n \det M$. \item \ul{Row or Column Operations} \\ If $\bf{R}_i \mapsto \bf{R}_i + \lambda \bf{R}_j$ for $i \neq j$ or $\bf{C}_a \mapsto \bf{C}_a + \lambda \bf{C}_b$ for $a \neq b$, then $\det M \mapsto \det M$. \item \ul{Row or Column Exchanges} \\ If $\bf{R}_i \leftrightarrow \bf{R}_j$ for $i \neq j$ or $\bf{C}_a \leftrightarrow \bf{C}_b$ for $a \neq b$ then $\det M \mapsto -\det M$. \end{itemize} \begin{example*} \[ A = \begin{pmatrix} 1 & 1 & a \\ a & 1 & 1 \\ 1 & a & 1 \end{pmatrix} \qquad a \in \CC \] Considering $\bf{C}_1 \mapsto \bf{C}_1 - \bf{C}_3$, which keeps the determinant invariant, we get: \begin{align*} \det A &= \det \begin{pmatrix} 1 - a & 1 & a \\ a - 1 & 1 & 1 \\ 0 & a & 1 \end{pmatrix} \\ &= (1 - a) \det \begin{pmatrix} 1 & 1 & a \\ -1 & 1 & 1 \\ 0 & a & 1 \end{pmatrix} \end{align*} Now we consider $\bf{C}_2 \to \bf{C}_2 - \bf{C}_3$: \begin{align*} \det A &= (1 - a) \det \begin{pmatrix} 1 & 1 - a & a \\ -1 & 0 & 1 \\ 0 & a - 1 & 1 \end{pmatrix} \\ &= (1 - a)^2 \det \begin{pmatrix} 1 & 1 & a \\ -1 & 0 & 1 \\ 0 & -1 & 1 \end{pmatrix} \end{align*} And finally $\bf{R}_1 \to \bf{R}_1 + \bf{R}_2 + \bf{R}_3$: \begin{align*} \det A &= (1 - a)^2 \det \begin{pmatrix} 0 & 0 & a + 2 \\ -1 & 0 & 1 \\ 0 & -1 & 1 \end{pmatrix} \\ &= (1 - a)^2(a + 2) \left| \begin{matrix} -1 & 0 \\ 0 & -1 \end{matrix} \right| \\ &= (1 - a)^2(a + 2) \end{align*} \end{example*} \subsubsection*{(d) Multiplicative Property} \begin{theorem*} For $n \times n$ matrices $M$ and $N$, \[ \det (MN) = \det(M) \det(N) .\] \end{theorem*} \noindent This is based on the following lemma. \begin{lemma*} \[ \eps_{i_1\dots i_n} M_{i_1a_1} \cdots M_{i_na_n} = (\det M) \eps_{a_1\dots a_n} \] \end{lemma*} \begin{proof}[of Theorem] \begin{align*} \det(MN) &= \eps_{i_1\dots i_n} (MN)_{i_11} \cdots (MN)_{i_nn} \\ &= \eps_{i_1\dots i_n} M_{i_1k_1} N_{k_11} \cdots M_{i_nk_n} N_{k_nn} \\ &= \eps_{i_1\dots i_n} M_{i_1k_1} \cdots M_{i_nk_n} N_{k_11} N_{k_nn} \\ &= (\det M) \eps_{k_1\dots k_n} N_{k_11} \cdots N_{k_nn} \\ &= (\det M)(\det N) \end{align*} as required. \end{proof} \bigskip \begin{proof}[of Lemma] Use total antisymmetry of left hand side and right hand side and then check by taking $a_1 = 1, \dots, a_n = n$. \end{proof} \subsubsection*{Examples} \begin{enumerate}[(i)] \item If \[ M = \left( \begin{tabular}{c|c} $A$ & $O$ \\ \hline $O$ & $B$ \end{tabular} \right) \] (block form) with $A$ an $r \times r$ and $B$ an $(n - r) \times (n - r)$, then \[ \det M = \det A \cdot \det B \] Since \[ \left( \begin{tabular}{c|c} $A$ & $O$ \\ \hline $O$ & $B$ \end{tabular} \right) = \left( \begin{tabular}{c|c} $A$ & $O$ \\ \hline $O$ & $I$ \end{tabular} \right) \left( \begin{tabular}{c|c} $I$ & $O$ \\ \hline $O$ & $B$ \end{tabular} \right) \] and we can use Lemma above. \item $M^{-1}M = I \implies \det(M^{-1}) \det(M) = \det(I) = 1$ so $\det(M^{-1}) = (\det M)^{-1}$. \item For $R$ real and orthogonal, \[ R^\top R = I \implies \det(R^\top) \det(R) = (\det R^2) = 1 \] \[ \implies \det R = \pm 1 \] \item For $U$ complex and unitary \[ U^\dag U = UI \implies \det (U^\dag) \det(U) = \ol{\det(U)}\det(U) = |\det(U)|^2 = 1 \] \[ \implies |\det U| = 1 \] \end{enumerate} \subsection{Minors, Cofactors and Inverses} \subsubsection*{(a) Cofactors and Determinants} Consider column $\bf{C}_a$ of matrix $M$ ($a$ fixed) and write $\bf{C}_a = \sum_i M_{ia} \bf{e}_i$ in definition of determinant: \begin{align*} \det M &= [\bf{C}_1, \dots, \bf{C}_{a - 1}, \bf{C}_a, \bf{C}_{a + 1}, \dots, \bf{C}_n] \\ &= [\bf{C}_1, \dots, \bf{C}_{a - 1}, \sum_i M_{ia} \bf{e}_i, \bf{C}_{a + 1}, \dots, \bf{C}_n] \\ &= \sum_i M_ia \Delta_{ia} \qquad \text{no sum over $a$} \end{align*} where the \emph{cofactor} $\Delta_{ia}$ is defined by \begin{center} \includegraphics[width=0.6\linewidth] {images/05b7a15a4faa11ec.png} \end{center} introduced earlier. We have deduced \begin{align*} \det M &= \sum_i M_{ia} \Delta_{ia} \\ &= \sum_i M_{ia}(-1)^{i + a} M^{ia} \end{align*} proving proposition in section 5.3(b). [Similarly, considering row $i$, find other expression]. \subsubsection*{(b) Adjugates and Inverses} Reasoning as in (a) with $\bf{C}_b = \sum_i M_{ib} \bf{e}_i$ \begin{align*} [\bf{C}_1, \dots, \bf{C}_{a - 1}, \bf{C}_b, \dots, \bf{C}_{a + 1}, \dots, \bf{C}_n] &= \sum_i M_{ib} \Delta_{ia} \\ &= \begin{cases} \det M & \text{if $a = b$} \\ 0 & \text{if $a \neq b$} \end{cases} \end{align*} Hence \[ \sum_i M_{ib} \Delta_{ia} = (\det M) \delta_{ab} \] And similarly \[ \sum_a M_{ja} \Delta_{ia} = (\det M) \delta_{ij} \] Let $\Delta$ be the \emph{matrix} of cofactors with entries $\Delta_{ia}$, and define \emph{adjugate} $\widetilde{M} = \mathrm{adj}(M) = \Delta^\top$. Then relations above because \begin{align*} \Delta_{ia} M_{ib} &= (\Delta^\top)_{ai} M_{ib} \\ &= (\Delta^\top M)_{ab} \\ &= (\widetilde{M} M)_{ab} \\ &= (\det M) \delta_{ab} \end{align*} and \[ M_{ja} \Delta_{ia} = (M\widetilde{M})_{ji} = (\det M) \delta_{ij} \] This justifies ($*$) in section 5.1 with \[ \widetilde(M) = \Delta^\top \] and \[ \Delta_{ia} = (-1)^{i + a} M^{ia} \] we have \[ \widetilde{M}M = M\widetilde{M} = (\det M) I \] Hence if $\det M \neq 0$ then it is invertible and \[ M^{-1} = \frac{1}{\det M} \widetilde{M} \] \begin{example*} Consider \[ A = \begin{pmatrix} 1 & 1 & a \\ a & 1 & 1 \\ 1 & a & 1 \end{pmatrix} \] previously found $\det A = (a - 1)^2(a + 2)$. Hence $A^{-1}$ exists if $a \neq 1$, $a \neq -2$. Matrix of cofactors is \[ \Delta = \begin{pmatrix} 1 - a & 1 - a & a^2 - 1 \\ a^2 - 1 & 1 - a & 1 - a \\ 1 - a & a^2 - 1 & 1 - a \end{pmatrix} \] e.g. \[ A^{12} = \left| \begin{matrix} a & 1 \\ 1 & 1 \end{matrix} \right| = a - 1 \] \[ \Delta_{12} = (-1)^{1 + 2} A^{12} = 1 - a \] Adjugate $\widetilde{A} = \Delta^\top$ and \begin{align*} A^{-1} &= \frac{1}{\det A} \widetilde{A} \\ &= \frac{1}{(1 - a)(a + 2)} \begin{pmatrix} 1 & -(1 + a) & 1 \\ 1 & 1 & -(1 + a) \\ -(1 + a) & 1 & 1 \end{pmatrix} \end{align*} if $a \neq 1$, $a \neq -2$. \end{example*} \subsection{Systems of Linear Equations} \subsubsection*{(a) Introduction and Nature of Solutions} Consider a system of $n$ linear equations in $n$ unknowns $x_i$ written in vector / matrix form \[ A\bf{x} = \bf{b} \qquad \bf{x}, \bf{b} \in \RR^n \] and $A$ an $n \times n$ matrix, i.e. \[ A_{11}x_1 + \cdots + A_{1n} x_n = b_1 \] \[ \vdots \] \[ A_{n1}x_1 + \cdots + A_{nn} x_n = b_n \] There are three possibilities: \begin{enumerate}[(i)] \item $\det A \neq 0 \implies \text{$A^{-1}$ exists} \implies \text{unique solution } \bf{x} = A^{-1}\bf{b}$ \item $\det A = 0$ and $b \not\in \mathrm{Im}\,\,A \implies$ no solution. \item $\det A = 0$ and $b \in \mathrm{Im}\,\,A \implies$ infinitely many solutions. \end{enumerate} Elaboration: a solution exists if and only if \[ A\bf{x}_0 = \bf{b} \text{ for some $\bf{x}_0$} \iff \bf{b} \in \mathrm{Im}\,\,A \] Then $\bf{x}$ is also a solution if and only if $\bf{u} = \bf{x} - \bf{x}_0$ satisfies \[ A\bf{u} = \bf{0} \] \emph{homogeneous problem}. Now \begin{align*} \det A \neq 0 &\iff \mathrm{Im}\,\,A = \RR^n \\ &\iff \mathrm{Ker}\,\,A = \{\bf{0}\} \end{align*} So in (i) there is a unique solution and it can be found using $A^{-1}$. But \begin{align*} \det A = 0 &\iff \mathrm{rank}(A) < n \\ &\iff \mathrm{null}(A) > 0 \end{align*} and then either $\bf{b} \not\in \mathrm{Im}\,\,A$ as in case (ii) or $\bf{b} \in \mathrm{Im}\,\,A$ as in case (iii). If $\bf{u}_1, \dots, \bf{u}_k$ is a basis for $\mathrm{Ker}\,\,A$ then general solution of homogeneous problem is \[ \bf{u} = \sum_{i = 1}^k \lambda_i \bf{u}_i \] \bigskip \noindent \textbf{Example} \\ $A\bf{x} = \bf{b}$ with $A$ as in section 5.4 and \[ \bf{b} = \begin{pmatrix} 1 \\ c \\ 1 \end{pmatrix} \] with $a, c \in \RR$. \begin{itemize} \item \ul{$a \neq 1, -2$} \\ Then $A^{-1}$ exists and we have a solution for any $c$: \[ \bf{x} = A^{-1}\bf{b} = \frac{1}{(1 - a)(a + 2)} \begin{pmatrix} 2 - c - ca \\ c - a \\ c - a \end{pmatrix} \] \item \ul{$a = 1$} \\ \[ A = \begin{pmatrix} 1 & 1 & 1 \\ 1 & 1 & 1 \\ 1 & 1 & 1 \end{pmatrix} \] \[ \mathrm{Im}\,\,A = \left\{\lambda \begin{pmatrix} 1 \\ 1 \\ 1 \end{pmatrix} \right\} \qquad \mathrm{Ker}\,\,A = \mathrm{span} \left\{ \begin{pmatrix} -1 \\ 1 \\ 0 \end{pmatrix} , \begin{pmatrix} -1 \\ 0 \\ 1 \end{pmatrix} \right\} \] $b \in \mathrm{Im}\,\,A$ if and only if $c = 1$, particular solution \[ \bf{x}_0 = \begin{pmatrix} 1 \\ 0 \\ 0 \end{pmatrix} \] general solution \[ \bf{x} = \bf{x}_0 + \bf{u} = \begin{pmatrix} 1 - \lambda - \mu \\ \lambda \\ \mu \end{pmatrix} \] case (ii). For $a = 1$ and $c \neq 1$ have no solutions: case (iii). \item \ul{$a = -2$} \[ A = \begin{pmatrix} 1 & 1 & -2 \\ -2 & 1 & 1 \\ 1 & -2 & 1 \end{pmatrix} \] \[ \mathrm{Im}\,\,A = \mathrm{span} \left\{ \begin{pmatrix} 1 \\ -2 \\ 1 \end{pmatrix} \right\} , \left\{ \begin{pmatrix} 1 \\ 1 \\ -2 \end{pmatrix} \right\} \qquad \mathrm{Ker}\,\,A = \left\{\lambda \begin{pmatrix} 1 \\ 1 \\ 1 \end{pmatrix} \right\} \] $\bf{b} \in \mathrm{Im}\,\,A$ if and only if $c = -2$, particular solution \[ \bf{x}_0 = \begin{pmatrix} 1 \\ 0 \\ 0 \end{pmatrix} \] general solution \[ \bf{x} = \bf{x}_0 + \bf{u} = \begin{pmatrix} 1 + \lambda \\ \lambda \\ \lambda \end{pmatrix} \] For $c \neq -2$ no solutions. \end{itemize} \subsubsection*{(b) Geometrical Intepretation in $\RR^3$} Let $\bf{R}_1, \bf{R}_2, \bf{R}_3$ be rows of $A$ ($3 \times 3$). \[ A\bf{u} = \bf{0} \iff \begin{cases} \bf{R}_1 \cdot \bf{u} = \bf{0} \\ \bf{R}_2 \cdot \bf{u} = bf{0} \\ \bf{R}_3 \cdot \bf{u} = \bf{0} \end{cases} \] (these are 3 equations of planes through $\bf{0}$, normals $\bf{R}_i$, assuming $\neq \bf{0}$). So solutions of homogeneous problem (finding $\mathrm{Ker}\,\,A$) given by intersection of these planes. \[ \mathrm{rank}(A) = 3 \implies \text{normals linearly independent and planes intersect in $\bf{0}$} \] \[ \mathrm{rank}(A) = 2 \implies \text{normals span a plane and planes intersect in a line} \] \begin{center} \includegraphics[width=0.6\linewidth] {images/22f689ca4faf11ec.png} \end{center} \[ \mathrm{dim}\,\,\mathrm{Ker}\,\,A = 1 .\] \[ \mathrm{rank}(A) = 1 \implies \text{normals are parallel and planes coincide} \] \begin{center} \includegraphics[width=0.6\linewidth] {images/48be0eb24faf11ec.png} \end{center} Now consider instead \[ A\bf{x} = \bf{b} \iff \begin{cases} \bf{R}_1 \cdot \bf{x} = b_1 \\ \bf{R}_2 \cdot \bf{x} = b_2 \\ \bf{R}_3 \cdot \bf{x} = b_3 \end{cases} \] planes with normals $\bf{R}_i$ but not passing through $\bf{0}$ unless $b_i = 0$. \[ \mathrm{rank}(A) = 3 \iff \det A \neq 0 ,\] normals linearly independent; planes intersect in a point and get unique solution for any $\bf{b}$. \[ \mathrm{rank}(A) = 2 \implies \text{planes may intersect in a line (as in homogeneous case)} \] \emph{but} they may not, e.g. \begin{center} \includegraphics[width=0.6\linewidth] {images/7139791a4fd911ec.png} \end{center} \[ \mathrm{rank}(A) = 1 \implies \text{planes may coincide (as in homogeneous case)} \] \emph{but} they may not, e.g. \begin{center} \includegraphics[width=0.6\linewidth] {images/85e8a0a24fd911ec.png} \end{center} \subsubsection*{Gaussian Elimination and Echelon Form} Consider $A\bf{x} = \bf{b}$ with $\bf{x} \in \RR^n$ and $\bf{b} \in \RR^m$ and $A$ an $m \times n$ matrix. Gaussian elimination is a direct approach to solving system of equations: \[ A_{11}x_1 + \cdots + A_{1n} x_n = b_1 \] \[ \vdots \] \[ A_{m1}x_1 + \cdots + A_{mn}x_n = b_m \] \begin{example*} \[ 3x_1 + 2x_2 + x_3 = b_1 \tag{$1$} \] \[ 6x_1 + 3x_2 + 3x_3 = b_2 \tag{$2$} \] \[ 6x_1 + 2x_2 + 4x_3 = b_3 \tag{$3$} \] Step (1): subtract multiples of ($1$) from ($2$) and ($3$) to eliminate $x_1$: \[ 0 - x_2 + x_3 = b_2 - 2b_1 \tag{$2'$} \] \[ 0 - 2x_2 + 2x_3 = b_3 - 2b_1 \tag{$3'$} \] Step (2): repeat this using ($2'$) to eliminate $x_2$: \[ 0 + 0 + 0 = b_3 - 2b_2 + 2b_1 \tag{$3''$} \] Now consider new system ($1$), ($2'$), ($3'$) \[ b_3 - 2b_2 + 2b_1 \neq 0 \implies \text{no solution} \] \[ b_3 - 2b_2 + 2b_1 = 0 \text{ then infinitely many solutions} \] $x_3$ is arbitrary and then $x_2$ and $x_1$ determined from ($2'$) and ($1$). In general case we aim to carry out steps as in example until we obtain equivalent system \[ M\bf{x} = \bf{d} \text{ with } M = \left( \begin{tabular}{c|c} $\hat{M}$ & numbers \\ 0 & 0 \end{tabular} \right) \] with $M$ an $m \times n$ (block form), with \[ \hat{M} = \begin{pmatrix} M_{11} & & \text{numbers} \\ & \cdots & \\ 0 & & M_{rr} \end{pmatrix} \] $M_{jj} \neq 0$ for each $j$. $M$ obtained from $A$ by row operations including row exchanges and column exchanges which relabel variables $x_i$. Note $x_{r + 1}, \dots, x_n$ undetermined, $d_{r + 1}, \dots, d_m = 0$ else no solution. And if this is satisfied then $x_1, \dots, x_r$ determined successively. \[ r = \mathrm{rank}\,\,M = \mathrm{rank}\,\,A \] If $n = m$ then $\det A = \pm \det M$ and if $r = n = m$ then \[ \det M = M_{11} \cdots M_{rr} \neq 0 \] \[ \implies A \text{ and } M \text{ invertible} \] $M$ as above is an example of \emph{echelon form}. \end{example*} \newpage \section{Eigenvalues and Eigenvectors} \subsection{Introduction} \subsubsection*{(a) Definitions} For a linear map $T : V \to V$ ($V$ a real or complex vector space) a vector $\bf{v} \in V$ with $\bf{v} \neq \bf{0}$ is an \emph{eigenvector} of $T$ with \emph{eigenvalue} $\lambda$ if \[ T(\bf{v}) = \lambda \bf{v} \] If $V = \RR^n \text{ or } \CC^n$ and $T$ given by an $n \times n$ matrix $A$, then \[ A\bf{v} = \lambda \bf{v} \iff (A - \lambda I)\bf{v} = \bf{0} \] and for given $\lambda$ this holds for some $\bf{v} \neq \bf{0}$ if and only if $\det(A - \lambda I) = 0$ \emph{characteristic equation} i.e. $\lambda$ is an eigenvalue if and only if it is a root of $\chi_A(t) = \det(A - tI)$ \emph{characteristic polynomial}. $\chi_A(t)$ polynomial of degree $n$ for $A$ $n \times n$. We find eigenvalues as roots of characteristic equation and then find corresponding eigenvectors. \subsubsection*{(b) Examples} \begin{enumerate}[(i)] \item $V = \CC^2$ and \[ A = \begin{pmatrix} 2 & i \\ -i & 2 \end{pmatrix} \] then \[ \det(A - \lambda I) = \left| \begin{matrix} 2 - \lambda & i \\ -i & 2 - \lambda \end{matrix} \right| = (2 - \lambda)^2 - 1 = 0 \] if and only if $\lambda = 1 \text{ or } 3$. To find eigenvectors $\bf{v} = \begin{pmatrix}v_1\\v_2\end{pmatrix}$: \ul{$\lambda = 1$}: \[ (A - I)\bf{v} = \begin{pmatrix} 1 & i \\ -i & 1 \end{pmatrix} \begin{pmatrix} v_1 \\ v_2 \end{pmatrix} = \bf{0} \] \[ \implies \bf{v} = \alpha \begin{pmatrix} 1 \\ i \end{pmatrix} \qquad \text{any $\alpha \neq 0$.} \] \ul{$\lambda = 3$}: \[ (A - 3I)\bf{v} = \begin{pmatrix} -1 & i \\ -i & -1 \end{pmatrix} \begin{pmatrix} v_1 \\ v_2 \end{pmatrix} = \bf{0} \] \[ \implies \bf{v} = \beta \begin{pmatrix} 1 \\ -i \end{pmatrix} \qquad \text{ any $\beta \neq 0$.} \] \item $V = \RR^2$ \[ A = \begin{pmatrix} 1 & 1 \\ 0 & 1 \end{pmatrix} \] \[ \det(A - \lambda I) = \left| \begin{matrix} 1 - \lambda & 1 \\ 0 & 1 - \lambda \end{matrix} \right| = (1 - \lambda)^2 = 0 \] \[ \implies \lambda = 1 \] Eigenvector: \[ (A - I)\bf{v} = \begin{pmatrix} 0 & 1 \\ 0 & 0 \end{pmatrix} \begin{pmatrix} v_1 \\ v_2 \end{pmatrix} = \bf{0} \] \[ \implies \begin{pmatrix} v_1 \\ v_2 \end{pmatrix} = \alpha \begin{pmatrix} 1 \\ 0 \end{pmatrix} \qquad \text{for any $\alpha \neq 0$.} \] \item $V = \RR^2 \text{ or } \CC^2$ \[ U = \begin{pmatrix} \cos\theta & -\sin\theta \\ \sin\theta & \cos\theta \end{pmatrix} \] \[ \chi_U(t) = \det(U - tI) = t^2 - 2t\cos\theta + 1 \] Eigenvalues $\lambda = e^{\pm i\theta}$ and eigenvectors \[ \bf{v} = \alpha \begin{pmatrix} 1 \\ \mp i \end{pmatrix} \qquad (\alpha \neq 0) \] \end{enumerate} \subsubsection*{(c) Deductions involving $\chi_A(t)$} For $A$ an $n \times n$ matrix, characteristic polynomial has degree $n$: \begin{align*} \chi_A(t) &= \det \begin{pmatrix} A_{11} - t & A_{12} & \cdots & A_{1n} \\ A_{21} & A_{22 - t} & \cdots & A_{2n} \\ \vdots & \vdots & \ddots & \vdots \\ A_{n1} & A_{n2} & \cdots & A_{nn - t} \end{pmatrix} \\ &= \sum_{j = 0}^n c_j t^j \\ &= (-1)^n(t - \lambda_1) \cdots (t - \lambda_n) \end{align*} \begin{enumerate}[(i)] \item There exists at least one eval (one root of $\chi_A$); in fact there exists $n$ roots counted with multiplicity (Fundamental Theorem of Algebra) \item $\mathrm{tr}(A) = A_{ii} = \sum_i \lambda_i$ sum of reals by comparing terms of order $n - 1$ in $t$. \item $\det(A) = \chi_A(0) = \prod_i \lambda_i$ (product of eigenvalues) \item If $A$ is diagonal: \[ A = \begin{pmatrix} \lambda_1 & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & \lambda_n \end{pmatrix} \] with diagonal entries eigenvalues; (ii) and (iii) are then immediate. \item If $A$ is real, coefficients $c_i$ are real and $\chi_A(\lambda) = 0 \iff \chi_A(\ol{\lambda}) = 0$: non-real roots occur in conjugate pairs. \end{enumerate} \subsection{Eigenspaces and Multiplicities} \subsubsection*{(a) Definitions} For an eigenvalue $\lambda$ of matrix $A$, define the \emph{eigenspace} \[ E_\lambda = \{\bf{v} : A\bf{v} = \lambda\bf{v}\} = \mathrm{Ker}(A - \lambda I); \] the \emph{geometric multiplicity} \[ m_\lambda = \dim E_\lambda = \mathrm{null} (A - \lambda I) .\] (\# linearly independent eigenvalues eigenvectors with eval $\lambda$); \\ the \emph{algebraic multiplicity} \[ M_\lambda, \text{ multiplicity of $\lambda$ as a root of $\chi_A$} \] i.e. $\chi_A(t) = (t - \lambda)^{M_\lambda} f(t)$ with $f(\lambda) \neq 0$. \begin{proposition*} \eqnoskip \[ M_\lambda \ge m_\lambda \] \end{proposition*} \noindent [Further discussion in section 6.3] \subsubsection*{(b) Examples} \begin{enumerate}[(i)] \item Define: \[ A = \begin{pmatrix} -2 & 2 & -3 \\ 2 & 1 & -6 \\ -1 & -2 & 0 \end{pmatrix} \] \[ \chi_A(t) + \det(A - tI) = (5 - t)(t + 3)^2 \] so we have roots $5$ and $-3$, with $M_5 = 1$ and $M_{-3} = 2$. \begin{itemize} \item For $\lambda = 5$ we have: \[ (A - 5I)\bf{x} = \begin{pmatrix} -7 & 2 & -3 \\ 2 & -4 & -6 \\ -1 & -2 & -5 \end{pmatrix} \begin{pmatrix} x_1 \\ x_2 \\ x_3 \end{pmatrix} = \bf{0} \] \[ \implies E_5 = \left\{\alpha \begin{pmatrix} 1 \\ 2 \\ -1 \end{pmatrix} \right\} \] \item For $\lambda = -3$ we have \[ (A + 3I)\bf{x} = \begin{pmatrix} 1 & 2 & -3 \\ 2 & 4 & -6 \\ -1 & -2 & 3 \end{pmatrix} \begin{pmatrix} x_1 \\ x_2 \\ x_3 \end{pmatrix} = \bf{0} \] Solve to find: \[ \bf{x} = \begin{pmatrix} -2x_2 + 3x_3 \\ x_2 \\ x_3 \end{pmatrix} \] or \[ E_{-3} = \left\{\alpha \begin{pmatrix} -2 \\ 1 \\ 0 \end{pmatrix} + \beta \begin{pmatrix} 3 \\ 0 \\ 1 \end{pmatrix} \right\} \] \end{itemize} So \[ \dim E_5 = m_5 = 1 = M_5 \] \[ \dim E_{-3} = m_{-3} = 2 = M_{-3} \] \item Consider \[ A = \begin{pmatrix} -3 & -1 & 1 \\ -1 & -3 & 1 \\ -2 & -2 & 0 \end{pmatrix} \] Then \[ \chi_A(t) = \det(A - tI) = -(t + 2)^3 \] roots are $\lambda = -2$, with $M_{-2} = 3$. To find eigenvectors: \[ (A + 2I)\bf{x} = \begin{pmatrix} -1 & -1 & 1 \\ -1 & -1 & 1 \\ -2 & -2 & 2 \end{pmatrix} \begin{pmatrix} x_1 \\ x_2 \\ x_3 \end{pmatrix} = \bf{0} \] \[ \implies \bf{x} = \begin{pmatrix} -x_2 + x_3 \\ x_2 \\ x_3 \end{pmatrix} \] \[ \implies E_{-2} = \left\{\alpha \begin{pmatrix} -1 \\ 1 \\ 0 \end{pmatrix} + \beta \begin{pmatrix} 1 \\ 0 \\ 1 \end{pmatrix} \right\} \] so $\dim E_{-2} = m_{-2} = 2$ but $M_{-2} = 3$. (So we do have $M_{-2} \ge m_{-2}$.) \end{enumerate} \subsubsection*{(c) Linear Independence of Eigenvectors} \begin{proposition*} \begin{enumerate}[(i)] \item Let $\bf{v}_1, \dots, \bf{v}_r$ be eigenvectors of matrix $A$ ($n \times n$) with eigenvalues $\lambda_1, \dots, \lambda_r$. If the eigenvalues are distinct, $\lambda_i \neq \lambda_j$ for $i \neq j$, then the eigenvectors are linearly independent. \item With conditions as in (i), let $\mathcal{B}_{\lambda_i}$ be a basis for $E_{\lambda_i}$, then \[ \mathcal{B}_{\lambda_1} \cup \mathcal{B}_{\lambda_2} \cup \cdots \cup \mathcal{B}_{\lambda_r} \] is linearly independent. \end{enumerate} \end{proposition*} \begin{proof} \begin{enumerate}[(i)] \item Note \begin{align*} \bf{w} &= \sum_{j = 1}^r \alpha_j \bf{v}_j \\ \implies (A - \lambda I)\bf{w} &= \sum_{j = 1}^r \alpha_j (\lambda_j - \lambda) \bf{v}_j \end{align*} First, suppose eigenvectors are linearly dependent, so there exists linear relations $\bf{w} = \bf{0}$ with number of non-zero coefficients $p \ge 2$. Pick a $\bf{w}$ for which $p$ is least and assume (without loss of generality) that $\alpha_1 \neq 0$. Then \[ (A - \lambda_1 I)\bf{w} = \sum_{j > 1} \alpha_j (\lambda_j - \lambda_1) \bf{v}_j = \bf{0} ,\] a linear relation with $p - 1$ non-zero coefficients, $\contradiction$ ($p$ was least). \\ Alternative second proof, \[ \bf{w} = \bf{0} \] \[ \implies \prod{_j \neq k} (A - \lambda_j I) \bf{w} = \alpha_k \left(\prod_{j \neq k} (\lambda_k - \lambda_j)\right) \bf{v}_k = \bf{0} \] (for some chosen $k$). \[ \implies \alpha_k = 0 \] so the eigenvectors are linearly independent. \item It suffices to show that if \[ \bf{w} = \bf{w}_1 + \bf{w}_2 + \cdots + \bf{w}_r = \bf{0} \] with $\bf{w}_i \in E_{\lambda_i}$ then \[ \implies \bf{w}_i = \bf{0} .\] This follows by same arguments as in (i). \end{enumerate} \end{proof} \subsection{Diagonalisability and Similarity} \subsubsection*{(a) Introduction} \begin{proposition*} For an $n \times n$ matrix $A$ acting on $V = \RR^n$ or $\CC^n$, the following conditions are equivalent \begin{enumerate}[(i)] \item There exists a basis of eigenvectors for $V$, $\bf{v}_1, \bf{v}_2, \dots, \bf{v}_n$ with \[ A \bf{v}_i = \lambda_i \bf{v}_i \] (no summation convention here!) \item There exists an $n \times n$ invertible matrix $P$ with \[ P^{-1}AP = D = \begin{pmatrix} \lambda_1 & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & \lambda_n \end{pmatrix} \] \end{enumerate} If either of these conditions holds, $A$ is \emph{diagonalisable}. \end{proposition*} \begin{proof} Note that for any matrix $P$, $AP$ has columns $A\bf{C}_i(P)$ and $PD$ has columns $\lambda_i\bf{C}_i(P)$ for each $i$. Then (i) and (ii) are related by \[ \bf{v}_i = \bf{c}_i(P) : P^{-1}AP = D \iff AP = PD \iff A\bf{v}_i = \lambda_i \bf{v}_i .\] \end{proof} \bigskip \noindent \textbf{Example} \\ Refer to section 6.1(b): \[ U = \begin{pmatrix} \cos \theta & -\sin \theta \\ \sin \theta & \cos \theta \end{pmatrix} \] eigenvalues $e^{\pm i \theta}$ and eigenvectors $\begin{pmatrix}1\\\mp1\end{pmatrix}$. Linearly independent over $\CC$ so \[ P = \begin{pmatrix} 1 & 1 \\ -i & i \end{pmatrix} \implies P^{-1} = \half \begin{pmatrix} 1 & i \\ 1 & -i \end{pmatrix} \] and \[ P^{-1}UP = \begin{pmatrix} e^{i\theta} & 0 \\ 0 & e^{-i\theta} \end{pmatrix} \] $U$ diagonalisable over $\CC$ but \emph{not} over $\RR$. \subsubsection*{(b) Criteria for Diagonalisability} \begin{theorem*} Let $A$ be an $n \times n$ matrix and $\lambda_1, \dots, \lambda_r$ all its distinct eigenvalues. \begin{enumerate}[(i)] \item A necessary and sufficient condition: $A$ is diagonalisable if and only if \[ M_{\lambda_i} = m_{\lambda_i} \qquad \text{for $i = 1, \dots, r$} \] \item A sufficient condition: $A$ is diagonalisable if there are $n$ distinct eigenvalues, i.e. $r = n$. \end{enumerate} \end{theorem*} \begin{proof} Use Proposition in section 6.2(c) \\ For (ii) if $r = n$ we have $n$ distinct eigenvalues and hence $n$ linearly independent eigenvalues, which form a basis (for $\RR^n$ or $\CC^n$). \\ For (i), choosing bases $\mathcal{B}_{\lambda_i}$ for each eigenspace, \[ \mathcal{B}_{\lambda_i} \cup \mathcal{B}_{\lambda_2} \cup \cdots \cup \mathcal{B}_{\lambda_r} \] is a linearly independent set of \[ m_{\lambda_1} + m_{\lambda_2} + \cdots + m_{\lambda_r} \] vectors. It is a basis (for $\RR^n$ or $\CC^n$) if and only if we have $n$ vectors. But \[ m_{\lambda_i} \le M_{\lambda_i} \] and \[ M_{\lambda_1} + M_{\lambda_2} + \cdots + M_{\lambda_r} = n .\] Hence we have a basis if and only if \[ M_{\lambda_i} = m_{\lambda_i} \qquad \text{for each $i$} \] \end{proof} \bigskip \noindent \textbf{Examples} \\ Refer to section 6.2(b) \begin{enumerate}[(i)] \item \eqnoskip \[ A = \begin{pmatrix} -2 & 2 & -3 \\ 2 & 1 & -6 \\ -1 & -2 & 0 \end{pmatrix} \] \[ \lambda = 5, -3, -3 \qquad M_5 = m_5 = 1 \qquad M_{-3} = m_{-3} = 2 \] hence $A$ diagonalisable. \[ P = \begin{pmatrix} 1 & -2 & 3 \\ 2 & 1 & 0 \\ -1 & 0 & 1 \end{pmatrix} , \qquad P^{-1} = \frac{1}{8} \begin{pmatrix} 1 & 2 & -3 \\ -2 & 4 & 6 \\ 1 & 2 & 5 \end{pmatrix} \] \[ P^{-1}AP = \begin{pmatrix} 5 & 0 & 0 \\ 0 & -3 & 0 \\ 0 & 0 & -3 \end{pmatrix} \] as expected. \item \eqnoskip \[ A = \begin{pmatrix} -3 & -1 & 1 \\ -1 & -3 & 1 \\ -2 & -2 & 0 \end{pmatrix} \] \[ \lambda = -2,-2,-2 \qquad M_{-2} = 3 > m_{-2} = 2 \] hence $A$ is not diagonalisable. Check: if it was then \[ P^{-1}AP = -2I \] \[ \implies A = P(-2I)P^{-1} = -2I \contradiction .\] \end{enumerate} \subsubsection*{(c) Similarity} Matrices $A$ and $B$ ($n \times n$) are \emph{similar} if \[ B = P^{-1}AP \] for some invertible $P$ ($n \times n$). This is an equivalence relation. \begin{proposition*} If $A$ and $B$ are similar, then \begin{enumerate}[(i)] \item $B^r = P^{-1}A^rP$ for $r \ge 0$. \item $B^{-1} = P^{-1}A^{-1}P$ (if either $A$ or $B$ invertible, so is the other). \item $\mathrm{tr}(B) = \mathrm{tr}(A)$. \item $\det(B) = \det(A)$. \item $\chi_B(t) = \chi_A(t)$. \end{enumerate} \end{proposition*} \begin{proof} (i) and (ii) immediate. (iii): \begin{align*} \mathrm{tr}(B) &= \mathrm{tr}(P^{-1}AP) \\ &= \mathrm{tr}(APP^{-1}) \\ &= \mathrm{tr}(A) \end{align*} For (iv): \begin{align*} \det(B) &= \det(P^{-1}AP) \\ &= \det(P^{-1})\det(A)\det(P) \\ &= \det(A) \end{align*} For (v): \begin{align*} \det(B - tI) &= \det(P^{-1}AP - tI) \\ &= \det(P^{-1}(A - tI)P) \\ &= \det(A - tI) \end{align*} as in (iv). \end{proof} \subsection{Hermitian and Symmetric Matrices} \subsubsection*{(a) Real Eigenvalues and Orthogonal Eigenvectors} Recall: matrix $A$ ($n \times n$) is hermitian if \[ A^\dag = \ol{A}^\top = A \qquad \text{or} \qquad A_{ij} = \ol{A_{ji}} \] special case: $A$ is real and symmetric \[ \ol{A} = A \qquad A^\top = A \qquad \text{or} \qquad \begin{cases} A_{ij} = \ol{A_{ij}} \\ A_{ij} = A_{ji} \end{cases} \] Recall: complex inner-product for $\bf{v}, \bf{w} \in \CC^n$ IS \[ \bf{v}^\dag \bf{w} = \sum_i \ol{v_i} w_i \] and for $\bf{v}, \bf{w} \in \RR^n$ this reduces to \[ \bf{v}^\top \bf{w} = \bf{v} \cdot \bf{w} = \sum_i v_i w_i \] Observation: if $A$ is hermitian then \[ (A\bf{v})^\dag \bf{w} = \bf{v}^\dag (A\bf{w}) \,\,\,\forall \bf{v}, \bf{w} \in \CC^n \] [since $LHS = (\bf{v}^\dag A^\dag)\bf{w} = \bf{v}^\dag A^\dag \bf{w} = \bf{v}^\dag A \bf{w} = RHS$] \begin{theorem*} For a matrix $A$ ($n \times n$) that is hermitian \begin{enumerate}[(i)] \item Every eigenvalue $\lambda$ is real \item Eigenvectors $\bf{v}$, $\bf{w}$ with distinct eigenvalues $\lambda$, $\mu$ respectively ($\lambda \neq \mu$) are orthogonal \[ \bf{v}^\dag \bf{v} = 0 \] \item If $A$ is real and symmetric then for each $\lambda$ in (i) we can choose a real eigenvector $\bf{v}$ and (ii) becomes \[ \bf{v}^\top \bf{w} = \bf{v} \cdot \bf{w} = 0 \] \end{enumerate} \end{theorem*} \begin{proof} \begin{enumerate}[(i)] \eqitem \begin{align*} \bf{w}^\dag (A\bf{v}) &= (A\bf{v})^\dag \bf{v} \\ \implies \bf{v}^\dag (\lambda \bf{v}) &= (\lambda \bf{v})^\dag \bf{v} \\ \implies \lambda \bf{v}^\dag \bf{v} &= \ol{\lambda} \bf{v}^\dag \bf{v} \end{align*} for $\bf{v}$ an eigenvector with eigenvalue $\lambda$. But $\bf{v} \neq \bf{0}$ so $\bf{v}^\dag \bf{v} \neq 0$ and $\lambda = \ol{\lambda}$. \eqitem \begin{align*} \bf{v}^\dag (A\bf{w}) &= (A\bf{v})^\dag \bf{w} \\ \implies \bf{v}^\dag (\mu \bf{w}) &= (\lambda \bf{v})^\dag) \bf{w} \\ \implies \mu \bf{v}^\dag \bf{w} &= \ol{\lambda} \bf{v}^\dag \bf{w} \\ &= \lambda \bf{v}^\dag \bf{w} \end{align*} from (i). But $\lambda \neq \mu$ so $\bf{v}^\dag \bf{w} = 0$. \item Given $A\bf{v} = \lambda \bf{v}$ with $\bf{v} \in \CC^n$ and $A$, $\lambda$ real, let \[ \bf{w} = \bf{u} + i\bf{u}' \] with $\bf{u}, \bf{u}' \in \RR^n$. Then $A\bf{u} = \lambda \bf{u}$ and $A\bf{u}' = \lambda \bf{u}'$ but $\bf{v} \neq 0$ implies one of $\bf{u}$ or $\bf{u}'$ is nonzero, so there is at least one real eigenvector. \end{enumerate} \end{proof} \subsubsection*{Unitary and Orthogonal Diagonalisation} \begin{theorem*} Any $n \times n$ hermitian matrix $A$ is diagonalisable (as in section 6.3(a)) \begin{enumerate}[(i)] \item There exists a basis of eigenvectors \[ \bf{u}_1, \dots, \bf{u}_n \in \CC^n \] with \[ A\bf{u}_i = \lambda_i \bf{u}_i \] ; or equivalently \item There exists $n \times n$ invertible matrix $P$ with \[ P^{-1}AP = D = \begin{pmatrix} \lambda_1 & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & \lambda_n \end{pmatrix} ;\] columns of $P$ are eigenvectors $\bf{u}_i$. \end{enumerate} In addition: the eigenvectors $\bf{u}_i$ can be chosen to be orthonormal \[ \bf{u}_i^\dag \bf{u}_j = \delta_{ij} .\] or equivalently the matrix $P$ can be chosen to be unitary \[ P^\dag = P^{-1} \implies P^\dag AP = D \] Special case: for $n \times n$ real symmetric $A$, can choose eigenvectors $\bf{u}_1, \dots, \bf{u}_n \in \RR^n$ with \[ \bf{u}_i^\top \bf{u}_j = \bf{u}_i \cdot \bf{u}_j = \delta_{ij} \] equivalently, the matrix $P$ can be chosen to be orthogonal \[ P^\top = P^{-1} \implies P^\top AP = D \] \end{theorem*} \noindent Proof of diagonalisability is \emph{not examinable} and remaining statements follow by combining results of section 6.2, 6.3 and choosing \emph{orthonormal} basis for each eigenspace. \bigskip \noindent \textbf{Examples} \\ \begin{enumerate}[(i)] \item Consider hermitian ($A^\dag = A$) as in section 6.1(b): \[ A = \begin{pmatrix} 2 & i \\ -i & 2 \end{pmatrix} \] then $\lambda_1 = 1$ and $\lambda_2 = 3$ and choose \[ \bf{u}_1 = \frac{1}{\sqrt{2}} \begin{pmatrix} 1 \\ i \end{pmatrix} , \qquad \bf{u}_2 = \frac{1}{\sqrt{2}} \begin{pmatrix} 1 \\ -i \end{pmatrix} \] to ensure $\bf{u}_1^\dag \bf{u}_1 = \bf{u}_2^\dag \bf{u}_2 = 1$ and note \[ \bf{u}_1^\dag \bf{u}_2 = \half (1 - i) \begin{pmatrix} 1 \\ -i \end{pmatrix} = 0 .\] Let \[ P = \frac{1}{\sqrt{2}} \begin{pmatrix} 1 & 1 \\ i & -i \end{pmatrix} \] then $P^\dag = P^{-1}$ unitary and \[ P^\dag AP = \begin{pmatrix} 1 & 0 \\ 0 & 3 \end{pmatrix} \] \item Consider symmetric matrix \[ A = \begin{pmatrix} 0 & 1 & 1 \\ 1 & 0 & 1 \\ 1 & 1 & 0 \end{pmatrix} \] then $\lambda_1 = \lambda_2 = -1$ and $\lambda_3 = 2$ and can choose \[ \bf{u}_1 = \frac{1}{\sqrt{2}} \begin{pmatrix} 1 \\ -1 \\ 0 \end{pmatrix} , \qquad \bf{u}_2 = \frac{1}{\sqrt{6}} \begin{pmatrix} 1 \\ 1 \\ -2 \end{pmatrix} , \qquad \bf{u}_3 = \frac{1}{\sqrt{3}} \begin{pmatrix} 1 \\ 1 \\ 1 \end{pmatrix} \] Let $P$ be matrix with columns $\bf{u}_1$, $\bf{u}_2$, $\bf{u}_3$ then $P^\top = P^{-1}$ orthogonal \[ P^\top AP = \begin{pmatrix} -1 & 0 & 0 \\ 0 & -1 & 0 \\ 0 & 0 & 2 \end{pmatrix} \] \end{enumerate} \subsection{Quadratic Forms} Consider $\mathcal{F} : \RR^2 \to \RR^2$ defined by \[ \mathcal{F}(\bf{x}) = 2x_1^2 - 4x_1x_2 + 5x_2^2 \] This can be expressed \[ \mathbf{F}(\bf{x}) = x_1'^2 + 6x_2'^2 \] where \[ x_1' = \frac{1}{\sqrt{5}} (2x_1 + x_2) \] \[ x_2' = \frac{1}{\sqrt{5}}(-x_1 + 2x_2) \] with $x_1'^2 + x_2'^2 = x_1^2 + x_2^2$. To understand this better, note \[ \mathcal{F}(\bf{x}) = \bf{x}^\top A\bf{x} \] where \[ A = \begin{pmatrix} 2 & -2 \\ -2 & 5 \end{pmatrix} \] and we can diagonalise $A$ because $\lambda_1 = 1$, $\lambda_2 = 6$, and then we can compute \[ \bf{u}_1 = \frac{1}{\sqrt{5}} \begin{pmatrix} 2 \\ 1 \end{pmatrix} , \qquad \bf{u}_2 = \frac{1}{\sqrt{5}} \begin{pmatrix} -1 \\ 2 \end{pmatrix} \] Then \[ x_1' = \bf{u}_1 \cdot \bf{x} \] \[ x_2' = \bf{u}_2 \cdot \bf{x} \] give the simplified form for $\mathcal{F}$. In general, a \emph{quadratic form} is a function $\mathcal{F} : \RR^n \to \RR^2$ given by \[ \mathcal{F}(\bf{x}) = \bf{x}^\top A \bf{x} = x_i A_{ij} x_j \] where $A$ is an $n \times n$ real symmetric matrix. From section 6.4, \[ P^\top AP = D = \begin{pmatrix} \lambda_1 & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & \lambda_n \end{pmatrix} \] where $\lambda_i$ are eigenvalues of $A$ and $P$ orthogonal with columns $\bf{u}_i$ orthonormal eigenvectors. Let $\bf{x}' = P^\top \bf{x}$ or $\bf{x} = P\bf{x}'$. Then \begin{align*} \mathcal{F}(\bf{x} &= \bf{x}^\top A\bf{x} \\ &= (P\bf{x}')^\top A (P\bf{x}') \\ &= (\bf{x}')^\top (P^\top AP) \bf{x}' \\ &= (\bf{x}')^\top D\bf{x}' \end{align*} $\mathcal{F}$ has been \emph{diagonalised}. Now \[ \bf{x}' = x_1' \bf{e}_1 + \cdots + x_n' \bf{e}_n \] and \begin{align*} \bf{x} &= x_1 \bf{e}_1 + \cdots + x_n \bf{e}_n \\ &= x_1' \bf{u} + \cdots + x_n' \bf{u}_n \end{align*} since $x_i' = \bf{u}_i \cdot \bf{x} \iff \bf{x}' P^\top \bf{x}$. Thus, $x_i'$ are coordinates with respect to new axes given by orthonormal basis vector $\bf{u}_i$ and these called \emph{principal axes} of $\mathcal{F}$. Relation to original axes along standard basis vectors $\bf{e}_i$ and coordinates $x_i$ is given by an orthogonal transformation \[ |\bf{x}|^2 = x_ix_i = x_i'x_i' \] \subsubsection*{(b) Examples in $\RR^2$ and $\RR^3$} \textbf{In $\RR^2$} \\ \[ \mathcal{F}(\bf{x}) = \bf{x}^\top A\bf{x} \] with \[ A = \begin{pmatrix} \alpha & \beta \\ \beta & \alpha \end{pmatrix} \] eigenvalues $\lambda_1 = \alpha + \beta$, $\lambda_2 = \alpha - \beta$. Eigenvectors: \[ \bf{u}_1 = \frac{1}{\sqrt{2}} \begin{pmatrix} 1 \\ 1 \end{pmatrix} , \qquad \bf{u}_2 = \frac{1}{\sqrt{2}} \begin{pmatrix} -1 \\ 1 \end{pmatrix} \] \begin{align*} \mathcal{F}(\bf{x}) &= \alpha x_1^2 + 2 \beta x_1 x_2 + \alpha x_2^2 \\ &= (\alpha + \beta) x_1'^2 + (\alpha - \beta)x_2'^2 \end{align*} with \[ x_1' = \frac{1}{\sqrt{2}}(x_1 + x_2 \] \[ x_2' = \frac{1}{\sqrt{2}}(-x_1 + x_2 \] \begin{enumerate}[(i)] \item $\alpha = \frac{3}{2}$, $\beta = -\half$. Then $\lambda_1 = 1$, $\lambda_2 = 2$. \[ \mathcal{F}(\bf{x}) = x_1'^2 + 2x_2'^2 = 1 \] defines an ellipse. \begin{center} \includegraphics[width=0.6\linewidth] {images/5b27d5d851f811ec.png} \end{center} \item $\alpha = -\half$, $\beta = \frac{3}{2}$. Then $\lambda_1 = 1$ and $\lambda_2 = -2$ \[ \mathcal{F}(\bf{x}) = x_1'^2 - 2x_2'^2 = 1 \] defines a hyperbola. \begin{center} \includegraphics[width=0.6\linewidth] {images/91182fee51f811ec.png} \end{center} \end{enumerate} \bigskip \noindent \textbf{In $\RR^3$} \[ \mathcal{F}(\bf{x}) = \bf{x}^\top A\bf{x} = \lambda_1 x_1'^2 + \lambda_2 x_2'^2 + \lambda_3 x_3'^2 \] after diagonalisation. \begin{enumerate}[(i)] \item If $A$ has eigenvalues $\lambda_1, \lambda_2, \lambda_3 > 0$ then $\mathcal{F} = 1$ defines an ellipsoid. \item From section 6.4, \[ A = \begin{pmatrix} 0 & 1 & 1 \\ 1 & 0 & 1 \\ 1 & 1 & 0 \end{pmatrix} \] has eigenvalues $\lambda_1 = \lambda_2 = -1$, $\lambda_3 = 2$. Hence \begin{align*} \mathcal{F} = 2x_1x_2 + 2x_2x_3 + 2x_3x_1 \\ &= -x_1'^2 - x_2'^2 + 2x_3'^2 \end{align*} \[ \mathcal{F} = 1 \iff 2x_3'^2 = 1 + x_1'^2 + x_2'^2 \] hyperboloid: \begin{center} \includegraphics[width=0.6\linewidth] {images/3b44f2b851f911ec.png} \end{center} \[ \mathcal{F} = -1 \iff x_1'^2 + x_2'^2 = 1 + 2x_3'^2 \] 2 sheeted hyperboloid: \begin{center} \includegraphics[width=0.6\linewidth] {images/5724349e51f911ec.png} \end{center} \end{enumerate} \subsection{Cayley-Hamilton Theorem} If $A$ is an $n \times n$ complex matrix and \[ f(t) = c_0 + c_1t + \cdots + c_k t^k \] polynomial of degree $k$, then \[ f(A) = c_0I + c_1A + \cdots + c_k A^k \] We can also define power series of matrices subject to convergence, for example \[ \exp A = I + A + \cdots + \frac{1}{r!} A^r + \cdots \] converges for any $A$. Note \begin{enumerate}[(i)] \item If \[ D = \begin{pmatrix} \lambda_1 & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & \lambda_n \end{pmatrix} \] is some diagonal matrix, then \[ D^r = \begin{pmatrix} \lambda_1^r & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & \lambda_n^r \end{pmatrix} \] and \[ f(D) = \begin{pmatrix} f(\lambda_1) & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & f(\lambda_n) \end{pmatrix} \] \item If $B = P^{-1}AP$ for invertible $P$, i.e. $A$ and $B$ are similar then \[ B^r = P^{-1}AP \qquad \text{and} \qquad f(B) = f(P^{-1}AP) = P^{-1}f(A)P \] Recall, the characteristic polynomial is \[ \chi_A(t) = \det(A - tI) = c_0 + c_1t + \cdots c_n t^n \] where $c_0 = \det A$ and $c_n = (-1)^n$. \end{enumerate} \begin{theorem*}[Cayley-Hamilton] \[ \chi_A(A) = c_0 I + c_1 A + \cdots + c_n A^n = 0 \] ``a matrix satisfies its own characteristic equation'' \end{theorem*} \begin{note*} Cayley-Hamilton implies \[ c_0 I = -A(c_1 I + \cdots + c_n A^{n - 1}) \] and if $c_0 = \det A \neq 0$ then \[ A^{-1} = - \frac{1}{c_0} (c_1 I + \cdots c_n A^{n - 1}) .\] \end{note*} \begin{proof} \begin{enumerate}[(i)] \item General $2 \times 2$ matrix \[ A = \begin{pmatrix} a & b \\ c & d \end{pmatrix} \implies \chi_A(t) = t^2 - (a + d)t + (ad - bc) \] then check by substitution that $\chi_A = 0$ (on example sheet 4). \item Diagonalisable $n \times n$ matrix: \\ consider $A$ with eigenvalues $\lambda_i$ and invertible $P$ such that \[ P^{-1}AP = D = \begin{pmatrix} \lambda_1 & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & \lambda_n \end{pmatrix} \] and hence \[ \chi_A(D) = \begin{pmatrix} \chi_A(\lambda_1) & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & \chi_A(\lambda_n) \end{pmatrix} = 0 \] since $\lambda_i$ are eigenvalues. Then \begin{align*} \chi_A(A) &= \chi_A(P^{-1}DP) \\ &= P^{-1}\chi_A(D)P \\ &= 0 \end{align*} as required. \item The non diagonalisable case is beyond the scope of this course, but one can use an analytical argument to extend the diagonalisable case. \end{enumerate} \end{proof} \newpage \section{Changing Bases, Canonical Forms and Symmetries} \subsection{Changing Bases in General} \subsubsection*{(a) Definitions and Proposition} Recall Section 4.4: given linear map $T : V \to W$ (real or complex vector spaces) and choice of bases \[ \{\bf{e}_i\} \quad i = 1, \dots, n \quad \text{for $V$} \] \[ \{\bf{f}_a\} \quad a = 1, \dots, m \quad \text{for $W$} \] the matrix $A$ ($m \times n$) with respect to these bases is defined by \[ T(\bf{e}_i) = \sum_a \bf{f}_a A_{ai} \] This definition is chosen to ensure \[ \bf{y} = T(\bf{x}) \iff y_A = \sum_i A_{ai} x_i = A_{ai} x_i \] where \[ \bf{x} = \sum_i x_i \bf{e}_i, \qquad \bf{y} = \sum_a y_a \bf{f}_a ,\] which holds since \begin{align*} T(\sum_i x_i \bf{e}_i) &= \sum_i x_i T(\bf{e}_i) \\ &= \sum_i x_i(\sum_a \bf{f}_a A_{ai}) \\ &= \sum_a\ub{\left( \sum_i A_{ai}x_i \right)} _{\text{$= y_a$ as required}} \bf{f}_a \end{align*} Same linear map $T$ has matrix $A'$ with respect to bases \[ \{\bf{e}_i'\} \qquad i = 1, \dots, n \quad \text{for $V$} \] \[ \{\bf{f}_a'\} \qquad a = 1, \dots, m \quad \text{for $W$} \] defined by \[ T(\bf{e}_i') = \sum_a \bf{f}_a' A_{ai}' \] To relate $A$ and $A'$ we need to say how bases are related, and \emph{change} of \emph{base} matrices $P$ ($n \times n$) and $Q$ ($m \times n$) are defined by \[ \bf{e}_i' = \sum_j \bf{e}_j P_{ji}, \qquad \bf{f}_a' = \sum_b \bf{f}_b Q_{ba} \] \begin{note*} $P$ and $Q$ invertible; in relation above we can exchange $\{\bf{e}_i\}$ and $\{\bf{e}_i'\}$ with $P \to P^{-1}$ and similarly for $Q$. \end{note*} \begin{proposition*} With definitions as above \[ A' = Q^{-1}AP \] change of basis formula for matrix of a linear map. \end{proposition*} \begin{example*} $n = 2$, $m = 3$ \[ T(\bf{e}_1) = \bf{f}_1 + 2 \bf{f}_2 - \bf{f}_3 = \sum_a \bf{f}_a A_{a1} \] \[ T(\bf{e}_2) = - \bf{f}_1 + 2 \bf{2}_2 + \bf{f}_3 = \sum_a \bf{f}_a A_{a2} \] \[ \implies A = \begin{pmatrix} 1 & -1 \\ 2 & 2 \\ -1 & 1 \end{pmatrix} \] New basis for $V$ \[ \bf{e}_1' = \bf{e}_1 - \bf{e}_2 = \sum_i \bf{e}_i P_{i1} \qquad \bf{e}_2' = \bf{e}_1 + \bf{e}_2 = \sum_i \bf{e}_i P_{i2} \] \[ \implies P = \begin{pmatrix} 1 & 1 \\ -1 & 1 \end{pmatrix} \] New basis for $W$ \[ \bf{f}_1' = \bf{f}_1 - \bf{f}_3 \qquad \bf{f}_2' = \bf{f}_2 \qquad \bf{f}_3' = \bf{f}_1 + \bf{f}_3 \] \[ \implies Q = \begin{pmatrix} 1 & 0 & 1 \\ 0 & 1 & 0 \\ -1 & 0 & 1 \end{pmatrix} \] Change of basis formula: \[ A' = Q^{-1}AP = \begin{pmatrix} \half & 0 & -\half \\ 0 & 1 & 0 \\ \half & 0 & \half \end{pmatrix} \begin{pmatrix} 1 & -1 \\ 2 & 2 \\ -1 & 1 \end{pmatrix} \begin{pmatrix} 1 & 1 \\ -1 & 1 \end{pmatrix} = \begin{pmatrix} 2 & 0 \\ 0 & 4 \\ 0 & 0 \end{pmatrix} \] Direct check \[ T(\bf{e}_1') = 2\bf{f}_1' \qquad T(\bf{e}_2') = 4 \bf{f}_2' \] which agrees. \end{example*} \subsubsection*{(b) Proof of Proposition} \begin{align*} T(\bf{e}_i') &= T(\sum_j \bf{e}_j P_{ji}) &&\text{definition of $P$} \\ &= \sum_j T(\bf{e}_j) P_{ji} &&\text{$T$ linear} \\ &= \sum_j \sum_a \bf{f}_a A_{aj} P_{ji} &&\text{definition of $A$} \end{align*} \begin{align*} T(\bf{e}_i') &= \sum_b \bf{f}_b' A_{bi}' &&\text{definition of $A'$} \\ &= \sum_b \sum_a \bf{f}_a Q_{ab} A_{bi}' &&\text{definition of $Q$} \end{align*} Comparing coefficients of $\bf{f}_a$ (since it's a basis): \[ \sum_j A_{aj} P_{ji} = \sum_b Q_{ab} A_{bi}' \] or \[ AP = QA' \] as required. \subsubsection*{(c) Approach using vector components} Consider \begin{align*} \bf{x} &= \sum_j x_j \bf{e}_j \\ &= \sum_i x_i' \bf{e}_i' \\ &= \sum_j \left( \sum_i P_{ji} x_i' \right) \bf{e}_j \\ \implies x_j &= P_{ji} x_i' \end{align*} Write \[ X = \begin{pmatrix} x_1 \\ \vdots \\ x_n \end{pmatrix} \qquad \text{and} \qquad X' = \begin{pmatrix} x_1' \\ \vdots \\ x_n' \end{pmatrix} \] then \[ X = PX' \qquad \text{or} \qquad X' = P^{-1}X \] Note: some care needed if $V = \RR^n$, e.g. $n = 2$ with \[ \bf{e}_1 = \begin{pmatrix} 1 \\ 1 \end{pmatrix} , \qquad \bf{e}_2 = \begin{pmatrix} 1 \\ -1 \end{pmatrix} \] \[ \bf{x} = \begin{pmatrix} 5 \\ 1 \end{pmatrix} \in \RR^2 \] has $\bf{x} = 3\bf{e}_1 + 2\bf{e}_2$ so \[ X = \begin{pmatrix} 3 \\ 2 \end{pmatrix} \] Similarly \[ \bf{y} = \sum_b y_b \bf{f}_b = \sum_a y_a' \bf{f}_a' \] \[ \implies y_b = Q_{ba} y_a' \] Then \[ Y = QY' \qquad \text{or} \qquad Y' = Q^{-1}Y \] where \[ Y = \begin{pmatrix} y_1 \\ \vdots \\ y_m \end{pmatrix} \qquad \text{and} \qquad Y' = \begin{pmatrix} y_1 \\ \vdots y_m' \end{pmatrix} \] Now, marices $A$, $A'$ are defined to ensure \[ Y = AX \qquad \text{and} \qquad Y' = A'X' \] But \begin{align*} Y' &= Q^{-1}Y \\ &= Q^{-1}AX \\ &= (Q^{-1}AP)X' \\ &= A'X' \end{align*} and true $\forall \bf{x}$ so \[ A' = Q^{-1}AP .\] \subsubsection*{Comments} \begin{enumerate}[(i)] \item Definition of matrix $A$ for $T : V \to W$ with respect to bases $\{\bf{e}_i\}$ and $\{\bf{f}_a\}$ can be expressed; column $i$ of $A$ consists of components of $T(\bf{e}_i)$ with respect to basis $\{\bf{f}_a\}$. [For $T : \RR^n \to \RR^m$ with standard bases, columns of $A$ are images of standard basis vectors.] Similarly, definitions of $P$ and $Q$ say: columns consist of complements of new basis vectors with respect to old. \item With $V = W$ and same bases and $\bf{e}_i = \bf{f}_i$, $\bf{e}_i' = \bf{f}_i'$ we have \[ P = Q \qquad \text{and} \qquad A' = P^{-1}AP \] Matrices representing the same linear map with respect to different bases are similar; conversely if $A$ and $A'$ are similar then we can regard them as representing same linear map with $P$ defining change of basis. In section 6.3, we observed \[ \mathrm{tr}(A') = \mathrm{tr}(A) ,\] \[ \det(A') = \det(A) ,\] \[ \chi_{A'}(t) = \chi_A(t) \] so these are properties of linear map. \item $V = W = \RR^n \text{ or } \CC^n$, with $\bf{e}_i$ standard basis - matrix $A$ is diagonalisable if and only if there exists basis of eigenvectors \[ \bf{e}_i' = \bf{v}_i \] with \[ A\bf{v}_i = \lambda_i \bf{v}_i \qquad \text{no summation convention!} \] and then \[ A' = P^{-1}AP = D = \begin{pmatrix} \lambda_1 & \cdots & 0 \\ \vdots & \ddots & \vdots \\ 0 & \cdots & \lambda_n \end{pmatrix} \] and \[ \bf{v}_i = \sum_j \bf{e}_j P_{ji} \] eigenvectors are columns of $P$. Specialising further $A^\dag = A$ implies exists basis of orthonormal eigenvectors \[ \bf{e}_i' = \bf{u}_i \qquad \text{and} \qquad P^\dag = P^{-1} \] \end{enumerate} \subsection{Jordan Canonical / Normal Form} This result classifies $n \times n$ complex matrices up to similarity. \begin{proposition*} Any $2 \times 2$ complex matrix $A$ is similar to one of the following: \begin{enumerate}[(i)] \item For some $\lambda_1 \neq \lambda_2$ \[ A' = \begin{pmatrix} \lambda_1 & 0 \\ 0 & \lambda_2 \end{pmatrix} \] so \[ \chi_A(t) = (t - \lambda_1)(t - \lambda_2) \] \item For some $\lambda$, \[ A = \begin{pmatrix} \lambda & 0 \\ 0 & \lambda \end{pmatrix} \] so \[ \chi_A(t) = (t - \lambda)^2 \] \item For some $\lambda$, \[ A = \begin{pmatrix} \lambda & 1 \\ 0 & \lambda \end{pmatrix} \] so \[ \chi_A(t) = (t - \lambda)^2 \] \end{enumerate} \end{proposition*} \begin{proof} $\chi_A(t)$ has 2 roots over $\CC$. \begin{enumerate}[(i)] \item For distinct roots or eigenvalues, $\lambda_1$, $\lambda_2$, we have $M_1 = m_1 = M_2 = m_2 = 1$ and eigenvectors $\bf{v}_1$, $\bf{v}_2$ provide a basis. \item For repeated root / eigenvalue $\lambda$, if $M_\lambda = m_\lambda = 2$, then same argument applies. \item For repeated root / eigenvalue $\lambda$, with $M_\lambda = 2$ and $m_\lambda = 1$, let $\bf{v}$ be eigenvector for $\lambda$ and $\bf{w}$ any linearly independent vector. Then \[ A\bf{v} = \lambda \bf{v} \] \[ A\bf{w} = \alpha \bf{v} + \beta \bf{w} \] say. Matrix of map with respect to basis $\{\bf{v}, \bf{w}\}$ is \[ \begin{pmatrix} \lambda & \alpha \\ 0 & \beta \end{pmatrix} \] But $\beta = \lambda$ (otherwise case (i)) and $\alpha \neq 0$ (otherwise case (ii)). Now set $\bf{u} = \alpha \bf{v}$ and note \[ A\bf{u} = \lambda \bf{u} \] \[ A\bf{w} = \bf{u} + \lambda \bf{w} \] so with respect to basis $\{\bf{u}, \bf{w}\}$ matrix is \[ A' = \begin{pmatrix} \lambda & 1 \\ 0 & \lambda \end{pmatrix} \] as claimed. \end{enumerate} \end{proof} \begin{example*}[using a slightly different approach] \[ A = \begin{pmatrix} 1 & 4 \\ -1 & 5 \end{pmatrix} \] \[ \implies \chi_A(t) = (t - 3)^2 \] and \[ A - 3I = \begin{pmatrix} -2 & 4 \\ -1 & 2 \end{pmatrix} \] Choose \[ \bf{w} = \begin{pmatrix} 1 \\ 0 \end{pmatrix} \] not an eigenvector and then \[ \bf{u} = (A - 3I) \bf{w} = \begin{pmatrix} -2 \\ -1 \end{pmatrix} \] But $(A - 3I)^2 = 0$, and \[ A\bf{u} = 3\bf{u} \] \[ A\bf{w} = \bf{u} + 3\bf{w} \] so basis $\{\bf{u}, \bf{w}\}$ gives JCF. Check: \[ P = \begin{pmatrix} -2 & 1 \\ -1 & 0 \end{pmatrix} \implies P^{-1} = \begin{pmatrix} 0 & -1 \\ 1 & -2 \end{pmatrix} \] and \[ P^{-1}AP = \begin{pmatrix} 3 & 1 \\ 0 & 3 \end{pmatrix} \] Generalisation to larger matrices can be consired, starting with \[ N = \begin{pmatrix} 0 & 1 & 0 & \cdots & 0 \\ 0 & 0 & 1 & \cdots & 0 \\ 0 & 0 & 0 & \cdots & 0 \\ \vdots & \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & \cdots & 0 \end{pmatrix} \] $n \times n$. When applied to standard basis vectors get \[ \bf{w}_n \mapsto \bf{e}_{n - 1} \mapsto \cdots \mapsto \bf{e}_1 \mapsto \bf{0} \] Note \[ J = \lambda I + N \] then \[ \chi_J(t) = (\lambda - t)^n \] but $m_\lambda = 1$ ($M_\lambda = n$). \end{example*} \begin{theorem*} Any $n \times n$ complex matrix $A$ is similar to a matrix $A'$ with block form \begin{center} \includegraphics[width=0.6\linewidth] {images/66db862251fd11ec.png} \end{center} where each diagonal block is a \emph{Jordan block} with form \[ J_p(\lambda) = \ub{ \begin{pmatrix} \lambda & 1 & 0 & \cdots & 0 \\ 0 & \lambda & 1 & \cdots & 0 \\ 0 & 0 & \lambda & \cdots & 0 \\ \vdots & \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & \cdots & \lambda \end{pmatrix} }_{p \times p} \] with $n_1 + \cdots n_3 = n$ and $\lambda_1, \dots, \lambda_r$ are eigenvectors of $A$ and $A'$ (same eigenvalue may appear in more than one block). $A$ is diagonalisable if and only if $A'$ consists of $1 \times 1$ Jordan blocks only. \end{theorem*} \begin{proof} See Linear Algebra and GRM in Part IB. \end{proof} \subsection{Quadrics and Conics} \subsubsection*{(a) Quadrics in General} A quadric in $\RR^n$ is a hypersurface defined \[ Q(\bf{x}) = \bf{x}^\top A\bf{x} + \bf{x}^\top \bf{x} + x = 0 \] for some $A$, $n \times n$ real symmetric, non-zero matrix, $\bf{b} \in \RR^n$, $c \in \RR$. So \[ Q(\bf{x}) = A_{ij} x_i x_j + b_ix_i + c = 0 \] Consider classifying solutions up to geometrical equivalence: no distinction between solutions related by \emph{isometries} (length preserving maps) in $\RR^n$, i.e. related by \begin{enumerate}[(i)] \item translation - change in origin \item orthogonal transformation about origin - change in axes. \end{enumerate} If $A$ is invertible (no zero eigenvalues) then by setting $\bf{y} = \bf{x} + \half A^{-1}\bf{b}$ we have \begin{align*} \bf{y}^\top A\bf{y} &= (\bf{x} + \half A^{-1}\bf{b})^\top A (\bf{x} + \half A^{-1} \bf{b}) \\ &= \bf{x}^\top A\bf{x} + \bf{b}^\top \bf{x} + \frac{1}{4} \bf{b}^\top A^{-1} \bf{b} \end{align*} [since $(A^{-1}\bf{b})^\top = \bf{b}^\top (A^{-1})^\top$ and $(A^{-1})^\top = (A^\top)^{-1} = A_{-1}$ in this case.] Then $Q(\bf{x}) = 0 \iff \mathcal{F}(\bf{y}) = k$ with $\mathcal{F}(\bf{y}) = \bf{y}^\top A\bf{y}$. (quadratic form with respect to new origin $\bf{y} = \bf{0}$) and $k = \frac{1}{4} \bf{b}^\top A^{-1}\bf{b} - c$. Diagonalise $\mathcal{F}$ as in section 6.5: orthonormal eigenvectors give principal axes, eigenvalues of $A$ and value of $k$ determine nature of quadric. Example in $\RR^3$ given in section 6.5(b) \begin{enumerate}[(i)] \item eigenvalues $> 0$ and $k > 0$ get ellipsoid \item eigenvalues of different sign and $k \neq 0$ get hyperboloid \end{enumerate} If $A$ has one or more zero eigenvalues then analysis changes and simplest standard form may have both linear and quadratic terms. \subsubsection*{(b) Conics} Quadrics in $\RR^2$ are curves, \emph{conics}. \bigskip \noindent \ul{$\det A \neq 0$.} \\ By completing square and diagonalising we get a standard form \[ \lambda_1 x_1'^2 + \lambda_2 x_2'^2 = k \] \[ \lambda_1, \lambda_2 > 0 \implies \begin{cases} \text{ellipse for $k > 0$} \\ \text{point for $k = 0$} \\ \text{no solution for $k < 0$} \end{cases} \] \[ \lambda_1 > 0, \lambda_2 < 0 \implies \begin{cases} \text{hyperbola for $k > 0$ or $k < 0$} \\ \text{pair of lines for $k = 0$} \end{cases} \] e.g. \[ x_1'^2 - x_2'^2 = (x_1 - x_2)(x_1 + x_2) = 0 \] \bigskip \noindent \ul{$\det A = 0$} Suppose $\lambda_1 > 0$ and $\lambda_2 = 0$; diagonalise $A$ in original formula to get \[ \lambda_1 x_1'^2 + b_1'x_1' + b_2'x_2' + c = 0 \] \[ \iff \lambda_1 x_1''^2 + b_2'x_2' + c' = 0 \] where \[ x_1'' = x_1' + \frac{1}{2\lambda_1} b_1' \qquad \text{and} \qquad c' = c - \frac{b_1'^2}{4\lambda_1^2} \] If $b_2' = 0$ then we get a pair of lines for $c' < 0$, single line for $c' = 0$ and no solutions for $c' > 0$. If $b_2' \neq 0$ then equation becomes \[ \lambda_1 x_1''^2 + b_2'x_2'' = 0 \] parabola where \[ x_2'' = x_2' + \frac{1}{b_2'}c' \] \subsection{Symmetries and Transformation Groups} \subsubsection*{(a) Orthogonal Transformation and Rotations in $\RR^n$} \begin{align*} \text{$R$ orthogonal} &\iff R^\top R = RR^\top = I \\ &\iff (R\bf{x}) \cdot (R\bf{y}) = \bf{x} \cdot \bf{y} \,\,\forall \bf{x}, \bf{y} \\ &\iff \text{columns or rows of $R$ orthonormal vectors} \end{align*} The set of such matrices forms the \emph{orthogonal group} $\mathrm{O}(n)$. \[ R \in O(n) \implies \det R = \pm 1 \] [$\det(R^\top)\det(R) = [\det(R)]^2 = 1$] \[ \mathrm{SO}(n) = \{R \in \mathrm{O}(n) : \det R = +1\} \] is a subgroup, the \emph{special orthogonal group}. \[ R \in \mathrm{O}(n) \implies \text{$R$ preserves lengths and $|n\text{-dim vol}|$} \] \[ R \in \mathrm{SO}(n) \implies \text{$R$ also preserves orientation} \] $\mathrm{SO}(n)$ consists of all rotations in $\RR^n$. \bigskip \noindent Reflections belong to $\mathrm{O}(n) \setminus \mathrm{SO}(n)$, any element of $\mathrm{O}(n)$ is of the form \[ R \text{ or } RH \text{ with } R \in \mathrm{SO}(n) \] e.g. if $n$ is odd, we can choose $H = -I$. \noindent\textbf{Active and Passive Points of View} \\ For a rotation $R$ (matrix), the transformation \[ x_i' = R_{ij}x_j \] can be viewed in two ways. \bigskip \noindent \emph{Active view point}: rotation transforms vectors \[ x_i' \text{ components of new vector} \] \[ \bf{x}' = R\bf{x} \text{ with respect to standard basis $\{e_i\}$} \] e.g. $\RR^2$ \begin{center} \begin{tsqx} (-1.5,0)->>(0.5,0) (0,-1.5)->>(0,0.5) (0.5,0)->>(1.5,0) (0,0.5)->>(0,1.5) label $\mathbf{e}_2$ @ (-0.1,0.5) label $\mathbf{e}_1$ @ (0.5,-0.1) unitcircle X := dir(30) X' := dir(50) label $\mathbf{x}$ @ 1.1*X label $\mathbf{x}' = R\mathbf{x}$ @ 1.2*X' origin->>X origin->>X' \end{tsqx} \end{center} \[ |\bf{x}'|^2 = |\bf{x}|^2 \] \bigskip \noindent \emph{Passive view point}: rotation changes basis \[ x_i' \text{ components of same vector $\bf{x}$ but with respect to new basis $\{u_i\}$} \] e.g. $\RR^2$ \begin{center} \begin{tsqx} (-1.5,0)->>(0.5,0) (0,-1.5)->>(0,0.5) (0.5,0)->>(1.5,0) (0,0.5)->>(0,1.5) label $x_1$ @ (1.6,0) label $x_2$ @ (0,1.6) -1.5*dir(70)->>0.5*dir(70) -1.5*dir(-20)->>0.5*dir(-20) 0.5*dir(70)->>1.5*dir(70) 0.5*dir(-20)->>1.5*dir(-20) label $x_2'$ @ 1.6*dir(70) label $x_1'$ @ 1.6*dir(-20) label $\mathbf{u}_2$ @ 0.6*dir(78) label $\mathbf{u}_1$ @ 0.6*dir(-28) label $\mathbf{e}_2$ @ (-0.1,0.5) label $\mathbf{e}_1$ @ (0.5,-0.1) X := dir(30) label $\mathbf{x}$ @ 1.1*X origin->>X \end{tsqx} \end{center} \begin{align*} \bf{u}_1 &= \sum_j R_{ij} \bf{e}_j \\ &= \sum_j \bf{e}_j(R^{-1})_{ji} \end{align*} (compare to section 6.5: $P = R^{-1}$) \subsubsection*{(b) 2D Minkowski Space and Lorentz Transformations} Define a new ``inner product'' on $\RR^2$ by \[ (\bf{x}, \bf{y} = \bf{x}^\top J \bf{y} = x_0y_0 - x_1y_1 \] where \[ J = \begin{pmatrix} 1 & 0 \\ 0 & -1 \end{pmatrix} \] where we now label components \[ \bf{x} = \begin{pmatrix} x_0 \\ x_1 \end{pmatrix} \qquad \text{and} \qquad \bf{y} = \begin{pmatrix} y_0 \\ y_1 \end{pmatrix} \] This is not positive definite, since \[ (\bf{x}, \bf{x}) = \bf{x}^\top J \bf{x} = x_0^2 - x_1^2 \] but still bilinear and symmetric. Standard basis vectors are ``orthonormal'' in generalised sence: \[ \bf{e}_0 = \begin{pmatrix} 1 \\ 0 \end{pmatrix} \qquad \text{and} \qquad \bf{e}_1 = \begin{pmatrix} 0 \\ 1 \end{pmatrix} \] obey \begin{align*} (\bf{e}_0, \bf{e}_0) &= 1 \\ (\bf{e}_1, \bf{e}_1) &= -1 \\ (\bf{e}_0, \bf{e}_1) &= 0 \end{align*} New inner product is called the \emph{Minkowski} metric and $\RR^2$ equipped with it is called \emph{Minkowski space}. Consider \[ M = \begin{pmatrix} M_{00} & M_{01} \\ M_{10} & M_{11} \end{pmatrix} \] giving a linear map $\RR^2 \to \RR^2$. This preserves the Minkowski metric if and only if \begin{align*} (M\bf{x}, M\bf{y}) &= (\bf{x}, \bf{y} &&\forall \bf{x}, \,athbf{y} \in \RR^2 \\ \iff (M\bf{x})^\top J(M\bf{y}) &= \bf{x}^\top(M^\top JM) \bf{y} \\ &= \bf{x}^\top J \bf{y} &&\forall \bf{x}, \bf{y} \in \RR^2 \\ \iff M^\top JM &= J \end{align*} The set of such matrices forms a group. Now \begin{align*} \det(M^\top JM) &= \det M^\top \det J \det M \\ &= \det J \implies (\det M)^2 &= 1 \\ \implies \det M &= \pm 1 \end{align*} Furthermore, $|M_{00}| \ge 1$, so \[ M_{00} \ge 1 \qquad \text{or} \qquad M_{00} \le -1 .\] The subgroup with \[ \det M = +1 \qquad \text{and} \qquad M_{00} \ge 1 \] is the \emph{Lorentz group} in 2D. \\ \emph{General form for $M$}: require columns $M\bf{e}_0$ and $M\bf{e}_1$ to be orthonormal, like $\bf{e}_0$, $\bf{e}_0$ (with respect to new inner product). This implies \[ M(\theta) = \begin{pmatrix} \cosh \theta & \sinh \theta \\ \sinh \theta & \cosh \theta \end{pmatrix} \] First column fixed by requiring $(M\bf{e}_0, M\bf{e}_1) = 1$ or $M_{00}^1 - M_{10}^2 = 1$ and $M_{00} \ge 1$. The second column is then fixed by $M\bf{e}_0, M\bf{e}_1) = 0$, $(M\bf{e}_1, M\bf{e}_1) = -1$ and $\det M = +1$ (fixes overall sign). For such matrices \[ M(\theta_1)M(\theta_2) = M(\theta_1 + \theta_2) \] \begin{center} \begin{tsqx} (-1.5,0)->>(1.5,0) (0,-1.5)->>(0,1.5) label $x_1$ @ (1.6,0) label $x_0$ @ (0,1.6) ! real f(real x) {return sqrt(x**2 - 1);} ! draw(graph(f,1,1.5,operator ..)); ! real g(real x) {return -sqrt(x**2 - 1);} ! draw(graph(g,1,1.5,operator ..)); ! real h(real x) {return sqrt(x**2 + 1);} ! draw(graph(h,-f(1.5),f(1.5),operator ..)); ! real j(real x) {return -sqrt(x**2 + 1);} ! draw(graph(j,-f(1.5),f(1.5),operator ..), dashed); x := (0.5,h(0.5)) Mx := (-0.5,h(-0.5)) origin->>x origin->>Mx label $\mathbf{x}$ @ 1.1*x label $\mathbf{x}' = M\mathbf{x}$ @ (-0.25,1.3) (-1.3,-1.3)--(1.3,1.3) (-1.3,1.3)--(1.3,-1.3) label $k < 0$ @ (1.2,0.1) label $k > 0$ @ (0.9,1.5) label $k = 0$ @ (1.4,1.4) \end{tsqx} \end{center} curves with $(\bf{x},\bf{x}) = k$, constant, as shown. \bigskip \noindent \textbf{Physical application} \\ Set \[ M(\theta) = \gamma(v) \begin{pmatrix} 1 & v \\ v & 1 \end{pmatrix} \] \[ v = \tanh \theta \] \[ \gamma(v) = (1 - v^2)^{-1/2}, \qquad |v| < 1 .\] Rename $x_0 \to t$ time coordinate and $x_1 \to x$ space coordinate. \[ \bf{x}' = M\bf{x} \iff \begin{cases} t' = \gamma(t + vx) \\ x' = \gamma(x + vt) \end{cases} \] \emph{Lorentz transformation} or \emph{boost} relating observes moving with relative velocity $b$ according to Special Relativity (units with $c = 1$). Factor $\gamma(v) = (1 - v^2)^{-1/2}$ gives rise to effects such as time dilation and length contraction. \end{document}