hw2.tex

\documentclass[10pt,oneside]{article}
\usepackage{amsmath} 
\usepackage{graphicx} 
\usepackage{subcaption} 
\usepackage{amsfonts}
\usepackage{amssymb} 

\newcommand{\tr}{\operatorname{trace}}
\newcommand{\vecm}{\operatorname{vec}}
\newcommand{\diagm}{\operatorname{diagm}}
\newcommand{\dotstar}{\mathbin{.*}}

\usepackage{polyglossia}


\usepackage[
  backend=biber
]{biblatex}
\addbibresource{biblio.bib}


\usepackage[
  letterpaper,
  left=1cm,
  right=1cm,
  top=1.5cm,
  bottom=1.5cm
]{geometry}


\usepackage[
  final,
  unicode,
  colorlinks=true,
  citecolor=blue,
  linkcolor=blue,
  plainpages=false,
  urlcolor=blue,
  pdfpagelabels=true,
  pdfsubject={Cálculo},
  pdfauthor={José Doroteo Arango Arámbula},
  pdftitle={Tarea 1},
  pdfkeywords={UNAM, FES Acatlán, 2021-I}
]{hyperref}

\usepackage{booktabs}


\usepackage{algpseudocode}
\usepackage{algorithm} 
\floatname{algorithm}{Algoritmo}

\usepackage{enumitem}

\usepackage{lastpage}
\usepackage{fancyhdr}
\fancyhf{}
\pagestyle{fancy}
\fancyhf{}
\fancyhead[L]{MIT IAP Winter 2022} 
\fancyhead[C]{Matrix Calculus} 
\fancyhead[R]{Profs. Edelman and Johnson} 
\fancyfoot[R]{}
% https://tex.stackexchange.com/questions/227/how-can-i-add-page-of-on-my-document
\fancyfoot[C]{\thepage\ of \pageref*{LastPage}}
%\fancyfoot[C]{ of }
\fancyfoot[L]{} 
\renewcommand{\headrulewidth}{2pt} 
\renewcommand{\footrulewidth}{2pt}

\usepackage[newfloat=true]{minted} 

\author{} 
\title{Homework 2}

\date{\today}


\DeclareCaptionFormat{mitedFormat}{%
    \textbf{#1#2}#3}
\DeclareCaptionStyle{minetdStyle}{skip=0cm,width=.85\textwidth,justification=centering,
  font=footnotesize,singlelinecheck=off,format=mitedFormat,labelsep=space}
\newenvironment{mintedCode}{\captionsetup{type=listing,style=minetdStyle}}{}

\usepackage{dirtytalk} 

\SetupFloatingEnvironment{listing}{}

\usepackage[parfill]{parskip}

\usepackage{csquotes} 

\begin{document}
\maketitle
\thispagestyle{fancy} 

{\bf Please submit your HW on Canvas; include a PDF printout of any code and results, clearly labeled, e.g. from a Jupyter notebook.  It is due Wednesday January 26th by 11:59pm EST.  }

\subsection*{Problem 1}

Let $f(x): \mathbb{R}^n \to \mathbb{R}^n$ be an invertible map from inputs $x \in \mathbb{R}^n$ ($n$-component column vectors) to outputs in $\mathbb{R}^n$.   If $g(x)$ is the inverse function, so that $g(f(x)) = x = f(g(x))$, use the chain rule to show how the Jacobians $f'(x)$ and $g'(x)$ are related.

\subsection*{Problem 2}

Consider $f(A) = \sqrt{A}$ where $A$ is an $n\times n$ matrix.   (Recall that the matrix square root is a matrix such that $(\sqrt{A})^2 = A$.   In 18.06, we might compute this from a diagonalization of $A$ by taking the square roots of all the eigenvalues.)

\begin{enumerate}[label=\alph*)]
    \item  Give a formula for the Jacobian of $f$, acting on $\vecm(A)$, in terms of Kronecker products and matrix powers only (no eigenvalues required!).  (Hint: see problem 1.)
    
    \item Check your formula numerically against a finite-difference approximation.  (To ensure that no complex numbers show up in the matrix square root, pick a random positive-definite $A = B^T B$ for some random square $B$.)
\end{enumerate}

\subsection*{Problem 3}

Suppose that $A(p) = A\_0 + \diagm(p)$ constructs an $n\times n$ matrix $A$ from $p \in \mathbb{R}^n$, where $\diagm(p) = \begin{pmatrix} p_1 & & \\ & p_2 & \\ & & \ddots \end{pmatrix}$ is the diagonal matrix with the entries of $p$ along its diagonal, and $A_0$ is some constant matrix.   We now perform the following sequence of steps:

\begin{enumerate}
    \item solve $A(p) x = a$ for $x \in \mathbb{R}^n$, where $a$ is some vector.
    \item form $B(x) = B_0 + \diagm(x \dotstar x)$ where $\dotstar$ is the elementwise product and $B_0$ is some $n\times n$ matrix.
    \item solve $By = b$ for $y \in \mathbb{R}^n$, where $b$ is some vector.
    \item compute $f = y^T F y$ where $F = F^T$ is some symmetric $n \times n$ matrix.
\end{enumerate}

Now, suppose that we want to optimize $f(p)$ as a function of the parameters $p$ that went into the first step.  We need to compute the gradient $\nabla f$ for any kind of large-scale problem.   If $n$ is huge, though, we must be careful to do this efficiently, using ``reverse-mode'' or ``adjoint'' calculations in which we apply the chain rule from left to right.

\begin{enumerate}[label=\alph*)]

\item Explain the sequence of steps to compute $\nabla f$.   Your answer should show that $\nabla f$ can \emph{also} be computed by solving only \emph{two} $n \times n$ linear systems, similar to $f$ itself.

\item Check your answer numerically against finite differences (for some randomly chosen $A_0, B_0, a, b, F$).

\item Check your answer against the result of a reverse-mode AD software (e.g. Zygote in Julia).

\end{enumerate}

\subsection*{Problem 4}

\begin{enumerate}[label=\alph*)]

\item Write down some $4 \times 4$ matrix that is not the
Kronecker product of two $2 \times 2$ matrices.  Convince
us this is true.

\item Prove that if $A$ ($m \times m$) and $B$ ($n \times n$) are orthogonal (i.e.,
$A^TA=I_m$ and $B^TB=I_n$) then $A \otimes B$ ($mn \times mn$) is orthogonal.

\item If $f(A) = e^A= \sum_{k=0}^\infty A^k/k!$, write down
a power series involving Kronecker products for the
Jacobian $f'(A)$.  Check your answer with a numerical example, e.g.~against finite differences.

\end{enumerate}


\end{document}