forked from mitmath/matrixcalc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hw2.tex
157 lines (107 loc) · 5.29 KB
/
hw2.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
\documentclass[10pt,oneside]{article}
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{amsfonts}
\usepackage{amssymb}
\newcommand{\tr}{\operatorname{trace}}
\newcommand{\vecm}{\operatorname{vec}}
\newcommand{\diagm}{\operatorname{diagm}}
\newcommand{\dotstar}{\mathbin{.*}}
\usepackage{polyglossia}
\usepackage[
backend=biber
]{biblatex}
\addbibresource{biblio.bib}
\usepackage[
letterpaper,
left=1cm,
right=1cm,
top=1.5cm,
bottom=1.5cm
]{geometry}
\usepackage[
final,
unicode,
colorlinks=true,
citecolor=blue,
linkcolor=blue,
plainpages=false,
urlcolor=blue,
pdfpagelabels=true,
pdfsubject={Cálculo},
pdfauthor={José Doroteo Arango Arámbula},
pdftitle={Tarea 1},
pdfkeywords={UNAM, FES Acatlán, 2021-I}
]{hyperref}
\usepackage{booktabs}
\usepackage{algpseudocode}
\usepackage{algorithm}
\floatname{algorithm}{Algoritmo}
\usepackage{enumitem}
\usepackage{lastpage}
\usepackage{fancyhdr}
\fancyhf{}
\pagestyle{fancy}
\fancyhf{}
\fancyhead[L]{MIT IAP Winter 2022}
\fancyhead[C]{Matrix Calculus}
\fancyhead[R]{Profs. Edelman and Johnson}
\fancyfoot[R]{}
% https://tex.stackexchange.com/questions/227/how-can-i-add-page-of-on-my-document
\fancyfoot[C]{\thepage\ of \pageref*{LastPage}}
%\fancyfoot[C]{ of }
\fancyfoot[L]{}
\renewcommand{\headrulewidth}{2pt}
\renewcommand{\footrulewidth}{2pt}
\usepackage[newfloat=true]{minted}
\author{}
\title{Homework 2}
\date{\today}
\DeclareCaptionFormat{mitedFormat}{%
\textbf{#1#2}#3}
\DeclareCaptionStyle{minetdStyle}{skip=0cm,width=.85\textwidth,justification=centering,
font=footnotesize,singlelinecheck=off,format=mitedFormat,labelsep=space}
\newenvironment{mintedCode}{\captionsetup{type=listing,style=minetdStyle}}{}
\usepackage{dirtytalk}
\SetupFloatingEnvironment{listing}{}
\usepackage[parfill]{parskip}
\usepackage{csquotes}
\begin{document}
\maketitle
\thispagestyle{fancy}
{\bf Please submit your HW on Canvas; include a PDF printout of any code and results, clearly labeled, e.g. from a Jupyter notebook. It is due Wednesday January 26th by 11:59pm EST. }
\subsection*{Problem 1}
Let $f(x): \mathbb{R}^n \to \mathbb{R}^n$ be an invertible map from inputs $x \in \mathbb{R}^n$ ($n$-component column vectors) to outputs in $\mathbb{R}^n$. If $g(x)$ is the inverse function, so that $g(f(x)) = x = f(g(x))$, use the chain rule to show how the Jacobians $f'(x)$ and $g'(x)$ are related.
\subsection*{Problem 2}
Consider $f(A) = \sqrt{A}$ where $A$ is an $n\times n$ matrix. (Recall that the matrix square root is a matrix such that $(\sqrt{A})^2 = A$. In 18.06, we might compute this from a diagonalization of $A$ by taking the square roots of all the eigenvalues.)
\begin{enumerate}[label=\alph*)]
\item Give a formula for the Jacobian of $f$, acting on $\vecm(A)$, in terms of Kronecker products and matrix powers only (no eigenvalues required!). (Hint: see problem 1.)
\item Check your formula numerically against a finite-difference approximation. (To ensure that no complex numbers show up in the matrix square root, pick a random positive-definite $A = B^T B$ for some random square $B$.)
\end{enumerate}
\subsection*{Problem 3}
Suppose that $A(p) = A\_0 + \diagm(p)$ constructs an $n\times n$ matrix $A$ from $p \in \mathbb{R}^n$, where $\diagm(p) = \begin{pmatrix} p_1 & & \\ & p_2 & \\ & & \ddots \end{pmatrix}$ is the diagonal matrix with the entries of $p$ along its diagonal, and $A_0$ is some constant matrix. We now perform the following sequence of steps:
\begin{enumerate}
\item solve $A(p) x = a$ for $x \in \mathbb{R}^n$, where $a$ is some vector.
\item form $B(x) = B_0 + \diagm(x \dotstar x)$ where $\dotstar$ is the elementwise product and $B_0$ is some $n\times n$ matrix.
\item solve $By = b$ for $y \in \mathbb{R}^n$, where $b$ is some vector.
\item compute $f = y^T F y$ where $F = F^T$ is some symmetric $n \times n$ matrix.
\end{enumerate}
Now, suppose that we want to optimize $f(p)$ as a function of the parameters $p$ that went into the first step. We need to compute the gradient $\nabla f$ for any kind of large-scale problem. If $n$ is huge, though, we must be careful to do this efficiently, using ``reverse-mode'' or ``adjoint'' calculations in which we apply the chain rule from left to right.
\begin{enumerate}[label=\alph*)]
\item Explain the sequence of steps to compute $\nabla f$. Your answer should show that $\nabla f$ can \emph{also} be computed by solving only \emph{two} $n \times n$ linear systems, similar to $f$ itself.
\item Check your answer numerically against finite differences (for some randomly chosen $A_0, B_0, a, b, F$).
\item Check your answer against the result of a reverse-mode AD software (e.g. Zygote in Julia).
\end{enumerate}
\subsection*{Problem 4}
\begin{enumerate}[label=\alph*)]
\item Write down some $4 \times 4$ matrix that is not the
Kronecker product of two $2 \times 2$ matrices. Convince
us this is true.
\item Prove that if $A$ ($m \times m$) and $B$ ($n \times n$) are orthogonal (i.e.,
$A^TA=I_m$ and $B^TB=I_n$) then $A \otimes B$ ($mn \times mn$) is orthogonal.
\item If $f(A) = e^A= \sum_{k=0}^\infty A^k/k!$, write down
a power series involving Kronecker products for the
Jacobian $f'(A)$. Check your answer with a numerical example, e.g.~against finite differences.
\end{enumerate}
\end{document}