-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy path20141110-HPGMG.tex
202 lines (171 loc) · 6.19 KB
/
20141110-HPGMG.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
% \documentclass[handout]{beamer}
\documentclass{beamer}
\mode<presentation>
{
\usetheme{ANLBlue}
% \usefonttheme[onlymath]{serif}
% \usetheme{Singapore}
% \usetheme{Warsaw}
% \usetheme{Malmoe}
% \useinnertheme{circles}
% \useoutertheme{infolines}
% \useinnertheme{rounded}
\setbeamercovered{transparent=20}
}
\usepackage[english]{babel}
\usepackage[latin1]{inputenc}
\usepackage{alltt,listings,multirow,ulem,siunitx}
\usepackage[absolute,overlay]{textpos}
\TPGrid{1}{1}
\usepackage{pdfpages}
\usepackage{ulem}
\usepackage{multimedia}
\usepackage{multicol}
\newcommand\hmmax{0}
\newcommand\bmmax{0}
\usepackage{bm}
\usepackage{comment}
\usepackage{subcaption}
% font definitions, try \usepackage{ae} instead of the following
% three lines if you don't like this look
\usepackage{mathptmx}
\usepackage[scaled=.90]{helvet}
% \usepackage{courier}
\usepackage[T1]{fontenc}
\usepackage{tikz}
\usetikzlibrary{decorations.pathreplacing}
\usetikzlibrary{shadows,arrows,shapes.misc,shapes.arrows,shapes.multipart,arrows,decorations.pathmorphing,backgrounds,positioning,fit,petri,calc,shadows,chains,matrix}
\newcommand\vvec{\bm v}
\newcommand\bvec{\bm b}
\newcommand\bxk{\bvec_0 \times \kappa_0 \cdot \nabla}
\newcommand\delp{\nabla_\perp}
% \usepackage{pgfpages}
% \pgfpagesuselayout{4 on 1}[a4paper,landscape,border shrink=5mm]
\usepackage{JedMacros}
\newcommand{\timeR}{t_{\mathrm{R}}}
\newcommand{\timeW}{t_{\mathrm{W}}}
\newcommand{\mglevel}{\ensuremath{\ell}}
\newcommand{\mglevelcp}{\ensuremath{\mglevel_{\mathrm{cp}}}}
\newcommand{\mglevelcoarse}{\ensuremath{\mglevel_{\mathrm{coarse}}}}
\newcommand{\mglevelfine}{\ensuremath{\mglevel_{\mathrm{fine}}}}
%solution and residual
\newcommand{\vx}{\ensuremath{x}}
\newcommand{\vc}{\ensuremath{\hat{x}}}
\newcommand{\vr}{\ensuremath{r}}
\newcommand{\vb}{\ensuremath{b}}
%operators
\newcommand{\vA}{\ensuremath{A}}
\newcommand{\vP}{\ensuremath{I_H^h}}
\newcommand{\vS}{\ensuremath{S}}
\newcommand{\vR}{\ensuremath{I_h^H}}
\newcommand{\vI}{\ensuremath{\hat I_h^H}}
\newcommand{\vV}{\ensuremath{\mathbf{V}}}
\newcommand{\vF}{\ensuremath{F}}
\newcommand{\vtau}{\ensuremath{\mathbf{\tau}}}
\title{HPGMG: High-Performance Geometric Multigrid}
\author{{\bf Jed Brown} \texttt{[email protected]} (ANL and CU Boulder)
}
% - Use the \inst command only if there are several affiliations.
% - Keep it simple, no one is interested in your street address.
% \institute
% {
% Mathematics and Computer Science Division \\ Argonne National Laboratory
% }
\date{$[HPC]^3$, KAUST, 2014-11-10 \\[1em]
{\small This talk: \url{http://59A2.org/files/20141110-HPGMG.pdf}}}
% This is only inserted into the PDF information catalog. Can be left
% out.
\subject{Talks}
% If you have a file called "university-logo-filename.xxx", where xxx
% is a graphic format that can be processed by latex or pdflatex,
% resp., then you can add a logo as follows:
% \pgfdeclareimage[height=0.5cm]{university-logo}{university-logo-filename}
% \logo{\pgfuseimage{university-logo}}
% Delete this, if you do not want the table of contents to pop up at
% the beginning of each subsection:
% \AtBeginSubsection[]
% {
% \begin{frame}<beamer>
% \frametitle{Outline}
% \tableofcontents[currentsection,currentsubsection]
% \end{frame}
% }
% \AtBeginSection[]
% {
% \begin{frame}<beamer>
% \frametitle{Outline}
% \tableofcontents[currentsection]
% \end{frame}
% }
% If you wish to uncover everything in a step-wise fashion, uncomment
% the following command:
% \beamerdefaultoverlayspecification{<+->}
\begin{document}
\lstset{language=C}
\normalem
\begin{frame}{HPGMG: a new benchmarking proposal}
\begin{itemize}
\item \url{https://hpgmg.org}, [email protected] mailing list
\item SC14 BoF: Wednesday, Nov 19, 12:15pm to 1:15pm
\item Mark Adams, Sam Williams (finite-volume), myself (finite-element), John Shalf, Brian Van Straalen, Erich Strohmeier, Rich Vuduc
\item Implementations
\begin{description}
\item[Finite Volume] memory bandwidth intensive, simple data dependencies
\item[Finite Element] compute- and cache-intensive, vectorizes, overlapping writes
\end{description}
\item Full multigrid, well-defined, scale-free problem
\item Goal: necessary and sufficient
\begin{itemize}
\item Every feature stressed by benchmark should be necessary for an important application
\item Good performance on the benchmark should be sufficient for good performance on most applications
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Kiviat diagrams}
\begin{center}
\includegraphics[width=\textwidth]{figures/hpgmg-kiviat-20140606.png}
\end{center}
\begin{itemize}
\item c/o Ian Karlin and Bert Still (LLNL)
\end{itemize}
\end{frame}
\begin{frame}{HPGMG distinguishes networks at 1M dofs/core}
\begin{center}
\includegraphics[width=0.6\textwidth]{figures/hpgmg-fv-20140515-dof.png}
\end{center}
\begin{itemize}
\item Peregrine and Edison have identical node architecture
\item Peregrine has 5:1 tapered IB, Edison has Aries dragonfly topology
\end{itemize}
\end{frame}
\begin{frame}
\vspace{-1em}
\begin{center}
\includegraphics[width=0.9\textwidth]{figures/MG/titan-edison-supermuc-range.png}
\end{center}
\vspace{-1em}
\begin{itemize}
\item Turn-around time often not negotiable
\begin{itemize}
\item policy, manufacturing, forecasting
\end{itemize}
\item Users like predictable performance across a range of problem sizes
\item Transient problems do not weak scale even if each step does
\end{itemize}
\end{frame}
\begin{frame}{Where we are now: $QR$ factorization with MKL on MIC}
\begin{figure}
\centering
\includegraphics[width=\textwidth]{figures/hardware/MKL-dgeqrf-MIC-201411.png}
\end{figure}
\begin{itemize}
\item Figure compares two CPU sockets (230W TDP) to one MIC (300W TDP plus host)
\item Performance/Watt only breaks even at largest problem sizes
\item Haswell-EP doubles performance within same power envelope
\item $10^4 \times 10^4$ matrix takes 667 GFlops: about 2 seconds
\item This is an $O(n^{3/2})$ operation on $n$ data
\item MIC cannot strong scale, no more energy efficient/cost effective
\item ``hard to program'' versus ``architecture ill-suited for problem''?
\end{itemize}
\end{frame}
\end{document}