notebook.tex


% Default to the notebook output style

    
% Inherit from the specified cell style.


\documentclass[11pt]{article}

    
    \usepackage[T1]{fontenc}
    % Nicer default font (+ math font) than Computer Modern for most use cases
    \usepackage{mathpazo}

    % Basic figure setup, for now with no caption control since it's done
    % automatically by Pandoc (which extracts ![](path) syntax from Markdown).
    \usepackage{graphicx}
    % We will generate all images so they have a width \maxwidth. This means
    % that they will get their normal width if they fit onto the page, but
    % are scaled down if they would overflow the margins.
    \makeatletter
    \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth
    \else\Gin@nat@width\fi}
    \makeatother
    \let\Oldincludegraphics\includegraphics
    % Set max figure width to be 80% of text width, for now hardcoded.
    \renewcommand{\includegraphics}[1]{\Oldincludegraphics[width=.8\maxwidth]{#1}}
    % Ensure that by default, figures have no caption (until we provide a
    % proper Figure object with a Caption API and a way to capture that
    % in the conversion process - todo).
    \usepackage{caption}
    \DeclareCaptionLabelFormat{nolabel}{}
    \captionsetup{labelformat=nolabel}

    \usepackage{adjustbox} % Used to constrain images to a maximum size 
    \usepackage{xcolor} % Allow colors to be defined
    \usepackage{enumerate} % Needed for markdown enumerations to work
    \usepackage{geometry} % Used to adjust the document margins
    \usepackage{amsmath} % Equations
    \usepackage{amssymb} % Equations
    \usepackage{textcomp} % defines textquotesingle
    % Hack from http://tex.stackexchange.com/a/47451/13684:
    \AtBeginDocument{%
        \def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code
    }
    \usepackage{upquote} % Upright quotes for verbatim code
    \usepackage{eurosym} % defines \euro
    \usepackage[mathletters]{ucs} % Extended unicode (utf-8) support
    \usepackage[utf8x]{inputenc} % Allow utf-8 characters in the tex document
    \usepackage{fancyvrb} % verbatim replacement that allows latex
    \usepackage{grffile} % extends the file name processing of package graphics 
                         % to support a larger range 
    % The hyperref package gives us a pdf with properly built
    % internal navigation ('pdf bookmarks' for the table of contents,
    % internal cross-reference links, web links for URLs, etc.)
    \usepackage{hyperref}
    \usepackage{longtable} % longtable support required by pandoc >1.10
    \usepackage{booktabs}  % table support for pandoc > 1.12.2
    \usepackage[inline]{enumitem} % IRkernel/repr support (it uses the enumerate* environment)
    \usepackage[normalem]{ulem} % ulem is needed to support strikethroughs (\sout)
                                % normalem makes italics be italics, not underlines
    

    % Colors for the hyperref package
    \definecolor{urlcolor}{rgb}{0,.145,.698}
    \definecolor{linkcolor}{rgb}{.71,0.21,0.01}
    \definecolor{citecolor}{rgb}{.12,.54,.11}

    % ANSI colors
    \definecolor{ansi-black}{HTML}{3E424D}
    \definecolor{ansi-black-intense}{HTML}{282C36}
    \definecolor{ansi-red}{HTML}{E75C58}
    \definecolor{ansi-red-intense}{HTML}{B22B31}
    \definecolor{ansi-green}{HTML}{00A250}
    \definecolor{ansi-green-intense}{HTML}{007427}
    \definecolor{ansi-yellow}{HTML}{DDB62B}
    \definecolor{ansi-yellow-intense}{HTML}{B27D12}
    \definecolor{ansi-blue}{HTML}{208FFB}
    \definecolor{ansi-blue-intense}{HTML}{0065CA}
    \definecolor{ansi-magenta}{HTML}{D160C4}
    \definecolor{ansi-magenta-intense}{HTML}{A03196}
    \definecolor{ansi-cyan}{HTML}{60C6C8}
    \definecolor{ansi-cyan-intense}{HTML}{258F8F}
    \definecolor{ansi-white}{HTML}{C5C1B4}
    \definecolor{ansi-white-intense}{HTML}{A1A6B2}

    % commands and environments needed by pandoc snippets
    % extracted from the output of `pandoc -s`
    \providecommand{\tightlist}{%
      \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
    \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
    % Add ',fontsize=\small' for more characters per line
    \newenvironment{Shaded}{}{}
    \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
    \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}}
    \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
    \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
    \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
    \newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}}
    \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}}
    \newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
    \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}}
    \newcommand{\RegionMarkerTok}[1]{{#1}}
    \newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
    \newcommand{\NormalTok}[1]{{#1}}
    
    % Additional commands for more recent versions of Pandoc
    \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}}
    \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}}
    \newcommand{\ImportTok}[1]{{#1}}
    \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{{#1}}}}
    \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}}
    \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
    \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}}
    \newcommand{\BuiltInTok}[1]{{#1}}
    \newcommand{\ExtensionTok}[1]{{#1}}
    \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}}
    \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}}
    \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    
    
    % Define a nice break command that doesn't care if a line doesn't already
    % exist.
    \def\br{\hspace*{\fill} \\* }
    % Math Jax compatability definitions
    \def\gt{>}
    \def\lt{<}
    % Document parameters
    \title{kNN}
    
    
    % Pygments definitions
    
\makeatletter
\def\PY@reset{\let\PY@it=\relax \let\PY@bf=\relax%
    \let\PY@ul=\relax \let\PY@tc=\relax%
    \let\PY@bc=\relax \let\PY@ff=\relax}
\def\PY@tok#1{\csname PY@tok@#1\endcsname}
\def\PY@toks#1+{\ifx\relax#1\empty\else%
    \PY@tok{#1}\expandafter\PY@toks\fi}
\def\PY@do#1{\PY@bc{\PY@tc{\PY@ul{%
    \PY@it{\PY@bf{\PY@ff{#1}}}}}}}
\def\PY#1#2{\PY@reset\PY@toks#1+\relax+\PY@do{#2}}

\expandafter\def\csname PY@tok@w\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}}
\expandafter\def\csname PY@tok@c\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.74,0.48,0.00}{##1}}}
\expandafter\def\csname PY@tok@k\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}}
\expandafter\def\csname PY@tok@o\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@ow\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PY@tok@nb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@nf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@nc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@nn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@ne\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.82,0.25,0.23}{##1}}}
\expandafter\def\csname PY@tok@nv\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@no\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}}
\expandafter\def\csname PY@tok@nl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.63,0.00}{##1}}}
\expandafter\def\csname PY@tok@ni\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.60,0.60,0.60}{##1}}}
\expandafter\def\csname PY@tok@na\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.49,0.56,0.16}{##1}}}
\expandafter\def\csname PY@tok@nt\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@nd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PY@tok@s\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sd\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@si\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PY@tok@se\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.13}{##1}}}
\expandafter\def\csname PY@tok@sr\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PY@tok@ss\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@sx\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@m\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@gh\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PY@tok@gu\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}}
\expandafter\def\csname PY@tok@gd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}}
\expandafter\def\csname PY@tok@gi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}}
\expandafter\def\csname PY@tok@gr\endcsname{\def\PY@tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}}
\expandafter\def\csname PY@tok@ge\endcsname{\let\PY@it=\textit}
\expandafter\def\csname PY@tok@gs\endcsname{\let\PY@bf=\textbf}
\expandafter\def\csname PY@tok@gp\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PY@tok@go\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.53,0.53}{##1}}}
\expandafter\def\csname PY@tok@gt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}}
\expandafter\def\csname PY@tok@err\endcsname{\def\PY@bc##1{\setlength{\fboxsep}{0pt}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}
\expandafter\def\csname PY@tok@kc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kd\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kr\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@bp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@fm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@vc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@vg\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@vi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@vm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@sa\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@dl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@s2\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@s1\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@mb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@il\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mo\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@ch\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cm\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cpf\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@c1\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cs\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}

\def\PYZbs{\char`\\}
\def\PYZus{\char`\_}
\def\PYZob{\char`\{}
\def\PYZcb{\char`\}}
\def\PYZca{\char`\^}
\def\PYZam{\char`\&}
\def\PYZlt{\char`\<}
\def\PYZgt{\char`\>}
\def\PYZsh{\char`\#}
\def\PYZpc{\char`\%}
\def\PYZdl{\char`\$}
\def\PYZhy{\char`\-}
\def\PYZsq{\char`\'}
\def\PYZdq{\char`\"}
\def\PYZti{\char`\~}
% for compatibility with earlier versions
\def\PYZat{@}
\def\PYZlb{[}
\def\PYZrb{]}
\makeatother


    % Exact colors from NB
    \definecolor{incolor}{rgb}{0.0, 0.0, 0.5}
    \definecolor{outcolor}{rgb}{0.545, 0.0, 0.0}


    % Prevent overflowing lines due to hard-to-break entities
    \sloppy 
    % Setup hyperref package
    \hypersetup{
      breaklinks=true,  % so long urls are correctly broken across lines
      colorlinks=true,
      urlcolor=urlcolor,
      linkcolor=linkcolor,
      citecolor=citecolor,
      }
    % Slightly bigger margins than the latex defaults
    
    \geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
    
    
    \begin{document}
    
    
    \maketitle
    
    
    \hypertarget{cs145-howework-3-part-1-knn}{%
\section{CS145 Howework 3, Part 1:
kNN}\label{cs145-howework-3-part-1-knn}}

{ \textbf{Important Note:} } HW3 is due on \textbf{11:59 PM PT, Nov 9
(Monday, Week 6)}. Please submit through GradeScope.

Note that, Howework \#3 has two jupyter notebooks to complete (Part 1:
kNN and Part 2: Neural Network).

\begin{center}\rule{0.5\linewidth}{0.5pt}\end{center}

\hypertarget{print-out-your-name-and-uid}{%
\subsection{Print Out Your Name and
UID}\label{print-out-your-name-and-uid}}

{ \textbf{Name: Devyan Biswas, UID: 804988161} }

\begin{center}\rule{0.5\linewidth}{0.5pt}\end{center}

\hypertarget{before-you-start}{%
\subsection{Before You Start}\label{before-you-start}}

You need to first create HW2 conda environment by the given
\texttt{cs145hw3.yml} file, which provides the name and necessary
packages for this tasks. If you have \texttt{conda} properly installed,
you may create, activate or deactivate by the following commands:

\begin{verbatim}
conda env create -f cs145hw3.yml
conda activate hw3
conda deactivate
\end{verbatim}

OR

\begin{verbatim}
conda env create --name NAMEOFYOURCHOICE -f cs145hw3.yml 
conda activate NAMEOFYOURCHOICE
conda deactivate
\end{verbatim}

To view the list of your environments, use the following command:

\begin{verbatim}
conda env list
\end{verbatim}

More useful information about managing environments can be found
\href{https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html}{here}.

You may also quickly review the usage of basic Python and Numpy package,
if needed in coding for matrix operations.

In this notebook, you must not delete any code cells in this notebook.
If you change any code outside the blocks (such as hyperparameters) that
you are allowed to edit (between \texttt{STRART/END\ YOUR\ CODE\ HERE}),
you need to highlight these changes. You may add some additional cells
to help explain your results and observations.

    \hypertarget{download-and-prepare-the-dataset}{%
\subsection{Download and prepare the
dataset}\label{download-and-prepare-the-dataset}}

Download the CIFAR-10 dataset (file size: \textasciitilde163M). Run the
following from the HW3 directory:

\begin{Shaded}
\begin{Highlighting}[]
\BuiltInTok{cd}\NormalTok{ hw3/data/datasets }
\ExtensionTok{./get\_datasets.sh}
\end{Highlighting}
\end{Shaded}

Make sure you put the dataset downloaded under hw3/data/datasets folder.
After downloading the dataset, you can start your notebook from the HW3
directory. Note that the dataset is used in both jupyter notebooks (kNN
and Neural Networks). You only need to download the dataset once for
HW3.

    \hypertarget{import-the-appropriate-libraries}{%
\subsection{Import the appropriate
libraries}\label{import-the-appropriate-libraries}}

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}56}]:} \PY{k+kn}{import} \PY{n+nn}{numpy} \PY{k}{as} \PY{n+nn}{np} \PY{c+c1}{\PYZsh{} for doing most of our calculations}
         \PY{k+kn}{import} \PY{n+nn}{matplotlib}\PY{n+nn}{.}\PY{n+nn}{pyplot} \PY{k}{as} \PY{n+nn}{plt}\PY{c+c1}{\PYZsh{} for plotting}
         \PY{k+kn}{from} \PY{n+nn}{data}\PY{n+nn}{.}\PY{n+nn}{data\PYZus{}utils} \PY{k}{import} \PY{n}{load\PYZus{}CIFAR10} \PY{c+c1}{\PYZsh{} function to load the CIFAR\PYZhy{}10 dataset.}
         
         \PY{c+c1}{\PYZsh{} Load matplotlib images inline}
         \PY{o}{\PYZpc{}}\PY{k}{matplotlib} inline
         
         \PY{c+c1}{\PYZsh{} These are important for reloading any code you write in external .py files.}
         \PY{c+c1}{\PYZsh{} see http://stackoverflow.com/questions/1907993/autoreload\PYZhy{}of\PYZhy{}modules\PYZhy{}in\PYZhy{}ipython}
         \PY{o}{\PYZpc{}}\PY{k}{load\PYZus{}ext} autoreload
         \PY{o}{\PYZpc{}}\PY{k}{autoreload} 2
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
The autoreload extension is already loaded. To reload it, use:
  \%reload\_ext autoreload

    \end{Verbatim}

    Now, to verify that the dataset has been successfully set up, the
following code will print out the shape of train/test data and labels.
The output shapes for train/test data are (50000, 32, 32, 3) and (10000,
32, 32, 3), while the labels are (50000,) and (10000,) respectively.

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}57}]:} \PY{c+c1}{\PYZsh{} Set the path to the CIFAR\PYZhy{}10 data}
         \PY{n}{cifar10\PYZus{}dir} \PY{o}{=} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{./data/datasets/cifar\PYZhy{}10\PYZhy{}batches\PYZhy{}py}\PY{l+s+s1}{\PYZsq{}}
         \PY{n}{X\PYZus{}train}\PY{p}{,} \PY{n}{y\PYZus{}train}\PY{p}{,} \PY{n}{X\PYZus{}test}\PY{p}{,} \PY{n}{y\PYZus{}test} \PY{o}{=} \PY{n}{load\PYZus{}CIFAR10}\PY{p}{(}\PY{n}{cifar10\PYZus{}dir}\PY{p}{)}
         
         \PY{c+c1}{\PYZsh{} As a sanity check, we print out the size of the training and test data.}
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Training data shape: }\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{shape}\PY{p}{)}
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Training labels shape: }\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y\PYZus{}train}\PY{o}{.}\PY{n}{shape}\PY{p}{)}
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Test data shape: }\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{X\PYZus{}test}\PY{o}{.}\PY{n}{shape}\PY{p}{)}
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Test labels shape: }\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y\PYZus{}test}\PY{o}{.}\PY{n}{shape}\PY{p}{)}
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
Training data shape:  (50000, 32, 32, 3)
Training labels shape:  (50000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)

    \end{Verbatim}

    Now we visualize some examples from the dataset by showing a few
examples of training images from each class.

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}58}]:} \PY{n}{classes} \PY{o}{=} \PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{plane}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{car}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{bird}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{cat}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{deer}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{dog}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{frog}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{horse}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{ship}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{truck}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}
         \PY{n}{num\PYZus{}classes} \PY{o}{=} \PY{n+nb}{len}\PY{p}{(}\PY{n}{classes}\PY{p}{)}
         \PY{n}{samples\PYZus{}per\PYZus{}class} \PY{o}{=} \PY{l+m+mi}{7}
         \PY{k}{for} \PY{n}{y}\PY{p}{,} \PY{n+nb+bp}{cls} \PY{o+ow}{in} \PY{n+nb}{enumerate}\PY{p}{(}\PY{n}{classes}\PY{p}{)}\PY{p}{:}
             \PY{n}{idxs} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{flatnonzero}\PY{p}{(}\PY{n}{y\PYZus{}train} \PY{o}{==} \PY{n}{y}\PY{p}{)}
             \PY{n}{idxs} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{choice}\PY{p}{(}\PY{n}{idxs}\PY{p}{,} \PY{n}{samples\PYZus{}per\PYZus{}class}\PY{p}{,} \PY{n}{replace}\PY{o}{=}\PY{k+kc}{False}\PY{p}{)}
             \PY{k}{for} \PY{n}{i}\PY{p}{,} \PY{n}{idx} \PY{o+ow}{in} \PY{n+nb}{enumerate}\PY{p}{(}\PY{n}{idxs}\PY{p}{)}\PY{p}{:}
                 \PY{n}{plt\PYZus{}idx} \PY{o}{=} \PY{n}{i} \PY{o}{*} \PY{n}{num\PYZus{}classes} \PY{o}{+} \PY{n}{y} \PY{o}{+} \PY{l+m+mi}{1}
                 \PY{n}{plt}\PY{o}{.}\PY{n}{subplot}\PY{p}{(}\PY{n}{samples\PYZus{}per\PYZus{}class}\PY{p}{,} \PY{n}{num\PYZus{}classes}\PY{p}{,} \PY{n}{plt\PYZus{}idx}\PY{p}{)}
                 \PY{n}{plt}\PY{o}{.}\PY{n}{imshow}\PY{p}{(}\PY{n}{X\PYZus{}train}\PY{p}{[}\PY{n}{idx}\PY{p}{]}\PY{o}{.}\PY{n}{astype}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{uint8}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}\PY{p}{)}
                 \PY{n}{plt}\PY{o}{.}\PY{n}{axis}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{off}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
                 \PY{k}{if} \PY{n}{i} \PY{o}{==} \PY{l+m+mi}{0}\PY{p}{:}
                     \PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{n+nb+bp}{cls}\PY{p}{)}
         \PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}


    \begin{center}
    \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_7_0.png}
    \end{center}
    { \hspace*{\fill} \\}
    
    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}59}]:} \PY{c+c1}{\PYZsh{} Subsample the data for more efficient code execution in this exercise}
         \PY{n}{num\PYZus{}training} \PY{o}{=} \PY{l+m+mi}{5000}
         \PY{n}{mask} \PY{o}{=} \PY{n+nb}{list}\PY{p}{(}\PY{n+nb}{range}\PY{p}{(}\PY{n}{num\PYZus{}training}\PY{p}{)}\PY{p}{)}
         \PY{n}{X\PYZus{}train} \PY{o}{=} \PY{n}{X\PYZus{}train}\PY{p}{[}\PY{n}{mask}\PY{p}{]}
         \PY{n}{y\PYZus{}train} \PY{o}{=} \PY{n}{y\PYZus{}train}\PY{p}{[}\PY{n}{mask}\PY{p}{]}
         
         \PY{n}{num\PYZus{}test} \PY{o}{=} \PY{l+m+mi}{500}
         \PY{n}{mask} \PY{o}{=} \PY{n+nb}{list}\PY{p}{(}\PY{n+nb}{range}\PY{p}{(}\PY{n}{num\PYZus{}test}\PY{p}{)}\PY{p}{)}
         \PY{n}{X\PYZus{}test} \PY{o}{=} \PY{n}{X\PYZus{}test}\PY{p}{[}\PY{n}{mask}\PY{p}{]}
         \PY{n}{y\PYZus{}test} \PY{o}{=} \PY{n}{y\PYZus{}test}\PY{p}{[}\PY{n}{mask}\PY{p}{]}
         
         \PY{c+c1}{\PYZsh{} Reshape the image data into rows}
         \PY{n}{X\PYZus{}train} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{reshape}\PY{p}{(}\PY{n}{X\PYZus{}train}\PY{p}{,} \PY{p}{(}\PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{,} \PY{o}{\PYZhy{}}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{)}
         \PY{n}{X\PYZus{}test} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{reshape}\PY{p}{(}\PY{n}{X\PYZus{}test}\PY{p}{,} \PY{p}{(}\PY{n}{X\PYZus{}test}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{,} \PY{o}{\PYZhy{}}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{)}
         \PY{n+nb}{print}\PY{p}{(}\PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{shape}\PY{p}{,} \PY{n}{X\PYZus{}test}\PY{o}{.}\PY{n}{shape}\PY{p}{)}
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
(5000, 3072) (500, 3072)

    \end{Verbatim}

    \hypertarget{implement-k-nearest-neighbors-algorithms}{%
\subsection{Implement K-nearest neighbors
algorithms}\label{implement-k-nearest-neighbors-algorithms}}

In the following cells, you will build a KNN classifier and choose
hyperparameters via k-fold cross-validation.

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}60}]:} \PY{c+c1}{\PYZsh{} Import the KNN class}
         \PY{k+kn}{from} \PY{n+nn}{hw3code} \PY{k}{import} \PY{n}{KNN}
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}61}]:} \PY{c+c1}{\PYZsh{} Declare an instance of the knn class.}
         \PY{n}{knn} \PY{o}{=} \PY{n}{KNN}\PY{p}{(}\PY{p}{)}
         
         \PY{c+c1}{\PYZsh{} Train the classifier.}
         \PY{c+c1}{\PYZsh{}   We have implemented the training of the KNN classifier.}
         \PY{c+c1}{\PYZsh{}   Look at the train function in the KNN class to see what this does.}
         \PY{n}{knn}\PY{o}{.}\PY{n}{train}\PY{p}{(}\PY{n}{X}\PY{o}{=}\PY{n}{X\PYZus{}train}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{n}{y\PYZus{}train}\PY{p}{)}
\end{Verbatim}


    \textbf{Questions}

\begin{enumerate}
\def\labelenumi{(\arabic{enumi})}
\item
  Describe what is going on in the function knn.train().
\item
  What are the pros and cons of this training step of KNN?
\end{enumerate}

    \textbf{Answers}

\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
  The \texttt{knn.train()} function is just assigning the passed in X
  and y values into the model's X\_train and y\_train. We are using a
  lazy learning approach to KNN here, meaning that we don't do any real
  training; instead, we store the training values and then do our
  prediction/classification only on receiving a new test tuple.
\item
  The biggest cons to this method are that the time in prediction goes
  up significantly, as well as the amount of memory needed. However, the
  ``training'' time is much less. Additionally, unlike the eager
  approach, the lazy learning approach doesn't stick to one hypothesis.
  Instead, it is able to use many local linear functions that can
  actually emulate a global function for the dataset (what the notes
  term as a global approximation to the target function).
\end{enumerate}

    \begin{center}\rule{0.5\linewidth}{0.5pt}\end{center}

    \hypertarget{knn-prediction}{%
\subsection{KNN prediction}\label{knn-prediction}}

In the following sections, you will implement the functions to calculate
the distances of test points to training points, and from this
information, predict the class of the KNN.

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}62}]:} \PY{c+c1}{\PYZsh{} Implement the function compute\PYZus{}distances() in the KNN class.}
         \PY{c+c1}{\PYZsh{} Do not worry about the input \PYZsq{}norm\PYZsq{} for now; use the default definition of the norm}
         \PY{c+c1}{\PYZsh{}   in the code, which is the 2\PYZhy{}norm.}
         \PY{c+c1}{\PYZsh{} You should only have to fill out the clearly marked sections.}
         
         \PY{k+kn}{import} \PY{n+nn}{time}
         \PY{n}{time\PYZus{}start} \PY{o}{=}\PY{n}{time}\PY{o}{.}\PY{n}{time}\PY{p}{(}\PY{p}{)}
         
         \PY{n}{dists\PYZus{}L2} \PY{o}{=} \PY{n}{knn}\PY{o}{.}\PY{n}{compute\PYZus{}distances}\PY{p}{(}\PY{n}{X}\PY{o}{=}\PY{n}{X\PYZus{}test}\PY{p}{)}
         
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Time to run code: }\PY{l+s+si}{\PYZob{}\PYZcb{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{format}\PY{p}{(}\PY{n}{time}\PY{o}{.}\PY{n}{time}\PY{p}{(}\PY{p}{)}\PY{o}{\PYZhy{}}\PY{n}{time\PYZus{}start}\PY{p}{)}\PY{p}{)}
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Frobenius norm of L2 distances: }\PY{l+s+si}{\PYZob{}\PYZcb{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{format}\PY{p}{(}\PY{n}{np}\PY{o}{.}\PY{n}{linalg}\PY{o}{.}\PY{n}{norm}\PY{p}{(}\PY{n}{dists\PYZus{}L2}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{fro}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}\PY{p}{)}\PY{p}{)}
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
Time to run code: 50.123364210128784
Frobenius norm of L2 distances: 7906696.077040902

    \end{Verbatim}

    \hypertarget{really-slow-code}{%
\subsubsection{Really slow code?}\label{really-slow-code}}

Note: This probably took a while. This is because we use two for loops.
We could increase the speed via vectorization, removing the for loops.
Normally it may takes 20-40 seconds.

If you implemented this correctly, evaluating np.linalg.norm(dists\_L2,
`fro') should return: \textasciitilde7906696

    \hypertarget{knn-vectorization}{%
\subsubsection{KNN vectorization}\label{knn-vectorization}}

The above code took far too long to run. If we wanted to optimize
hyperparameters, it would be time-expensive. Thus, we will speed up the
code by vectorizing it, removing the for loops.

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}63}]:} \PY{c+c1}{\PYZsh{} Implement the function compute\PYZus{}L2\PYZus{}distances\PYZus{}vectorized() in the KNN class.}
         \PY{c+c1}{\PYZsh{} In this function, you ought to achieve the same L2 distance but WITHOUT any for loops.}
         \PY{c+c1}{\PYZsh{} Note, this is SPECIFIC for the L2 norm.}
         
         \PY{n}{time\PYZus{}start} \PY{o}{=}\PY{n}{time}\PY{o}{.}\PY{n}{time}\PY{p}{(}\PY{p}{)}
         \PY{n}{dists\PYZus{}L2\PYZus{}vectorized} \PY{o}{=} \PY{n}{knn}\PY{o}{.}\PY{n}{compute\PYZus{}L2\PYZus{}distances\PYZus{}vectorized}\PY{p}{(}\PY{n}{X}\PY{o}{=}\PY{n}{X\PYZus{}test}\PY{p}{)}
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Time to run code: }\PY{l+s+si}{\PYZob{}\PYZcb{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{format}\PY{p}{(}\PY{n}{time}\PY{o}{.}\PY{n}{time}\PY{p}{(}\PY{p}{)}\PY{o}{\PYZhy{}}\PY{n}{time\PYZus{}start}\PY{p}{)}\PY{p}{)}
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Difference in L2 distances between your KNN implementations (should be 0): }\PY{l+s+si}{\PYZob{}\PYZcb{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{format}\PY{p}{(}\PY{n}{np}\PY{o}{.}\PY{n}{linalg}\PY{o}{.}\PY{n}{norm}\PY{p}{(}\PY{n}{dists\PYZus{}L2} \PY{o}{\PYZhy{}} \PY{n}{dists\PYZus{}L2\PYZus{}vectorized}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{fro}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}\PY{p}{)}\PY{p}{)}
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
Time to run code: 0.388016939163208
Difference in L2 distances between your KNN implementations (should be 0): 0.0

    \end{Verbatim}

    \hypertarget{speedup}{%
\subsubsection{Speedup}\label{speedup}}

Depending on your computer speed, you should see a 20-100x speed up from
vectorization and no difference in L2 distances between two
implementations.

On our computer, the vectorized form took 0.20 seconds while the naive
implementation took 26.88 seconds.

    \hypertarget{implementing-the-prediction}{%
\subsection{Implementing the
prediction}\label{implementing-the-prediction}}

Now that we have functions to calculate the distances from a test point
to given training points, we now implement the function that will
predict the test point labels.

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}64}]:} \PY{c+c1}{\PYZsh{} Implement the function predict\PYZus{}labels in the KNN class.}
         \PY{c+c1}{\PYZsh{} Calculate the training error (num\PYZus{}incorrect / total\PYZus{}samples) }
         \PY{c+c1}{\PYZsh{}   from running knn.predict\PYZus{}labels with k=1}
         
         \PY{n}{error} \PY{o}{=} \PY{l+m+mi}{1}
         
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{} START YOUR CODE HERE}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{}   Calculate the error rate by calling predict\PYZus{}labels on the test }
         \PY{c+c1}{\PYZsh{}   data with k = 1.  Store the error rate in the variable error.}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{n}{pred\PYZus{}labels} \PY{o}{=} \PY{n}{knn}\PY{o}{.}\PY{n}{predict\PYZus{}labels}\PY{p}{(}\PY{n}{dists\PYZus{}L2\PYZus{}vectorized}\PY{p}{,} \PY{n}{k} \PY{o}{=} \PY{l+m+mi}{1}\PY{p}{)}
         \PY{c+c1}{\PYZsh{} print(pred\PYZus{}labels)}
         \PY{n}{num\PYZus{}samples} \PY{o}{=} \PY{n}{pred\PYZus{}labels}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}
         \PY{n}{count} \PY{o}{=} \PY{l+m+mi}{0}
         \PY{k}{for} \PY{n}{i} \PY{o+ow}{in} \PY{n+nb}{range}\PY{p}{(}\PY{n}{num\PYZus{}samples}\PY{p}{)}\PY{p}{:}
             \PY{n}{count} \PY{o}{+}\PY{o}{=} \PY{p}{(}\PY{n}{pred\PYZus{}labels}\PY{p}{[}\PY{n}{i}\PY{p}{]} \PY{o}{!=} \PY{n}{y\PYZus{}test}\PY{p}{[}\PY{n}{i}\PY{p}{]}\PY{p}{)}
         \PY{n}{error} \PY{o}{=} \PY{n}{count} \PY{o}{/} \PY{n}{num\PYZus{}samples}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{} END YOUR CODE HERE}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         
         \PY{n+nb}{print}\PY{p}{(}\PY{n}{error}\PY{p}{)}
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
0.726

    \end{Verbatim}

    If you implemented this correctly, the error should be: 0.726. This
means that the k-nearest neighbors classifier is right 27.4\% of the
time, which is not great.

    \hypertarget{questions}{%
\subsubsection{Questions:}\label{questions}}

What could you do to improve the accuracy of the k-nearest neighbor
classifier you just implemented? Write down your answer in less than 30
words.

    \hypertarget{answers}{%
\subsubsection{Answers:}\label{answers}}

One way to resolve this is to do some processing on the dataset to make
it easier to classify, like with feature/data scaling.

    \hypertarget{optimizing-knn-hyperparameters-k}{%
\subsection{\texorpdfstring{Optimizing KNN hyperparameters
\(k\)}{Optimizing KNN hyperparameters k}}\label{optimizing-knn-hyperparameters-k}}

In this section, we'll take the KNN classifier that you have constructed
and perform cross validation to choose a best value of \(k\).

If you are not familiar with cross validation, cross-validation is a
technique for evaluating ML models by training several ML models on
subsets of the available input data and evaluating them on the
complementary subset of the data. Use cross-validation to detect
overfitting, ie, failing to generalize a pattern. More specifically, in
k-fold cross-validation, you evenly split the input data into k subsets
of data (also known as folds). You train an ML model on all but one
(k-1) of the subsets, and then evaluate the model on the subset that was
not used for training. This process is repeated k times, with a
different subset reserved for evaluation (and excluded from training)
each time.

More details of cross validation can be found
\href{https://scikit-learn.org/stable/modules/cross_validation.html}{here}.
However, you are not allowed to use sklean in your implementation.

    \hypertarget{create-training-and-validation-folds}{%
\subsubsection{Create training and validation
folds}\label{create-training-and-validation-folds}}

First, we will create the training and validation folds for use in
k-fold cross validation.

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}65}]:} \PY{c+c1}{\PYZsh{} Create the dataset folds for cross\PYZhy{}valdiation.}
         \PY{n}{num\PYZus{}folds} \PY{o}{=} \PY{l+m+mi}{5}
         
         \PY{n}{X\PYZus{}train\PYZus{}folds} \PY{o}{=} \PY{p}{[}\PY{p}{]}
         \PY{n}{y\PYZus{}train\PYZus{}folds} \PY{o}{=}  \PY{p}{[}\PY{p}{]}
         
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{} START YOUR CODE HERE}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{}   Split the training data into num\PYZus{}folds (i.e., 5) folds.}
         \PY{c+c1}{\PYZsh{}   X\PYZus{}train\PYZus{}folds is a list, where X\PYZus{}train\PYZus{}folds[i] contains the }
         \PY{c+c1}{\PYZsh{}      data points in fold i.}
         \PY{c+c1}{\PYZsh{}   y\PYZus{}train\PYZus{}folds is also a list, where y\PYZus{}train\PYZus{}folds[i] contains}
         \PY{c+c1}{\PYZsh{}      the corresponding labels for the data in X\PYZus{}train\PYZus{}folds[i]}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{n}{num\PYZus{}training\PYZus{}exs} \PY{o}{=} \PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}
         \PY{n}{entries\PYZus{}per\PYZus{}fold} \PY{o}{=} \PY{n+nb}{int}\PY{p}{(}\PY{n}{num\PYZus{}training\PYZus{}exs} \PY{o}{/} \PY{n}{num\PYZus{}folds}\PY{p}{)}
         \PY{n}{rand\PYZus{}indices} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{permutation}\PY{p}{(}\PY{n}{num\PYZus{}training\PYZus{}exs}\PY{p}{)}
         \PY{n}{X\PYZus{}train\PYZus{}folds} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{split}\PY{p}{(}\PY{n}{X\PYZus{}train}\PY{p}{[}\PY{n}{rand\PYZus{}indices}\PY{p}{]}\PY{p}{,} \PY{n}{num\PYZus{}folds}\PY{p}{)}
         \PY{n}{y\PYZus{}train\PYZus{}folds} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{split}\PY{p}{(}\PY{n}{y\PYZus{}train}\PY{p}{[}\PY{n}{rand\PYZus{}indices}\PY{p}{]}\PY{p}{,} \PY{n}{num\PYZus{}folds}\PY{p}{)}
         
         \PY{n}{X\PYZus{}train\PYZus{}folds} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{asarray}\PY{p}{(}\PY{n}{X\PYZus{}train\PYZus{}folds}\PY{p}{)}
         \PY{n}{y\PYZus{}train\PYZus{}folds} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{asarray}\PY{p}{(}\PY{n}{y\PYZus{}train\PYZus{}folds}\PY{p}{)}
         
         \PY{n+nb}{print}\PY{p}{(}\PY{n}{X\PYZus{}train\PYZus{}folds}\PY{p}{)}
         \PY{n+nb}{print}\PY{p}{(}\PY{n}{y\PYZus{}train\PYZus{}folds}\PY{p}{)}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{} END YOUR CODE HERE}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
[[[122.  98.  62. {\ldots} 184. 117.  76.]
  [206. 235. 226. {\ldots} 135. 120. 101.]
  [121. 106. 102. {\ldots} 127. 107.  98.]
  {\ldots}
  [113. 123.  64. {\ldots} 179. 171. 143.]
  [ 53.  65.  53. {\ldots}  49.  50.  41.]
  [135. 163. 168. {\ldots}  86.  67.  57.]]

 [[209. 205. 197. {\ldots} 193. 161.  92.]
  [ 73. 102.  95. {\ldots}  40.  65.  38.]
  [186. 180. 184. {\ldots} 112. 117. 103.]
  {\ldots}
  [219. 205. 205. {\ldots}  18.  16.  30.]
  [ 41.  53.  40. {\ldots} 186. 190. 179.]
  [ 48. 112. 172. {\ldots}  50.  51.  48.]]

 [[ 67.  60.  44. {\ldots} 181. 153. 120.]
  [ 49.  41.  30. {\ldots} 204. 184. 178.]
  [150. 161. 174. {\ldots}  94. 108. 109.]
  {\ldots}
  [ 32.  48.  96. {\ldots}  27.  42.  83.]
  [149. 158. 189. {\ldots} 127. 132. 140.]
  [190. 195. 181. {\ldots} 155. 140. 122.]]

 [[ 16.  37.  79. {\ldots}  47.  96. 153.]
  [182. 200. 212. {\ldots}  74. 163. 135.]
  [125. 127. 126. {\ldots} 106. 132.  91.]
  {\ldots}
  [152. 161. 175. {\ldots}  96.  79.  79.]
  [ 35.  41.  55. {\ldots} 104. 120. 107.]
  [ 32.  43.  60. {\ldots}  18.  25.  41.]]

 [[180.  63.  80. {\ldots}  20.  19.  17.]
  [114. 110. 104. {\ldots}  51.  43.  32.]
  [250. 253. 249. {\ldots} 254. 253. 254.]
  {\ldots}
  [253. 253. 253. {\ldots} 172. 163. 102.]
  [ 55.  60.  68. {\ldots} 116. 122. 115.]
  [232. 215. 187. {\ldots} 215. 209. 197.]]]
[[2 9 2 {\ldots} 7 4 9]
 [2 4 7 {\ldots} 9 8 0]
 [4 6 8 {\ldots} 6 8 5]
 [8 8 2 {\ldots} 2 4 8]
 [9 3 8 {\ldots} 4 5 9]]

    \end{Verbatim}

    \hypertarget{optimizing-the-number-of-nearest-neighbors-hyperparameter.}{%
\subsubsection{Optimizing the number of nearest neighbors
hyperparameter.}\label{optimizing-the-number-of-nearest-neighbors-hyperparameter.}}

In this section, we select different numbers of nearest neighbors and
assess which one has the lowest k-fold cross validation error.

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}66}]:} \PY{n}{time\PYZus{}start} \PY{o}{=}\PY{n}{time}\PY{o}{.}\PY{n}{time}\PY{p}{(}\PY{p}{)}
         
         \PY{n}{ks} \PY{o}{=} \PY{p}{[}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{3}\PY{p}{,} \PY{l+m+mi}{5}\PY{p}{,} \PY{l+m+mi}{7}\PY{p}{,} \PY{l+m+mi}{10}\PY{p}{,} \PY{l+m+mi}{15}\PY{p}{,} \PY{l+m+mi}{20}\PY{p}{,} \PY{l+m+mi}{25}\PY{p}{,} \PY{l+m+mi}{30}\PY{p}{]}
         
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{} START YOUR CODE HERE}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{}   Calculate the cross\PYZhy{}validation error for each k in ks, testing}
         \PY{c+c1}{\PYZsh{}   the trained model on each of the 5 folds.  Average these errors}
         \PY{c+c1}{\PYZsh{}   together and make a plot of k vs. average cross\PYZhy{}validation error. }
         \PY{c+c1}{\PYZsh{}   Since we assume L2 distance here, please use the vectorized code!}
         \PY{c+c1}{\PYZsh{}   Otherwise, you might be waiting a long time.}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{n}{knn} \PY{o}{=} \PY{n}{KNN}\PY{p}{(}\PY{p}{)}
         \PY{n}{res} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{zeros}\PY{p}{(}\PY{n+nb}{len}\PY{p}{(}\PY{n}{ks}\PY{p}{)}\PY{p}{)}
         \PY{k}{for} \PY{n}{index}\PY{p}{,}\PY{n}{k} \PY{o+ow}{in} \PY{n+nb}{enumerate}\PY{p}{(}\PY{n}{ks}\PY{p}{)}\PY{p}{:}
             \PY{n}{error} \PY{o}{=} \PY{l+m+mi}{0}
             \PY{k}{for} \PY{n}{j} \PY{o+ow}{in} \PY{n+nb}{range}\PY{p}{(}\PY{n}{num\PYZus{}folds}\PY{p}{)}\PY{p}{:}
                 \PY{n}{x\PYZus{}t\PYZus{}folds} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{concatenate}\PY{p}{(}\PY{p}{[}\PY{n}{X\PYZus{}train\PYZus{}folds}\PY{p}{[}\PY{n}{fold}\PY{p}{]} \PY{k}{for} \PY{n}{fold} \PY{o+ow}{in} \PY{n+nb}{range}\PY{p}{(}\PY{n}{num\PYZus{}folds}\PY{p}{)} \PY{k}{if} \PY{n}{fold} \PY{o}{!=} \PY{n}{j}\PY{p}{]}\PY{p}{)}
                 \PY{n}{y\PYZus{}t\PYZus{}folds} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{concatenate}\PY{p}{(}\PY{p}{[}\PY{n}{y\PYZus{}train\PYZus{}folds}\PY{p}{[}\PY{n}{fold}\PY{p}{]} \PY{k}{for} \PY{n}{fold} \PY{o+ow}{in} \PY{n+nb}{range}\PY{p}{(}\PY{n}{num\PYZus{}folds}\PY{p}{)} \PY{k}{if} \PY{n}{fold} \PY{o}{!=} \PY{n}{j}\PY{p}{]}\PY{p}{)}
                 \PY{n}{x\PYZus{}test\PYZus{}fold} \PY{o}{=} \PY{n}{X\PYZus{}train\PYZus{}folds}\PY{p}{[}\PY{n}{j}\PY{p}{]}
                 \PY{n}{y\PYZus{}test\PYZus{}fold} \PY{o}{=} \PY{n}{y\PYZus{}train\PYZus{}folds}\PY{p}{[}\PY{n}{j}\PY{p}{]}
                 \PY{n}{knn}\PY{o}{.}\PY{n}{train}\PY{p}{(}\PY{n}{X}\PY{o}{=}\PY{n}{x\PYZus{}t\PYZus{}folds}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{n}{y\PYZus{}t\PYZus{}folds}\PY{p}{)} \PY{c+c1}{\PYZsh{}train on the n\PYZhy{}1 folds}
                 \PY{n}{distances} \PY{o}{=} \PY{n}{knn}\PY{o}{.}\PY{n}{compute\PYZus{}L2\PYZus{}distances\PYZus{}vectorized}\PY{p}{(}\PY{n}{X}\PY{o}{=}\PY{n}{x\PYZus{}test\PYZus{}fold}\PY{p}{)} \PY{c+c1}{\PYZsh{} check distances}
                 \PY{n}{pred} \PY{o}{=} \PY{n}{knn}\PY{o}{.}\PY{n}{predict\PYZus{}labels}\PY{p}{(}\PY{n}{distances}\PY{p}{,} \PY{n}{k}\PY{o}{=}\PY{n}{k}\PY{p}{)} \PY{c+c1}{\PYZsh{} run prediction}
                 \PY{n}{num\PYZus{}incorrect} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{sum}\PY{p}{(}\PY{n}{pred} \PY{o}{!=} \PY{n}{y\PYZus{}test\PYZus{}fold}\PY{p}{)} \PY{c+c1}{\PYZsh{}check number of wrong cases}
                 \PY{n}{error} \PY{o}{+}\PY{o}{=} \PY{n}{num\PYZus{}incorrect} \PY{o}{/} \PY{n}{y\PYZus{}test\PYZus{}fold}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]} \PY{c+c1}{\PYZsh{}create average error rate for model}
             \PY{n}{res}\PY{p}{[}\PY{n}{index}\PY{p}{]} \PY{o}{=} \PY{n}{error} \PY{o}{/} \PY{n}{num\PYZus{}folds} \PY{c+c1}{\PYZsh{} add average error rate to red[index]}
         
         \PY{n}{ks\PYZus{}min} \PY{o}{=} \PY{n}{ks}\PY{p}{[}\PY{n}{np}\PY{o}{.}\PY{n}{argsort}\PY{p}{(}\PY{n}{res}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{]}
         \PY{n}{results\PYZus{}min} \PY{o}{=} \PY{n+nb}{min}\PY{p}{(}\PY{n}{res}\PY{p}{)}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{} END YOUR CODE HERE}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Set k = }\PY{l+s+si}{\PYZob{}0\PYZcb{}}\PY{l+s+s1}{ and get minimum error as }\PY{l+s+si}{\PYZob{}1\PYZcb{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{format}\PY{p}{(}\PY{n}{ks\PYZus{}min}\PY{p}{,}\PY{n}{results\PYZus{}min}\PY{p}{)}\PY{p}{)}
         \PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{ks}\PY{p}{,}\PY{n}{res}\PY{p}{)}
         \PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{ks}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
         \PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Average Cross Validation Error}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
         \PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
         
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Computation time: }\PY{l+s+si}{\PYZpc{}.2f}\PY{l+s+s1}{\PYZsq{}}\PY{o}{\PYZpc{}}\PY{p}{(}\PY{n}{time}\PY{o}{.}\PY{n}{time}\PY{p}{(}\PY{p}{)}\PY{o}{\PYZhy{}}\PY{n}{time\PYZus{}start}\PY{p}{)}\PY{p}{)}
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
Set k = 10 and get minimum error as 0.7247999999999999

    \end{Verbatim}

    \begin{center}
    \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_30_1.png}
    \end{center}
    { \hspace*{\fill} \\}
    
    \begin{Verbatim}[commandchars=\\\{\}]
Computation time: 47.53

    \end{Verbatim}

    \textbf{Questions:}

\begin{enumerate}
\def\labelenumi{(\arabic{enumi})}
\item
  Why do we typically choose \(k\) as an odd number (for exmple in
  \texttt{ks})
\item
  What value of \(k\) is best amongst the tested \(k\)'s? What is the
  cross-validation error for this value of \(k\)?
\end{enumerate}

    \textbf{Answers} 1. An odd number for \texttt{k} is generally done to
avoid situations in which a datapoint has multiple options for the label
it could be assigned. Essentially, it's to overcome ties. 2. This value
depends on the subset we took of the data at the start, but for the
version on this notebook, the best \texttt{k=10}, and the
cross-validation error is \(\approx0.7248\)

    \hypertarget{evaluating-the-model-on-the-testing-dataset.}{%
\subsection{Evaluating the model on the testing
dataset.}\label{evaluating-the-model-on-the-testing-dataset.}}

Now, given the optimal \(k\) which you have learned, evaluate the
testing error of the k-nearest neighbors model.

    \begin{Verbatim}[commandchars=\\\{\}]
{\color{incolor}In [{\color{incolor}67}]:} \PY{n}{error} \PY{o}{=} \PY{l+m+mi}{1}
         
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{} START YOUR CODE HERE}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{}   Evaluate the testing error of the k\PYZhy{}nearest neighbors classifier}
         \PY{c+c1}{\PYZsh{}   for your optimal hyperparameters found by 5\PYZhy{}fold cross\PYZhy{}validation.}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{n}{knn}\PY{o}{.}\PY{n}{train}\PY{p}{(}\PY{n}{X}\PY{o}{=}\PY{n}{X\PYZus{}train}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{n}{y\PYZus{}train}\PY{p}{)}
         \PY{n}{dists\PYZus{}L2\PYZus{}vectorized} \PY{o}{=} \PY{n}{knn}\PY{o}{.}\PY{n}{compute\PYZus{}L2\PYZus{}distances\PYZus{}vectorized}\PY{p}{(}\PY{n}{X}\PY{o}{=}\PY{n}{X\PYZus{}test}\PY{p}{)}
         \PY{n}{pred\PYZus{}labels} \PY{o}{=} \PY{n}{knn}\PY{o}{.}\PY{n}{predict\PYZus{}labels}\PY{p}{(}\PY{n}{dists\PYZus{}L2\PYZus{}vectorized}\PY{p}{,}\PY{n}{k}\PY{o}{=}\PY{n}{ks\PYZus{}min}\PY{p}{)}
         \PY{n}{num\PYZus{}samples} \PY{o}{=} \PY{n}{pred\PYZus{}labels}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}
         \PY{n}{num\PYZus{}errors} \PY{o}{=} \PY{l+m+mi}{0}
         \PY{k}{for} \PY{n}{i} \PY{o+ow}{in} \PY{n+nb}{range}\PY{p}{(}\PY{n}{num\PYZus{}samples}\PY{p}{)}\PY{p}{:}
             \PY{k}{if} \PY{n}{pred\PYZus{}labels}\PY{p}{[}\PY{n}{i}\PY{p}{]} \PY{o}{!=} \PY{n}{y\PYZus{}test}\PY{p}{[}\PY{n}{i}\PY{p}{]}\PY{p}{:}
                 \PY{n}{num\PYZus{}errors} \PY{o}{=} \PY{n}{num\PYZus{}errors} \PY{o}{+} \PY{l+m+mi}{1}
         \PY{n}{error} \PY{o}{=} \PY{n}{num\PYZus{}errors}\PY{o}{/}\PY{n}{num\PYZus{}samples}
         
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         \PY{c+c1}{\PYZsh{} END YOUR CODE HERE}
         \PY{c+c1}{\PYZsh{} ================================================================ \PYZsh{}}
         
         \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Error rate achieved: }\PY{l+s+si}{\PYZob{}\PYZcb{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{format}\PY{p}{(}\PY{n}{error}\PY{p}{)}\PY{p}{)}
\end{Verbatim}


    \begin{Verbatim}[commandchars=\\\{\}]
Error rate achieved: 0.718

    \end{Verbatim}

    \textbf{Question:}

How much did your error change by cross-validation over naively choosing
\(k=1\) and using the L2-norm?

    \textbf{Answers} Well, the error went from 0.726 to 0.718, so it's an
improvement of 0.08. The dataset may be difficult to separate or
classify without further processing of the data, but it's still a pretty
good improvement.

    \begin{center}\rule{0.5\linewidth}{0.5pt}\end{center}

    \hypertarget{end-of-homework-3-part-1}{%
\subsection{End of Homework 3, Part 1
:)}\label{end-of-homework-3-part-1}}

After you've finished both parts the homework, please print out the both
of the entire \texttt{ipynb} notebooks and \texttt{py} files into one
PDF file. Make sure you include the output of code cells and answers for
questions. Prepare submit it to GradeScope. Do not include any dataset
in your submission.


    % Add a bibliography block to the postdoc
    
    
    \end{document}