Fix Hierarchical Stuff (#34)

* corrected hierarchical CP/NCP stuff in slides * corrected hierarchical CP/NCP stuff in stan * corrected hierarchical CP/NCP stuff in turing
storopoli · Dec 26, 2022 · da2fe5e · da2fe5e
1 parent 9efd563
commit da2fe5e
Show file tree

Hide file tree

Showing 33 changed files with 83 additions and 771 deletions.
diff --git a/slides/00-Tools.tex b/slides/00-Tools.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Tools}
 
 \subsection{Recommended References}

diff --git a/slides/01-Bayesian_Statistics.tex b/slides/01-Bayesian_Statistics.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Bayesian Statistics}
 
 \subsection{Recommended References}

diff --git a/slides/02-Statistical_Distributions.tex b/slides/02-Statistical_Distributions.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Probability Distributions}
 
 \subsection{Recommended References}

diff --git a/slides/03-Priors.tex b/slides/03-Priors.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Priors}
 
 \subsection{Recommended References}

diff --git a/slides/04-Predictive_Checks.tex b/slides/04-Predictive_Checks.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Predictive Checks}
 
 \subsection{Recommended References}

diff --git a/slides/05-Linear_Regression.tex b/slides/05-Linear_Regression.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Linear Regression}
 
 \subsection{Recommended References}

diff --git a/slides/06-Logistic_Regression.tex b/slides/06-Logistic_Regression.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Logistic Regression}
 
 \subsection{Recommended References}

diff --git a/slides/07-Ordinal_Regression.tex b/slides/07-Ordinal_Regression.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Ordinal Regression}
 
 \subsection{Recommended References}

diff --git a/slides/08-Poisson_Regression.tex b/slides/08-Poisson_Regression.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Poisson Regression}
 
 \subsection{Recommended References}

diff --git a/slides/09-Robust_Regression.tex b/slides/09-Robust_Regression.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Robust Regression}
 
 \subsection{Recommended References}
@@ -181,9 +182,9 @@ \subsubsection{Student's $t$ instead Binomial}
 	$z$'s errors, $\epsilon$, are distributed as a Student's $t$ distribution:
 	$$
 		\begin{aligned}
-			y_i        & = \begin{cases} 0 & \text{se } z_i < 0 \\ 1 & \text{se }\ z_i > 0 \end{cases}                                          \\
-			z_i        & = X_i \boldsymbol{\beta} + \epsilon_i                                 \\
-			\epsilon_i & \sim \text{Student} \left (\nu, 0, \sqrt{\frac{\nu - 2}{\nu}} \right) \\
+			y_i        & = \begin{cases} 0 & \text{se } z_i < 0 \\ 1 & \text{se }\ z_i > 0 \end{cases} \\
+			z_i        & = X_i \boldsymbol{\beta} + \epsilon_i                                         \\
+			\epsilon_i & \sim \text{Student} \left (\nu, 0, \sqrt{\frac{\nu - 2}{\nu}} \right)         \\
 			\nu        & \sim \text{Gamma}(2, 0.1) \in \left[2, \infty \right)
 		\end{aligned}
 	$$
@@ -241,10 +242,10 @@ \subsubsection{Negative Binomial Mixture instead of Poisson}
 		\begin{aligned}
 			\mathbf{y}
 			           & \begin{cases}
-				= 0,                                                                                             & \text{ if } S_i = 0 \\
-				\sim \text{Negative Binomial} \left( e^{(\alpha + \mathbf{X} \boldsymbol{\beta})}, \phi \right), & \text{ if } S_i = 1
-			\end{cases}                               \\
-			P(S_i = 1) & = \text{Logistic/Probit}(\mathbf{X} \boldsymbol{\gamma}) \\
+				             = 0,                                                                                             & \text{ if } S_i = 0 \\
+				             \sim \text{Negative Binomial} \left( e^{(\alpha + \mathbf{X} \boldsymbol{\beta})}, \phi \right), & \text{ if } S_i = 1
+			             \end{cases} \\
+			P(S_i = 1) & = \text{Logistic/Probit}(\mathbf{X} \boldsymbol{\gamma})                                                               \\
 			\gamma     & \sim \text{Beta}(1, 1)
 		\end{aligned}
 	$$

diff --git a/slides/10-Hierarchical_Models.tex b/slides/10-Hierarchical_Models.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Hierarchical Models}
 
 \subsection{Hierarchical Models - Recommended References}
@@ -525,19 +526,11 @@ \subsection{Approaches to Hierarchical Modeling}
 \begin{frame}{Approaches to Hierarchical Modeling}
 	\begin{vfilleditems}
 		\item \textbf{Varying-intercept} model:
-		One group-level intercept besides the population-level
-		intercept and coefficients.
+		One group-level intercept besides the population-level coefficients.
 		\item \textbf{Varying-slope} model:
-		One or more group-level coefficient(s) besides the population-level
-		intercept and coefficients.
+		One or more group-level coefficient(s) besides the population-level intercept.
 		\item \textbf{Varying-intercept-slope} model:
-		One group-level intercept and one or more group-level coefficient(s) besides the population-level
-		intercept and coefficients.
-		\item \textbf{Correlated varying-intercept-slope} model:
-		One group-level intercept and one or more group-level coefficient(s) besides the population-level
-		intercept and coefficients.
-		Here the group-level intercept and coefficients priors
-		are \textbf{sampled from the same multivariate distribution}.
+		One group-level intercept and one or more group-level coefficient(s).
 	\end{vfilleditems}
 \end{frame}
 
@@ -549,11 +542,11 @@ \subsection{Approaches to Hierarchical Modeling}
 	This example is for linear regression:
 	$$
 		\begin{aligned}
-			\mathbf{y}         & \sim \text{Normal}\left( \alpha + \alpha_j + \mathbf{X} \cdot \boldsymbol{\beta}, \sigma \right) \\
-			\alpha             & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha)                                                    \\
-			\alpha_j           & \sim \text{Normal}(0, \tau)                                                                      \\
-			\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}})                        \\
-			\tau               & \sim \text{Cauchy}^+(0, \psi_{\alpha})                                                           \\
+			\mathbf{y}         & \sim \text{Normal}\left(\alpha_j + \mathbf{X} \cdot \boldsymbol{\beta}, \sigma \right) \\
+			\alpha_j           & \sim \text{Normal}(\alpha, \tau)                                                       \\
+			\alpha             & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha)                                          \\
+			\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}})              \\
+			\tau               & \sim \text{Cauchy}^+(0, \psi_{\alpha})                                                 \\
 			\sigma             & \sim \text{Exponential}(\lambda_\sigma)
 		\end{aligned}
 	$$
@@ -564,98 +557,29 @@ \subsection{Approaches to Hierarchical Modeling}
 	such as $J_1, J_2, \dots$:
 	$$
 		\begin{aligned}
-			\mathbf{y}         & \sim \text{Normal}(\alpha + \alpha_{j1} + \alpha_{j2} + \mathbf{X} \boldsymbol{\beta}, \sigma) \\
-			\alpha             & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha)                                                  \\
-			\alpha_{j1}        & \sim \text{Normal}(0, \tau_{\alpha j1})                                                        \\
-			\alpha_{j2}        & \sim \text{Normal}(0, \tau_{\alpha j2})                                                        \\
-			\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}})                      \\
-			\tau_{\alpha j1}   & \sim \text{Cauchy}^+(0, \psi_{\alpha j1})                                                      \\
-			\tau_{\alpha j2}   & \sim \text{Cauchy}^+(0, \psi_{\alpha j2})                                                      \\
+			\mathbf{y}         & \sim \text{Normal}(\alpha_{j1} + \alpha_{j2} + \mathbf{X} \boldsymbol{\beta}, \sigma) \\
+			\alpha_{j1}        & \sim \text{Normal}(\alpha_1, \tau_{\alpha j1})                                        \\
+			\alpha_{j2}        & \sim \text{Normal}(\alpha_2, \tau_{\alpha j2})                                        \\
+			\alpha_1           & \sim \text{Normal}(\mu_{\alpha 1}, \sigma_{\alpha 1})                                 \\
+			\alpha_2           & \sim \text{Normal}(\mu_{\alpha 2}, \sigma_{\alpha 2})                                 \\
+			\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}})             \\
+			\tau_{\alpha j1}   & \sim \text{Cauchy}^+(0, \psi_{\alpha j1})                                             \\
+			\tau_{\alpha j2}   & \sim \text{Cauchy}^+(0, \psi_{\alpha j2})                                             \\
 			\sigma             & \sim \text{Exponential}(\lambda_\sigma)
 		\end{aligned}
 	$$
 \end{frame}
 
-\begin{frame}{Mathematical Specification -- Varying-Slope Model}
-	This example is for linear regression:
-	$$
-		\begin{aligned}
-			\mathbf{y}           & \sim \text{Normal}\left( \alpha + \mathbf{X} \cdot \boldsymbol{\beta} + \mathbf{X} \cdot \boldsymbol{\beta}_j \cdot \boldsymbol{\tau}, \sigma \right) \\
-			\alpha               & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha)                                                                                                         \\
-			\boldsymbol{\beta}   & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}})                                                                             \\
-			\boldsymbol{\beta}_j & \sim \text{Normal}(\mu_{\boldsymbol{\beta j}}, \sigma_{\boldsymbol{\beta j}})                                                                         \\
-			\boldsymbol{\tau}    & \sim \text{Cauchy}^+(0, \psi_{\beta})                                                                                                                 \\
-			\sigma               & \sim \text{Exponential}(\lambda_\sigma)
-		\end{aligned}
-	$$
-	$\boldsymbol{\tau}$ is a vector of priors for the group-level coefficients'
-	standard deviation.
-\end{frame}
-
-\begin{frame}{Mathematical Specification -- Varying-Slope Model}
-	If you need to extend to more than one group,
-	such as $J_1, J_2, \dots$:
-	$$
-		\begin{aligned}
-			\mathbf{y}                   & \sim \text{Normal}(\alpha + \mathbf{X} \cdot \boldsymbol{\beta} + \mathbf{X} \boldsymbol{\beta}_{j1} \cdot \boldsymbol{\tau}_{j1} + \mathbf{X} \boldsymbol{\beta}_{j2} \cdot \boldsymbol{\tau}_{j2}, \sigma) \\
-			\alpha                       & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha)                                                                                                                                                                \\
-			\boldsymbol{\beta}           & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}})                                                                                                                                    \\
-			\boldsymbol{\beta}_{j1}      & \sim \text{Normal}(\mu_{\boldsymbol{\beta j1}}, \sigma_{\boldsymbol{\beta j1}})                                                                                                                              \\
-			\boldsymbol{\beta}_{j2}      & \sim \text{Normal}(\mu_{\boldsymbol{\beta j2}}, \sigma_{\boldsymbol{\beta j2}})                                                                                                                              \\
-			\boldsymbol{\tau}_{\beta j1} & \sim \text{Cauchy}^+(0, \psi_{\beta j1})                                                                                                                                                                     \\
-			\boldsymbol{\tau}_{\beta j2} & \sim \text{Cauchy}^+(0, \psi_{\beta j2})                                                                                                                                                                     \\
-			\sigma                       & \sim \text{Exponential}(\lambda_\sigma)
-		\end{aligned}
-	$$
-\end{frame}
-
-\begin{frame}{Mathematical Specification -- Varying-Intercept-Slope Model}
-	This example is for linear regression:
-	$$
-		\begin{aligned}
-			\mathbf{y}              & \sim \text{Normal}\left( \alpha + \alpha_j + \mathbf{X} \cdot \boldsymbol{\beta} + \mathbf{X} \cdot \boldsymbol{\beta}_j \cdot \boldsymbol{\tau}_\beta, \sigma \right) \\
-			\alpha                  & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha)                                                                                                                          \\
-			\alpha_j                & \sim \text{Normal}(0, \tau_\alpha)                                                                                                                                     \\
-			\boldsymbol{\beta}      & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}})                                                                                              \\
-			\boldsymbol{\beta}_j    & \sim \text{Normal}(\mu_{\boldsymbol{\beta j}}, \sigma_{\boldsymbol{\beta j}})                                                                                          \\
-			\tau_\alpha             & \sim \text{Cauchy}^+(0, \psi_{\alpha})                                                                                                                                 \\
-			\boldsymbol{\tau}_\beta & \sim \text{Cauchy}^+(0, \psi_{\beta})                                                                                                                                  \\
-			\sigma                  & \sim \text{Exponential}(\lambda_\sigma)
-		\end{aligned}
-	$$
-	$\boldsymbol{\tau}_\beta$ is a vector of priors for the group-level coefficients'
-	standard deviation.
-\end{frame}
-
-\begin{frame}{Mathematical Specification -- Varying-Intercept-Slope Model}
-	If you need to extend to more than one group,
-	such as $J_1, J_2, \dots$:
-	$$
-		\begin{aligned}
-			\mathbf{y}                   & \sim \text{Normal}(\alpha + \alpha_j + \mathbf{X} \cdot \boldsymbol{\beta} + \mathbf{X} \boldsymbol{\beta}_{j1} \cdot \boldsymbol{\tau}_{j1} + \mathbf{X} \boldsymbol{\beta}_{j2} \cdot \boldsymbol{\tau}_{j2}, \sigma) \\
-			\alpha                       & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha)                                                                                                                                                                           \\
-			\alpha_j                     & \sim \text{Normal}(0, \tau_\alpha)                                                                                                                                                                                      \\
-			\boldsymbol{\beta}           & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}})                                                                                                                                               \\
-			\boldsymbol{\beta}_{j1}      & \sim \text{Normal}(\mu_{\boldsymbol{\beta j1}}, \sigma_{\boldsymbol{\beta j1}})                                                                                                                                         \\
-			\boldsymbol{\beta}_{j2}      & \sim \text{Normal}(\mu_{\boldsymbol{\beta j2}}, \sigma_{\boldsymbol{\beta j2}})                                                                                                                                         \\
-			\tau_\alpha                  & \sim \text{Cauchy}^+(0, \psi_{\alpha})                                                                                                                                                                                  \\
-			\boldsymbol{\tau}_{\beta j1} & \sim \text{Cauchy}^+(0, \psi_{\beta j1})                                                                                                                                                                                \\
-			\boldsymbol{\tau}_{\beta j2} & \sim \text{Cauchy}^+(0, \psi_{\beta j2})                                                                                                                                                                                \\
-			\sigma                       & \sim \text{Exponential}(\lambda_\sigma)
-		\end{aligned}
-	$$
-\end{frame}
-
-\begin{frame}{Mathematical Specification -- Correlated Varying-Slope Model}
-	Here we insert a column filled with $1$s in the data matrix $\mathbf{X}$.
+\begin{frame}{Mathematical Specification -- Varying-(Intercept-)Slope Model}
+	If we want a varying intercept, we just insert a column filled with $1$s in the data matrix $\mathbf{X}$.
 	\vfill
 	Mathematically, this makes the column behave like an ``identity'' variable
 	(because the number $1$ in the multiplication operation $1 \cdot \beta$ is the identity element.
 	It maps $x \to x$ keeping the value of $x$ intact) and, consequently,
 	we can interpret the column's coefficient as the model's intercept.
 \end{frame}
 
-\begin{frame}{Mathematical Specification -- Correlated Varying-Slope Model}
+\begin{frame}{Mathematical Specification -- Varying-(Intercept-)Slope Model}
 	Hence, we have as a data matrix:
 	$$
 		\mathbf{X} =
@@ -668,7 +592,7 @@ \subsection{Approaches to Hierarchical Modeling}
 	$$
 \end{frame}
 
-\begin{frame}{Mathematical Specification -- Correlated Varying-Slope Model}
+\begin{frame}{Mathematical Specification -- Varying-(Intercept-)Slope Model}
 	This example is for linear regression:
 	$$
 		\begin{aligned}
@@ -681,11 +605,11 @@ \subsection{Approaches to Hierarchical Modeling}
 	$$
 	Each coefficient vector $\boldsymbol{\beta}_j$ represents the
 	model columns $\mathbf{X}$ coefficients for every group $j \in J$.
-	Also the first column of $\mathbf{X}$ is a column filled with $1$s
+	Also the first column of $\mathbf{X}$ could be a column filled with $1$s
 	(intercept).
 \end{frame}
 
-\begin{frame}{Mathematical Specification -- Correlated Varying-Slope Model}
+\begin{frame}{Mathematical Specification -- Varying-(Intercept-)Slope Model}
 	If you need to extend to more than one group,
 	such as $J_1, J_2, \dots$:
 	$$

diff --git a/slides/11-MCMC.tex b/slides/11-MCMC.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Markov Chain Monte Carlo (MCMC) and Model Metrics}
 
 \subsection{Markov Chain Monte Carlo (MCMC) and Model Metrics - Recommended References}
@@ -913,6 +914,39 @@ \subsubsection{Limitations of HMC and NUTS}
 	$$
 \end{frame}
 
+\begin{frame}{Non-Centered Parameterization -- Varying-Intercept Model}
+	This example is for linear regression:
+	$$
+		\begin{aligned}
+			\mathbf{y}         & \sim \text{Normal}\left(\alpha_j + \mathbf{X} \cdot \boldsymbol{\beta}, \sigma \right) \\
+			\alpha_j           & = z_j \cdot \tau + \alpha                                                              \\
+			z_j                & \sim \text{Normal}(0, 1)                                                               \\
+			\alpha             & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha)                                          \\
+			\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}})              \\
+			\tau               & \sim \text{Cauchy}^+(0, \psi_{\alpha})                                                 \\
+			\sigma             & \sim \text{Exponential}(\lambda_\sigma)
+		\end{aligned}
+	$$
+\end{frame}
+
+\begin{frame}{Non-Centered Parameterization -- Varying-(Intercept-)Slope Model}
+	This example is for linear regression:
+	$$
+		\begin{aligned}
+			\mathbf{y}            & \sim \text{Normal}(\mathbf{X} \boldsymbol{\beta}_{j}, \sigma)                 \\
+			\boldsymbol{\beta}_j  & = \boldsymbol{\gamma}_j \cdot \boldsymbol{\Sigma} \cdot \boldsymbol{\gamma}_j \\
+			\boldsymbol{\gamma}_j & \sim \text{Multivariate Normal}(\mathbf{0}, \mathbf{I})
+			\quad \text{for}\quad j \in \{ 1, \dots, J \}                                                         \\
+			\boldsymbol{\Sigma}   & \sim \text{LKJ}(\eta)                                                         \\
+			\sigma                & \sim \text{Exponential}(\lambda_\sigma)
+		\end{aligned}
+	$$
+	Each coefficient vector $\boldsymbol{\beta}_j$ represents the
+	model columns $\mathbf{X}$ coefficients for every group $j \in J$.
+	Also the first column of $\mathbf{X}$ could be a column filled with $1$s
+	(intercept).
+\end{frame}
+
 \begin{frame}{\texttt{Stan} and NUTS}
 	\texttt{Stan} was the first MCMC sampler to implement NUTS.
 	Besides that, it has an automatic optimized adjustment routine for values of $L$ and $\epsilon$ during warmup.

diff --git a/slides/12-Model_Comparison.tex b/slides/12-Model_Comparison.tex
@@ -1,3 +1,4 @@
+% !TeX root = slides.tex
 \section{Model Comparison}
 
 \subsection{Model Comparison - Recommended References}

diff --git a/slides/slides.pdf b/slides/slides.pdf