Skip to content
This repository was archived by the owner on Nov 1, 2024. It is now read-only.

Commit

Permalink
Fix Hierarchical Stuff (#34)
Browse files Browse the repository at this point in the history
* corrected hierarchical CP/NCP stuff in slides

* corrected hierarchical CP/NCP stuff in stan

* corrected hierarchical CP/NCP stuff in turing
  • Loading branch information
storopoli authored Dec 26, 2022
1 parent 9efd563 commit da2fe5e
Show file tree
Hide file tree
Showing 33 changed files with 83 additions and 771 deletions.
1 change: 1 addition & 0 deletions slides/00-Tools.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Tools}

\subsection{Recommended References}
Expand Down
1 change: 1 addition & 0 deletions slides/01-Bayesian_Statistics.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Bayesian Statistics}

\subsection{Recommended References}
Expand Down
1 change: 1 addition & 0 deletions slides/02-Statistical_Distributions.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Probability Distributions}

\subsection{Recommended References}
Expand Down
1 change: 1 addition & 0 deletions slides/03-Priors.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Priors}

\subsection{Recommended References}
Expand Down
1 change: 1 addition & 0 deletions slides/04-Predictive_Checks.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Predictive Checks}

\subsection{Recommended References}
Expand Down
1 change: 1 addition & 0 deletions slides/05-Linear_Regression.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Linear Regression}

\subsection{Recommended References}
Expand Down
1 change: 1 addition & 0 deletions slides/06-Logistic_Regression.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Logistic Regression}

\subsection{Recommended References}
Expand Down
1 change: 1 addition & 0 deletions slides/07-Ordinal_Regression.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Ordinal Regression}

\subsection{Recommended References}
Expand Down
1 change: 1 addition & 0 deletions slides/08-Poisson_Regression.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Poisson Regression}

\subsection{Recommended References}
Expand Down
15 changes: 8 additions & 7 deletions slides/09-Robust_Regression.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Robust Regression}

\subsection{Recommended References}
Expand Down Expand Up @@ -181,9 +182,9 @@ \subsubsection{Student's $t$ instead Binomial}
$z$'s errors, $\epsilon$, are distributed as a Student's $t$ distribution:
$$
\begin{aligned}
y_i & = \begin{cases} 0 & \text{se } z_i < 0 \\ 1 & \text{se }\ z_i > 0 \end{cases} \\
z_i & = X_i \boldsymbol{\beta} + \epsilon_i \\
\epsilon_i & \sim \text{Student} \left (\nu, 0, \sqrt{\frac{\nu - 2}{\nu}} \right) \\
y_i & = \begin{cases} 0 & \text{se } z_i < 0 \\ 1 & \text{se }\ z_i > 0 \end{cases} \\
z_i & = X_i \boldsymbol{\beta} + \epsilon_i \\
\epsilon_i & \sim \text{Student} \left (\nu, 0, \sqrt{\frac{\nu - 2}{\nu}} \right) \\
\nu & \sim \text{Gamma}(2, 0.1) \in \left[2, \infty \right)
\end{aligned}
$$
Expand Down Expand Up @@ -241,10 +242,10 @@ \subsubsection{Negative Binomial Mixture instead of Poisson}
\begin{aligned}
\mathbf{y}
& \begin{cases}
= 0, & \text{ if } S_i = 0 \\
\sim \text{Negative Binomial} \left( e^{(\alpha + \mathbf{X} \boldsymbol{\beta})}, \phi \right), & \text{ if } S_i = 1
\end{cases} \\
P(S_i = 1) & = \text{Logistic/Probit}(\mathbf{X} \boldsymbol{\gamma}) \\
= 0, & \text{ if } S_i = 0 \\
\sim \text{Negative Binomial} \left( e^{(\alpha + \mathbf{X} \boldsymbol{\beta})}, \phi \right), & \text{ if } S_i = 1
\end{cases} \\
P(S_i = 1) & = \text{Logistic/Probit}(\mathbf{X} \boldsymbol{\gamma}) \\
\gamma & \sim \text{Beta}(1, 1)
\end{aligned}
$$
Expand Down
122 changes: 23 additions & 99 deletions slides/10-Hierarchical_Models.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Hierarchical Models}

\subsection{Hierarchical Models - Recommended References}
Expand Down Expand Up @@ -525,19 +526,11 @@ \subsection{Approaches to Hierarchical Modeling}
\begin{frame}{Approaches to Hierarchical Modeling}
\begin{vfilleditems}
\item \textbf{Varying-intercept} model:
One group-level intercept besides the population-level
intercept and coefficients.
One group-level intercept besides the population-level coefficients.
\item \textbf{Varying-slope} model:
One or more group-level coefficient(s) besides the population-level
intercept and coefficients.
One or more group-level coefficient(s) besides the population-level intercept.
\item \textbf{Varying-intercept-slope} model:
One group-level intercept and one or more group-level coefficient(s) besides the population-level
intercept and coefficients.
\item \textbf{Correlated varying-intercept-slope} model:
One group-level intercept and one or more group-level coefficient(s) besides the population-level
intercept and coefficients.
Here the group-level intercept and coefficients priors
are \textbf{sampled from the same multivariate distribution}.
One group-level intercept and one or more group-level coefficient(s).
\end{vfilleditems}
\end{frame}

Expand All @@ -549,11 +542,11 @@ \subsection{Approaches to Hierarchical Modeling}
This example is for linear regression:
$$
\begin{aligned}
\mathbf{y} & \sim \text{Normal}\left( \alpha + \alpha_j + \mathbf{X} \cdot \boldsymbol{\beta}, \sigma \right) \\
\alpha & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha) \\
\alpha_j & \sim \text{Normal}(0, \tau) \\
\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}}) \\
\tau & \sim \text{Cauchy}^+(0, \psi_{\alpha}) \\
\mathbf{y} & \sim \text{Normal}\left(\alpha_j + \mathbf{X} \cdot \boldsymbol{\beta}, \sigma \right) \\
\alpha_j & \sim \text{Normal}(\alpha, \tau) \\
\alpha & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha) \\
\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}}) \\
\tau & \sim \text{Cauchy}^+(0, \psi_{\alpha}) \\
\sigma & \sim \text{Exponential}(\lambda_\sigma)
\end{aligned}
$$
Expand All @@ -564,98 +557,29 @@ \subsection{Approaches to Hierarchical Modeling}
such as $J_1, J_2, \dots$:
$$
\begin{aligned}
\mathbf{y} & \sim \text{Normal}(\alpha + \alpha_{j1} + \alpha_{j2} + \mathbf{X} \boldsymbol{\beta}, \sigma) \\
\alpha & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha) \\
\alpha_{j1} & \sim \text{Normal}(0, \tau_{\alpha j1}) \\
\alpha_{j2} & \sim \text{Normal}(0, \tau_{\alpha j2}) \\
\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}}) \\
\tau_{\alpha j1} & \sim \text{Cauchy}^+(0, \psi_{\alpha j1}) \\
\tau_{\alpha j2} & \sim \text{Cauchy}^+(0, \psi_{\alpha j2}) \\
\mathbf{y} & \sim \text{Normal}(\alpha_{j1} + \alpha_{j2} + \mathbf{X} \boldsymbol{\beta}, \sigma) \\
\alpha_{j1} & \sim \text{Normal}(\alpha_1, \tau_{\alpha j1}) \\
\alpha_{j2} & \sim \text{Normal}(\alpha_2, \tau_{\alpha j2}) \\
\alpha_1 & \sim \text{Normal}(\mu_{\alpha 1}, \sigma_{\alpha 1}) \\
\alpha_2 & \sim \text{Normal}(\mu_{\alpha 2}, \sigma_{\alpha 2}) \\
\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}}) \\
\tau_{\alpha j1} & \sim \text{Cauchy}^+(0, \psi_{\alpha j1}) \\
\tau_{\alpha j2} & \sim \text{Cauchy}^+(0, \psi_{\alpha j2}) \\
\sigma & \sim \text{Exponential}(\lambda_\sigma)
\end{aligned}
$$
\end{frame}

\begin{frame}{Mathematical Specification -- Varying-Slope Model}
This example is for linear regression:
$$
\begin{aligned}
\mathbf{y} & \sim \text{Normal}\left( \alpha + \mathbf{X} \cdot \boldsymbol{\beta} + \mathbf{X} \cdot \boldsymbol{\beta}_j \cdot \boldsymbol{\tau}, \sigma \right) \\
\alpha & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha) \\
\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}}) \\
\boldsymbol{\beta}_j & \sim \text{Normal}(\mu_{\boldsymbol{\beta j}}, \sigma_{\boldsymbol{\beta j}}) \\
\boldsymbol{\tau} & \sim \text{Cauchy}^+(0, \psi_{\beta}) \\
\sigma & \sim \text{Exponential}(\lambda_\sigma)
\end{aligned}
$$
$\boldsymbol{\tau}$ is a vector of priors for the group-level coefficients'
standard deviation.
\end{frame}

\begin{frame}{Mathematical Specification -- Varying-Slope Model}
If you need to extend to more than one group,
such as $J_1, J_2, \dots$:
$$
\begin{aligned}
\mathbf{y} & \sim \text{Normal}(\alpha + \mathbf{X} \cdot \boldsymbol{\beta} + \mathbf{X} \boldsymbol{\beta}_{j1} \cdot \boldsymbol{\tau}_{j1} + \mathbf{X} \boldsymbol{\beta}_{j2} \cdot \boldsymbol{\tau}_{j2}, \sigma) \\
\alpha & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha) \\
\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}}) \\
\boldsymbol{\beta}_{j1} & \sim \text{Normal}(\mu_{\boldsymbol{\beta j1}}, \sigma_{\boldsymbol{\beta j1}}) \\
\boldsymbol{\beta}_{j2} & \sim \text{Normal}(\mu_{\boldsymbol{\beta j2}}, \sigma_{\boldsymbol{\beta j2}}) \\
\boldsymbol{\tau}_{\beta j1} & \sim \text{Cauchy}^+(0, \psi_{\beta j1}) \\
\boldsymbol{\tau}_{\beta j2} & \sim \text{Cauchy}^+(0, \psi_{\beta j2}) \\
\sigma & \sim \text{Exponential}(\lambda_\sigma)
\end{aligned}
$$
\end{frame}

\begin{frame}{Mathematical Specification -- Varying-Intercept-Slope Model}
This example is for linear regression:
$$
\begin{aligned}
\mathbf{y} & \sim \text{Normal}\left( \alpha + \alpha_j + \mathbf{X} \cdot \boldsymbol{\beta} + \mathbf{X} \cdot \boldsymbol{\beta}_j \cdot \boldsymbol{\tau}_\beta, \sigma \right) \\
\alpha & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha) \\
\alpha_j & \sim \text{Normal}(0, \tau_\alpha) \\
\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}}) \\
\boldsymbol{\beta}_j & \sim \text{Normal}(\mu_{\boldsymbol{\beta j}}, \sigma_{\boldsymbol{\beta j}}) \\
\tau_\alpha & \sim \text{Cauchy}^+(0, \psi_{\alpha}) \\
\boldsymbol{\tau}_\beta & \sim \text{Cauchy}^+(0, \psi_{\beta}) \\
\sigma & \sim \text{Exponential}(\lambda_\sigma)
\end{aligned}
$$
$\boldsymbol{\tau}_\beta$ is a vector of priors for the group-level coefficients'
standard deviation.
\end{frame}

\begin{frame}{Mathematical Specification -- Varying-Intercept-Slope Model}
If you need to extend to more than one group,
such as $J_1, J_2, \dots$:
$$
\begin{aligned}
\mathbf{y} & \sim \text{Normal}(\alpha + \alpha_j + \mathbf{X} \cdot \boldsymbol{\beta} + \mathbf{X} \boldsymbol{\beta}_{j1} \cdot \boldsymbol{\tau}_{j1} + \mathbf{X} \boldsymbol{\beta}_{j2} \cdot \boldsymbol{\tau}_{j2}, \sigma) \\
\alpha & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha) \\
\alpha_j & \sim \text{Normal}(0, \tau_\alpha) \\
\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}}) \\
\boldsymbol{\beta}_{j1} & \sim \text{Normal}(\mu_{\boldsymbol{\beta j1}}, \sigma_{\boldsymbol{\beta j1}}) \\
\boldsymbol{\beta}_{j2} & \sim \text{Normal}(\mu_{\boldsymbol{\beta j2}}, \sigma_{\boldsymbol{\beta j2}}) \\
\tau_\alpha & \sim \text{Cauchy}^+(0, \psi_{\alpha}) \\
\boldsymbol{\tau}_{\beta j1} & \sim \text{Cauchy}^+(0, \psi_{\beta j1}) \\
\boldsymbol{\tau}_{\beta j2} & \sim \text{Cauchy}^+(0, \psi_{\beta j2}) \\
\sigma & \sim \text{Exponential}(\lambda_\sigma)
\end{aligned}
$$
\end{frame}

\begin{frame}{Mathematical Specification -- Correlated Varying-Slope Model}
Here we insert a column filled with $1$s in the data matrix $\mathbf{X}$.
\begin{frame}{Mathematical Specification -- Varying-(Intercept-)Slope Model}
If we want a varying intercept, we just insert a column filled with $1$s in the data matrix $\mathbf{X}$.
\vfill
Mathematically, this makes the column behave like an ``identity'' variable
(because the number $1$ in the multiplication operation $1 \cdot \beta$ is the identity element.
It maps $x \to x$ keeping the value of $x$ intact) and, consequently,
we can interpret the column's coefficient as the model's intercept.
\end{frame}

\begin{frame}{Mathematical Specification -- Correlated Varying-Slope Model}
\begin{frame}{Mathematical Specification -- Varying-(Intercept-)Slope Model}
Hence, we have as a data matrix:
$$
\mathbf{X} =
Expand All @@ -668,7 +592,7 @@ \subsection{Approaches to Hierarchical Modeling}
$$
\end{frame}

\begin{frame}{Mathematical Specification -- Correlated Varying-Slope Model}
\begin{frame}{Mathematical Specification -- Varying-(Intercept-)Slope Model}
This example is for linear regression:
$$
\begin{aligned}
Expand All @@ -681,11 +605,11 @@ \subsection{Approaches to Hierarchical Modeling}
$$
Each coefficient vector $\boldsymbol{\beta}_j$ represents the
model columns $\mathbf{X}$ coefficients for every group $j \in J$.
Also the first column of $\mathbf{X}$ is a column filled with $1$s
Also the first column of $\mathbf{X}$ could be a column filled with $1$s
(intercept).
\end{frame}

\begin{frame}{Mathematical Specification -- Correlated Varying-Slope Model}
\begin{frame}{Mathematical Specification -- Varying-(Intercept-)Slope Model}
If you need to extend to more than one group,
such as $J_1, J_2, \dots$:
$$
Expand Down
34 changes: 34 additions & 0 deletions slides/11-MCMC.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Markov Chain Monte Carlo (MCMC) and Model Metrics}

\subsection{Markov Chain Monte Carlo (MCMC) and Model Metrics - Recommended References}
Expand Down Expand Up @@ -913,6 +914,39 @@ \subsubsection{Limitations of HMC and NUTS}
$$
\end{frame}

\begin{frame}{Non-Centered Parameterization -- Varying-Intercept Model}
This example is for linear regression:
$$
\begin{aligned}
\mathbf{y} & \sim \text{Normal}\left(\alpha_j + \mathbf{X} \cdot \boldsymbol{\beta}, \sigma \right) \\
\alpha_j & = z_j \cdot \tau + \alpha \\
z_j & \sim \text{Normal}(0, 1) \\
\alpha & \sim \text{Normal}(\mu_\alpha, \sigma_\alpha) \\
\boldsymbol{\beta} & \sim \text{Normal}(\mu_{\boldsymbol{\beta}}, \sigma_{\boldsymbol{\beta}}) \\
\tau & \sim \text{Cauchy}^+(0, \psi_{\alpha}) \\
\sigma & \sim \text{Exponential}(\lambda_\sigma)
\end{aligned}
$$
\end{frame}

\begin{frame}{Non-Centered Parameterization -- Varying-(Intercept-)Slope Model}
This example is for linear regression:
$$
\begin{aligned}
\mathbf{y} & \sim \text{Normal}(\mathbf{X} \boldsymbol{\beta}_{j}, \sigma) \\
\boldsymbol{\beta}_j & = \boldsymbol{\gamma}_j \cdot \boldsymbol{\Sigma} \cdot \boldsymbol{\gamma}_j \\
\boldsymbol{\gamma}_j & \sim \text{Multivariate Normal}(\mathbf{0}, \mathbf{I})
\quad \text{for}\quad j \in \{ 1, \dots, J \} \\
\boldsymbol{\Sigma} & \sim \text{LKJ}(\eta) \\
\sigma & \sim \text{Exponential}(\lambda_\sigma)
\end{aligned}
$$
Each coefficient vector $\boldsymbol{\beta}_j$ represents the
model columns $\mathbf{X}$ coefficients for every group $j \in J$.
Also the first column of $\mathbf{X}$ could be a column filled with $1$s
(intercept).
\end{frame}

\begin{frame}{\texttt{Stan} and NUTS}
\texttt{Stan} was the first MCMC sampler to implement NUTS.
Besides that, it has an automatic optimized adjustment routine for values of $L$ and $\epsilon$ during warmup.
Expand Down
1 change: 1 addition & 0 deletions slides/12-Model_Comparison.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% !TeX root = slides.tex
\section{Model Comparison}

\subsection{Model Comparison - Recommended References}
Expand Down
Binary file modified slides/slides.pdf
Binary file not shown.
Loading

0 comments on commit da2fe5e

Please sign in to comment.