diff --git a/metadpy/bayesian.py b/metadpy/bayesian.py
index 95b3606..4d89023 100644
--- a/metadpy/bayesian.py
+++ b/metadpy/bayesian.py
@@ -275,12 +275,17 @@ def hmetad(
     # Group level
     elif (within is None) & (between is None) & (subject is not None):
 
-        # pymcData = preprocess_group(
-        #     data, subject, stimuli, accuracy, confidence, nRatings
-        # )
+        pymcData = preprocess_group(
+            data, subject, stimuli, accuracy, confidence, nRatings
+        )
+
+        from group_level_pymc import hmetad_groupLevel
 
-        raise ValueError(
-            "Invalid backend provided - This model is not implemented yet."
+        model_output = hmetad_groupLevel(
+            pymcData,
+            sample_model=sample_model,
+            num_chains=num_chains,
+            num_samples=num_samples,
         )
 
     ###################
@@ -409,90 +414,89 @@ def extractParameters(
     return data
 
 
-# TODO: when implementing group level fitting, the following wrapper will be usefull.
-# def preprocess_group(
-#     data: pd.DataFrame,
-#     subject: str,
-#     stimuli: str,
-#     accuracy: str,
-#     confidence: str,
-#     nRatings: int,
-# ) -> Dict:
-#     """Preprocess group data.
+def preprocess_group(
+    data: pd.DataFrame,
+    subject: str,
+    stimuli: str,
+    accuracy: str,
+    confidence: str,
+    nRatings: int,
+) -> Dict:
+    """Preprocess group data.
 
-#     Parameters
-#     ----------
-#     data : :py:class:`pandas.DataFrame` or None
-#         Dataframe. Note that this function can also directly be used as a
-#         Pandas method, in which case this argument is no longer needed.
-#     subject : string or None
-#         Name of column containing the subject identifier (only required if a
-#         within-subject or a between-subject factor is provided).
-#     stimuli : string or None
-#         Name of the column containing the stimuli.
-#     accuracy : string or None
-#         Name of the columns containing the accuracy.
-#     confidence : string or None
-#         Name of the column containing the confidence ratings.
-#     nRatings : int or None
-#         Number of discrete ratings. If a continuous rating scale was used, and
-#         the number of unique ratings does not match `nRatings`, will convert to
-#         discrete ratings using :py:func:`metadpy.utils.discreteRatings`.
+    Parameters
+    ----------
+    data : :py:class:`pandas.DataFrame` or None
+        Dataframe. Note that this function can also directly be used as a
+        Pandas method, in which case this argument is no longer needed.
+    subject : string or None
+        Name of column containing the subject identifier (only required if a
+        within-subject or a between-subject factor is provided).
+    stimuli : string or None
+        Name of the column containing the stimuli.
+    accuracy : string or None
+        Name of the columns containing the accuracy.
+    confidence : string or None
+        Name of the column containing the confidence ratings.
+    nRatings : int or None
+        Number of discrete ratings. If a continuous rating scale was used, and
+        the number of unique ratings does not match `nRatings`, will convert to
+        discrete ratings using :py:func:`metadpy.utils.discreteRatings`.
 
-#     Return
-#     ------
-#     pymcData : Dict
+    Return
+    ------
+    pymcData : Dict
 
-#     """
-#     pymcData = {
-#         "d1": [],
-#         "c1": [],
-#         "nSubj": data[subject].nunique(),
-#         "subID": np.arange(data[subject].nunique(), dtype="int"),
-#         "hits": [],
-#         "falsealarms": [],
-#         "s": [],
-#         "n": [],
-#         "counts": [],
-#         "nRatings": nRatings,
-#         "Tol": 1e-05,
-#         "cr": [],
-#         "m": [],
-#     }
+    """
+    pymcData = {
+        "d1": [],
+        "c1": [],
+        "nSubj": data[subject].nunique(),
+        "subID": np.arange(data[subject].nunique(), dtype="int"),
+        "hits": [],
+        "falsealarms": [],
+        "s": [],
+        "n": [],
+        "counts": [],
+        "nRatings": nRatings,
+        "Tol": 1e-05,
+        "cr": [],
+        "m": [],
+    }
 
-#     for sub in data[subject].unique():
-#         nR_S1, nR_S2 = trials2counts(
-#             data=data[data[subject] == sub],
-#             stimuli=stimuli,
-#             accuracy=accuracy,
-#             confidence=confidence,
-#             nRatings=nRatings,
-#         )
-
-#         this_data = extractParameters(nR_S1, nR_S2)
-#         pymcData["d1"].append(this_data["d1"])
-#         pymcData["c1"].append(this_data["c1"])
-#         pymcData["s"].append(this_data["S"])
-#         pymcData["n"].append(this_data["N"])
-#         pymcData["m"].append(this_data["M"])
-#         pymcData["cr"].append(this_data["CR"])
-#         pymcData["counts"].append(this_data["counts"])
-#         pymcData["hits"].append(this_data["H"])
-#         pymcData["falsealarms"].append(this_data["FA"])
-
-#     pymcData["d1"] = np.array(pymcData["d1"], dtype="float")
-#     pymcData["c1"] = np.array(pymcData["c1"], dtype="float")
-#     pymcData["s"] = np.array(pymcData["s"], dtype="int")
-#     pymcData["n"] = np.array(pymcData["n"], dtype="int")
-#     pymcData["m"] = np.array(pymcData["m"], dtype="int")
-#     pymcData["cr"] = np.array(pymcData["cr"], dtype="int")
-#     pymcData["counts"] = np.array(pymcData["counts"], dtype="int")
-#     pymcData["hits"] = np.array(pymcData["hits"], dtype="int")
-#     pymcData["falsealarms"] = np.array(pymcData["falsealarms"], dtype="int")
-#     pymcData["nSubj"] = data[subject].nunique()
-#     pymcData["subID"] = np.arange(pymcData["nSubj"], dtype="int")
+    for sub in data[subject].unique():
+        nR_S1, nR_S2 = trials2counts(
+            data=data[data[subject] == sub],
+            stimuli=stimuli,
+            accuracy=accuracy,
+            confidence=confidence,
+            nRatings=nRatings,
+        )
 
-#     return pymcData
+        this_data = extractParameters(nR_S1, nR_S2)
+        pymcData["d1"].append(this_data["d1"])
+        pymcData["c1"].append(this_data["c1"])
+        pymcData["s"].append(this_data["S"])
+        pymcData["n"].append(this_data["N"])
+        pymcData["m"].append(this_data["M"])
+        pymcData["cr"].append(this_data["CR"])
+        pymcData["counts"].append(this_data["counts"])
+        pymcData["hits"].append(this_data["H"])
+        pymcData["falsealarms"].append(this_data["FA"])
+
+    pymcData["d1"] = np.array(pymcData["d1"], dtype="float")
+    pymcData["c1"] = np.array(pymcData["c1"], dtype="float")
+    pymcData["s"] = np.array(pymcData["s"], dtype="int")
+    pymcData["n"] = np.array(pymcData["n"], dtype="int")
+    pymcData["m"] = np.array(pymcData["m"], dtype="int")
+    pymcData["cr"] = np.array(pymcData["cr"], dtype="int")
+    pymcData["counts"] = np.array(pymcData["counts"], dtype="int")
+    pymcData["hits"] = np.array(pymcData["hits"], dtype="int")
+    pymcData["falsealarms"] = np.array(pymcData["falsealarms"], dtype="int")
+    pymcData["nSubj"] = data[subject].nunique()
+    pymcData["subID"] = np.arange(pymcData["nSubj"], dtype="int")
+
+    return pymcData
 
 
 # def preprocess_rm1way(
diff --git a/metadpy/models/group_level_pymc.py b/metadpy/models/group_level_pymc.py
new file mode 100644
index 0000000..2da4810
--- /dev/null
+++ b/metadpy/models/group_level_pymc.py
@@ -0,0 +1,227 @@
+# Author: Nicolas Legrand <nicolas.legrand@cfin.au.dk>
+
+import pytensor.tensor as pt
+import numpy as np
+from pymc import Binomial, Deterministic, HalfNormal, Model, Multinomial, Normal, sample
+
+
+def phi(x):
+    """Cumulative normal distribution."""
+    return 0.5 + 0.5 * pt.erf(x / pt.sqrt(2))
+
+
+def hmetad_groupLevel(
+    data, sample_model=True, num_samples: int = 1000, num_chains: int = 4, **kwargs
+):
+    """Hierarchical Bayesian modeling of meta-d' (group level).
+
+    This is an internal function. The group level model must be called using
+    :py:func:`metadpy.bayesian.hmetad`.
+
+    Parameters
+    ----------
+    data : dict
+        Response data.
+    sample_model : boolean
+        If `False`, only the model is returned without sampling.
+    num_samples : int
+        The number of samples per chains to draw (defaults to `1000`).
+    num_chains : int
+        The number of chains (defaults to `4`).
+    **kwargs : keyword arguments
+        All keyword arguments are passed to `func::pymc.sampling.sample`.
+
+    Returns
+    -------
+    model : :py:class:`pymc.Model` instance
+        The pymc model. Encapsulates the variables and likelihood factors.
+    trace : :py:class:`pymc.backends.base.MultiTrace` or
+        :py:class:`arviz.InferenceData`
+        A `MultiTrace` or `ArviZ InferenceData` object that contains the samples.
+
+    References
+    ----------
+    .. [#] Fleming, S.M. (2017) HMeta-d: hierarchical Bayesian estimation of
+    metacognitive efficiency from confidence ratings, Neuroscience of Consciousness,
+    3(1) nix007, https://doi.org/10.1093/nc/nix007
+
+    """
+    nRatings = data["nRatings"]
+    nSubj = data["nSubj"]
+    
+    with Model() as model:
+
+        # Group-level hyperpriors
+        mu_c1 = Normal("mu_c1", mu=0.0, sigma=1)
+        sigma_c1 = HalfNormal("sigma_c1", sigma=1)
+        
+        mu_d1 = Normal("mu_d1", mu=0.0, sigma=1)
+        sigma_d1 = HalfNormal("sigma_d1", sigma=1)
+
+        mu_logMratio = Normal("mu_logMratio", mu=0.0, sigma=1)
+        sigma_logMratio = HalfNormal("sigma_logMratio", sigma=1)
+
+        # Group-level hyperpriors for criteria
+        mu_cS1_hn = HalfNormal("mu_cS1_hn", sigma=1, shape=nRatings - 1)
+        sigma_cS1_hn = HalfNormal("sigma_cS1_hn", sigma=1)
+        
+        mu_cS2_hn = HalfNormal("mu_cS2_hn", sigma=1, shape=nRatings - 1)
+        sigma_cS2_hn = HalfNormal("sigma_cS2_hn", sigma=1)
+
+        # Subject-level parameters - each drawn from group distribution
+        c1 = Normal("c1", mu=mu_c1, sigma=sigma_c1, shape=nSubj)
+        d1 = Normal("d1", mu=mu_d1, sigma=sigma_d1, shape=nSubj)
+        logMratio = Normal("logMratio", mu=mu_logMratio, sigma=sigma_logMratio, shape=nSubj)
+        
+        # Transform to meta_d
+        meta_d = Deterministic("meta_d", d1 * pt.exp(logMratio))
+
+        # Subject-level criteria
+        cS1_hn = HalfNormal("cS1_hn", sigma=sigma_cS1_hn, shape=(nSubj, nRatings - 1))
+        cS2_hn = HalfNormal("cS2_hn", sigma=sigma_cS2_hn, shape=(nSubj, nRatings - 1))
+
+        # For each subject, fit the exact same model as the original subject-level model
+        for s in range(nSubj):
+            # Get subject-specific parameters
+            c1_s = c1[s]
+            d1_s = d1[s]
+            meta_d_s = meta_d[s]
+            
+            # Subject-specific criteria using the exact same pattern as original
+            cS1_s = Deterministic(f"cS1_{s}", pt.sort(-cS1_hn[s, :]) + (c1_s - data["Tol"]))
+            cS2_s = Deterministic(f"cS2_{s}", pt.sort(cS2_hn[s, :]) + (c1_s - data["Tol"]))
+
+            # TYPE 1 SDT BINOMIAL MODEL (exact same as original)
+            h_s = phi(d1_s / 2 - c1_s)
+            f_s = phi(-d1_s / 2 - c1_s)
+            H_s = Binomial(f"H_{s}", data["s"][s], h_s, observed=data["hits"][s])
+            FA_s = Binomial(f"FA_{s}", data["n"][s], f_s, observed=data["falsealarms"][s])
+
+            # TYPE 2 SDT MODEL - exact same calculations as original
+            # Means of SDT distributions
+            S2mu = pt.flatten(meta_d_s / 2, 1)
+            S1mu = pt.flatten(-meta_d_s / 2, 1)
+
+            # Calculate normalisation constants
+            C_area_rS1 = phi(c1_s - S1mu)
+            I_area_rS1 = phi(c1_s - S2mu)
+            C_area_rS2 = 1 - phi(c1_s - S2mu)
+            I_area_rS2 = 1 - phi(c1_s - S1mu)
+
+            # Get nC_rS1 probs - exact same pattern as original
+            nC_rS1 = phi(cS1_s - S1mu) / C_area_rS1
+            nC_rS1 = Deterministic(
+                f"nC_rS1_{s}",
+                pt.concatenate(
+                    (
+                        [
+                            phi(cS1_s[0] - S1mu) / C_area_rS1,
+                            nC_rS1[1:] - nC_rS1[:-1],
+                            (
+                                (phi(c1_s - S1mu) - phi(cS1_s[(nRatings - 2)] - S1mu))
+                                / C_area_rS1
+                            ),
+                        ]
+                    ),
+                    axis=0,
+                ),
+            )
+
+            # Get nI_rS2 probs - exact same pattern as original
+            nI_rS2 = (1 - phi(cS2_s - S1mu)) / I_area_rS2
+            nI_rS2 = Deterministic(
+                f"nI_rS2_{s}",
+                pt.concatenate(
+                    (
+                        [
+                            ((1 - phi(c1_s - S1mu)) - (1 - phi(cS2_s[0] - S1mu))) / I_area_rS2,
+                            nI_rS2[:-1] - (1 - phi(cS2_s[1:] - S1mu)) / I_area_rS2,
+                            (1 - phi(cS2_s[nRatings - 2] - S1mu)) / I_area_rS2,
+                        ]
+                    ),
+                    axis=0,
+                ),
+            )
+
+            # Get nI_rS1 probs - exact same pattern as original  
+            nI_rS1 = (-phi(cS1_s - S2mu)) / I_area_rS1
+            nI_rS1 = Deterministic(
+                f"nI_rS1_{s}",
+                pt.concatenate(
+                    (
+                        [
+                            phi(cS1_s[0] - S2mu) / I_area_rS1,
+                            nI_rS1[:-1] + (phi(cS1_s[1:] - S2mu)) / I_area_rS1,
+                            (phi(c1_s - S2mu) - phi(cS1_s[(nRatings - 2)] - S2mu)) / I_area_rS1,
+                        ]
+                    ),
+                    axis=0,
+                ),
+            )
+
+            # Get nC_rS2 probs - exact same pattern as original
+            nC_rS2 = (1 - phi(cS2_s - S2mu)) / C_area_rS2
+            nC_rS2 = Deterministic(
+                f"nC_rS2_{s}",
+                pt.concatenate(
+                    (
+                        [
+                            ((1 - phi(c1_s - S2mu)) - (1 - phi(cS2_s[0] - S2mu))) / C_area_rS2,
+                            nC_rS2[:-1] - ((1 - phi(cS2_s[1:] - S2mu)) / C_area_rS2),
+                            (1 - phi(cS2_s[nRatings - 2] - S2mu)) / C_area_rS2,
+                        ]
+                    ),
+                    axis=0,
+                ),
+            )
+
+            # Avoid underflow of probabilities - exact same as original
+            nC_rS1 = pt.switch(nC_rS1 < data["Tol"], data["Tol"], nC_rS1)
+            nI_rS2 = pt.switch(nI_rS2 < data["Tol"], data["Tol"], nI_rS2)
+            nI_rS1 = pt.switch(nI_rS1 < data["Tol"], data["Tol"], nI_rS1)
+            nC_rS2 = pt.switch(nC_rS2 < data["Tol"], data["Tol"], nC_rS2)
+
+            # TYPE 2 SDT MODEL Multinomial likelihood - exact same pattern as original
+            subject_counts = data["counts"][s, :]
+            
+            Multinomial(
+                f"CR_counts_{s}",
+                n=data["cr"][s],
+                p=nC_rS1,
+                shape=nRatings,
+                observed=subject_counts[:nRatings],
+            )
+            Multinomial(
+                f"FA_counts_{s}",
+                n=data["falsealarms"][s],
+                p=nI_rS2,
+                shape=nRatings,
+                observed=subject_counts[nRatings : nRatings * 2],
+            )
+            Multinomial(
+                f"M_counts_{s}",
+                n=data["m"][s],
+                p=nI_rS1,
+                shape=nRatings,
+                observed=subject_counts[nRatings * 2 : nRatings * 3],
+            )
+            Multinomial(
+                f"H_counts_{s}",
+                n=data["hits"][s],
+                p=nC_rS2,
+                shape=nRatings,
+                observed=subject_counts[nRatings * 3 : nRatings * 4],
+            )
+
+        if sample_model is True:
+            trace = sample(
+                return_inferencedata=True,
+                chains=num_chains,
+                draws=num_samples,
+                **kwargs
+            )
+
+            return model, trace
+
+        else:
+            return model
\ No newline at end of file
diff --git a/metadpy/tests/test_bayesian.py b/metadpy/tests/test_bayesian.py
index 000a85b..50cc904 100644
--- a/metadpy/tests/test_bayesian.py
+++ b/metadpy/tests/test_bayesian.py
@@ -121,6 +121,64 @@ def test_hmetad(self):
         assert round(pymc_df["meta_d"].values[0], 2) - 1.58 < 0.01
         assert round(pymc_df["m_ratio"].values[0], 2) - 1.03 < 0.01
 
+        ##################
+        # Test group level
+        ##################
+        group_df = load_dataset("rm")
+        test_df = group_df[group_df['Subject'].isin([0, 1])]  # Just 2 subjects for testing
+
+        # Test model compilation only
+        model, _ = hmetad(
+            data=test_df,
+            subject='Subject',
+            nRatings=4,
+            stimuli='Stimuli',
+            accuracy='Accuracy',
+            confidence='Confidence',
+            sample_model=False
+        )
+        assert isinstance(model, pm.Model)
+        
+        # Check that group-level variables exist
+        model_vars = list(model.named_vars.keys())
+        assert 'mu_d1' in model_vars
+        assert 'mu_c1' in model_vars
+        assert 'mu_logMratio' in model_vars
+        assert 'sigma_d1' in model_vars
+        assert 'sigma_c1' in model_vars
+        assert 'sigma_logMratio' in model_vars
+        
+        # Check that subject-specific variables exist
+        assert 'c1' in model_vars  # Should be shape (nSubj,)
+        assert 'd1' in model_vars  # Should be shape (nSubj,)
+        assert 'meta_d' in model_vars  # Should be shape (nSubj,)
+        
+        # Test with sampling (minimal for speed)
+        model, trace = hmetad(
+            data=test_df,
+            subject='Subject',
+            nRatings=4,
+            stimuli='Stimuli',
+            accuracy='Accuracy',
+            confidence='Confidence',
+            sample_model=True,
+            num_samples=10,  # Very small for testing
+            num_chains=1,
+            tune=10
+        )
+        assert isinstance(model, pm.Model)
+        assert hasattr(trace, 'posterior')
+        
+        # Check that group parameters are in trace
+        group_vars = list(trace.posterior.data_vars.keys())
+        assert 'mu_d1' in group_vars
+        assert 'mu_c1' in group_vars
+        assert 'mu_logMratio' in group_vars
+        
+        # Check dimensions
+        assert trace.posterior.mu_d1.dims == ('chain', 'draw')  # Group-level scalar has chain, draw dims
+        assert trace.posterior.c1.dims == ('chain', 'draw', 'c1_dim_0')  # Vector has additional subject dim
+
 
 if __name__ == "__main__":
     unittest.main(argv=["first-arg-is-ignored"], exit=False)