diff --git a/metadpy/bayesian.py b/metadpy/bayesian.py
index 4d89023..95b3606 100644
--- a/metadpy/bayesian.py
+++ b/metadpy/bayesian.py
@@ -275,17 +275,12 @@ def hmetad(
     # Group level
     elif (within is None) & (between is None) & (subject is not None):
 
-        pymcData = preprocess_group(
-            data, subject, stimuli, accuracy, confidence, nRatings
-        )
-
-        from group_level_pymc import hmetad_groupLevel
+        # pymcData = preprocess_group(
+        #     data, subject, stimuli, accuracy, confidence, nRatings
+        # )
 
-        model_output = hmetad_groupLevel(
-            pymcData,
-            sample_model=sample_model,
-            num_chains=num_chains,
-            num_samples=num_samples,
+        raise ValueError(
+            "Invalid backend provided - This model is not implemented yet."
         )
 
     ###################
@@ -414,89 +409,90 @@ def extractParameters(
     return data
 
 
-def preprocess_group(
-    data: pd.DataFrame,
-    subject: str,
-    stimuli: str,
-    accuracy: str,
-    confidence: str,
-    nRatings: int,
-) -> Dict:
-    """Preprocess group data.
+# TODO: when implementing group level fitting, the following wrapper will be usefull.
+# def preprocess_group(
+#     data: pd.DataFrame,
+#     subject: str,
+#     stimuli: str,
+#     accuracy: str,
+#     confidence: str,
+#     nRatings: int,
+# ) -> Dict:
+#     """Preprocess group data.
 
-    Parameters
-    ----------
-    data : :py:class:`pandas.DataFrame` or None
-        Dataframe. Note that this function can also directly be used as a
-        Pandas method, in which case this argument is no longer needed.
-    subject : string or None
-        Name of column containing the subject identifier (only required if a
-        within-subject or a between-subject factor is provided).
-    stimuli : string or None
-        Name of the column containing the stimuli.
-    accuracy : string or None
-        Name of the columns containing the accuracy.
-    confidence : string or None
-        Name of the column containing the confidence ratings.
-    nRatings : int or None
-        Number of discrete ratings. If a continuous rating scale was used, and
-        the number of unique ratings does not match `nRatings`, will convert to
-        discrete ratings using :py:func:`metadpy.utils.discreteRatings`.
+#     Parameters
+#     ----------
+#     data : :py:class:`pandas.DataFrame` or None
+#         Dataframe. Note that this function can also directly be used as a
+#         Pandas method, in which case this argument is no longer needed.
+#     subject : string or None
+#         Name of column containing the subject identifier (only required if a
+#         within-subject or a between-subject factor is provided).
+#     stimuli : string or None
+#         Name of the column containing the stimuli.
+#     accuracy : string or None
+#         Name of the columns containing the accuracy.
+#     confidence : string or None
+#         Name of the column containing the confidence ratings.
+#     nRatings : int or None
+#         Number of discrete ratings. If a continuous rating scale was used, and
+#         the number of unique ratings does not match `nRatings`, will convert to
+#         discrete ratings using :py:func:`metadpy.utils.discreteRatings`.
 
-    Return
-    ------
-    pymcData : Dict
+#     Return
+#     ------
+#     pymcData : Dict
 
-    """
-    pymcData = {
-        "d1": [],
-        "c1": [],
-        "nSubj": data[subject].nunique(),
-        "subID": np.arange(data[subject].nunique(), dtype="int"),
-        "hits": [],
-        "falsealarms": [],
-        "s": [],
-        "n": [],
-        "counts": [],
-        "nRatings": nRatings,
-        "Tol": 1e-05,
-        "cr": [],
-        "m": [],
-    }
+#     """
+#     pymcData = {
+#         "d1": [],
+#         "c1": [],
+#         "nSubj": data[subject].nunique(),
+#         "subID": np.arange(data[subject].nunique(), dtype="int"),
+#         "hits": [],
+#         "falsealarms": [],
+#         "s": [],
+#         "n": [],
+#         "counts": [],
+#         "nRatings": nRatings,
+#         "Tol": 1e-05,
+#         "cr": [],
+#         "m": [],
+#     }
 
-    for sub in data[subject].unique():
-        nR_S1, nR_S2 = trials2counts(
-            data=data[data[subject] == sub],
-            stimuli=stimuli,
-            accuracy=accuracy,
-            confidence=confidence,
-            nRatings=nRatings,
-        )
+#     for sub in data[subject].unique():
+#         nR_S1, nR_S2 = trials2counts(
+#             data=data[data[subject] == sub],
+#             stimuli=stimuli,
+#             accuracy=accuracy,
+#             confidence=confidence,
+#             nRatings=nRatings,
+#         )
+
+#         this_data = extractParameters(nR_S1, nR_S2)
+#         pymcData["d1"].append(this_data["d1"])
+#         pymcData["c1"].append(this_data["c1"])
+#         pymcData["s"].append(this_data["S"])
+#         pymcData["n"].append(this_data["N"])
+#         pymcData["m"].append(this_data["M"])
+#         pymcData["cr"].append(this_data["CR"])
+#         pymcData["counts"].append(this_data["counts"])
+#         pymcData["hits"].append(this_data["H"])
+#         pymcData["falsealarms"].append(this_data["FA"])
+
+#     pymcData["d1"] = np.array(pymcData["d1"], dtype="float")
+#     pymcData["c1"] = np.array(pymcData["c1"], dtype="float")
+#     pymcData["s"] = np.array(pymcData["s"], dtype="int")
+#     pymcData["n"] = np.array(pymcData["n"], dtype="int")
+#     pymcData["m"] = np.array(pymcData["m"], dtype="int")
+#     pymcData["cr"] = np.array(pymcData["cr"], dtype="int")
+#     pymcData["counts"] = np.array(pymcData["counts"], dtype="int")
+#     pymcData["hits"] = np.array(pymcData["hits"], dtype="int")
+#     pymcData["falsealarms"] = np.array(pymcData["falsealarms"], dtype="int")
+#     pymcData["nSubj"] = data[subject].nunique()
+#     pymcData["subID"] = np.arange(pymcData["nSubj"], dtype="int")
 
-        this_data = extractParameters(nR_S1, nR_S2)
-        pymcData["d1"].append(this_data["d1"])
-        pymcData["c1"].append(this_data["c1"])
-        pymcData["s"].append(this_data["S"])
-        pymcData["n"].append(this_data["N"])
-        pymcData["m"].append(this_data["M"])
-        pymcData["cr"].append(this_data["CR"])
-        pymcData["counts"].append(this_data["counts"])
-        pymcData["hits"].append(this_data["H"])
-        pymcData["falsealarms"].append(this_data["FA"])
-
-    pymcData["d1"] = np.array(pymcData["d1"], dtype="float")
-    pymcData["c1"] = np.array(pymcData["c1"], dtype="float")
-    pymcData["s"] = np.array(pymcData["s"], dtype="int")
-    pymcData["n"] = np.array(pymcData["n"], dtype="int")
-    pymcData["m"] = np.array(pymcData["m"], dtype="int")
-    pymcData["cr"] = np.array(pymcData["cr"], dtype="int")
-    pymcData["counts"] = np.array(pymcData["counts"], dtype="int")
-    pymcData["hits"] = np.array(pymcData["hits"], dtype="int")
-    pymcData["falsealarms"] = np.array(pymcData["falsealarms"], dtype="int")
-    pymcData["nSubj"] = data[subject].nunique()
-    pymcData["subID"] = np.arange(pymcData["nSubj"], dtype="int")
-
-    return pymcData
+#     return pymcData
 
 
 # def preprocess_rm1way(
diff --git a/metadpy/models/group_level_pymc.py b/metadpy/models/group_level_pymc.py
deleted file mode 100644
index 2da4810..0000000
--- a/metadpy/models/group_level_pymc.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# Author: Nicolas Legrand <nicolas.legrand@cfin.au.dk>
-
-import pytensor.tensor as pt
-import numpy as np
-from pymc import Binomial, Deterministic, HalfNormal, Model, Multinomial, Normal, sample
-
-
-def phi(x):
-    """Cumulative normal distribution."""
-    return 0.5 + 0.5 * pt.erf(x / pt.sqrt(2))
-
-
-def hmetad_groupLevel(
-    data, sample_model=True, num_samples: int = 1000, num_chains: int = 4, **kwargs
-):
-    """Hierarchical Bayesian modeling of meta-d' (group level).
-
-    This is an internal function. The group level model must be called using
-    :py:func:`metadpy.bayesian.hmetad`.
-
-    Parameters
-    ----------
-    data : dict
-        Response data.
-    sample_model : boolean
-        If `False`, only the model is returned without sampling.
-    num_samples : int
-        The number of samples per chains to draw (defaults to `1000`).
-    num_chains : int
-        The number of chains (defaults to `4`).
-    **kwargs : keyword arguments
-        All keyword arguments are passed to `func::pymc.sampling.sample`.
-
-    Returns
-    -------
-    model : :py:class:`pymc.Model` instance
-        The pymc model. Encapsulates the variables and likelihood factors.
-    trace : :py:class:`pymc.backends.base.MultiTrace` or
-        :py:class:`arviz.InferenceData`
-        A `MultiTrace` or `ArviZ InferenceData` object that contains the samples.
-
-    References
-    ----------
-    .. [#] Fleming, S.M. (2017) HMeta-d: hierarchical Bayesian estimation of
-    metacognitive efficiency from confidence ratings, Neuroscience of Consciousness,
-    3(1) nix007, https://doi.org/10.1093/nc/nix007
-
-    """
-    nRatings = data["nRatings"]
-    nSubj = data["nSubj"]
-    
-    with Model() as model:
-
-        # Group-level hyperpriors
-        mu_c1 = Normal("mu_c1", mu=0.0, sigma=1)
-        sigma_c1 = HalfNormal("sigma_c1", sigma=1)
-        
-        mu_d1 = Normal("mu_d1", mu=0.0, sigma=1)
-        sigma_d1 = HalfNormal("sigma_d1", sigma=1)
-
-        mu_logMratio = Normal("mu_logMratio", mu=0.0, sigma=1)
-        sigma_logMratio = HalfNormal("sigma_logMratio", sigma=1)
-
-        # Group-level hyperpriors for criteria
-        mu_cS1_hn = HalfNormal("mu_cS1_hn", sigma=1, shape=nRatings - 1)
-        sigma_cS1_hn = HalfNormal("sigma_cS1_hn", sigma=1)
-        
-        mu_cS2_hn = HalfNormal("mu_cS2_hn", sigma=1, shape=nRatings - 1)
-        sigma_cS2_hn = HalfNormal("sigma_cS2_hn", sigma=1)
-
-        # Subject-level parameters - each drawn from group distribution
-        c1 = Normal("c1", mu=mu_c1, sigma=sigma_c1, shape=nSubj)
-        d1 = Normal("d1", mu=mu_d1, sigma=sigma_d1, shape=nSubj)
-        logMratio = Normal("logMratio", mu=mu_logMratio, sigma=sigma_logMratio, shape=nSubj)
-        
-        # Transform to meta_d
-        meta_d = Deterministic("meta_d", d1 * pt.exp(logMratio))
-
-        # Subject-level criteria
-        cS1_hn = HalfNormal("cS1_hn", sigma=sigma_cS1_hn, shape=(nSubj, nRatings - 1))
-        cS2_hn = HalfNormal("cS2_hn", sigma=sigma_cS2_hn, shape=(nSubj, nRatings - 1))
-
-        # For each subject, fit the exact same model as the original subject-level model
-        for s in range(nSubj):
-            # Get subject-specific parameters
-            c1_s = c1[s]
-            d1_s = d1[s]
-            meta_d_s = meta_d[s]
-            
-            # Subject-specific criteria using the exact same pattern as original
-            cS1_s = Deterministic(f"cS1_{s}", pt.sort(-cS1_hn[s, :]) + (c1_s - data["Tol"]))
-            cS2_s = Deterministic(f"cS2_{s}", pt.sort(cS2_hn[s, :]) + (c1_s - data["Tol"]))
-
-            # TYPE 1 SDT BINOMIAL MODEL (exact same as original)
-            h_s = phi(d1_s / 2 - c1_s)
-            f_s = phi(-d1_s / 2 - c1_s)
-            H_s = Binomial(f"H_{s}", data["s"][s], h_s, observed=data["hits"][s])
-            FA_s = Binomial(f"FA_{s}", data["n"][s], f_s, observed=data["falsealarms"][s])
-
-            # TYPE 2 SDT MODEL - exact same calculations as original
-            # Means of SDT distributions
-            S2mu = pt.flatten(meta_d_s / 2, 1)
-            S1mu = pt.flatten(-meta_d_s / 2, 1)
-
-            # Calculate normalisation constants
-            C_area_rS1 = phi(c1_s - S1mu)
-            I_area_rS1 = phi(c1_s - S2mu)
-            C_area_rS2 = 1 - phi(c1_s - S2mu)
-            I_area_rS2 = 1 - phi(c1_s - S1mu)
-
-            # Get nC_rS1 probs - exact same pattern as original
-            nC_rS1 = phi(cS1_s - S1mu) / C_area_rS1
-            nC_rS1 = Deterministic(
-                f"nC_rS1_{s}",
-                pt.concatenate(
-                    (
-                        [
-                            phi(cS1_s[0] - S1mu) / C_area_rS1,
-                            nC_rS1[1:] - nC_rS1[:-1],
-                            (
-                                (phi(c1_s - S1mu) - phi(cS1_s[(nRatings - 2)] - S1mu))
-                                / C_area_rS1
-                            ),
-                        ]
-                    ),
-                    axis=0,
-                ),
-            )
-
-            # Get nI_rS2 probs - exact same pattern as original
-            nI_rS2 = (1 - phi(cS2_s - S1mu)) / I_area_rS2
-            nI_rS2 = Deterministic(
-                f"nI_rS2_{s}",
-                pt.concatenate(
-                    (
-                        [
-                            ((1 - phi(c1_s - S1mu)) - (1 - phi(cS2_s[0] - S1mu))) / I_area_rS2,
-                            nI_rS2[:-1] - (1 - phi(cS2_s[1:] - S1mu)) / I_area_rS2,
-                            (1 - phi(cS2_s[nRatings - 2] - S1mu)) / I_area_rS2,
-                        ]
-                    ),
-                    axis=0,
-                ),
-            )
-
-            # Get nI_rS1 probs - exact same pattern as original  
-            nI_rS1 = (-phi(cS1_s - S2mu)) / I_area_rS1
-            nI_rS1 = Deterministic(
-                f"nI_rS1_{s}",
-                pt.concatenate(
-                    (
-                        [
-                            phi(cS1_s[0] - S2mu) / I_area_rS1,
-                            nI_rS1[:-1] + (phi(cS1_s[1:] - S2mu)) / I_area_rS1,
-                            (phi(c1_s - S2mu) - phi(cS1_s[(nRatings - 2)] - S2mu)) / I_area_rS1,
-                        ]
-                    ),
-                    axis=0,
-                ),
-            )
-
-            # Get nC_rS2 probs - exact same pattern as original
-            nC_rS2 = (1 - phi(cS2_s - S2mu)) / C_area_rS2
-            nC_rS2 = Deterministic(
-                f"nC_rS2_{s}",
-                pt.concatenate(
-                    (
-                        [
-                            ((1 - phi(c1_s - S2mu)) - (1 - phi(cS2_s[0] - S2mu))) / C_area_rS2,
-                            nC_rS2[:-1] - ((1 - phi(cS2_s[1:] - S2mu)) / C_area_rS2),
-                            (1 - phi(cS2_s[nRatings - 2] - S2mu)) / C_area_rS2,
-                        ]
-                    ),
-                    axis=0,
-                ),
-            )
-
-            # Avoid underflow of probabilities - exact same as original
-            nC_rS1 = pt.switch(nC_rS1 < data["Tol"], data["Tol"], nC_rS1)
-            nI_rS2 = pt.switch(nI_rS2 < data["Tol"], data["Tol"], nI_rS2)
-            nI_rS1 = pt.switch(nI_rS1 < data["Tol"], data["Tol"], nI_rS1)
-            nC_rS2 = pt.switch(nC_rS2 < data["Tol"], data["Tol"], nC_rS2)
-
-            # TYPE 2 SDT MODEL Multinomial likelihood - exact same pattern as original
-            subject_counts = data["counts"][s, :]
-            
-            Multinomial(
-                f"CR_counts_{s}",
-                n=data["cr"][s],
-                p=nC_rS1,
-                shape=nRatings,
-                observed=subject_counts[:nRatings],
-            )
-            Multinomial(
-                f"FA_counts_{s}",
-                n=data["falsealarms"][s],
-                p=nI_rS2,
-                shape=nRatings,
-                observed=subject_counts[nRatings : nRatings * 2],
-            )
-            Multinomial(
-                f"M_counts_{s}",
-                n=data["m"][s],
-                p=nI_rS1,
-                shape=nRatings,
-                observed=subject_counts[nRatings * 2 : nRatings * 3],
-            )
-            Multinomial(
-                f"H_counts_{s}",
-                n=data["hits"][s],
-                p=nC_rS2,
-                shape=nRatings,
-                observed=subject_counts[nRatings * 3 : nRatings * 4],
-            )
-
-        if sample_model is True:
-            trace = sample(
-                return_inferencedata=True,
-                chains=num_chains,
-                draws=num_samples,
-                **kwargs
-            )
-
-            return model, trace
-
-        else:
-            return model
\ No newline at end of file
diff --git a/metadpy/tests/test_bayesian.py b/metadpy/tests/test_bayesian.py
index 50cc904..000a85b 100644
--- a/metadpy/tests/test_bayesian.py
+++ b/metadpy/tests/test_bayesian.py
@@ -121,64 +121,6 @@ def test_hmetad(self):
         assert round(pymc_df["meta_d"].values[0], 2) - 1.58 < 0.01
         assert round(pymc_df["m_ratio"].values[0], 2) - 1.03 < 0.01
 
-        ##################
-        # Test group level
-        ##################
-        group_df = load_dataset("rm")
-        test_df = group_df[group_df['Subject'].isin([0, 1])]  # Just 2 subjects for testing
-
-        # Test model compilation only
-        model, _ = hmetad(
-            data=test_df,
-            subject='Subject',
-            nRatings=4,
-            stimuli='Stimuli',
-            accuracy='Accuracy',
-            confidence='Confidence',
-            sample_model=False
-        )
-        assert isinstance(model, pm.Model)
-        
-        # Check that group-level variables exist
-        model_vars = list(model.named_vars.keys())
-        assert 'mu_d1' in model_vars
-        assert 'mu_c1' in model_vars
-        assert 'mu_logMratio' in model_vars
-        assert 'sigma_d1' in model_vars
-        assert 'sigma_c1' in model_vars
-        assert 'sigma_logMratio' in model_vars
-        
-        # Check that subject-specific variables exist
-        assert 'c1' in model_vars  # Should be shape (nSubj,)
-        assert 'd1' in model_vars  # Should be shape (nSubj,)
-        assert 'meta_d' in model_vars  # Should be shape (nSubj,)
-        
-        # Test with sampling (minimal for speed)
-        model, trace = hmetad(
-            data=test_df,
-            subject='Subject',
-            nRatings=4,
-            stimuli='Stimuli',
-            accuracy='Accuracy',
-            confidence='Confidence',
-            sample_model=True,
-            num_samples=10,  # Very small for testing
-            num_chains=1,
-            tune=10
-        )
-        assert isinstance(model, pm.Model)
-        assert hasattr(trace, 'posterior')
-        
-        # Check that group parameters are in trace
-        group_vars = list(trace.posterior.data_vars.keys())
-        assert 'mu_d1' in group_vars
-        assert 'mu_c1' in group_vars
-        assert 'mu_logMratio' in group_vars
-        
-        # Check dimensions
-        assert trace.posterior.mu_d1.dims == ('chain', 'draw')  # Group-level scalar has chain, draw dims
-        assert trace.posterior.c1.dims == ('chain', 'draw', 'c1_dim_0')  # Vector has additional subject dim
-
 
 if __name__ == "__main__":
     unittest.main(argv=["first-arg-is-ignored"], exit=False)