diff --git a/metadpy/bayesian.py b/metadpy/bayesian.py index 4d89023..95b3606 100644 --- a/metadpy/bayesian.py +++ b/metadpy/bayesian.py @@ -275,17 +275,12 @@ def hmetad( # Group level elif (within is None) & (between is None) & (subject is not None): - pymcData = preprocess_group( - data, subject, stimuli, accuracy, confidence, nRatings - ) - - from group_level_pymc import hmetad_groupLevel + # pymcData = preprocess_group( + # data, subject, stimuli, accuracy, confidence, nRatings + # ) - model_output = hmetad_groupLevel( - pymcData, - sample_model=sample_model, - num_chains=num_chains, - num_samples=num_samples, + raise ValueError( + "Invalid backend provided - This model is not implemented yet." ) ################### @@ -414,89 +409,90 @@ def extractParameters( return data -def preprocess_group( - data: pd.DataFrame, - subject: str, - stimuli: str, - accuracy: str, - confidence: str, - nRatings: int, -) -> Dict: - """Preprocess group data. +# TODO: when implementing group level fitting, the following wrapper will be usefull. +# def preprocess_group( +# data: pd.DataFrame, +# subject: str, +# stimuli: str, +# accuracy: str, +# confidence: str, +# nRatings: int, +# ) -> Dict: +# """Preprocess group data. - Parameters - ---------- - data : :py:class:`pandas.DataFrame` or None - Dataframe. Note that this function can also directly be used as a - Pandas method, in which case this argument is no longer needed. - subject : string or None - Name of column containing the subject identifier (only required if a - within-subject or a between-subject factor is provided). - stimuli : string or None - Name of the column containing the stimuli. - accuracy : string or None - Name of the columns containing the accuracy. - confidence : string or None - Name of the column containing the confidence ratings. - nRatings : int or None - Number of discrete ratings. If a continuous rating scale was used, and - the number of unique ratings does not match `nRatings`, will convert to - discrete ratings using :py:func:`metadpy.utils.discreteRatings`. +# Parameters +# ---------- +# data : :py:class:`pandas.DataFrame` or None +# Dataframe. Note that this function can also directly be used as a +# Pandas method, in which case this argument is no longer needed. +# subject : string or None +# Name of column containing the subject identifier (only required if a +# within-subject or a between-subject factor is provided). +# stimuli : string or None +# Name of the column containing the stimuli. +# accuracy : string or None +# Name of the columns containing the accuracy. +# confidence : string or None +# Name of the column containing the confidence ratings. +# nRatings : int or None +# Number of discrete ratings. If a continuous rating scale was used, and +# the number of unique ratings does not match `nRatings`, will convert to +# discrete ratings using :py:func:`metadpy.utils.discreteRatings`. - Return - ------ - pymcData : Dict +# Return +# ------ +# pymcData : Dict - """ - pymcData = { - "d1": [], - "c1": [], - "nSubj": data[subject].nunique(), - "subID": np.arange(data[subject].nunique(), dtype="int"), - "hits": [], - "falsealarms": [], - "s": [], - "n": [], - "counts": [], - "nRatings": nRatings, - "Tol": 1e-05, - "cr": [], - "m": [], - } +# """ +# pymcData = { +# "d1": [], +# "c1": [], +# "nSubj": data[subject].nunique(), +# "subID": np.arange(data[subject].nunique(), dtype="int"), +# "hits": [], +# "falsealarms": [], +# "s": [], +# "n": [], +# "counts": [], +# "nRatings": nRatings, +# "Tol": 1e-05, +# "cr": [], +# "m": [], +# } - for sub in data[subject].unique(): - nR_S1, nR_S2 = trials2counts( - data=data[data[subject] == sub], - stimuli=stimuli, - accuracy=accuracy, - confidence=confidence, - nRatings=nRatings, - ) +# for sub in data[subject].unique(): +# nR_S1, nR_S2 = trials2counts( +# data=data[data[subject] == sub], +# stimuli=stimuli, +# accuracy=accuracy, +# confidence=confidence, +# nRatings=nRatings, +# ) + +# this_data = extractParameters(nR_S1, nR_S2) +# pymcData["d1"].append(this_data["d1"]) +# pymcData["c1"].append(this_data["c1"]) +# pymcData["s"].append(this_data["S"]) +# pymcData["n"].append(this_data["N"]) +# pymcData["m"].append(this_data["M"]) +# pymcData["cr"].append(this_data["CR"]) +# pymcData["counts"].append(this_data["counts"]) +# pymcData["hits"].append(this_data["H"]) +# pymcData["falsealarms"].append(this_data["FA"]) + +# pymcData["d1"] = np.array(pymcData["d1"], dtype="float") +# pymcData["c1"] = np.array(pymcData["c1"], dtype="float") +# pymcData["s"] = np.array(pymcData["s"], dtype="int") +# pymcData["n"] = np.array(pymcData["n"], dtype="int") +# pymcData["m"] = np.array(pymcData["m"], dtype="int") +# pymcData["cr"] = np.array(pymcData["cr"], dtype="int") +# pymcData["counts"] = np.array(pymcData["counts"], dtype="int") +# pymcData["hits"] = np.array(pymcData["hits"], dtype="int") +# pymcData["falsealarms"] = np.array(pymcData["falsealarms"], dtype="int") +# pymcData["nSubj"] = data[subject].nunique() +# pymcData["subID"] = np.arange(pymcData["nSubj"], dtype="int") - this_data = extractParameters(nR_S1, nR_S2) - pymcData["d1"].append(this_data["d1"]) - pymcData["c1"].append(this_data["c1"]) - pymcData["s"].append(this_data["S"]) - pymcData["n"].append(this_data["N"]) - pymcData["m"].append(this_data["M"]) - pymcData["cr"].append(this_data["CR"]) - pymcData["counts"].append(this_data["counts"]) - pymcData["hits"].append(this_data["H"]) - pymcData["falsealarms"].append(this_data["FA"]) - - pymcData["d1"] = np.array(pymcData["d1"], dtype="float") - pymcData["c1"] = np.array(pymcData["c1"], dtype="float") - pymcData["s"] = np.array(pymcData["s"], dtype="int") - pymcData["n"] = np.array(pymcData["n"], dtype="int") - pymcData["m"] = np.array(pymcData["m"], dtype="int") - pymcData["cr"] = np.array(pymcData["cr"], dtype="int") - pymcData["counts"] = np.array(pymcData["counts"], dtype="int") - pymcData["hits"] = np.array(pymcData["hits"], dtype="int") - pymcData["falsealarms"] = np.array(pymcData["falsealarms"], dtype="int") - pymcData["nSubj"] = data[subject].nunique() - pymcData["subID"] = np.arange(pymcData["nSubj"], dtype="int") - - return pymcData +# return pymcData # def preprocess_rm1way( diff --git a/metadpy/models/group_level_pymc.py b/metadpy/models/group_level_pymc.py deleted file mode 100644 index 2da4810..0000000 --- a/metadpy/models/group_level_pymc.py +++ /dev/null @@ -1,227 +0,0 @@ -# Author: Nicolas Legrand - -import pytensor.tensor as pt -import numpy as np -from pymc import Binomial, Deterministic, HalfNormal, Model, Multinomial, Normal, sample - - -def phi(x): - """Cumulative normal distribution.""" - return 0.5 + 0.5 * pt.erf(x / pt.sqrt(2)) - - -def hmetad_groupLevel( - data, sample_model=True, num_samples: int = 1000, num_chains: int = 4, **kwargs -): - """Hierarchical Bayesian modeling of meta-d' (group level). - - This is an internal function. The group level model must be called using - :py:func:`metadpy.bayesian.hmetad`. - - Parameters - ---------- - data : dict - Response data. - sample_model : boolean - If `False`, only the model is returned without sampling. - num_samples : int - The number of samples per chains to draw (defaults to `1000`). - num_chains : int - The number of chains (defaults to `4`). - **kwargs : keyword arguments - All keyword arguments are passed to `func::pymc.sampling.sample`. - - Returns - ------- - model : :py:class:`pymc.Model` instance - The pymc model. Encapsulates the variables and likelihood factors. - trace : :py:class:`pymc.backends.base.MultiTrace` or - :py:class:`arviz.InferenceData` - A `MultiTrace` or `ArviZ InferenceData` object that contains the samples. - - References - ---------- - .. [#] Fleming, S.M. (2017) HMeta-d: hierarchical Bayesian estimation of - metacognitive efficiency from confidence ratings, Neuroscience of Consciousness, - 3(1) nix007, https://doi.org/10.1093/nc/nix007 - - """ - nRatings = data["nRatings"] - nSubj = data["nSubj"] - - with Model() as model: - - # Group-level hyperpriors - mu_c1 = Normal("mu_c1", mu=0.0, sigma=1) - sigma_c1 = HalfNormal("sigma_c1", sigma=1) - - mu_d1 = Normal("mu_d1", mu=0.0, sigma=1) - sigma_d1 = HalfNormal("sigma_d1", sigma=1) - - mu_logMratio = Normal("mu_logMratio", mu=0.0, sigma=1) - sigma_logMratio = HalfNormal("sigma_logMratio", sigma=1) - - # Group-level hyperpriors for criteria - mu_cS1_hn = HalfNormal("mu_cS1_hn", sigma=1, shape=nRatings - 1) - sigma_cS1_hn = HalfNormal("sigma_cS1_hn", sigma=1) - - mu_cS2_hn = HalfNormal("mu_cS2_hn", sigma=1, shape=nRatings - 1) - sigma_cS2_hn = HalfNormal("sigma_cS2_hn", sigma=1) - - # Subject-level parameters - each drawn from group distribution - c1 = Normal("c1", mu=mu_c1, sigma=sigma_c1, shape=nSubj) - d1 = Normal("d1", mu=mu_d1, sigma=sigma_d1, shape=nSubj) - logMratio = Normal("logMratio", mu=mu_logMratio, sigma=sigma_logMratio, shape=nSubj) - - # Transform to meta_d - meta_d = Deterministic("meta_d", d1 * pt.exp(logMratio)) - - # Subject-level criteria - cS1_hn = HalfNormal("cS1_hn", sigma=sigma_cS1_hn, shape=(nSubj, nRatings - 1)) - cS2_hn = HalfNormal("cS2_hn", sigma=sigma_cS2_hn, shape=(nSubj, nRatings - 1)) - - # For each subject, fit the exact same model as the original subject-level model - for s in range(nSubj): - # Get subject-specific parameters - c1_s = c1[s] - d1_s = d1[s] - meta_d_s = meta_d[s] - - # Subject-specific criteria using the exact same pattern as original - cS1_s = Deterministic(f"cS1_{s}", pt.sort(-cS1_hn[s, :]) + (c1_s - data["Tol"])) - cS2_s = Deterministic(f"cS2_{s}", pt.sort(cS2_hn[s, :]) + (c1_s - data["Tol"])) - - # TYPE 1 SDT BINOMIAL MODEL (exact same as original) - h_s = phi(d1_s / 2 - c1_s) - f_s = phi(-d1_s / 2 - c1_s) - H_s = Binomial(f"H_{s}", data["s"][s], h_s, observed=data["hits"][s]) - FA_s = Binomial(f"FA_{s}", data["n"][s], f_s, observed=data["falsealarms"][s]) - - # TYPE 2 SDT MODEL - exact same calculations as original - # Means of SDT distributions - S2mu = pt.flatten(meta_d_s / 2, 1) - S1mu = pt.flatten(-meta_d_s / 2, 1) - - # Calculate normalisation constants - C_area_rS1 = phi(c1_s - S1mu) - I_area_rS1 = phi(c1_s - S2mu) - C_area_rS2 = 1 - phi(c1_s - S2mu) - I_area_rS2 = 1 - phi(c1_s - S1mu) - - # Get nC_rS1 probs - exact same pattern as original - nC_rS1 = phi(cS1_s - S1mu) / C_area_rS1 - nC_rS1 = Deterministic( - f"nC_rS1_{s}", - pt.concatenate( - ( - [ - phi(cS1_s[0] - S1mu) / C_area_rS1, - nC_rS1[1:] - nC_rS1[:-1], - ( - (phi(c1_s - S1mu) - phi(cS1_s[(nRatings - 2)] - S1mu)) - / C_area_rS1 - ), - ] - ), - axis=0, - ), - ) - - # Get nI_rS2 probs - exact same pattern as original - nI_rS2 = (1 - phi(cS2_s - S1mu)) / I_area_rS2 - nI_rS2 = Deterministic( - f"nI_rS2_{s}", - pt.concatenate( - ( - [ - ((1 - phi(c1_s - S1mu)) - (1 - phi(cS2_s[0] - S1mu))) / I_area_rS2, - nI_rS2[:-1] - (1 - phi(cS2_s[1:] - S1mu)) / I_area_rS2, - (1 - phi(cS2_s[nRatings - 2] - S1mu)) / I_area_rS2, - ] - ), - axis=0, - ), - ) - - # Get nI_rS1 probs - exact same pattern as original - nI_rS1 = (-phi(cS1_s - S2mu)) / I_area_rS1 - nI_rS1 = Deterministic( - f"nI_rS1_{s}", - pt.concatenate( - ( - [ - phi(cS1_s[0] - S2mu) / I_area_rS1, - nI_rS1[:-1] + (phi(cS1_s[1:] - S2mu)) / I_area_rS1, - (phi(c1_s - S2mu) - phi(cS1_s[(nRatings - 2)] - S2mu)) / I_area_rS1, - ] - ), - axis=0, - ), - ) - - # Get nC_rS2 probs - exact same pattern as original - nC_rS2 = (1 - phi(cS2_s - S2mu)) / C_area_rS2 - nC_rS2 = Deterministic( - f"nC_rS2_{s}", - pt.concatenate( - ( - [ - ((1 - phi(c1_s - S2mu)) - (1 - phi(cS2_s[0] - S2mu))) / C_area_rS2, - nC_rS2[:-1] - ((1 - phi(cS2_s[1:] - S2mu)) / C_area_rS2), - (1 - phi(cS2_s[nRatings - 2] - S2mu)) / C_area_rS2, - ] - ), - axis=0, - ), - ) - - # Avoid underflow of probabilities - exact same as original - nC_rS1 = pt.switch(nC_rS1 < data["Tol"], data["Tol"], nC_rS1) - nI_rS2 = pt.switch(nI_rS2 < data["Tol"], data["Tol"], nI_rS2) - nI_rS1 = pt.switch(nI_rS1 < data["Tol"], data["Tol"], nI_rS1) - nC_rS2 = pt.switch(nC_rS2 < data["Tol"], data["Tol"], nC_rS2) - - # TYPE 2 SDT MODEL Multinomial likelihood - exact same pattern as original - subject_counts = data["counts"][s, :] - - Multinomial( - f"CR_counts_{s}", - n=data["cr"][s], - p=nC_rS1, - shape=nRatings, - observed=subject_counts[:nRatings], - ) - Multinomial( - f"FA_counts_{s}", - n=data["falsealarms"][s], - p=nI_rS2, - shape=nRatings, - observed=subject_counts[nRatings : nRatings * 2], - ) - Multinomial( - f"M_counts_{s}", - n=data["m"][s], - p=nI_rS1, - shape=nRatings, - observed=subject_counts[nRatings * 2 : nRatings * 3], - ) - Multinomial( - f"H_counts_{s}", - n=data["hits"][s], - p=nC_rS2, - shape=nRatings, - observed=subject_counts[nRatings * 3 : nRatings * 4], - ) - - if sample_model is True: - trace = sample( - return_inferencedata=True, - chains=num_chains, - draws=num_samples, - **kwargs - ) - - return model, trace - - else: - return model \ No newline at end of file diff --git a/metadpy/tests/test_bayesian.py b/metadpy/tests/test_bayesian.py index 50cc904..000a85b 100644 --- a/metadpy/tests/test_bayesian.py +++ b/metadpy/tests/test_bayesian.py @@ -121,64 +121,6 @@ def test_hmetad(self): assert round(pymc_df["meta_d"].values[0], 2) - 1.58 < 0.01 assert round(pymc_df["m_ratio"].values[0], 2) - 1.03 < 0.01 - ################## - # Test group level - ################## - group_df = load_dataset("rm") - test_df = group_df[group_df['Subject'].isin([0, 1])] # Just 2 subjects for testing - - # Test model compilation only - model, _ = hmetad( - data=test_df, - subject='Subject', - nRatings=4, - stimuli='Stimuli', - accuracy='Accuracy', - confidence='Confidence', - sample_model=False - ) - assert isinstance(model, pm.Model) - - # Check that group-level variables exist - model_vars = list(model.named_vars.keys()) - assert 'mu_d1' in model_vars - assert 'mu_c1' in model_vars - assert 'mu_logMratio' in model_vars - assert 'sigma_d1' in model_vars - assert 'sigma_c1' in model_vars - assert 'sigma_logMratio' in model_vars - - # Check that subject-specific variables exist - assert 'c1' in model_vars # Should be shape (nSubj,) - assert 'd1' in model_vars # Should be shape (nSubj,) - assert 'meta_d' in model_vars # Should be shape (nSubj,) - - # Test with sampling (minimal for speed) - model, trace = hmetad( - data=test_df, - subject='Subject', - nRatings=4, - stimuli='Stimuli', - accuracy='Accuracy', - confidence='Confidence', - sample_model=True, - num_samples=10, # Very small for testing - num_chains=1, - tune=10 - ) - assert isinstance(model, pm.Model) - assert hasattr(trace, 'posterior') - - # Check that group parameters are in trace - group_vars = list(trace.posterior.data_vars.keys()) - assert 'mu_d1' in group_vars - assert 'mu_c1' in group_vars - assert 'mu_logMratio' in group_vars - - # Check dimensions - assert trace.posterior.mu_d1.dims == ('chain', 'draw') # Group-level scalar has chain, draw dims - assert trace.posterior.c1.dims == ('chain', 'draw', 'c1_dim_0') # Vector has additional subject dim - if __name__ == "__main__": unittest.main(argv=["first-arg-is-ignored"], exit=False)