Skip to content

Commit

Permalink
Merge pull request #87 from sct-pipeline/stat_cov_err
Browse files Browse the repository at this point in the history
Added graph showing intra-subject variability (COV) as a function of atrophy estimation error
  • Loading branch information
PaulBautin authored Jan 16, 2021
2 parents d73bced + ae45092 commit 12bd8e3
Showing 1 changed file with 85 additions and 2 deletions.
87 changes: 85 additions & 2 deletions csa_rescale_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,77 @@ def sample_size(df, config_param):
sample_size.append('inf')
return sample_size

def error_function_of_intra_cov(df, path_output):
"""Scatter plot of intra-subject COV in function of error %
:param df: dataframe for computing stats per subject: df_sub
:param path_output: directory in which plot is saved
"""
fig, ax = plt.subplots(figsize=(7, 7))
# remove rescale=1 because error=0
df = df[df['rescale'] != 1]
# compute linear regression
z = np.polyfit(x=df.loc[:, 'perc_error'], y=df.loc[:, 'cov'], deg=1)
p = np.poly1d(z)
# plot
df.plot.scatter(x='perc_error', y='cov', c='rescale', colormap='viridis')
min_err = min(df['perc_error'].values)
max_err = max(df['perc_error'].values)
plt.plot([min_err, max_err], [min_err*z[0]+z[1], max_err*z[0]+z[1]], ls="--", c=".3")
ax.set_xlabel('perc_error')
ax.set_ylabel('cov')
plt.title('COV in function of % error,\n linear regression: {}'.format(p))
plt.title("COV in function of % error")
ax.legend(loc='upper right')
plt.grid()
output_file = os.path.join(path_output, "fig_err_in_function_of_cov.png")
plt.savefig(output_file, bbox_inches='tight')
print("--> Created figure: {}".format(output_file))


def error_function_of_intra_cov_outlier(df, path_output):
"""Scatter plot of intra-subject COV in function of error % to identify outliers with either high error %
or high intra-subject COV
:param df: dataframe for computing stats per subject: df_sub
:param path_output: directory in which plot is saved
"""
fig, ax = plt.subplots(figsize=(7, 7))
# identified outliers either high error % or high intra-subject COV of t1 images
outliers_t1_all = ['sub-brnoUhb01', 'sub-brnoUhb02', 'sub-brnoUhb03', 'sub-brnoUhb06', 'sub-brnoUhb07', 'sub-brnoUhb08',
'sub-barcelona04', 'sub-barcelona06', 'sub-beijingPrisma03', 'sub-milan03', 'sub-oxfordFmrib01',
'sub-tokyo750w03', 'sub-pavia04']
# identified t1 outliers remaining after subjects removed due to missing vertebrae levels missing CSA
outliers_t1 = ['sub-brnoUhb01', 'sub-brnoUhb08', 'sub-milan03', 'sub-oxfordFmrib01', 'sub-cmrrb05',
'sub-tokyo750w03', 'sub-pavia04']
# identified outliers either high error % or high intra-subject COV of t2 images
outliers_t2_all = ['sub-tokyo750w02', 'sub-tokyo750w04', 'sub-tokyo750w06', 'sub-beijingVerio02',
'sub-beijingVerio03', 'sub-ubc02', 'sub-vuiisIngenia05']
# identified t2 outliers remaining after subjects removed due to missing vertebrae levels missing CSA
outliers_t2 = ['sub-tokyo750w02', 'sub-tokyo750w04', 'sub-tokyo750w06', 'sub-beijingVerio02', 'sub-beijingVerio03',
'sub-ubc02', 'sub-vuiisIngenia05']
# remove rescale=1 because error=0
df = df[df['rescale'] != 1]
ax.scatter(df['perc_error'], df['cov'], color='tab:blue', label='others')
# plot scatter outliers of t1w images
for outlier_t1 in outliers_t1:
df_t1 = df.groupby(['subject']).get_group(outlier_t1)
ax.scatter(df_t1['perc_error'], df_t1['cov'], color='tab:red', label=outlier_t1)
df_t1 = []
# plot scatter outliers of t2w images
for outlier_t2 in outliers_t2:
df_t2 = df.groupby(['subject']).get_group(outlier_t2)
ax.scatter(df_t2['perc_error'], df_t2['cov'], color='tab:olive', label=outlier_t2)
df_t2 = []
# plot
ax.set_xlabel('perc_error')
ax.set_ylabel('cov')
plt.title("COV in function of error %")
ax.legend(loc='upper right')
plt.grid()
# save image
output_file = os.path.join(path_output, "fig_err_in_function_of_cov_outlier.png")
plt.savefig(output_file, bbox_inches='tight')
print("--> Created figure: {}".format(output_file))


def add_columns_df_sub(df):
""" Add columns theoretic CSA values (rX^2 * MEAN(area)) and CSA without rescaling to dataframe
Expand All @@ -257,6 +328,7 @@ def add_columns_df_sub(df):
group.loc[subject, 'theoretic_csa'] = csa_without_rescale.loc[subject, 'mean'] * (rescale ** 2)
df.loc[rescale, 'theoretic_csa'] = group['theoretic_csa'].values
df.loc[rescale, 'csa_without_rescale'] = csa_without_rescale['mean'].values
df.loc[rescale, 'csa_without_rescale'] = csa_without_rescale['mean'].values
df = df.reset_index()
return df

Expand Down Expand Up @@ -285,6 +357,9 @@ def main():
# identify rows with missing values
print("Remove rows with missing values...")
lines_to_drop = df_vert[df_vert['MEAN(area)'] == 'None'].index
df_vert['subject'] = list(sub.split('data_processed/')[1].split('/anat')[0] for sub in df_vert['Filename'])

# remove rows with missing values
df_vert = df_vert.drop(df_vert.index[lines_to_drop])
df_vert['MEAN(area)'] = pd.to_numeric(df_vert['MEAN(area)'])
print(" Rows removed: {}".format(lines_to_drop))
Expand Down Expand Up @@ -338,8 +413,10 @@ def main():
df_sub = add_columns_df_sub(df_sub)
df_sub['rescale_estimated'] = df_sub['mean'].div(df_sub['csa_without_rescale'])
df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa']).abs()
df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).abs().div(df_sub['mean'])
df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).abs().div(df_sub['theoretic_csa'])
print(df_sub)
# save dataframe in a csv file
df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv'))

# Create dataframe for computing stats across subject: df_rescale
print("\n==================== rescaling_dataframe ==========================\n")
Expand All @@ -356,9 +433,12 @@ def main():
df_rescale['mean_rescale_estimated'] = df_sub.groupby('rescale').mean()['rescale_estimated'].values
df_rescale['std_rescale_estimated'] = df_sub.groupby('rescale').std()['rescale_estimated'].values
df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values
df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values
df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values
df_rescale['sample_size'] = sample_size(df_rescale, config_param)
print(df_rescale)
# save dataframe in a csv file
df_sub.to_csv(os.path.join(path_output, r'csa_transfo.csv'))

# plot graph if verbose is present
if arguments.fig:
Expand All @@ -382,7 +462,10 @@ def main():
# std = STD of subjects without rescaling CSA values
# mean_csa = mean CSA value of subjects without rescaling
plot_sample_size(z_conf=z_score_confidence, z_power=z_score_power, std=df_rescale.loc[1, 'std_inter'], mean_csa=df_rescale.loc[1, 'mean_inter'], path_output=path_output)

# scatter plot of COV in function of error %
error_function_of_intra_cov(df_sub, path_output=path_output)
# scatter plot of COV in function of error % to identify outliers
error_function_of_intra_cov_outlier(df_sub, path_output=path_output)

if __name__ == "__main__":
main()

0 comments on commit 12bd8e3

Please sign in to comment.