From 7b000665b45ea49b749bfe89be065f50f4921523 Mon Sep 17 00:00:00 2001 From: Maarten Mennes Date: Wed, 18 Apr 2018 15:54:45 +0200 Subject: [PATCH] refactoring of feature_time_series based on @rtrhd's code and @dangom's suggestions. Massive speed-up associated with this! --- ICA_AROMA_functions.py | 130 +++++++++++++++++++++-------------------- 1 file changed, 66 insertions(+), 64 deletions(-) diff --git a/ICA_AROMA_functions.py b/ICA_AROMA_functions.py index 8ba20fe..8bdfd92 100644 --- a/ICA_AROMA_functions.py +++ b/ICA_AROMA_functions.py @@ -9,6 +9,7 @@ from builtins import str from builtins import range from past.utils import old_div +import numpy as np def runICA(fslDir, inFile, outDir, melDirIn, mask, dim, TR): @@ -200,9 +201,19 @@ def register2MNI(fslDir, inFile, outFile, affmat, warp): '--premat=' + affmat, '--interp=trilinear'])) +def cross_correlation(a, b): + """Cross Correlations between columns of two matrices""" + assert a.ndim == b.ndim == 2 + _, ncols_a = a.shape + # nb variables in columns rather than rows hence transpose + # extract just the cross terms between cols in a and cols in b + return np.corrcoef(a.T, b.T)[:ncols_a, ncols_a:] + def feature_time_series(melmix, mc): - """ This function extracts the maximum RP correlation feature scores. It determines the maximum robust correlation of each component time-series with a model of 72 realigment parameters. + """ This function extracts the maximum RP correlation feature scores. + It determines the maximum robust correlation of each component time-series + with a model of 72 realignment parameters. Parameters --------------------------------------------------------------------------------- @@ -211,7 +222,8 @@ def feature_time_series(melmix, mc): Returns --------------------------------------------------------------------------------- - maxRPcorr: Array of the maximum RP correlation feature scores for the components of the melodic_mix file""" + maxRPcorr: Array of the maximum RP correlation feature scores for the components + of the melodic_mix file""" # Import required modules import numpy as np @@ -219,71 +231,60 @@ def feature_time_series(melmix, mc): # Read melodic mix file (IC time-series), subsequently define a set of squared time-series mix = np.loadtxt(melmix) - mixsq = np.power(mix, 2) # Read motion parameter file - RP6 = np.loadtxt(mc) + rp6 = np.loadtxt(mc) + _, nparams = rp6.shape # Determine the derivatives of the RPs (add zeros at time-point zero) - RP6_der = np.array(RP6[list(range(1, RP6.shape[0])), :] - RP6[list(range(0, RP6.shape[0] - 1)), :]) - RP6_der = np.concatenate((np.zeros((1, 6)), RP6_der), axis=0) + rp6_der = np.vstack((np.zeros(nparams), + np.diff(rp6, axis=0) + )) + + #RP6_der = np.array(RP6[list(range(1, RP6.shape[0])), :] - RP6[list(range(0, RP6.shape[0] - 1)), :]) + #RP6_der = np.concatenate((np.zeros((1, 6)), RP6_der), axis=0) # Create an RP-model including the RPs and its derivatives - RP12 = np.concatenate((RP6, RP6_der), axis=1) + rp12 = np.hstack((rp6, rp6_der)) + #RP12 = np.concatenate((RP6, RP6_der), axis=1) # Add the squared RP-terms to the model - RP24 = np.concatenate((RP12, np.power(RP12, 2)), axis=1) + # add the fw and bw shifted versions + rp12_1fw = np.vstack(( + np.zeros(2 * nparams), + rp12[:-1] + )) + rp12_1bw = np.vstack(( + rp12[1:], + np.zeros(2 * nparams) + )) + rp_model = np.hstack((rp12, rp12_1fw, rp12_1bw)) - # Derive shifted versions of the RP_model (1 frame for and backwards) - RP24_1fw = np.concatenate((np.zeros((1, 24)), np.array(RP24[list(range(0, RP24.shape[0] - 1)), :])), axis=0) - RP24_1bw = np.concatenate((np.array(RP24[list(range(1, RP24.shape[0])), :]), np.zeros((1, 24))), axis=0) + # Determine the maximum correlation between RPs and IC time-series + nsplits = 1000 + nmixrows, nmixcols = mix.shape + nrows_to_choose = int(round(0.9 * nmixrows)) - # Combine the original and shifted mot_pars into a single model - RP_model = np.concatenate((RP24, RP24_1fw, RP24_1bw), axis=1) + # Max correlations for multiple splits of the dataset (for a robust estimate) + max_correls = np.empty((nsplits, nmixcols)) + for i in range(nsplits): + # Select a random subset of 90% of the dataset rows (*without* replacement) + chosen_rows = random.sample(population=range(nmixrows), + k=nrows_to_choose) - # Define the column indices of respectively the squared or non-squared terms - idx_nonsq = np.array(np.concatenate((list(range(0, 12)), list(range(24, 36)), list(range(48, 60))), axis=0)) - idx_sq = np.array(np.concatenate((list(range(12, 24)), list(range(36, 48)), list(range(60, 72))), axis=0)) + # Combined correlations between RP and IC time-series, squared and non squared + correl_nonsquared = cross_correlation(mix[chosen_rows], + rp_model[chosen_rows]) + correl_squared = cross_correlation(mix[chosen_rows]**2, + rp_model[chosen_rows]**2) + correl_both = np.hstack((correl_squared, correl_nonsquared)) - # Determine the maximum correlation between RPs and IC time-series - nSplits = int(1000) - maxTC = np.zeros((nSplits, mix.shape[1])) - for i in range(0, nSplits): - # Get a random set of 90% of the dataset and get associated RP model and IC time-series matrices - idx = np.array(random.sample(list(range(0, mix.shape[0])), int(round(0.9 * mix.shape[0])))) - RP_model_temp = RP_model[idx, :] - mix_temp = mix[idx, :] - mixsq_temp = mixsq[idx, :] - - # Calculate correlation between non-squared RP/IC time-series - RP_model_nonsq = RP_model_temp[:, idx_nonsq] - cor_nonsq = np.array(np.zeros((mix_temp.shape[1], RP_model_nonsq.shape[1]))) - for j in range(0, mix_temp.shape[1]): - for k in range(0, RP_model_nonsq.shape[1]): - cor_temp = np.corrcoef(mix_temp[:, j], RP_model_nonsq[:, k]) - cor_nonsq[j, k] = cor_temp[0, 1] - - # Calculate correlation between squared RP/IC time-series - RP_model_sq = RP_model_temp[:, idx_sq] - cor_sq = np.array(np.zeros((mix_temp.shape[1], RP_model_sq.shape[1]))) - for j in range(0, mixsq_temp.shape[1]): - for k in range(0, RP_model_sq.shape[1]): - cor_temp = np.corrcoef(mixsq_temp[:, j], RP_model_sq[:, k]) - cor_sq[j, k] = cor_temp[0, 1] - - # Combine the squared an non-squared correlation matrices - corMatrix = np.concatenate((cor_sq, cor_nonsq), axis=1) - - # Get maximum absolute temporal correlation for every IC - corMatrixAbs = np.abs(corMatrix) - maxTC[i, :] = corMatrixAbs.max(axis=1) - - # Get the mean maximum correlation over all random splits - # nanmean to deal with occasional nans popping up in the correlation calculation - maxRPcorr = np.nanmean(maxTC, axis=0) - - # Return the feature score - return maxRPcorr + # Maximum absolute temporal correlation for every IC + max_correls[i] = np.abs(correl_both).max(axis=1) + + # Feature score is the mean of the maximum correlation over all the random splits + # Avoid propagating occasional nans that arise in artificial test cases + return np.nanmean(max_correls, axis=0) def feature_frequency(melFTmix, TR): @@ -497,14 +498,15 @@ def classification(outDir, maxRPcorr, edgeFract, HFC, csfFract): motionICs = np.squeeze(np.array(np.where((proj > 0) + (csfFract > thr_csf) + (HFC > thr_HFC)))) # Put the feature scores in a text file - np.savetxt(os.path.join(outDir, 'feature_scores.txt'), np.vstack((maxRPcorr, edgeFract, HFC, csfFract)).T) + np.savetxt(os.path.join(outDir, 'feature_scores.txt'), + np.vstack((maxRPcorr, edgeFract, HFC, csfFract)).T) # Put the indices of motion-classified ICs in a text file txt = open(os.path.join(outDir, 'classified_motion_ICs.txt'), 'w') if motionICs.size > 1: # and len(motionICs) != 0: if motionICs is not None and - txt.write(','.join(['%.0f' % num for num in (motionICs + 1)])) + txt.write(','.join(['{:.0f}'.format(num) for num in (motionICs + 1)])) elif motionICs.size == 1: - txt.write('%.0f' % (motionICs + 1)) + txt.write('{:.0f}'.format(motionICs + 1)) txt.close() # Create a summary overview of the classification @@ -521,12 +523,12 @@ def classification(outDir, maxRPcorr, edgeFract, HFC, csfFract): classif = "True" else: classif = "False" - txt.write('\t'.join([i + 1, - classif, - maxRPcorr[i], - edgeFract[i], - HFC[i], - csfFract[i]])) + txt.write('\t'.join(['{:d}'.format(i + 1), + classif, + '{:.2f}'.format(maxRPcorr[i]), + '{:.2f}'.format(edgeFract[i]), + '{:.2f}'.format(HFC[i]), + '{:.2f}'.format(csfFract[i])])) txt.write('\n') txt.close()