forked from treder/MVPA-Light
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmv_searchlight.m
196 lines (172 loc) · 7.73 KB
/
mv_searchlight.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
function [perf,result] = mv_searchlight(cfg, X, clabel)
% Classification using a feature searchlight approach can highlight which
% feature(s) are informative. To this end, classification is performed on
% each feature separately. However, neighbouring features can enter the
% classification together when a matrix of size [features x features]
% specifying the neighbours is provided.
%
% Usage:
% [perf, ...] = mv_searchlight(cfg,X,clabel)
%
%Parameters:
% X - [samples x features] data matrix or a
% [samples x features x time] matrix. In the latter case,
% either all time points are used as features (if
% average=0) or they are averaged to one time (average=1)
% clabel - [samples x 1] vector of class labels containing
% 1's (class 1) and 2's (class 2)
%
% cfg - struct with parameters:
% .metric - classifier performance metric, default 'accuracy'. See
% mv_classifier_performance. If set to [] or 'none', the
% raw classifier output (labels, dvals or probabilities
% depending on cfg.output_type) for each sample is returned.
% .nb - [features x features] matrix specifying which features
% are neighbours of each other.
% - EITHER -
% a GRAPH consisting of 0's and 1's. A 1 in the
% (i,j)-th element signifies that feature i and feature j
% are neighbours, and a 0 means they are not neighbours
% - OR -
% a DISTANCE MATRIX, where larger values mean larger distance.
% If no matrix is provided, every feature is only neighbour
% to itself and classification is performed for each feature
% separately.
% .size - if a nb matrix is provided, size defines the
% size of the 'neighbourhood' of a feature.
% if nb is a graph, it gives the number of steps taken
% through the nb matrix to find neighbours:
% 0: only the feature itself is considered (no neighbours)
% 1: the feature and its immediate neighbours
% 2: the feature, its neighbours, and its neighbours'
% neighbours
% 3+: neighbours of neighbours of neighbours etc
% (default 1)
% if nb is a distance matrix, size defines the number of
% neighbouring features that enter the classification
% 0: only the feature itself is considered (no neighbours)
% 1: the feature and its first closest neighbour
% according to the distance matrix
% 2+: the 2 closest neighbours etc.
% .average - if 1 and X is [samples x features x time], the time
% dimension is averaged ot a single feature (default 0). If
% 0, each time point is used as a separate feature
% .normalise - normalises the data across samples, for each time point
% and each feature separately, using 'zscore' or 'demean'
% (default 'zscore'). Set to 'none' or [] to avoid normalisation.
% .feedback - print feedback on the console (default 1)
%
% CROSS-VALIDATION parameters:
% .cv - perform cross-validation, can be set to 'kfold',
% 'leaveout', 'holdout', or 'none' (default 'kfold')
% .k - number of folds in k-fold cross-validation (default 5)
% .p - if cv is 'holdout', p is the fraction of test samples
% (default 0.1)
% .stratify - if 1, the class proportions are approximately preserved
% in each fold (default 1)
% .repeat - number of times the cross-validation is repeated with new
% randomly assigned folds (default 1)
%
% Returns:
% perf - [features x 1] vector of classifier performances
% corresponding to the selected metric
% (c) Matthias Treder
X = double(X);
mv_set_default(cfg,'classifier','lda');
mv_set_default(cfg,'param',[]);
mv_set_default(cfg,'metric','accuracy');
mv_set_default(cfg,'nb',[]);
mv_set_default(cfg,'size',1);
mv_set_default(cfg,'metric','auc');
mv_set_default(cfg,'average',0);
mv_set_default(cfg,'feedback',1);
% Cross-validation settings
mv_set_default(cfg,'cv','kfold');
mv_set_default(cfg,'repeat',5);
mv_set_default(cfg,'k',5);
mv_set_default(cfg,'p',0.1);
mv_set_default(cfg,'stratify',1);
switch(cfg.cv)
case 'leaveout', cfg.k = size(X,1);
case 'holdout', cfg.k = 1;
end
if cfg.average && ~ismatrix(X)
X = mean(X,3);
end
[n, nFeat, ~] = size(X);
[clabel, nclasses] = mv_check_clabel(clabel);
mv_check_cfg(cfg);
perf = cell(nFeat,1);
perf_std = cell(nFeat,1);
%% Find the neighbourhood of the requested size
if isempty(cfg.nb)
if cfg.feedback, fprintf('No neighbour matrix provided, considering each feature individually\n'), end
% Do not include neighbours: each feature is only neighbour to itself
nb = eye(nFeat);
elseif numel(cfg.nb)==1 && cfg.nb == 0
nb = eye(nFeat);
else
%%% Decide whether nb is a graph or a distance matrix
if all(ismember([0,1],unique(cfg.nb))) % graph
% Use a trick used in transition matrices for Markov chains: taking the
% i-th power of the matrix yields information about the neighbours that
% can be reached in i steps
nb = double(double(cfg.nb)^cfg.size > 0);
else % distance matrix -> change it into a graph
% The resulting graph is not necessarily symmetric since if the
% closest neighbour of chan1 is chan2, the closest neighbour of
% chan2 can still be a different channel. Therefore, the matrix nb
% contains the information of closest neighbours in its rows. E.g.,
% the row nb(i,:) gives the closest neighbours of the i-th channel
nb = zeros(nFeat); % initialise as empty matrix
for nn=1:nFeat
[~,soidx] = sort(cfg.nb(nn,:),'ascend');
% put 1's in the row corresponding to the nearest
% neighbours
nb(nn,soidx(1:cfg.size+1)) = 1;
end
end
end
%% Prepare cfg struct for mv_crossvalidate
tmp_cfg = cfg;
tmp_cfg.feedback = 0;
% Store the current state of the random number generator
rng_state = rng;
%% Loop across features
for ff=1:nFeat
% Identify neighbours: multiply a unit vector with 1 at the ff'th with
% the nb matrix, this yields the neighbours of feature ff
u = [zeros(1,ff-1), 1, zeros(1,nFeat-ff)];
neighbours = find( u * nb > 0);
if cfg.feedback
if numel(neighbours)>1
fprintf('Classifying using feature %d with neighbours %s\n', ff, mat2str(setdiff(neighbours,ff)))
else
fprintf('Classifying using feature %d with no neighbours\n', ff)
end
end
% Extract desired features and reshape into [samples x features]
Xfeat = reshape(X(:,neighbours,:), n, []);
% We always set the random number generator back to the same state:
% this assures that the same cross-validation folds are used for each
% channel, increasing comparability
rng(rng_state);
% Perform cross-validation for specific feature(s)
[perf{ff}, res] = mv_crossvalidate(tmp_cfg, Xfeat, clabel);
perf_std{ff} = res.perf_std;
end
perf = cat(2,perf{:})';
perf_std = cat(2,perf_std{:})';
result = [];
if nargout>1
result.function = mfilename;
result.perf = perf;
result.perf_std = perf_std;
result.metric = cfg.metric;
result.cv = cfg.cv;
result.k = cfg.k;
result.n = size(X,1);
result.repeat = cfg.repeat;
result.nclasses = nclasses;
result.classifier = cfg.classifier;
end