-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathicassoGraph.m
executable file
·384 lines (339 loc) · 11.7 KB
/
icassoGraph.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
function icassoGraph(sR, varargin)
%function icassoGraph(sR,['identifier1',value1,'indentifier2',value2,...])
%
%PURPOSE
%
%To visualize the estimate space as a 2D projection and the
%similarities between the estimates as a graph. Estimates are
%presented as black points that are located so that the distances
%between the points approximate the similarities between the
%estimates. Estimates belonging to the same cluster are bounded by
%a red convex hull whose background color expresses its average
%density.
%
%EXAMPLES OF BASIC USAGE
%
%Show results for as many estimate-clusters as there are dimensions
%(possibly reduced) and use default visualization parameters:
%
% icassoGraph(sR);
%
%Show results for 9 estimate-clusters. Suppress graph lines for
%similarities under value 0.7 everywhere as well as inside
%those clusters whose intra-cluster similarity is above 0.8:
%
% icassoGraph(sR,'L',9,'graphlimit',0.7,'dense',0.8);
%
%INPUTS
%
%sR (struct) Icasso result structure
%
%Optional input arguments are given as argument identifier - value
%pairs: 'identifier1', value1, 'identifier2', value2,...
%(case insensitive)
%
% 'L' (string) 'rdim' (default) | (integer)
% sets the number of estimate-clusters 'rdim' sets it equal to the
% (reduced) data dimension
% 'colorlimit' (vector) default [0.5 0.75 0.9]
% sets the thresholds for colorscale for graph lines and clusters
% 'line' (string) 'on' (default) | 'off'
% whether to show the similarity graph lines or not
% 'hull' (string) 'on' (default) | 'off'
% whether to show the "cluster hulls" or not
% 'graphlimit' (scalar) in 0...1 | (string) 'auto' (default)
% see additional information 2
% 'dense' (scalar) in 0...1 | (string) 'auto' (default)
% see additional information 3
%
%DETAILS
%
%The function clears the active figure.
%
%Estimate-cluster labels are located at the left upper corner of
%each cluster hull. The cyan circles show the locations of the
%centrotypes (see explanation in function 'centrotype') of
%estimate-clusters on the projection. The area of a cluster is
%roughly related to its density: in general, the smaller the
%cluster area, the more compact it its and the more stable the
%corresponding IC estimate is as well.
%
%The colorbar to the right shows the color coding of clusters and
%a floating legend for graph lines (you can drag-and-drop the
%legend if it's inconveniently placed.)
%
%The background color of each cluster hull depends on the average
%intra-cluster similarity (see additional information 1). The color
%thresholds can be changed by input parameter 'colorlimit', e.g.,
%[0.5 0.75 0.9] sets three shades of red for 0.5...0.75 (light
%red), 0.75...0.9, and 0.9...1 (bright red). The same color coding
%is used for the graph lines that present pairwise similarities
%between the estimates. However, for sake of clarity, the
%similarity graph is, by default, suppressed inside those clusters
%whose intra-cluster similarity is above the highest threshold of
%'colorlimit' (by default 0.9). The graph is globally suppressed below
%the lowest limit (by default 0.5). This default mode can be
%changed by using parameters 'dense' and 'graphlimit' (see
%additional information 2 and 3). You can completely suppress
%completely the similarity graph completely and/or the convex
%"cluster hulls" from the figure by giving value 'off' for
%parameters 'line' and/or 'hull'. Note that the function may
%automatically change these threshold in order to keep the number
%of lines reasonable, i.e., less than 5000.
%
%ADDITIONAL INFORMATION
%
%(1)The intra-cluster similarities for cluster C are
%the mutual similarities between the estimates in C, and the
%extra-cluster similarities for C are the similarities between the
%estimates in C and the estimates not in C.
%
%(2)Parameter 'graphlimit' sets the lowest similarity value for
%which the graph lines are drawn. The default mode (string 'auto')
%sets this limit equal to the lowest 'colorlimit (0.5 by default). A
%higher or lower limit can be given explicitly. When you set a
%'graphlimit' lower than the lowest 'colorlimit' the lines in
%between appear as thin gray lines instead of red lines.
%
%(3)Parameter 'dense' sets the limit for intra-cluster similarity
%above which above which the similarity graph inside a cluster hull
%is suppressed. String 'auto' sets the value equal to highest
%'colorlimit' (0.9 by default).
%
%SEE ALSO
% icassoShow
% icassoDendrogram
%COPYRIGHT NOTICE
%This function is a part of Icasso software library
%Copyright (C) 2003-2005 Johan Himberg
%
%This program is free software; you can redistribute it and/or
%modify it under the terms of the GNU General Public License
%as published by the Free Software Foundation; either version 2
%of the License, or any later version.
%
%This program is distributed in the hope that it will be useful,
%but WITHOUT ANY WARRANTY; without even the implied warranty of
%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
%GNU General Public License for more details.
%
%You should have received a copy of the GNU General Public License
%along with this program; if not, write to the Free Software
%Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
% ver 1.2 100105
if nargin<1|isempty(sR),
error('At least one input argument expected');
end
if isempty(sR.projection.coordinates) | isempty(sR.cluster.partition) | ...
isempty(sR.cluster.index) | isempty(sR.cluster.similarity),
error('Missing similarity/projection/cluster information.');
end
% initiate output args.
index2centrotypes=[]; clusterquality=[]; partition=[];
EDGECOLOR=[0.3 0.3 0.3];
%% Set defaults and process optional input
default={'line','on','l', 'rdim',...
'graphlimit','auto','colorlimit',[0.5 0.75 0.9],...
'dense','auto','hull','on'};
varargin=processvarargin(varargin,default);
num_of_args=length(varargin);
for i=1:2:num_of_args,
id=varargin{i}; value=varargin{i+1};
switch lower(id)
case 'line'
switch lower(value)
case 'on'
graph=1;
case 'off'
graph=0;
otherwise
error('Option ''line'' must be ''on'' or ''off''.');
end
case 'hull'
switch lower(value)
case 'on'
hull=1;
case 'off'
hull=0;
otherwise
error('Option ''hull'' must be ''on'' or ''off''.');
end
case 'l'
if isnumeric(value);
level=value;
else
switch lower(value)
case 'rdim'
level=icassoGet(sR,'rdim');
otherwise
error(['Unknown value for identifier ' id]);
end
end
case 'dense'
if ischar(value),
switch lower(value)
case 'auto'
internallimit='auto';
otherwise
error('Option ''dense'' must be string ''auto'' or a scalar in 0...1');
end
else
internallimit=value(1);
if internallimit<0 | internallimit>1,
error('Option ''dense'' must be string ''auto'' or a scalar in 0...1');
end
end
case 'graphlimit'
if ischar(value),
switch lower(value)
case 'auto'
lowlimit='auto';
otherwise
error('Option ''graphlimit'' must be string ''auto'' or a scalar in 0...1');
end
else
lowlimit=value(1);
if lowlimit<0 | lowlimit>1,
error('Option ''graphlimit'' must be string ''auto'' or a scalar in 0...1');
end
end
case 'colorlimit'
if any(value(:)==0) | any(value(:)==1),
error('0 and 1 not allowed in ''colorlimit''.');
end
clustercolorlimit=value(:);
otherwise
error(['Doesn''t recognize option ''' id '''.' sprintf('\n')...
'Available: ''level'',''dense'',''graphlimit'',' ...
'''colorlimit'',''hull'', and ''line''.']);
end
end
% set auto values
if strcmp(lowlimit,'auto'),
lowlimit=min(clustercolorlimit);
end
if strcmp(internallimit,'auto'),
internallimit=max(clustercolorlimit);
end
% Check if cluster level is valid
maxCluster=size(sR.cluster.partition,1);
if level<=0 | level>maxCluster,
error('Cluster level out of range or not specified.');
end
%%%%% Compute some cluster statistics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Get the partition
partition=sR.cluster.partition(level,:);
Ncluster=max(partition);
% cluster statistics
c=sR.cluster.similarity;
s=clusterstat(c,partition);
%%%% Get centrotypes %
index2centrotypes=icassoIdx2Centrotype(sR,'partition',partition);
%%%%%%%%%% Visualization %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clf reset; hold on;
p=sR.projection.coordinates;
% define clustercolors
clustercolor=redscale(length(clustercolorlimit)+1);
% initiate graphic handles
h_graph=[]; % graph lines
graphText=[]; % label texts for graph lines
% Reduce similarities
% If partitioning is computed, limit not only by lowlimit but also
% by denseLim: ignore lines inside cluster hulls if average internal
% similarities are over denseLim (dense clusters)
if graph,
c=reducesim(c,lowlimit,partition,s.internal.avg,internallimit);
end
% set colors for clusters
faceColorMatrix=repmat(NaN,Ncluster,3);
for i=1:length(clustercolorlimit);
tmp=find(s.internal.avg(:)>=clustercolorlimit(i));
faceColorMatrix(tmp,:)=repmat(clustercolor(i+1,:),length(tmp),1);
end
% set edgecolors
edgeColorMatrix=repmat(EDGECOLOR,Ncluster,1);
% draw faces for clusters; they have to be in
% bottom; otherwise they shade everything else
if hull,
h_fill=clusterhull('fill',p,partition,faceColorMatrix);
end
title(sprintf('Estimate space as a 2D %s projection',upper(sR.projection.method)));
if lowlimit<clustercolorlimit(1),
graphlimit=[lowlimit, clustercolorlimit(:)', 1];
linecolor=[repmat(clustercolor(2,2),1,3).^.5; clustercolor(2:end,:)];
else
graphlimit=[clustercolorlimit(:)', 1];
linecolor=clustercolor(2:end,:);
end
if graph,
h_graph=similaritygraph(p,c, graphlimit, linecolor);
ax=axis;
xwidth=ax(2)-ax(1);
else
% Only vertices
h_graph=similaritygraph(p);
ax=axis;
xwidth=ax(2)-ax(1);
end
% Cluster labels
txt=cellstr(num2str([1:Ncluster]'));
% Hull edges...
if hull,
[h_edge,txtcoord]=clusterhull('edge',p,partition,edgeColorMatrix);
%h_clusterlabel=text(txtcoord(:,1)-xwidth/100,txtcoord(:,2),txt);
end
%...and centrotypes (cyan circles)
%% Plot centrotypes
h_centrotype=plot(p(index2centrotypes,1),p(index2centrotypes,2),'k.');
set(h_centrotype,'markersize',10,'color','y');
h_clusterlabel=text(p(index2centrotypes,1)-xwidth/100,p(index2centrotypes,2),txt);
for i=1:Ncluster,
set(h_clusterlabel(i),'horiz','right','color',[1 0 0],'fontsize',15,'fontweight','bold');
end
set(gca,'box','on');
%% Set colorbar that shows the cluster colors:
if hull,
caxis([0 size(clustercolor,1)]);
colormap(clustercolor);
h_colorbar=colorbar('vert');
set(h_colorbar,'ytick',[0:size(clustercolor,1)]',...
'yticklabel',[0;clustercolorlimit(:); 1]);
% set(h_colorbar,'ytick',[0:size(clustercolor,1)]',...
% 'yticklabel',[0;clustercolorlimit(:); 1],'dataaspect',[1 1 ...
% 1],'plotboxaspect',[1 1 1]);
set(get(h_colorbar,'ylabel'),'string','Averge intra-cluster similarity (cluster compactness)');
% Set legend
end
if graph,
i=1;
while i<=length(h_graph.example)
if isnan(h_graph.example(i)),
h_graph.example(i)=[];
h_graph.edge(i)=[];
h_graph.text(i)=[];
else
i=i+1;
end
end
legendText{1}='Single-run-estimate';
legendText{2}='"Best Estimate" (centrotype)';
legendText=[legendText, h_graph.text];
legendHandle=[h_graph.vertex(1);h_centrotype(1);h_graph.example(:)];
else
legendText{1}='Single-run-estimate';
legendText{2}='"Best Estimate" (centrotype)';
legendHandle=[h_graph.vertex(1);h_centrotype(1)];
end
% legend(legendHandle,legendText);
% Set figure name
if graph & internallimit<1,
xlabel('');
else
;
end
if hull,
% ylabel(['Convex hulls represent estimate-clusters.',...
% sprintf('\n'), 'Compact and isolated clusters suggest reliable estimates']);
end
set(gca,'yaxisloc','right');
set(gcf,'name','Icasso: Similarity Graph');