-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdpeak_manual.m
270 lines (231 loc) · 6.78 KB
/
dpeak_manual.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
function dpeak_manual(dataPath,percent,kernel)
% Aimed at clustering the data with Density Peak Algorithm (DPeak) manually
% -------------------------------------------------------------------------
% Input:
% dataPath - the file path of data
% percent - average percentage of neighbours
% kernel - gaussian or cutoff kernel
tic;
disp('Description of distance.mat: [i, j, dist(i,j)]')
%加载distances矩阵
datafile = [dataPath,'/distances.mat'];
load(datafile);
xx = distances;
%ND或NL即为xx所表达的样本总数
ND = max(xx(:,2));
NL = max(xx(:,1));
if (NL > ND)
ND = NL;
end
%计算xx的总行数,即为n*(n-1)/2
N = size(xx,1);
%完成距离矩阵初始化
if (exist([dataPath,'distMat.mat'],'file'))
load([dataPath,'distMat.mat']);
dist = distMat;
else
for i = 1 : ND %初始化dist 矩阵,所有元素全为0
for j = 1 : ND
dist(i,j) = 0;
end
end
for i = 1 : N %根据xx矩阵,依次回填dist矩阵
ii = xx(i,1);
jj = xx(i,2);
dist(ii,jj) = xx(i,3);
dist(jj,ii) = xx(i,3);
end
save distance_matrix dist;
end
%计算截距dc
fprintf('average percentage of neighbours (hard coded): %5.6f\n', percent);
%计算前N*percent/100的个数
position = round(N*percent/100);
%将xx第3列,即按距离进行从小到大排序
sda = sort(xx(:,3));%get distance of all points
%处在position位置的元素数值,即是cutoff distance
dc = sda(position); %dc is a distance from the paires of some point which index in dataset is equal to value of variable position
fprintf('Computing Rho with gaussian kernel of radius: %5.6f\n', dc);
%计算局部密度
for i = 1 : ND
rho(i) = 0.;
end
switch(kernel)
case 'gaussian'
for i = 1 : ND-1
for j = i+1 : ND
% i与j距离与dc相差越大,exp(-(dist(i,j)/dc)*(dist(i,j)/dc)值越小,
% 最终累积得到rho(i)越小表示与样本点i距离较近的点少,否则表示样本点i周边有很多距离较近的点
rho(i) = rho(i) + exp(-(dist(i,j)/dc) * (dist(i,j)/dc));
rho(j) = rho(j) + exp(-(dist(i,j)/dc) * (dist(i,j)/dc));
end
end
case 'cutoff' % 论文里公式1的卡方函数
neibors = zeros(ND,1);
for i = 1 : ND-1
count = 0;
for j = 1 : ND
if (i ~= j)
if (dist(i,j)<dc)
count = count + 1;
rho(i) = rho(i) + 1.;
neibors(i,count) = j;
end
end
end
end
otherwise
disp('please input the correct kernel')
end
%取出dist矩阵中值最大元素
maxd = max(max(dist));
%rho_sorted是倒序排序后的向量,ordrho是rho_sorted各元素在原向量rho中的位置,即下标向量,
%即ordrho(1)是密度最大的样本点的位置,rho_sorted(1)是密度值,ordrho(i)是密度值排第i位的样本点
[rho_sorted, ordrho]=sort(rho, 'descend');
%计算距离
for ii = 2 : ND
delta(ordrho(ii)) = maxd;
for jj = 1 : ii-1
if(dist(ordrho(ii),ordrho(jj)) < delta(ordrho(ii)))
delta(ordrho(ii)) = dist(ordrho(ii), ordrho(jj)); %取距离的最小值
nneigh(ordrho(ii)) = ordrho(jj);
end
end
end
delta(ordrho(1)) = max(delta(:)); %密度值最大的点对应的距离值
%方案一:绘制决策图(ρ-δ)
disp('Description of decision_graph: [density, delta]')
fid = fopen('decision_graph', 'w');
for i = 1 : ND
fprintf(fid, '%6.2f %6.2f\n', rho(i), delta(i));
end
%选取聚类中心
disp('Tips: Please select a rectangle enclosing cluster centers')
scrsz = get(0, 'ScreenSize');
figure(1)
plot(rho(:),delta(:),'o','MarkerSize',3,'MarkerFaceColor','k','MarkerEdgeColor','k');
title ('Decision Graph','FontSize',15.0)
xlabel ('\rho')
ylabel ('\delta')
figure(1)
rect = getrect(1);
rhomin = rect(1);
deltamin = rect(2);
%统计聚类中心个数
NCLUST = 0;
for i = 1 : ND
cl(i) = -1; %cl为分组标志数组,cl(i)=j表示第i个数据点分组到第j个cluster
end
for i = 1 : ND
if ((rho(i)>rhomin) && (delta(i)>deltamin))
NCLUST = NCLUST + 1;
cl(i) = NCLUST;
icl(NCLUST) = i; %逆映射,icl(j)=i表示第j个cluster的中心为第i个数据点
end
end
fprintf('NUMBER OF CLUSTERS: %i \n', NCLUST);
%分组
disp('Performing assignation')
for i = 1 : ND
if (cl(ordrho(i)) == -1)
cl(ordrho(i)) = cl(nneigh(ordrho(i))); %密度值不小于i点且距离i点最近
end
end
%halo
for i = 1 : ND
halo(i) = cl(i);
end
if (NCLUST > 1)
for i = 1 : NCLUST
bord_rho(i) = 0.; %边界密度阈值
end
for i = 1 : ND-1
for j = i+1 : ND
if ((cl(i)~=cl(j)) && (dist(i,j)<=dc)) %距离足够小但不属于同一个cluster的i和j
rho_aver = (rho(i)+rho(j))/2.;
if (rho_aver > bord_rho(cl(i)))
bord_rho(cl(i)) = rho_aver;
end
if (rho_aver > bord_rho(cl(j)))
bord_rho(cl(j)) = rho_aver;
end
end
end
end
for i = 1 : ND
if (rho(i) < bord_rho(cl(i))) %halo定义
halo(i) = 0;
end
end
end
%统计核心点和光晕点个数
for i = 1 : NCLUST
nc = 0;
nh = 0;
for j = 1 : ND
if (cl(j) == i)
nc = nc + 1;
end
if (halo(j) == i)
nh = nh + 1;
end
end
fprintf('CLUSTER: %i CENTER: %i ELEMENTS: %i CORE: %i HALO: %i \n', i, icl(i), nc, nh, nc-nh);
end
%可视化
cmap = colormap;
for i = 1 : NCLUST
ic = int8((i*64.) / (NCLUST*1.));
figure(2)
hold on
plot(rho(icl(i)),delta(icl(i)),'o','MarkerSize',10,'MarkerFaceColor',cmap(ic,:),'MarkerEdgeColor',cmap(ic,:));
xlabel('\rho');
ylabel('\delta');
end
if (exist([dataPath,'/points.mat'],'file'))
load([dataPath,'/points.mat']);
else
points = mdscale(dist, 2, 'criterion','metricstress');
save 'points.mat' points;
end
for i = 1 : ND
A(i,1) = 0.;
A(i,2) = 0.;
end
for i = 1 : NCLUST
nn = 0;
ic = int8((i*64.)/(NCLUST*1.));
for j = 1 : ND
if (cl(j) == i)
nn = nn + 1;
A(nn,1) = points(j,1);
A(nn,2) = points(j,2);
end
end
hold on
figure(3)
title ('DPEAK')
plot(A(1:nn,1),A(1:nn,2),'o','MarkerSize',5,'MarkerFaceColor',cmap(ic,:),'MarkerEdgeColor',cmap(ic,:));
end
for i = 1 : ND
if (halo(i) > 0)
ic = int8((halo(i)*64.)/(NCLUST*1.));
hold on
plot(points(i,1),points(i,2),'o','MarkerSize',5,'MarkerFaceColor',cmap(ic,:),'MarkerEdgeColor',cmap(ic,:));
end
end
fr = fopen('cluster_assignation', 'w');
disp('Description of cluster_assignation: [id, cluster assignation without halo control, cluster assignation with halo control]');
for i = 1 : ND
fprintf(fr, '%i %i %i\n',i,cl(i),halo(i));
end
result = cl';
save label result
%把peak画出来
for i = 1 : NCLUST
ic = int8((i*64.)/(NCLUST*1.));
figure(3)
plot(points(icl(i),1),points(icl(i),2),'o','MarkerSize',8,'MarkerFaceColor','k','MarkerEdgeColor','k');
hold on
end
toc;