Skip to content

Commit 8ae9029

Browse files
committed
Add KMeans--* and OutRankS1H from DAMI 2025
1 parent d694d38 commit 8ae9029

File tree

7 files changed

+565
-0
lines changed

7 files changed

+565
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
/*
2+
* This file is part of ELKI:
3+
* Environment for Developing KDD-Applications Supported by Index-Structures
4+
*
5+
* Copyright (C) 2025
6+
* ELKI Development Team
7+
*
8+
* This program is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU Affero General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* (at your option) any later version.
12+
*
13+
* This program is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU Affero General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU Affero General Public License
19+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
20+
*/
21+
package elki.outlier.clustering;
22+
23+
import java.util.List;
24+
import java.util.Objects;
25+
26+
import elki.clustering.kmeans.KMeansMinusMinus;
27+
import elki.data.Cluster;
28+
import elki.data.Clustering;
29+
import elki.data.NumberVector;
30+
import elki.data.model.ModelUtil;
31+
import elki.data.type.TypeInformation;
32+
import elki.database.datastore.DataStoreFactory;
33+
import elki.database.datastore.DataStoreUtil;
34+
import elki.database.datastore.WritableDoubleDataStore;
35+
import elki.database.ids.DBIDIter;
36+
import elki.database.ids.DBIDs;
37+
import elki.database.relation.DoubleRelation;
38+
import elki.database.relation.MaterializedDoubleRelation;
39+
import elki.database.relation.Relation;
40+
import elki.distance.NumberVectorDistance;
41+
import elki.math.DoubleMinMax;
42+
import elki.outlier.OutlierAlgorithm;
43+
import elki.result.outlier.BasicOutlierScoreMeta;
44+
import elki.result.outlier.OutlierResult;
45+
import elki.result.outlier.OutlierScoreMeta;
46+
import elki.utilities.documentation.Reference;
47+
import elki.utilities.optionhandling.Parameterizer;
48+
import elki.utilities.optionhandling.parameterization.ChainedParameterization;
49+
import elki.utilities.optionhandling.parameterization.ListParameterization;
50+
import elki.utilities.optionhandling.parameterization.Parameterization;
51+
52+
/**
53+
* K-means--* outlier detection. This version uses the distance to the nearest
54+
* cluster center as outlier score, whereas the version in
55+
* {@link KMeansMinusMinusOutlierDetection} uses the binary labeling produced
56+
* from the classic {@link KMeansMinusMinus} algorithm.
57+
* <p>
58+
* Reference:
59+
* <p>
60+
* Braulio V. Sánchez Vinces, Erich Schubert, Arthur Zimek,
61+
* Robson L. F. Cordeiro.<br>
62+
* A comparative evaluation of clustering-based outlier detection<br>
63+
* Data Mining and Knowledge Discovery 39 (13), 2025.
64+
*
65+
* @author Braulio V.S. Vinces
66+
*/
67+
@Reference(authors = "Braulio V. Sánchez Vinces, Erich Schubert, Arthur Zimek, Robson L. F. Cordeiro", //
68+
title = "A comparative evaluation of clustering-based outlier detection", //
69+
booktitle = "Data Mining and Knowledge Discovery 39 (13)", //
70+
bibkey = "dblp:journals/datamine/VincesSZC25", //
71+
url = "https://doi.org/10.1007/s10618-024-01086-z")
72+
public class KMeansMinusMinusStar<O extends NumberVector> implements OutlierAlgorithm {
73+
/**
74+
* Clustering algorithm to use
75+
*/
76+
KMeansMinusMinus<O> clustering;
77+
78+
/**
79+
* Constructor.
80+
*
81+
* @param clustering Clustering algorithm
82+
*/
83+
public KMeansMinusMinusStar(KMeansMinusMinus<O> clustering) {
84+
super();
85+
this.clustering = clustering;
86+
}
87+
88+
public OutlierResult run(Relation<O> relation) {
89+
Clustering<?> c = clustering.run(relation);
90+
DBIDs ids = relation.getDBIDs();
91+
92+
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
93+
DoubleMinMax mm = new DoubleMinMax();
94+
95+
NumberVectorDistance<? super O> distfunc = clustering.getDistance();
96+
List<? extends Cluster<?>> clusters = c.getAllClusters();
97+
for(Cluster<?> cluster : clusters) {
98+
if(cluster.isNoise()) {
99+
for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
100+
final O obj = relation.get(iter);
101+
double score = Double.POSITIVE_INFINITY;
102+
for(Cluster<?> c2 : clusters) {
103+
// avoid itself
104+
if(Objects.equals(cluster, c2)) {
105+
continue;
106+
}
107+
double dist = distfunc.distance(ModelUtil.getPrototype(c2.getModel(), relation), obj);
108+
score = Math.min(dist, score);
109+
}
110+
// distance to the nearest cluster's center:
111+
scores.put(iter, score);
112+
mm.put(score);
113+
}
114+
}
115+
else {
116+
NumberVector mean = ModelUtil.getPrototype(cluster.getModel(), relation);
117+
for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
118+
// distance to the cluster's center
119+
double score = cluster.size() == 1 ? 0. : distfunc.distance(mean, relation.get(iter));
120+
scores.put(iter, score);
121+
mm.put(score);
122+
}
123+
}
124+
}
125+
126+
// Build result representation.
127+
DoubleRelation scoreResult = new MaterializedDoubleRelation("K-means--* outliers", ids, scores);
128+
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
129+
return new OutlierResult(scoreMeta, scoreResult);
130+
}
131+
132+
@Override
133+
public TypeInformation[] getInputTypeRestriction() {
134+
return clustering.getInputTypeRestriction();
135+
}
136+
137+
/**
138+
* Parameterizer.
139+
*
140+
* @author Braulio V.S. Vinces
141+
*/
142+
public static class Par<O extends NumberVector> implements Parameterizer {
143+
/**
144+
* Clustering algorithm to run.
145+
*/
146+
KMeansMinusMinus<O> clustering;
147+
148+
@SuppressWarnings("unchecked")
149+
@Override
150+
public void configure(Parameterization config) {
151+
ChainedParameterization list = new ChainedParameterization(new ListParameterization() //
152+
.addFlag(KMeansMinusMinus.Par.NOISE_FLAG_ID), config);
153+
list.errorsTo(config);
154+
clustering = list.tryInstantiate(KMeansMinusMinus.class);
155+
}
156+
157+
@Override
158+
public KMeansMinusMinusStar<O> make() {
159+
return new KMeansMinusMinusStar<>(clustering);
160+
}
161+
}
162+
}

0 commit comments

Comments
 (0)