Skip to content

Commit d53e15a

Browse files
committedFeb 21, 2025
Local run with almost all modules
1 parent f18ecec commit d53e15a

File tree

3 files changed

+102
-70
lines changed

3 files changed

+102
-70
lines changed
 

‎Clustering_conda.yml

+56-56
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
id: clustering_example
1+
id: clustering_example_conda
22
description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2.
3-
version: 1.3
3+
version: 1.4
44
benchmarker: "Izaskun Mallona, Daniel Incicau"
55
storage: http://omnibenchmark.org:9000
66
benchmark_yaml_spec: 0.04
77
storage_api: S3
8-
storage_bucket_name: clusteringexample
8+
storage_bucket_name: clusteringexampleconda
99
software_backend: conda
1010
software_environments:
1111
clustbench:
@@ -51,74 +51,74 @@ stages:
5151
- id: data
5252
modules:
5353
- id: clustbench
54-
name: "clustbench datasets"
54+
name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
5555
software_environment: "clustbench"
5656
repository:
5757
url: https://github.com/imallona/clustbench_data
5858
commit: 366c5a2
59-
parameters:
59+
parameters: # comments depict the possible cardinalities and the number of curated labelsets
6060
- values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1
6161
- values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1
62-
# - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2
63-
# - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1
64-
# - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1
65-
# - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6 2
66-
# - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1
67-
# - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1
68-
# - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1
62+
- values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2
63+
- values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1
64+
- values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1
65+
- values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6 2
66+
- values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1
67+
- values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1
68+
- values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1
6969
- values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1
7070
- values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6
71-
# - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1
72-
# - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2
73-
# - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1
74-
# - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1
75-
# - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] # 2, 5 2
76-
# - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5 2
77-
# - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5 2
71+
- values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1
72+
- values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2
73+
- values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1
74+
- values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1
75+
- values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] # 2, 5 2
76+
- values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5 2
77+
- values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5 2
7878
- values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] # 3, 5 2
79-
# - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1
80-
# - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1
81-
# - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1
82-
# - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1
83-
# - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1
84-
# - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1
85-
# - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1
86-
# - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5
87-
# - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2
88-
# - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1
79+
- values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1
80+
- values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1
81+
- values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1
82+
- values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1
83+
- values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1
84+
- values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1
85+
- values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1
86+
- values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5
87+
- values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2
88+
- values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1
8989
- values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] # 3, 4 2
9090
- values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3
9191
- values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1
92-
# - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1
93-
# - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1
94-
# - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1
95-
# - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1
96-
# - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1
97-
# - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] # 2 1
98-
# - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] # 3 1
99-
# - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10 1
100-
# - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1
92+
- values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1
93+
- values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1
94+
- values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1
95+
- values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1
96+
- values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1
97+
- values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] # 2 1
98+
- values: ["--dataset_generator", "uci", "--dataset_name", "wine"] # 3 1
99+
- values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10 1
100+
- values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1
101101
- values: ["--dataset_generator", "wut", "--dataset_name", "cross"] # 4 1
102-
# - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10 1
103-
# - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1
104-
# - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1
105-
# - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1
106-
# - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1
107-
# - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1
108-
# - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1
109-
# - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1
102+
- values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10 1
103+
- values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1
104+
- values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1
105+
- values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1
106+
- values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1
107+
- values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1
108+
- values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1
109+
- values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1
110110
- values: ["--dataset_generator", "wut", "--dataset_name", "smile"] # 4, 6 2
111111
- values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] # 2 1
112-
# - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] # 4 1
113-
# - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] # 3 1
114-
# - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1
115-
# - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1
116-
# - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] # 3 1
117-
# - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] # 3 1
112+
- values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] # 4 1
113+
- values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] # 3 1
114+
- values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1
115+
- values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1
116+
- values: ["--dataset_generator", "wut", "--dataset_name", "x1"] # 3 1
117+
- values: ["--dataset_generator", "wut", "--dataset_name", "x2"] # 3 1
118118
- values: ["--dataset_generator", "wut", "--dataset_name", "x3"] # 4 1
119-
# - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] # 3 1
120-
# - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] # 5 1
121-
# - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1
119+
- values: ["--dataset_generator", "wut", "--dataset_name", "z1"] # 3 1
120+
- values: ["--dataset_generator", "wut", "--dataset_name", "z2"] # 5 1
121+
- values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1
122122
outputs:
123123
- id: data.matrix
124124
path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
@@ -189,7 +189,7 @@ stages:
189189
- values: ["--method", "FCPS_HDBSCAN_4"]
190190
- values: ["--method", "FCPS_HDBSCAN_8"]
191191
- values: ["--method", "FCPS_Diana"]
192-
# - values: ["--method", "FCPS_Fanny"]
192+
- values: ["--method", "FCPS_Fanny"]
193193
- values: ["--method", "FCPS_Hardcl"]
194194
- values: ["--method", "FCPS_Softcl"]
195195
- values: ["--method", "FCPS_Clara"]

‎Clustering_envmodules.yml

+23-7
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
id: clustering_example
1+
id: clustering_example_envmodules
22
description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2.
3-
version: 1.2
3+
version: 1.4
44
benchmarker: "Izaskun Mallona, Daniel Incicau"
5-
storage: https://play.min.io
5+
storage: http://omnibenchmark.org:9000
66
benchmark_yaml_spec: 0.04
77
storage_api: S3
8-
storage_bucket_name: clustering_example
8+
storage_bucket_name: clusteringexampleenvmodules
99
software_backend: envmodules
1010
software_environments:
1111
clustbench:
@@ -23,24 +23,40 @@ software_environments:
2323
conda: envs/r.yml
2424
apptainer: envs/r.sif
2525
envmodule: fcps # not true, but
26+
rmarkdown:
27+
description: "R with some plotting dependencies"
28+
conda: envs/rmarkdown.yml
29+
apptainer: envs/r.sif # not true, but
30+
envmodule: fcps # not true, but
2631
fcps:
2732
description: "CRAN's FCPS"
2833
conda: envs/fcps.yml
2934
apptainer: envs/fcps.sif
3035
envmodule: fcps
36+
metric_collectors:
37+
- id: plotting
38+
name: "Single-backend metric collector."
39+
software_environment: "rmarkdown"
40+
repository:
41+
url: https://github.com/imallona/clustering_report
42+
commit: f1a5876
43+
inputs:
44+
- metrics.scores
45+
outputs:
46+
- id: plotting.html
47+
path: "{input}/{name}/plotting_report.html"
3148
stages:
32-
3349
## clustbench data ##########################################################
3450

3551
- id: data
3652
modules:
3753
- id: clustbench
38-
name: "clustbench datasets"
54+
name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
3955
software_environment: "clustbench"
4056
repository:
4157
url: https://github.com/imallona/clustbench_data
4258
commit: 366c5a2
43-
parameters:
59+
parameters: # comments depict the possible cardinalities and the number of curated labelsets
4460
- values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1
4561
- values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1
4662
- values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2

‎Clustering_singularity.yml

+23-7
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
id: clustering_example
1+
id: clustering_example_apptainer
22
description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2.
3-
version: 1.2
3+
version: 1.4
44
benchmarker: "Izaskun Mallona, Daniel Incicau"
5-
storage: https://play.min.io
5+
storage: http://omnibenchmark.org:9000
66
benchmark_yaml_spec: 0.04
77
storage_api: S3
8-
storage_bucket_name: clustering_example
8+
storage_bucket_name: clusteringexampleapptainer
99
software_backend: apptainer
1010
software_environments:
1111
clustbench:
@@ -23,24 +23,40 @@ software_environments:
2323
conda: envs/r.yml
2424
apptainer: envs/r.sif
2525
envmodule: fcps # not true, but
26+
rmarkdown:
27+
description: "R with some plotting dependencies"
28+
conda: envs/rmarkdown.yml
29+
apptainer: envs/r.sif # not true, but
30+
envmodule: fcps # not true, but
2631
fcps:
2732
description: "CRAN's FCPS"
2833
conda: envs/fcps.yml
2934
apptainer: envs/fcps.sif
3035
envmodule: fcps
36+
metric_collectors:
37+
- id: plotting
38+
name: "Single-backend metric collector."
39+
software_environment: "rmarkdown"
40+
repository:
41+
url: https://github.com/imallona/clustering_report
42+
commit: f1a5876
43+
inputs:
44+
- metrics.scores
45+
outputs:
46+
- id: plotting.html
47+
path: "{input}/{name}/plotting_report.html"
3148
stages:
32-
3349
## clustbench data ##########################################################
3450

3551
- id: data
3652
modules:
3753
- id: clustbench
38-
name: "clustbench datasets"
54+
name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
3955
software_environment: "clustbench"
4056
repository:
4157
url: https://github.com/imallona/clustbench_data
4258
commit: 366c5a2
43-
parameters:
59+
parameters: # comments depict the possible cardinalities and the number of curated labelsets
4460
- values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1
4561
- values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1
4662
- values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2

0 commit comments

Comments
 (0)
Please sign in to comment.