diff --git a/demos/data-lakehouse-iceberg-trino-spark/LakehouseKafkaIngest.xml b/demos/data-lakehouse-iceberg-trino-spark/LakehouseKafkaIngest.xml
index 3ce1d3e3..dd71180e 100644
--- a/demos/data-lakehouse-iceberg-trino-spark/LakehouseKafkaIngest.xml
+++ b/demos/data-lakehouse-iceberg-trino-spark/LakehouseKafkaIngest.xml
@@ -3857,7 +3857,7 @@
Remote URL
- https://www.pegelonline.wsv.de/webservices/rest-api/v2/stations.json
+ https://www.pegelonline.wsv.de/webservices/rest-api/v2/stations.json
disable-http2
@@ -4790,7 +4790,7 @@
Remote URL
- https://www.pegelonline.wsv.de/webservices/rest-api/v2/stations.json
+ https://www.pegelonline.wsv.de/webservices/rest-api/v2/stations.json
disable-http2
@@ -6496,7 +6496,7 @@
Remote URL
- https://www.pegelonline.wsv.de/webservices/rest-api/v2/stations.json
+ https://www.pegelonline.wsv.de/webservices/rest-api/v2/stations.json
disable-http2
diff --git a/demos/demos-v2.yaml b/demos/demos-v2.yaml
index 9f39ab1d..4e04efb6 100644
--- a/demos/demos-v2.yaml
+++ b/demos/demos-v2.yaml
@@ -70,7 +70,7 @@ demos:
supportedNamespaces: ["default"]
resourceRequests:
cpu: 8700m
- memory: 29746Mi
+ memory: 42034Mi
pvc: 75Gi # 30Gi for Kafka
nifi-kafka-druid-water-level-data:
description: Demo ingesting water level data into Kafka using NiFi, streaming it into Druid and creating a Superset dashboard
@@ -91,7 +91,7 @@ demos:
supportedNamespaces: ["default"]
resourceRequests:
cpu: 8900m
- memory: 30042Mi
+ memory: 42330Mi
pvc: 75Gi # 30Gi for Kafka
spark-k8s-anomaly-detection-taxi-data:
description: Demo loading New York taxi data into an S3 bucket and carrying out an anomaly detection analysis on it
@@ -174,7 +174,7 @@ demos:
resourceRequests:
cpu: "80"
memory: 200Gi
- pvc: 1Ti
+ pvc: 300Gi
jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data:
description: Jupyterhub with PySpark and HDFS integration
documentation: https://docs.stackable.tech/stackablectl/stable/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data.html
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/dbeaver_1.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/dbeaver_1.png
index 1dab5dd1..7b8c0d8d 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/dbeaver_1.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/dbeaver_1.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_1.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_1.png
index 95f50815..6146d866 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_1.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_1.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_2.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_2.png
index 52742780..f66bf4a6 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_2.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_2.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_3.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_3.png
index c879fd01..f1b4187b 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_3.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_3.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_4.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_4.png
index 0ff200c8..3bed3cee 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_4.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_4.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_5.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_5.png
index f067af26..cae3ac9c 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_5.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/minio_5.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/nifi_1.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/nifi_1.png
index da238a03..d3eb961e 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/nifi_1.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/nifi_1.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/nifi_2.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/nifi_2.png
index 6ff2f5ea..7fe7514f 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/nifi_2.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/nifi_2.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_1.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_1.png
index 71ba38a9..c0015e1d 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_1.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_1.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_2.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_2.png
index 2c963477..f1c639b3 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_2.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_2.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_3.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_3.png
index aecaef45..4fe8fb2d 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_3.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_3.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_4.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_4.png
index 37fb0b15..193f9dd1 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_4.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_4.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_5.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_5.png
index b5fb7830..3b937b90 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_5.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_5.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_6.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_6.png
index 56a7857d..74d6a976 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_6.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_6.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_7.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_7.png
index 4102cd75..3d4dacd2 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_7.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_7.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_9.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_9.png
index c4174484..f91d9ebf 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_9.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/superset_9.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/topics.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/topics.png
deleted file mode 100644
index ee9107d3..00000000
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/topics.png and /dev/null differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/trino_1.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/trino_1.png
index ad9477ff..95a7bb78 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/trino_1.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/trino_1.png differ
diff --git a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/trino_2.png b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/trino_2.png
index 23c5f860..4412a4bc 100644
Binary files a/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/trino_2.png and b/docs/modules/demos/images/data-lakehouse-iceberg-trino-spark/trino_2.png differ
diff --git a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hbase-table-ui.png b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hbase-table-ui.png
index 7c50211a..ccaf5138 100644
Binary files a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hbase-table-ui.png and b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hbase-table-ui.png differ
diff --git a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hbase-ui-start-page.png b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hbase-ui-start-page.png
index b8dcfb31..5a7cfaaa 100644
Binary files a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hbase-ui-start-page.png and b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hbase-ui-start-page.png differ
diff --git a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data-hfile.png b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data-hfile.png
index 48479bcf..8e224550 100644
Binary files a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data-hfile.png and b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data-hfile.png differ
diff --git a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data-raw.png b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data-raw.png
index d68cf5e8..6e4cb861 100644
Binary files a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data-raw.png and b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data-raw.png differ
diff --git a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data.png b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data.png
index 4a08294f..7b792d31 100644
Binary files a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data.png and b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-data.png differ
diff --git a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-datanode.png b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-datanode.png
index 25ffd1aa..f28b62a8 100644
Binary files a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-datanode.png and b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-datanode.png differ
diff --git a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-overview.png b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-overview.png
index 177d193b..5175a9f5 100644
Binary files a/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-overview.png and b/docs/modules/demos/images/hbase-hdfs-load-cycling-data/hdfs-overview.png differ
diff --git a/docs/modules/demos/images/logging/tenant.png b/docs/modules/demos/images/logging/tenant.png
new file mode 100644
index 00000000..6e9df68c
Binary files /dev/null and b/docs/modules/demos/images/logging/tenant.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_1.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_1.png
index 41d2eac6..f3e8d867 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_1.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_1.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_2.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_2.png
index 887aad0f..dbc9c313 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_2.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_2.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_3.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_3.png
index f9e3f5b7..f4589843 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_3.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_3.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_4.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_4.png
index a5818000..a602fd20 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_4.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_4.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_5.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_5.png
index 9eea932e..8d5a2f60 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_5.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_5.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_6.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_6.png
index f5d4af04..fa4a79d8 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_6.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_6.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_7.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_7.png
index 97edec93..8bf3331f 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_7.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_7.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_8.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_8.png
index 276b127f..5c4906a3 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_8.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/druid_8.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_1.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_1.png
index 804c20e9..730a296d 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_1.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_1.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_2.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_2.png
index 98ca09ec..1464db06 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_2.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_2.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_3.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_3.png
index 21ecf1fa..b3037a02 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_3.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_3.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_4.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_4.png
index 4eb2eac7..5e4dc3ee 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_4.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/minio_4.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_1.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_1.png
index 20f088ad..9882ed17 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_1.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_1.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_2.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_2.png
index ea5993ba..36a8ac12 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_2.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_2.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_3.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_3.png
index 9474cb92..eaaf98ef 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_3.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_3.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_4.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_4.png
index d5517024..967e7303 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_4.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_4.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_5.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_5.png
index 5890ecf4..b4c4bdea 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_5.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/nifi_5.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_1.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_1.png
index 957cb79a..450af118 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_1.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_1.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_10.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_10.png
index 27de0a72..c361003b 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_10.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_10.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_11.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_11.png
index e9b5846c..725d2c7c 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_11.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_11.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_12.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_12.png
index c257cfcd..cbe2e2fb 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_12.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_12.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_2.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_2.png
index a3bda9d3..48dc7f9a 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_2.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_2.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_3.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_3.png
index 93424a04..b03904e5 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_3.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_3.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_4.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_4.png
index 6b1b4687..a00203da 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_4.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_4.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_5.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_5.png
index 2fad187a..61849340 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_5.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_5.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_6.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_6.png
index f3d4de4f..b22215a4 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_6.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_6.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_7.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_7.png
index ee447958..ce5fc2bb 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_7.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_7.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_8.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_8.png
index a82301c6..e3e892ff 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_8.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_8.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_9.png b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_9.png
index 8cd203e8..b32fdc6b 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_9.png and b/docs/modules/demos/images/nifi-kafka-druid-earthquake-data/superset_9.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_1.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_1.png
index d58d1df4..6c752eb4 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_1.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_1.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_2.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_2.png
index e8d6182d..41e658f0 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_2.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_2.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_3.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_3.png
index a86b7614..67518895 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_3.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_3.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_4.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_4.png
index 4814a179..0d25057f 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_4.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_4.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_5.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_5.png
index 8e0c00f4..a291f317 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_5.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_5.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_6.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_6.png
index fa8f86e3..b2febe66 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_6.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_6.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_7.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_7.png
index decdb6ec..ea7362dd 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_7.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_7.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_8.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_8.png
index 2931f39a..5aae8cef 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_8.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/druid_8.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_1.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_1.png
index f2296a3c..b8dcc3d6 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_1.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_1.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_2.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_2.png
index b51e3029..cf80b21e 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_2.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_2.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_3.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_3.png
index 3240d085..eca0ed3d 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_3.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_3.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_4.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_4.png
index 4be64ae0..5cdb721f 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_4.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_4.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_5.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_5.png
index aa0edc14..15cd8db7 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_5.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/minio_5.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_1.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_1.png
index b7301dc4..8922aa4e 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_1.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_1.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_10.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_10.png
index 360bf0de..d4bf96f1 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_10.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_10.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_11.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_11.png
index c8f916d4..ae53f980 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_11.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_11.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_12.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_12.png
index 59f8820a..6ebabb21 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_12.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_12.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_2.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_2.png
index f28f74a9..4e3b86f1 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_2.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_2.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_3.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_3.png
index 91663776..b41bf6bb 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_3.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_3.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_4.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_4.png
index 32f45be8..7e7c41c1 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_4.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_4.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_5.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_5.png
index 309551a4..2943b603 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_5.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_5.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_6.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_6.png
index 054b1d89..baa00472 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_6.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_6.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_7.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_7.png
index 20eb80d2..c33b8bd5 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_7.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_7.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_8.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_8.png
index 72a77f94..cedf8721 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_8.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_8.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_9.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_9.png
index 0ba8b007..e2d3d418 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_9.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/nifi_9.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_1.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_1.png
index 2bb3cc0a..cf199323 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_1.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_1.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_10.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_10.png
index 6d293450..3b836178 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_10.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_10.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_11.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_11.png
index 39eedf1d..b9ca9067 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_11.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_11.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_2.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_2.png
index 9e732632..7aa4fe89 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_2.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_2.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_3.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_3.png
index f53e18f7..28c35fac 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_3.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_3.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_4.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_4.png
index e37cbcf3..72f31947 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_4.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_4.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_5.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_5.png
index 63be3a71..80c4f4c9 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_5.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_5.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_6.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_6.png
index 369fcbfe..e72eecff 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_6.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_6.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_7.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_7.png
index 40497074..9c42c645 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_7.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_7.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_8.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_8.png
index 2ee0feaa..d4c8eda6 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_8.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_8.png differ
diff --git a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_9.png b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_9.png
index be35f89d..60c1c547 100644
Binary files a/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_9.png and b/docs/modules/demos/images/nifi-kafka-druid-water-level-data/superset_9.png differ
diff --git a/docs/modules/demos/pages/airflow-scheduled-job.adoc b/docs/modules/demos/pages/airflow-scheduled-job.adoc
index 769578d4..1f7e6f0c 100644
--- a/docs/modules/demos/pages/airflow-scheduled-job.adoc
+++ b/docs/modules/demos/pages/airflow-scheduled-job.adoc
@@ -98,7 +98,7 @@ continuously:
image::airflow-scheduled-job/airflow_7.png[]
-Click on the `run_every_minute` box in the centre of the page and then select `Log`:
+Click on the `run_every_minute` box in the centre of the page and then select `Logs`:
[WARNING]
====
@@ -118,7 +118,7 @@ image::airflow-scheduled-job/airflow_10.png[]
Go back to DAG overview screen. The `sparkapp_dag` job has a scheduled entry of `None` and a last-execution time
(`2022-09-19, 07:36:55`). This allows a DAG to be executed exactly once, with neither schedule-based runs nor any
-https://airflow.apache.org/docs/apache-airflow/stable/dag-run.html?highlight=backfill#backfill[backfill]. The DAG can
+https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dag-run.html#backfill[backfill]. The DAG can
always be triggered manually again via REST or from within the Webserver UI.
image::airflow-scheduled-job/airflow_11.png[]
diff --git a/docs/modules/demos/pages/data-lakehouse-iceberg-trino-spark.adoc b/docs/modules/demos/pages/data-lakehouse-iceberg-trino-spark.adoc
index c56e4496..7d1e3ac7 100644
--- a/docs/modules/demos/pages/data-lakehouse-iceberg-trino-spark.adoc
+++ b/docs/modules/demos/pages/data-lakehouse-iceberg-trino-spark.adoc
@@ -34,14 +34,14 @@ $ stackablectl demo install data-lakehouse-iceberg-trino-spark
[#system-requirements]
== System requirements
-The demo was developed and tested on a kubernetes cluster with 10 nodes (4 cores (8 threads), 20GB RAM and 30GB HDD).
+The demo was developed and tested on a kubernetes cluster with 10 nodes (4 cores (8 threads), 20GiB RAM and 30GB HDD).
Instance types that loosely correspond to this on the Hyperscalers are:
- *Google*: `e2-standard-8`
- *Azure*: `Standard_D4_v2`
- *AWS*: `m5.2xlarge`
-In addition to these nodes the operators will request multiple persistent volumes with a total capacity of about 1TB.
+In addition to these nodes the operators will request multiple persistent volumes with a total capacity of about 300Gi.
== Overview
@@ -94,7 +94,7 @@ directly into S3 using the https://trino.io/docs/current/connector/hive.html[Hiv
below-mentioned mechanism.
* *Built-in compaction:* Running table maintenance functions such as compacting smaller files (including deleted files)
into larger files for best query performance is recommended. Iceberg offers out-of-the-box tools for this.
-* *Hidden partitioning:* Image you have a table `sales (day varchar, ts timestamp)` partitioned by `day`. Lots of times,
+* *Hidden partitioning:* Imagine you have a table `sales (day varchar, ts timestamp)` partitioned by `day`. Lots of times,
users would run a statement such as `select count(\*) where ts > now() - interval 1 day`, resulting in a full table
scan as the partition column `day` was not filtered in the query. Iceberg resolves this problem by using hidden
partitions. In Iceberg, your table would look like `sales (ts timestamp) with (partitioning = ARRAY['day(ts)'])`. The
@@ -112,35 +112,33 @@ https://iceberg.apache.org[website] or https://github.com/apache/iceberg/[GitHub
To list the installed installed Stackable services run the following command:
-// TODO(Techassi): Update console output below
-
[source,console]
----
$ stackablectl stacklet list
- PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS
-
- hive hive default hive 212.227.224.138:31022
- metrics 212.227.224.138:30459
-
- hive hive-iceberg default hive 212.227.233.131:31511
- metrics 212.227.233.131:30003
-
- kafka kafka default metrics 217.160.118.190:32160
- kafka 217.160.118.190:31736
-
- nifi nifi default https https://217.160.120.117:31499 Admin user: admin, password: adminadmin
-
- opa opa default http http://217.160.222.211:31767
-
- superset superset default external-superset http://212.227.233.47:32393 Admin user: admin, password: adminadmin
- trino trino default coordinator-metrics 212.227.224.138:30610
- coordinator-https https://212.227.224.138:30876
-
- zookeeper zookeeper default zk 212.227.224.138:32321
-
- minio minio default http http://217.160.222.211:32031 Third party service
- console-http http://217.160.222.211:31429 Admin user: admin, password: adminadmin
+┌───────────┬───────────────┬───────────┬────────────────────────────────────────────────────┬─────────────────────────────────┐
+│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │
+╞═══════════╪═══════════════╪═══════════╪════════════════════════════════════════════════════╪═════════════════════════════════╡
+│ hive ┆ hive ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ hive ┆ hive-iceberg ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ kafka ┆ kafka ┆ default ┆ metrics 217.160.99.235:31148 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ kafka-tls 217.160.99.235:31202 ┆ │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ nifi ┆ nifi ┆ default ┆ https https://5.250.180.98:31825 ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ opa ┆ opa ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ superset ┆ superset ┆ default ┆ external-http http://87.106.122.58:32452 ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ trino ┆ trino ┆ default ┆ coordinator-metrics 212.227.194.245:31920 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ coordinator-https https://212.227.194.245:30841 ┆ │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ zookeeper ┆ zookeeper ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ minio ┆ minio-console ┆ default ┆ http http://217.160.99.235:30238 ┆ │
+└───────────┴───────────────┴───────────┴────────────────────────────────────────────────────┴─────────────────────────────────┘
----
include::partial$instance-hint.adoc[]
@@ -150,7 +148,7 @@ include::partial$instance-hint.adoc[]
=== Listing Buckets
The S3 provided by MinIO is used as persistent storage to store all the data used. Open the `minio` endpoint
-`console-http` retrieved by the `stackablectl stacklet list` command in your browser (http://217.160.222.211:31429 in
+`http` retrieved by the `stackablectl stacklet list` command in your browser (http://217.160.99.235:30238 in
this case).
image::data-lakehouse-iceberg-trino-spark/minio_1.png[]
@@ -168,7 +166,7 @@ Here, you can see the two buckets contained in the S3:
=== Inspecting Lakehouse
-Click on the blue button `Browse` on the bucket `lakehouse`.
+Click on the bucket `lakehouse`.
image::data-lakehouse-iceberg-trino-spark/minio_3.png[]
@@ -177,7 +175,7 @@ Multiple folders (called prefixes in S3), each containing a different dataset, a
image::data-lakehouse-iceberg-trino-spark/minio_4.png[]
-As you can see, the table `house-sales` is partitioned by day. Go ahead and click on any folder.
+As you can see, the table `house-sales` is partitioned by year. Go ahead and click on any folder.
image::data-lakehouse-iceberg-trino-spark/minio_5.png[]
@@ -199,7 +197,7 @@ sources are statically downloaded (e.g. as CSV), and others are fetched dynamica
=== View ingestion jobs
You can have a look at the ingestion job running in NiFi by opening the NiFi endpoint `https` from your
-`stackablectl stacklet list` command output (https://217.160.120.117:31499 in this case).
+`stackablectl stacklet list` command output (https://5.250.180.98:31825 in this case).
[NOTE]
====
@@ -215,17 +213,18 @@ Log in with the username `admin` and password `adminadmin`.
image::data-lakehouse-iceberg-trino-spark/nifi_2.png[]
As you can see, the NiFi workflow consists of lots of components. You can zoom in by using your mouse and mouse wheel.
-On the left side are two strands, that
+On the left side are three strands, that
. Fetch the list of known water-level stations and ingest them into Kafka.
-. Continuously run a loop fetching the measurements of the last 30 for every measuring station and ingesting them into
+. Fetch measurements of the last 30 days for every measuring station and ingest them into Kafka.
+. Continuously run a loop fetching the measurements for every measuring station and ingesting them into
Kafka.
-On the right side are three strands that
+On the right side are three strands, that
. Fetch the current shared bike station information
. Fetch the current shared bike station status
-. Fetch the current shared bike bike status
+. Fetch the current shared bike status
For details on the NiFi workflow ingesting water-level data, please read the
xref:nifi-kafka-druid-water-level-data.adoc#_nifi[nifi-kafka-druid-water-level-data documentation on NiFi].
@@ -278,7 +277,7 @@ schema = StructType([ \
])
----
-Afterwards, a streaming read from Kafka is started. It reads from our Kafka at `kafka:9090` with the topic
+Afterwards, a streaming read from Kafka is started. It reads from our Kafka at `kafka:9093` with the topic
`water_levels_measurements`. When starting up, the job will ready all the existing messages in Kafka (read from
earliest) and will process 50000000 records as a maximum in a single batch. As Kafka has retention set up, Kafka records
might alter out of the topic before Spark has read the records, which can be the case when the Spark application wasn't
@@ -294,7 +293,7 @@ explanation.
spark \
.readStream \
.format("kafka") \
-.option("kafka.bootstrap.servers", "kafka:9092") \
+.options(**kafkaOptions) \
.option("subscribe", "water_levels_measurements") \
.option("startingOffsets", "earliest") \
.option("maxOffsetsPerTrigger", 50000000) \
@@ -470,7 +469,7 @@ Trino is used to enable SQL access to the data.
=== Accessing the web interface
Open up the the Trino endpoint `coordinator-https` from your `stackablectl stacklet list` command output
-(https://212.227.224.138:30876 in this case).
+(https://212.227.194.245:30841 in this case).
image::data-lakehouse-iceberg-trino-spark/trino_1.png[]
@@ -498,7 +497,7 @@ Here you can see all the available Trino catalogs.
== Superset
Superset provides the ability to execute SQL queries and build dashboards. Open the Superset endpoint
-`external-superset` in your browser (http://212.227.233.47:32393 in this case).
+`external-http` in your browser (http://87.106.122.58:32452 in this case).
image::data-lakehouse-iceberg-trino-spark/superset_1.png[]
@@ -526,7 +525,7 @@ Another dashboard to look at is `Taxi trips`.
image::data-lakehouse-iceberg-trino-spark/superset_6.png[]
-There are multiple other dashboards you can explore on you own.
+There are multiple other dashboards you can explore on your own.
=== Viewing Charts
@@ -534,8 +533,8 @@ The dashboards consist of multiple charts. To list the charts, select the `Chart
=== Executing arbitrary SQL statements
-Within Superset, you can create dashboards and run arbitrary SQL statements. On the top click on the tab `SQL Lab` ->
-`SQL Editor`.
+Within Superset, you can create dashboards and run arbitrary SQL statements. On the top click on the tab `SQL` ->
+`SQL Lab`.
image::data-lakehouse-iceberg-trino-spark/superset_7.png[]
@@ -544,6 +543,12 @@ On the left, select the database `Trino lakehouse`, the schema `house_sales`, an
image::data-lakehouse-iceberg-trino-spark/superset_8.png[]
+[NOTE]
+====
+This older screenshot shows how the table preview would look like. Currently, there is an https://github.com/apache/superset/issues/25307[open issue]
+with previewing trino tables using the Iceberg connector. This doesn't affect the execution the following execution of the SQL statement.
+====
+
In the right textbox, you can enter the desired SQL statement. If you want to avoid making one up, use the following:
[source,sql]
diff --git a/docs/modules/demos/pages/hbase-hdfs-load-cycling-data.adoc b/docs/modules/demos/pages/hbase-hdfs-load-cycling-data.adoc
index 08885ab5..9eaec607 100644
--- a/docs/modules/demos/pages/hbase-hdfs-load-cycling-data.adoc
+++ b/docs/modules/demos/pages/hbase-hdfs-load-cycling-data.adoc
@@ -51,33 +51,31 @@ image::hbase-hdfs-load-cycling-data/overview.png[]
To list the installed Stackable services run the following command: `stackablectl stacklet list`
-//TODO(Techassi): Update console output
-
[source,console]
----
$ stackablectl stacklet list
-PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS
-
- hbase hbase default regionserver 172.18.0.5:32282
- ui http://172.18.0.5:31527
- metrics 172.18.0.5:31081
-
- hdfs hdfs default datanode-default-0-metrics 172.18.0.2:31441
- datanode-default-0-data 172.18.0.2:32432
- datanode-default-0-http http://172.18.0.2:30758
- datanode-default-0-ipc 172.18.0.2:32323
- journalnode-default-0-metrics 172.18.0.5:31123
- journalnode-default-0-http http://172.18.0.5:30038
- journalnode-default-0-https https://172.18.0.5:31996
- journalnode-default-0-rpc 172.18.0.5:30080
- namenode-default-0-metrics 172.18.0.2:32753
- namenode-default-0-http http://172.18.0.2:32475
- namenode-default-0-rpc 172.18.0.2:31639
- namenode-default-1-metrics 172.18.0.4:32202
- namenode-default-1-http http://172.18.0.4:31486
- namenode-default-1-rpc 172.18.0.4:31874
-
- zookeeper zookeeper default zk 172.18.0.4:32469
+
+┌───────────┬───────────┬───────────┬──────────────────────────────────────────────────────────────┬─────────────────────────────────┐
+│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │
+╞═══════════╪═══════════╪═══════════╪══════════════════════════════════════════════════════════════╪═════════════════════════════════╡
+│ hbase ┆ hbase ┆ default ┆ regionserver 172.18.0.2:31521 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ ui-http 172.18.0.2:32064 ┆ │
+│ ┆ ┆ ┆ metrics 172.18.0.2:31372 ┆ │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ hdfs ┆ hdfs ┆ default ┆ datanode-default-0-listener-data 172.18.0.2:31990 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ datanode-default-0-listener-http http://172.18.0.2:30659 ┆ │
+│ ┆ ┆ ┆ datanode-default-0-listener-ipc 172.18.0.2:30678 ┆ │
+│ ┆ ┆ ┆ datanode-default-0-listener-metrics 172.18.0.2:31531 ┆ │
+│ ┆ ┆ ┆ namenode-default-0-http http://172.18.0.2:32543 ┆ │
+│ ┆ ┆ ┆ namenode-default-0-metrics 172.18.0.2:30098 ┆ │
+│ ┆ ┆ ┆ namenode-default-0-rpc 172.18.0.2:30915 ┆ │
+│ ┆ ┆ ┆ namenode-default-1-http http://172.18.0.2:31333 ┆ │
+│ ┆ ┆ ┆ namenode-default-1-metrics 172.18.0.2:30862 ┆ │
+│ ┆ ┆ ┆ namenode-default-1-rpc 172.18.0.2:31440 ┆ │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ zookeeper ┆ zookeeper ┆ default ┆ ┆ Available, Reconciling, Running │
+└───────────┴───────────┴───────────┴──────────────────────────────────────────────────────────────┴─────────────────────────────────┘
+
----
include::partial$instance-hint.adoc[]
@@ -217,18 +215,18 @@ Below you will see the overview of your HDFS cluster.
image::hbase-hdfs-load-cycling-data/hdfs-overview.png[]
-The UI will give you information on the datanodes via the datanodes tab.
+The UI will give you information on the datanodes via the `Datanodes` tab.
image::hbase-hdfs-load-cycling-data/hdfs-datanode.png[]
-You can also browse the filesystem via the Utilities menu.
+You can also browse the file system by clicking on the `Utilities` tab and selecting `Browse the file system`.
image::hbase-hdfs-load-cycling-data/hdfs-data.png[]
-The raw data from the distcp job can be found here.
+Navigate in the file system to the folder `data` and then the `raw` folder. Here you can find the raw data from the distcp job.
image::hbase-hdfs-load-cycling-data/hdfs-data-raw.png[]
-The structure of the Hfiles can be seen here.
+Selecting the folder `data` and then `hfile` instead, gives you the structure of the Hfiles.
image::hbase-hdfs-load-cycling-data/hdfs-data-hfile.png[]
diff --git a/docs/modules/demos/pages/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data.adoc b/docs/modules/demos/pages/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data.adoc
index ac836118..cdf63df4 100644
--- a/docs/modules/demos/pages/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data.adoc
+++ b/docs/modules/demos/pages/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data.adoc
@@ -87,7 +87,7 @@ There should be one parquet file containing taxi trip data from September 2020.
== JupyterHub
-Have a look at the available Pods before logging in (operator pods are left out for clarity, you will see more Pods):
+Have a look at the available Pods before logging in:
[source,console]
----
@@ -118,8 +118,8 @@ You should see the JupyterHub login page.
image::jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/jupyter_hub_login.png[]
-Log in with username `admin` and password `adminadmin`. There should appear a new pod called `jupyter-admin` (operator
-pods are left out for clarity, you will see more Pods):
+Log in with username `admin` and password `adminadmin`.
+There should appear a new pod called `jupyter-admin`:
[source,console]
----
@@ -140,14 +140,14 @@ You should arrive at your workspace:
image::jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/jupyter_hub_workspace.png[]
-Now you can click on the `notebooks` folder on the left, open and run the contained file. Click on the double arrow (⏩️) to
-execute the Python scripts.
+Now you can double-click on the `notebook` folder on the left, open and run the contained file.
+Click on the double arrow (⏩️) to execute the Python scripts.
+
+image::jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/jupyter_hub_run_notebook.png[]
You can also inspect the `hdfs` folder where the `core-site.xml` and `hdfs-site.xml` from
the discovery `ConfigMap` of the HDFS cluster are located.
-image::jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/jupyter_hub_run_notebook.png[]
-
[NOTE]
====
The image defined for the spark job must contain all dependencies needed for that job to run. For pyspark jobs, this
diff --git a/docs/modules/demos/pages/logging.adoc b/docs/modules/demos/pages/logging.adoc
index 86b98250..12cd0aad 100644
--- a/docs/modules/demos/pages/logging.adoc
+++ b/docs/modules/demos/pages/logging.adoc
@@ -86,7 +86,12 @@ Log in with the username `admin` and password `adminadmin`.
NOTE: On first login, you will be presented with some options. Feel free to bypass them to get to the logs.
-Click _Discovery_ in the menu to view the recent logs. If you do not see anything, increase the search window to greater than _Last 15 minutes_.
+Select the `Global` tenant to have access to the pre-configured `vector-*` index pattern.
+
+image::logging/tenant.png[]
+
+Click _Discover_ in the menu on the left to view the recent logs.
+If you do not see anything, increase the search window to greater than _Last 15 minutes_.
image::logging/logs.png[]
diff --git a/docs/modules/demos/pages/nifi-kafka-druid-earthquake-data.adoc b/docs/modules/demos/pages/nifi-kafka-druid-earthquake-data.adoc
index dfcdb9b9..847fc05b 100644
--- a/docs/modules/demos/pages/nifi-kafka-druid-earthquake-data.adoc
+++ b/docs/modules/demos/pages/nifi-kafka-druid-earthquake-data.adoc
@@ -1,7 +1,7 @@
= nifi-kafka-druid-earthquake-data
:page-aliases: stable@stackablectl::demos/nifi-kafka-druid-earthquake-data.adoc
-:superset-docs: https://superset.apache.org/docs/creating-charts-dashboards/creating-your-first-dashboard#creating-charts-in-explore-view
+:superset-docs: https://superset.apache.org/docs/using-superset/creating-your-first-dashboard/#creating-charts-in-explore-view
:druid-tutorial: https://druid.apache.org/docs/latest/tutorials/tutorial-kafka.html#loading-data-with-the-data-loader
:k8s-cpu: https://kubernetes.io/docs/tasks/debug/debug-cluster/resource-metrics-pipeline/#cpu
:earthquake: https://earthquake.usgs.gov/earthquakes/feed/v1.0/csv.php
@@ -27,7 +27,7 @@ FQDN service names (including the namespace), so that the used TLS certificates
To run this demo, your system needs at least:
* 9 {k8s-cpu}[cpu units] (core/hyperthread)
-* 30GiB memory
+* 42GiB memory
* 75GiB disk storage
== Overview
@@ -37,7 +37,7 @@ This demo will
* Install the required Stackable operators.
* Spin up the following data products:
** *Superset*: A modern data exploration and visualization platform. This demo utilizes Superset to retrieve data from
- Druid via SQL queries and build dashboards on top of that data
+ Druid via SQL queries and build dashboards on top of that data.
** *Kafka*: A distributed event streaming platform for high-performance data pipelines, streaming analytics and data
integration. This demo uses it as an event streaming platform to stream the data in near real-time.
** *NiFi*: An easy-to-use, robust system to process and distribute data. This demo uses it to fetch earthquake data
@@ -54,39 +54,40 @@ charts. You can see the deployed products and their relationship in the followin
image::nifi-kafka-druid-earthquake-data/overview.png[]
-== Listing the deployed Stackable services
+== List the deployed Stackable services
To list the installed Stackable services run the following command:
[source,console]
----
$ stackablectl stacklet list
-┌───────────┬─────────────┬───────────┬────────────────────────────────────────────────┐
-│ Product ┆ Name ┆ Namespace ┆ Endpoints │
-╞═══════════╪═════════════╪═══════════╪════════════════════════════════════════════════╡
-│ druid ┆ druid ┆ default ┆ broker-metrics 172.18.0.2:32002 │
-│ ┆ ┆ ┆ broker-https https://172.18.0.2:32304 │
-│ ┆ ┆ ┆ coordinator-metrics 172.18.0.2:32058 │
-│ ┆ ┆ ┆ coordinator-https https://172.18.0.2:32545 │
-│ ┆ ┆ ┆ historical-metrics 172.18.0.2:30277 │
-│ ┆ ┆ ┆ historical-https https://172.18.0.2:30903 │
-│ ┆ ┆ ┆ middlemanager-metrics 172.18.0.2:32459 │
-│ ┆ ┆ ┆ middlemanager-https https://172.18.0.2:31967 │
-│ ┆ ┆ ┆ router-metrics 172.18.0.2:31720 │
-│ ┆ ┆ ┆ router-https https://172.18.0.2:32656 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ kafka ┆ kafka ┆ default ┆ metrics 172.18.0.2:31501 │
-│ ┆ ┆ ┆ kafka-tls 172.18.0.2:31237 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ nifi ┆ nifi ┆ default ┆ https https://172.18.0.2:31214 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ superset ┆ superset ┆ default ┆ external-superset http://172.18.0.2:30677 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ zookeeper ┆ zookeeper ┆ default ┆ zk 172.18.0.2:32682 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ minio ┆ minio-druid ┆ default ┆ http http://172.18.0.2:31516 │
-│ ┆ ┆ ┆ console-http http://172.18.0.2:30835 │
-└───────────┴─────────────┴───────────┴────────────────────────────────────────────────┘
+
+┌───────────┬───────────────┬───────────┬─────────────────────────────────────────────────┬─────────────────────────────────┐
+│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │
+╞═══════════╪═══════════════╪═══════════╪═════════════════════════════════════════════════╪═════════════════════════════════╡
+│ druid ┆ druid ┆ default ┆ broker-metrics 172.18.0.2:32001 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ broker-https https://172.18.0.2:32141 ┆ │
+│ ┆ ┆ ┆ coordinator-metrics 172.18.0.2:32353 ┆ │
+│ ┆ ┆ ┆ coordinator-https https://172.18.0.2:30738 ┆ │
+│ ┆ ┆ ┆ historical-metrics 172.18.0.2:32009 ┆ │
+│ ┆ ┆ ┆ historical-https https://172.18.0.2:32145 ┆ │
+│ ┆ ┆ ┆ middlemanager-metrics 172.18.0.2:32603 ┆ │
+│ ┆ ┆ ┆ middlemanager-https https://172.18.0.2:30162 ┆ │
+│ ┆ ┆ ┆ router-metrics 172.18.0.2:32763 ┆ │
+│ ┆ ┆ ┆ router-https https://172.18.0.2:31642 ┆ │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ kafka ┆ kafka ┆ default ┆ metrics 172.18.0.2:30429 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ kafka-tls 172.18.0.2:32602 ┆ │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ nifi ┆ nifi ┆ default ┆ https https://172.18.0.2:30596 ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ superset ┆ superset ┆ default ┆ external-http http://172.18.0.2:32569 ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ zookeeper ┆ zookeeper ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ minio ┆ minio-console ┆ default ┆ http http://172.18.0.2:30902 ┆ │
+└───────────┴───────────────┴───────────┴─────────────────────────────────────────────────┴─────────────────────────────────┘
+
----
include::partial$instance-hint.adoc[]
@@ -98,7 +99,7 @@ are structured in dedicated queues called topics. The test data will be put into
are produced (put in) by the test data generator and consumed (read) by Druid afterwards in the same order they were
created.
-As Kafka has no web interface, you must use a Kafka client like h{kcat}[kafkacat]. Kafka uses mutual TLS, so clients
+As Kafka has no web interface, you must use a Kafka client like {kcat}[kcat]. Kafka uses mutual TLS, so clients
wanting to connect to Kafka must present a valid TLS certificate. The easiest way to obtain this is to shell into the
`kafka-broker-default-0` Pod, as we will do in the following section for demonstration purposes. For a production setup,
you should spin up a dedicated Pod provisioned with a certificate acting as a Kafka client instead of shell-ing into the
@@ -108,9 +109,12 @@ Kafka Pod.
You can execute a command on the Kafka broker to list the available topics as follows:
+// In the following commands the kcat-prober container instead of the kafka container is used to send requests to Kafka.
+// This is necessary because kcat cannot use key- and truststore files with empty passwords, which are mounted here to the kafka container.
+// However, the kcat-prober container has TLS certificates mounted, which can be used by kcat to connect to Kafka.
[source,console]
----
-$ kubectl exec -it kafka-broker-default-0 -c kafka -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_server_mount/ca.crt -L"
+$ kubectl exec -it kafka-broker-default-0 -c kcat-prober -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_cert_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_cert_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_cert_server_mount/ca.crt -L"
Metadata for all topics (from broker -1: ssl://localhost:9093/bootstrap):
1 brokers:
broker 1001 at 172.18.0.2:32175 (controller)
@@ -132,7 +136,7 @@ parameter.
[source,console]
----
-$ kubectl exec -it kafka-broker-default-0 -c kafka -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_server_mount/ca.crt -C -t earthquakes -c 1"
+$ kubectl exec -it kafka-broker-default-0 -c kcat-prober -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_cert_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_cert_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_cert_server_mount/ca.crt -C -t earthquakes -c 1"
----
Below is an example of the output of one record:
@@ -171,7 +175,7 @@ The given pattern will print some metadata of the record.
[source,console]
----
-$ kubectl exec -it kafka-broker-default-0 -c kafka -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_server_mount/ca.crt -C -t earthquakes -o -8 -c 8 -f 'Topic %t / Partition %p / Offset: %o / Timestamp: %T\n'"
+$ kubectl exec -it kafka-broker-default-0 -c kcat-prober -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_cert_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_cert_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_cert_server_mount/ca.crt -C -t earthquakes -o -8 -c 8 -f 'Topic %t / Partition %p / Offset: %o / Timestamp: %T\n'"
Topic earthquakes / Partition 0 / Offset: 385011 / Timestamp: 1680607795568
Topic earthquakes / Partition 0 / Offset: 385012 / Timestamp: 1680607795568
Topic earthquakes / Partition 0 / Offset: 385013 / Timestamp: 1680607795570
@@ -195,7 +199,7 @@ Kafka.
=== View the testdata-generation Job
You can have a look at the ingestion job running in NiFi by opening the endpoint `https` from your
-`stackablectl stacklet list` command output. In this case, it is https://172.18.0.3:32558. Open it with your favourite
+`stackablectl stacklet list` command output. In this case, it is https://172.18.0.2:30596. Open it with your favourite
browser. Suppose you get a warning regarding the self-signed certificate generated by the
xref:home:secret-operator:index.adoc[Secret Operator] (e.g. Warning: Potential Security Risk Ahead). In that case, you must
tell your browser to trust the website and continue.
@@ -214,11 +218,11 @@ Double-click on the `InvokeHTTP` processor to show the processor details.
image::nifi-kafka-druid-earthquake-data/nifi_3.png[]
-Head over to the Tab `PROPERTIES`.
+Head over to the tab `PROPERTIES`.
image::nifi-kafka-druid-earthquake-data/nifi_4.png[]
-Here, you can see the setting `Remote URl`, which specifies the download URL from where the CSV file is retrieved. Close
+Here, you can see the setting `HTTP URL`, which specifies the download URL from where the CSV file is retrieved. Close
the processor details popup by clicking `OK`. Afterwards, double-click on the processor `PublishKafkaRecord_2_6`.
image::nifi-kafka-druid-earthquake-data/nifi_5.png[]
@@ -235,12 +239,12 @@ Druid deep storage is based on the S3 store provided by MinIO.
=== View the Ingestion job
-You can have a look at the ingestion job running in Druid by opening the endpoint `router-http` from your
-`stackablectl stacklet list` command output (http://172.18.0.4:30109 in this case).
+You can have a look at the ingestion job running in Druid by opening the endpoint `router-https` from your
+`stackablectl stacklet list` command output (http://172.18.0.2:31642 in this case).
image::nifi-kafka-druid-earthquake-data/druid_1.png[]
-By clicking on `Ingestion` at the top, you can see the running ingestion jobs.
+By clicking on `Supervisors` at the top, you can see the running ingestion jobs.
image::nifi-kafka-druid-earthquake-data/druid_2.png[]
@@ -250,7 +254,7 @@ errors.
image::nifi-kafka-druid-earthquake-data/druid_3.png[]
-The statistics show that Druid is currently ingesting `1251` records/s and has ingested 2.1 million records already. All
+The statistics show that Druid ingested `5074` records during the last minute and has ingested 3 million records already. All
entries have been consumed successfully, indicated by having no `processWithError`, `thrownAway` or `unparseable`
records.
@@ -261,8 +265,8 @@ by clicking on `Datasources` at the top.
image::nifi-kafka-druid-earthquake-data/druid_4.png[]
-You can see the data source's segments by clicking on the `earthquake` data source. In this case, the `earthquake` data
-source is partitioned by the year of the earthquake, resulting in 73 segments.
+You can see the data source's segments by clicking on `segments` under `Availability` for the `earthquake` data source. In this case, the `earthquake` data
+source is partitioned by the year of the earthquakes, resulting in 73 segments.
image::nifi-kafka-druid-earthquake-data/druid_5.png[]
@@ -271,7 +275,7 @@ the top.
image::nifi-kafka-druid-earthquake-data/druid_6.png[]
-You can now enter any arbitrary SQL statement, to e.g. list 10 earthquakes runs
+You can now enter any arbitrary SQL statement, to e.g. list 10 earthquakes run
[source,sql]
----
@@ -296,8 +300,8 @@ image::nifi-kafka-druid-earthquake-data/druid_8.png[]
== Superset
-Superset provides the ability to execute SQL queries and build dashboards. Open the endpoint `external-superset` in your
-browser (http://172.18.0.3:32108 in this case).
+Superset provides the ability to execute SQL queries and build dashboards. Open the endpoint `external-http` in your
+browser (http://172.18.0.2:32569 in this case).
image::nifi-kafka-druid-earthquake-data/superset_1.png[]
@@ -311,7 +315,7 @@ The demo has created a Dashboard to visualize the earthquake data. To open it, c
image::nifi-kafka-druid-earthquake-data/superset_3.png[]
-Click on the dashboard called `Earthquakes`. It might take some time until the dashboards renders all included charts.
+Click on the dashboard called `Earthquakes`. It might take some time until the dashboard renders all included charts.
image::nifi-kafka-druid-earthquake-data/superset_4.png[]
@@ -321,7 +325,7 @@ The dashboard `Earthquakes` consists of multiple charts. To list the charts, cli
image::nifi-kafka-druid-earthquake-data/superset_5.png[]
-Click on the Chart `Number of earthquakes my magnitude`. On the left side you can modify the chart and click on `Run` to
+Click on the Chart `Number of earthquakes by magnitude`. On the left side you can modify the chart and click on `Update Chart` to
see the effect.
image::nifi-kafka-druid-earthquake-data/superset_6.png[]
@@ -349,7 +353,7 @@ image::nifi-kafka-druid-earthquake-data/superset_9.png[]
=== Execute arbitrary SQL statements
Within Superset you can not only create dashboards but also run arbitrary SQL statements. On the top click on the tab
-`SQL Lab` -> `SQL Editor`.
+`SQL` -> `SQL Lab`.
image::nifi-kafka-druid-earthquake-data/superset_10.png[]
@@ -374,7 +378,7 @@ image::nifi-kafka-druid-earthquake-data/superset_12.png[]
== MinIO
The S3 provided by MinIO is used as a persistent deep storage for Druid to store all the data used. Open the `minio`
-endpoint `console-http` in your browser (http://172.18.0.4:31664 in this case).
+endpoint `http` in your browser (http://172.18.0.2:30902 in this case).
image::nifi-kafka-druid-earthquake-data/minio_1.png[]
@@ -382,13 +386,13 @@ Log in with the username `admin` and password `adminadmin`.
image::nifi-kafka-druid-earthquake-data/minio_2.png[]
-Click on the blue button `Browse` on the bucket `druid` and open the folders `data` -> `earthquakes`.
+Click on the bucket `demo` and open the folders `data` -> `earthquakes`.
image::nifi-kafka-druid-earthquake-data/minio_3.png[]
-As you can see druid saved 199MB of data within 73 prefixes (folders). One prefix corresponds to on segment which in
+As you can see Druid saved 201.5 MiB of data within 73 prefixes (folders). One prefix corresponds to one segment which in
turn contains all the data of a year. If you don't see any folders or files, the reason is that Druid has not saved its
-data from memory to the deep storage yet. After waiting a few minutes, the data should have been flushed to S3 and
+data from memory to the deep storage yet. After waiting for roughly an hour, the data should have been flushed to S3 and
show up.
image::nifi-kafka-druid-earthquake-data/minio_4.png[]
@@ -419,7 +423,7 @@ You also can create additional charts and bundle them together in a Dashboard. H
=== Load additional data
You can use the NiFi web interface to collect arbitrary data and write it to Kafka (it's recommended to use new Kafka
-topics for that). Alternatively, you can use a Kafka client like {kcat}[kafkacat] to create new topics and ingest data.
+topics for that). Alternatively, you can use a Kafka client like {kcat}[kcat] to create new topics and ingest data.
Using the Druid web interface, you can start an ingestion job that consumes and stores the data in an internal data
source. There is an excellent {druid-tutorial}[tutorial] from Druid on how to do this. Afterwards, the data source can
be analyzed within Druid and Superset, like the earthquake data.
diff --git a/docs/modules/demos/pages/nifi-kafka-druid-water-level-data.adoc b/docs/modules/demos/pages/nifi-kafka-druid-water-level-data.adoc
index d3cd4934..54e2924b 100644
--- a/docs/modules/demos/pages/nifi-kafka-druid-water-level-data.adoc
+++ b/docs/modules/demos/pages/nifi-kafka-druid-water-level-data.adoc
@@ -1,7 +1,7 @@
= nifi-kafka-druid-water-level-data
:page-aliases: stable@stackablectl::demos/nifi-kafka-druid-water-level-data.adoc
-:superset: https://superset.apache.org/docs/creating-charts-dashboards/creating-your-first-dashboard#creating-charts-in-explore-view
+:superset: https://superset.apache.org/docs/using-superset/creating-your-first-dashboard/#creating-charts-in-explore-view
:druid-tutorial: https://druid.apache.org/docs/latest/tutorials/tutorial-kafka.html#loading-data-with-the-data-loader
:k8s-cpu: https://kubernetes.io/docs/tasks/debug/debug-cluster/resource-metrics-pipeline/#cpu
:pegelonline-rest: https://www.pegelonline.wsv.de/webservice/dokuRestapi
@@ -27,7 +27,7 @@ FQDN service names (including the namespace), so that the used TLS certificates
To run this demo, your system needs at least:
* 9 {k8s-cpu}[cpu units] (core/hyperthread)
-* 30GiB memory
+* 42GiB memory
* 75GiB disk storage
== Overview
@@ -48,7 +48,7 @@ This demo will
* Ingest water level data from the {pegelonline}[PEGELONLINE web service] into Kafka. The data contains measured water
levels of different measuring stations all around Germany. If the web service is unavailable, this demo will not work,
as it needs the web service to ingest the data.
-** First, historical data from the last 31 days will be fetched and ingested.
+** First, historical data from the last 30 days will be fetched and ingested.
** Afterwards, the demo will fetch the current measurement of every station approximately every two minutes and ingest it
near-real-time into Kafka.
* Start a Druid ingestion job that ingests the data into the Druid instance.
@@ -66,44 +66,44 @@ To list the installed Stackable services run the following command:
[source,console]
----
$ stackablectl stacklet list
-┌───────────┬─────────────┬───────────┬────────────────────────────────────────────────┐
-│ Product ┆ Name ┆ Namespace ┆ Endpoints │
-╞═══════════╪═════════════╪═══════════╪════════════════════════════════════════════════╡
-│ druid ┆ druid ┆ default ┆ broker-metrics 172.18.0.2:31804 │
-│ ┆ ┆ ┆ broker-https https://172.18.0.2:31725 │
-│ ┆ ┆ ┆ coordinator-metrics 172.18.0.2:30547 │
-│ ┆ ┆ ┆ coordinator-https https://172.18.0.2:31186 │
-│ ┆ ┆ ┆ historical-metrics 172.18.0.2:32024 │
-│ ┆ ┆ ┆ historical-https https://172.18.0.2:31239 │
-│ ┆ ┆ ┆ middlemanager-metrics 172.18.0.2:32213 │
-│ ┆ ┆ ┆ middlemanager-https https://172.18.0.2:31641 │
-│ ┆ ┆ ┆ router-metrics 172.18.0.2:30950 │
-│ ┆ ┆ ┆ router-https https://172.18.0.2:30175 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ kafka ┆ kafka ┆ default ┆ metrics 172.18.0.2:30145 │
-│ ┆ ┆ ┆ kafka-tls 172.18.0.2:31662 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ nifi ┆ nifi ┆ default ┆ https https://172.18.0.2:30306 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ superset ┆ superset ┆ default ┆ external-superset http://172.18.0.2:30963 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ zookeeper ┆ zookeeper ┆ default ┆ zk 172.18.0.2:32710 │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ minio ┆ minio-druid ┆ default ┆ http http://172.18.0.2:30337 │
-│ ┆ ┆ ┆ console-http http://172.18.0.2:31775 │
-└───────────┴─────────────┴───────────┴────────────────────────────────────────────────┘
+
+┌───────────┬───────────────┬───────────┬─────────────────────────────────────────────────┬─────────────────────────────────┐
+│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │
+╞═══════════╪═══════════════╪═══════════╪═════════════════════════════════════════════════╪═════════════════════════════════╡
+│ druid ┆ druid ┆ default ┆ broker-metrics 172.18.0.2:32661 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ broker-https https://172.18.0.2:32130 ┆ │
+│ ┆ ┆ ┆ coordinator-metrics 172.18.0.2:31609 ┆ │
+│ ┆ ┆ ┆ coordinator-https https://172.18.0.2:32209 ┆ │
+│ ┆ ┆ ┆ historical-metrics 172.18.0.2:32644 ┆ │
+│ ┆ ┆ ┆ historical-https https://172.18.0.2:30865 ┆ │
+│ ┆ ┆ ┆ middlemanager-metrics 172.18.0.2:31184 ┆ │
+│ ┆ ┆ ┆ middlemanager-https https://172.18.0.2:30239 ┆ │
+│ ┆ ┆ ┆ router-metrics 172.18.0.2:31912 ┆ │
+│ ┆ ┆ ┆ router-https https://172.18.0.2:30616 ┆ │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ kafka ┆ kafka ┆ default ┆ metrics 172.18.0.2:31035 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ kafka-tls 172.18.0.2:31563 ┆ │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ nifi ┆ nifi ┆ default ┆ https https://172.18.0.2:30198 ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ superset ┆ superset ┆ default ┆ external-http http://172.18.0.2:31037 ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ zookeeper ┆ zookeeper ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ minio ┆ minio-console ┆ default ┆ http http://172.18.0.2:32345 ┆ │
+└───────────┴───────────────┴───────────┴─────────────────────────────────────────────────┴─────────────────────────────────┘
----
include::partial$instance-hint.adoc[]
-== Inspect data in Kafka
+== Inspect the data in Kafka
Kafka is an event streaming platform to stream the data in near real-time. All the messages put in and read from Kafka
-are structured in dedicated queues called topics. The test data will be put into a topic called earthquakes. The records
+are structured in dedicated queues called topics. The test data will be put into topics called stations and measurements. The records
are produced (put in) by the test data generator and consumed (read) by Druid afterwards in the same order they were
created.
-As Kafka has no web interface, you must use a Kafka client like h{kcat}[kafkacat]. Kafka uses mutual TLS, so clients
+As Kafka has no web interface, you must use a Kafka client like {kcat}[kcat]. Kafka uses mutual TLS, so clients
wanting to connect to Kafka must present a valid TLS certificate. The easiest way to obtain this is to shell into the
`kafka-broker-default-0` Pod, as we will do in the following section for demonstration purposes. For a production setup,
you should spin up a dedicated Pod provisioned with a certificate acting as a Kafka client instead of shell-ing into the
@@ -113,9 +113,12 @@ Kafka Pod.
You can execute a command on the Kafka broker to list the available topics as follows:
+// In the following commands the kcat-prober container instead of the kafka container is used to send requests to Kafka.
+// This is necessary because kcat cannot use key- and truststore files with empty passwords, which are mounted here to the kafka container.
+// However, the kcat-prober container has TLS certificates mounted, which can be used by kcat to connect to Kafka.
[source,console]
----
-$ kubectl exec -it kafka-broker-default-0 -c kafka -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_server_mount/ca.crt -L"
+$ kubectl exec -it kafka-broker-default-0 -c kcat-prober -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_cert_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_cert_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_cert_server_mount/ca.crt -L"
Metadata for all topics (from broker -1: ssl://localhost:9093/bootstrap):
1 brokers:
broker 1001 at 172.18.0.2:31146 (controller)
@@ -140,7 +143,7 @@ Metadata for all topics (from broker -1: ssl://localhost:9093/bootstrap):
partition 7, leader 1001, replicas: 1001, isrs: 1001
----
-You can see that Kafka consists of one broker, and the topic `stations` and `measurements` have been created with eight
+You can see that Kafka consists of one broker, and the topics `stations` and `measurements` have been created with eight
partitions each.
=== Show Sample Records
@@ -150,7 +153,7 @@ print via the `-c` parameter.
[source,console]
----
-$ kubectl exec -it kafka-broker-default-0 -c kafka -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_server_mount/ca.crt -C -t stations -c 2"
+$ kubectl exec -it kafka-broker-default-0 -c kcat-prober -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_cert_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_cert_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_cert_server_mount/ca.crt -C -t stations -c 2"
----
Below is an example of the output of two records:
@@ -189,7 +192,7 @@ Below is an example of the output of two records:
[source,console]
----
-$ kubectl exec -it kafka-broker-default-0 -c kafka -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_server_mount/ca.crt -C -t measurements -c 3"
+$ kubectl exec -it kafka-broker-default-0 -c kcat-prober -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_cert_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_cert_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_cert_server_mount/ca.crt -C -t measurements -c 3"
----
Below is an example of the output of three records:
@@ -254,7 +257,7 @@ The given pattern will print some metadata of the record.
[source,console]
----
-$ kubectl exec -it kafka-broker-default-0 -c kafka -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_server_mount/ca.crt -C -t measurements -o -8 -c 8 -f 'Topic %t / Partition %p / Offset: %o / Timestamp: %T\n'"
+$ kubectl exec -it kafka-broker-default-0 -c kcat-prober -- /bin/bash -c "/stackable/kcat -b localhost:9093 -X security.protocol=SSL -X ssl.key.location=/stackable/tls_cert_server_mount/tls.key -X ssl.certificate.location=/stackable/tls_cert_server_mount/tls.crt -X ssl.ca.location=/stackable/tls_cert_server_mount/ca.crt -C -t measurements -o -8 -c 8 -f 'Topic %t / Partition %p / Offset: %o / Timestamp: %T\n'"
Topic measurements / Partition 0 / Offset: 1324098 / Timestamp: 1680606104652
Topic measurements / Partition 1 / Offset: 1346816 / Timestamp: 1680606100462
Topic measurements / Partition 2 / Offset: 1339363 / Timestamp: 1680606100461
@@ -278,7 +281,7 @@ keeps streaming near-real-time updates for every available measuring station.
=== View the testdata-generation Job
You can look at the ingestion job running in NiFi by opening the endpoint `https` from your `stackablectl stacklet list`
-command output. You have to use the endpoint from your command output. In this case, it is https://172.18.0.3:32440.
+command output. You have to use the endpoint from your command output. In this case, it is https://172.18.0.2:30198.
Open it with your favourite browser. Suppose you get a warning regarding the self-signed certificate generated by the
ref:secret-operator::index.adoc[Secret Operator] (e.g. Warning: Potential Security Risk Ahead). In that case, you must
tell your browser to trust the website and continue.
@@ -289,10 +292,11 @@ Log in with the username `admin` and password `adminadmin`.
image::nifi-kafka-druid-water-level-data/nifi_2.png[]
-As you can see, the NiFi workflow consists of lots of components. It is split into two main components:
+As you can see, the NiFi workflow consists of lots of components. It is split into three main components:
-. On the left is the part bulk-loading all the known stations and the historical data for the last 30 days
-. On the right is the other part iterating over all stations and emitting the current measurement in an endless loop
+. On the left is the first part bulk-loading all the known stations
+. In the middle is the second part bulk-loading the historical data for the last 30 days
+. On the right is the third part iterating over all stations and emitting the current measurement in an endless loop
You can zoom in by using your mouse and mouse wheel.
@@ -304,17 +308,15 @@ The left workflows works as follows:
. The `Get station list` processors fetch the current list of stations as JSON via HTTP from the
{pegelonline}[PEGELONLINE web service].
. `Produce stations records` takes the list and produces a Kafka record for every station into the topic `stations`.
-. `SplitRecords` simultaneously takes the single FlowFile (NiFI record) containing all the stations and crates a new
+. `SplitRecords` simultaneously takes the single FlowFile (NiFi Record) containing all the stations and creates a new
FlowFile for every station.
. `Extract station_uuid` takes every FlowFile representing a station and extract the attribute `station_uuid` into the
metadata of the FlowFile.
. `Get historic measurements` calls the {pegelonline}[PEGELONLINE web service] for every station and fetches the
- measurements of the last 30 days. All failures are routed to the `LogAttribute` processor to inspect them in case any
- failure occurs.
+ measurements of the last 30 days.
. `Add station_uuid` will add the attribute `station_uuid` to the JSON list of measurements returned from the
{pegelonline}[PEGELONLINE web service], which is missing.
-. `PublishKafkaRecord_2_6` finally emits every measurement as a Kafka record to the topic `measurements`. All failures
- are routed to the `LogAttribute` processor to inspect them in case any failures occur.
+. `PublishKafkaRecord_2_6` finally emits every measurement as a Kafka record to the topic `measurements`.
The right side works similarly but is executed in an endless loop to stream the data in near-realtime. Double-click on
the `Get station list` processor to show the processor details.
@@ -325,7 +327,7 @@ Head over to the tab `PROPERTIES`.
image::nifi-kafka-druid-water-level-data/nifi_6.png[]
-Here, you can see the setting `Remote URl`, which specifies the download URL from where the JSON file containing the
+Here, you can see the setting `HTTP URL`, which specifies the download URL from where the JSON file containing the
stations is retrieved. Close the processor details popup by clicking `OK`. You can also have a detailed view of the
`Produce station records` processor by double-clicking it.
@@ -333,15 +335,15 @@ image::nifi-kafka-druid-water-level-data/nifi_7.png[]
The Kafka connection details within this processor - like broker addresses and topic names - are specified. It uses the
`JsonTreeReader` to parse the downloaded JSON and the `JsonRecordSetWriter` to split it into individual JSON records
-before writing it out. Double-click the `Get historic measurements` processor.
+before writing it out. Double-click the `Get 30 days historic data` processor.
image::nifi-kafka-druid-water-level-data/nifi_8.png[]
-This processor fetched the historical data for every station. Click on the `Remote URL` property.
+This processor fetched the historical data for every station. Click on the `HTTP URL` property.
image::nifi-kafka-druid-water-level-data/nifi_9.png[]
-The `Remote URL` does contain the `$\{station_uuid\}` placeholder, which gets replaced for every station.
+The `HTTP URL` does contain the `$\{station_uuid\}` placeholder, which gets replaced for every station.
Double-click the `PublishKafkaRecord_2_6` processor.
@@ -371,12 +373,12 @@ deep storage. The Druid deep storage is based on the S3 store provided by MinIO.
=== View the Ingestion job
-You can have a look at the ingestion jobs running in Druid by opening the Druid endpoint `router-http` from your
-`stackablectl stacklet list` command output (http://172.18.0.4:30899 in this case).
+You can have a look at the ingestion jobs running in Druid by opening the Druid endpoint `router-https` from your
+`stackablectl stacklet list` command output (https://172.18.0.2:31616 in this case).
image::nifi-kafka-druid-water-level-data/druid_1.png[]
-By clicking on `Ingestion` at the top you can see the running ingestion jobs.
+By clicking on `Supervisors` at the top you can see the running ingestion jobs.
image::nifi-kafka-druid-water-level-data/druid_2.png[]
@@ -386,7 +388,7 @@ number of processed records as well as the number of errors.
image::nifi-kafka-druid-water-level-data/druid_3.png[]
-The statistics show that Druid is currently ingesting `3597` records/s and has already ingested ~10 million. All records
+The statistics show that Druid ingested `2435` records during the last minute and has already ingested ~30 million records in total. All records
have been ingested successfully, indicated by having no `processWithError`, `thrownAway` or `unparseable` records.
=== Query the Data Source
@@ -396,11 +398,11 @@ the available data sources by clicking on `Datasources` at the top.
image::nifi-kafka-druid-water-level-data/druid_4.png[]
-The `Avg. row size (bytes)` column shows that a typical `measurement` record has `4` bytes, while a station record has
-`213`, more than 50 times the size. So, by choosing two dedicated topics over a single topic, this demo saved 50x of
+The `Avg. row size (bytes)` column shows that a typical `measurement` record has `3` bytes, while a station record has
+`222`, more than 50 times the size. So, by choosing two dedicated topics over a single topic, this demo saved 50x of
storage and computation costs.
-By clicking on the `measurements` data source, you can see the segments of the data source. In this case, the
+By clicking on `segments` under `Availability` for the `measurements` data source, you can see the segments of the data source. In this case, the
`measurements` data source is partitioned by the measurement day, resulting in 33 segments.
image::nifi-kafka-druid-water-level-data/druid_5.png[]
@@ -436,7 +438,7 @@ image::nifi-kafka-druid-water-level-data/druid_8.png[]
== Superset
Superset provides the ability to execute SQL queries and build dashboards. Open the Superset endpoint
-`external-superset` in your browser (http://172.18.0.4:32251 in this case).
+`external-http` in your browser (http://172.18.0.4:32251 in this case).
image::nifi-kafka-druid-water-level-data/superset_1.png[]
@@ -450,7 +452,7 @@ The demo has created a Dashboard to visualize the water level data. To open it,
image::nifi-kafka-druid-water-level-data/superset_3.png[]
-Click on the dashboard called `Water level data`. It might take some time until the dashboards renders all the included
+Click on the dashboard called `Water level data`. It might take some time until the dashboard renders all the included
charts.
image::nifi-kafka-druid-water-level-data/superset_4.png[]
@@ -461,18 +463,18 @@ The dashboard `Water level data` consists of multiple charts. To list the charts
image::nifi-kafka-druid-water-level-data/superset_5.png[]
-Click on the Chart `Measurements / hour`. On the left side, you can modify the chart and click on `Run` to see the
+Click on the Chart `Measurements / hour`. On the left side, you can modify the chart and click on `Update Chart` to see the
effect.
image::nifi-kafka-druid-water-level-data/superset_6.png[]
-You can see that starting from `2022/08/12` some stations didn't measure or transmit their data. They started sending
-measurements again at `2022/08/14`.
+You can see that starting from `2024/06/16` some stations didn't measure or transmit their data. They started sending
+measurements again at `2024/06/17`.
=== View the Station Distribution on the World Map
To look at the stations' geographical distribution, you have to click on the tab `Charts` at the top again. Afterwards,
-click on the chart `Stations` distribution.
+click on the chart `Stations distribution`.
image::nifi-kafka-druid-water-level-data/superset_7.png[]
@@ -484,8 +486,8 @@ image::nifi-kafka-druid-water-level-data/superset_8.png[]
=== Execute arbitrary SQL statements
-Within Superset, you can create dashboards and run arbitrary SQL statements. On the top, click on the tab `SQL Lab` ->
-`SQL Editor`.
+Within Superset, you can create dashboards and run arbitrary SQL statements. On the top, click on the tab `SQL` ->
+`SQL Lab`.
image::nifi-kafka-druid-water-level-data/superset_9.png[]
@@ -526,11 +528,11 @@ What might also be interesting is the average and current measurement of the sta
[source,sql]
----
-select
+select
stations.longname as station,
avg("value") as avg_measurement,
- latest("value") as current_measurement,
- latest("value") - avg("value") as diff
+ latest_by("value", measurements."__time") as current_measurement,
+ latest_by("value", measurements."__time") - avg("value") as diff
from measurements inner join stations on stations.uuid = measurements.station_uuid
group by 1
order by 2 desc
@@ -541,7 +543,7 @@ image::nifi-kafka-druid-water-level-data/superset_13.png[]
== MinIO
The S3 MinIO store provides persistent deep storage for Druid to store all the data used. Open the MinIO endpoint
-`console-http` retrieved by `stackablectl stacklet list` in your browser (http://172.18.0.5:32595 in this case).
+`http` retrieved by `stackablectl stacklet list` in your browser (http://172.18.0.2:32345 in this case).
image::nifi-kafka-druid-water-level-data/minio_1.png[]
@@ -549,17 +551,17 @@ Log in with the username `admin` and password `adminadmin`.
image::nifi-kafka-druid-water-level-data/minio_2.png[]
-Click on the blue button `Browse` on the bucket `druid` and open the folders `data`.
+Click on the bucket `demo` and open the folder `data`.
image::nifi-kafka-druid-water-level-data/minio_3.png[]
-You can see the druid has created a folder for both data sources. Go ahead and open the folder `measurements`.
+You can see that Druid has created a folder for both data sources. Go ahead and open the folder `measurements`.
image::nifi-kafka-druid-water-level-data/minio_4.png[]
-Druid saved 35MB of data within 33 prefixes (folders). One prefix corresponds to one segment, which contains all the
+Druid saved 51.5 MiB of data within 33 prefixes (folders). One prefix corresponds to one segment, which contains all the
measurements of a day. If you don't see any folders or files, the reason is that Druid still needs to save its data from
-memory to the deep storage. After waiting a few minutes, the data should have been flushed to S3 and show up.
+memory to the deep storage. After waiting for roughly an hour, the data should have been flushed to S3 and show up.
image::nifi-kafka-druid-water-level-data/minio_5.png[]
@@ -567,7 +569,7 @@ If you open up a prefix for a specific day, you can see that Druid has placed a
== Summary
-The demo put station records into the Kafka stream pipeline topic `station`. It also streamed ~30,000 measurements/s for
+The demo puts station records into the Kafka stream pipeline topic `station`. It also streamed ~30,000 measurements/s for
a total of ~11 million measurements into the topic `measurements`. Druid ingested the data near real-time into its data
source and enabled SQL access to it. Superset was used as a web-based frontend to execute SQL statements and build
dashboards.
@@ -575,22 +577,22 @@ dashboards.
== Where to go from here
There are multiple paths to go from here. The following sections give you some ideas on what to explore next. You can
-find the description of the water level data on the {pegelonline-rest}[on the PEGELONLINE REST API documentation
+find the description of the water level data on the {pegelonline-rest}[PEGELONLINE REST API documentation
(German only)].
-=== Execute Arbitrary SQL Statements
+=== Execute arbitrary SQL statements
Within Superset (or the Druid web interface), you can execute arbitrary SQL statements to explore the water level data.
-=== Create Additional Dashboards
+=== Create additional dashboards
You also can create additional charts and bundle them together in a Dashboard. Have a look at
{superset}[the Superset documentation] on how to do that.
-=== Load Additional Data
+=== Load additional data
You can use the NiFi web interface to collect arbitrary data and write it to Kafka (it's recommended to use new Kafka
-topics for that). Alternatively, you can use a Kafka client like {kcat}[kafkacat] to create new topics and ingest data.
+topics for that). Alternatively, you can use a Kafka client like {kcat}[kcat] to create new topics and ingest data.
Using the Druid web interface, you can start an ingestion job that consumes and stores the data in an internal data
source. There is an excellent {druid-tutorial}[tutorial] from Druid on how to do this. Afterwards, the data source can
-be analyzed within Druid and Superset, like the earthquake data.
+be analyzed within Druid and Superset, like the water level data.
diff --git a/docs/modules/demos/pages/signal-processing.adoc b/docs/modules/demos/pages/signal-processing.adoc
index 7eb637b7..4da8589c 100644
--- a/docs/modules/demos/pages/signal-processing.adoc
+++ b/docs/modules/demos/pages/signal-processing.adoc
@@ -61,7 +61,7 @@ Now access the JupyterHub web interface via http://localhost:8000.
You should see the JupyterHub login page where you can login with username `admin` and password `adminadmin`.
-You should arrive at your workspace where you can click on the `notebooks` folder on the left, open the file and run it. Click on the double arrow to execute the Python scripts:
+You should arrive at your workspace where you can double-click on the `notebook` folder on the left, open the file and run it. Click on the double arrow to execute the Python scripts:
image::signal-processing/notebook.png[]
@@ -82,16 +82,16 @@ Grafana can be reached by first looking up the service endpoint:
[source,console]
----
$ stackablectl stacklet list
-┌───────────┬───────────┬───────────┬──────────────────────────────────┬─────────────────────────────────────────┐
-│ Product ┆ Name ┆ Namespace ┆ Endpoints ┆ Info │
-╞═══════════╪═══════════╪═══════════╪══════════════════════════════════╪═════════════════════════════════════════╡
-│ nifi ┆ nifi ┆ default ┆ https https://172.19.0.3:30176 ┆ │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ zookeeper ┆ zookeeper ┆ default ┆ ┆ │
-├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ grafana ┆ grafana ┆ default ┆ service 172.19.0.3:30387 ┆ Third party service │
-│ ┆ ┆ ┆ ┆ Admin user: admin, password: adminadmin │
-└───────────┴───────────┴───────────┴──────────────────────────────────┴─────────────────────────────────────────┘
+
+┌───────────┬───────────┬───────────┬───────────────────────────────────┬─────────────────────────────────┐
+│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │
+╞═══════════╪═══════════╪═══════════╪═══════════════════════════════════╪═════════════════════════════════╡
+│ nifi ┆ nifi ┆ default ┆ https https://172.18.0.3:30851 ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ zookeeper ┆ zookeeper ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ grafana ┆ grafana ┆ default ┆ service 172.18.0.3:30499 ┆ │
+└───────────┴───────────┴───────────┴───────────────────────────────────┴─────────────────────────────────┘
----
@@ -99,7 +99,7 @@ Log in to Grafana with username `admin` and password `adminadmin` and navigate t
=== Measurements
-This is the original data. The first graph plots two measurments (`r1`, `r2`), togther with the model scores (`r1_score`, `r2_score`, `r1_score_lttb`). These are superimposed on each other for ease of comparison.
+This is the original data. The first graph plots two measurments (`r1`, `r2`), together with the model scores (`r1_score`, `r2_score`, `r1_score_lttb`). These are superimposed on each other for ease of comparison.
image::signal-processing/measurements.png[]
diff --git a/docs/modules/demos/pages/spark-k8s-anomaly-detection-taxi-data.adoc b/docs/modules/demos/pages/spark-k8s-anomaly-detection-taxi-data.adoc
index a3016393..6b212922 100644
--- a/docs/modules/demos/pages/spark-k8s-anomaly-detection-taxi-data.adoc
+++ b/docs/modules/demos/pages/spark-k8s-anomaly-detection-taxi-data.adoc
@@ -59,28 +59,26 @@ image::spark-k8s-anomaly-detection-taxi-data/overview.png[]
To list the installed Stackable services run the following command:
-// TODO(Techassi): Update console output
-
[source,console]
----
$ stackablectl stacklet list
-PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS
-
- hive hive spark-k8s-ad-taxi-data hive 172.18.0.2:31912
- metrics 172.18.0.2:30812
-
- hive hive-iceberg spark-k8s-ad-taxi-data hive 172.18.0.4:32133
- metrics 172.18.0.4:32125
-
- opa opa spark-k8s-ad-taxi-data http http://172.18.0.3:31450
-
- superset superset spark-k8s-ad-taxi-data external-superset http://172.18.0.2:31339 Admin user: admin, password: adminadmin
-
- trino trino spark-k8s-ad-taxi-data coordinator-metrics 172.18.0.3:32168
- coordinator-https https://172.18.0.3:31408
- minio minio-trino spark-k8s-ad-taxi-data http http://172.18.0.3:30589 Third party service
- console-http http://172.18.0.3:31452 Admin user: admin, password: adminadmin
+┌──────────┬───────────────┬───────────┬───────────────────────────────────────────────┬─────────────────────────────────┐
+│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │
+╞══════════╪═══════════════╪═══════════╪═══════════════════════════════════════════════╪═════════════════════════════════╡
+│ hive ┆ hive ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ hive ┆ hive-iceberg ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ opa ┆ opa ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ superset ┆ superset ┆ default ┆ external-http http://172.18.0.2:30562 ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ trino ┆ trino ┆ default ┆ coordinator-metrics 172.18.0.2:31980 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ coordinator-https https://172.18.0.2:32186 ┆ │
+├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ minio ┆ minio-console ┆ default ┆ http http://172.18.0.2:32276 ┆ │
+└──────────┴───────────────┴───────────┴───────────────────────────────────────────────┴─────────────────────────────────┘
----
include::partial$instance-hint.adoc[]
@@ -89,8 +87,8 @@ include::partial$instance-hint.adoc[]
=== List Buckets
-The S3 provided by MinIO is used as persistent storage to store all the data used. Open the endpoint `console-http`
-retrieved by `stackablectl stacklet list` in your browser (http://172.18.0.3:31452 in this case).
+The S3 provided by MinIO is used as persistent storage to store all the data used. Open the endpoint `http`
+retrieved by `stackablectl stacklet list` in your browser (http://172.18.0.2:32276 in this case).
image::spark-k8s-anomaly-detection-taxi-data/minio_0.png[]
@@ -107,16 +105,16 @@ Here, you can see the two buckets the S3 is split into:
=== Inspect raw data
-Click on the blue button `Browse` on the bucket `demo`.
+Click on the bucket `demo` and then on `ny-taxi-data` and `raw` respectively.
image::spark-k8s-anomaly-detection-taxi-data/minio_3.png[]
-A folder (called prefixes in S3) contains a dataset of similarly structured data files. The data is partitioned by month
+This folder (called prefixes in S3) contains a dataset of similarly structured data files. The data is partitioned by month
and contains several hundred MBs, which may seem small for a dataset. Still, the model is a time-series model where the
data has decreasing relevance the "older" it is, especially when the data is subject to multiple external factors, many
of which are unknown and fluctuating in scope and effect.
-The second bucket prediction contains the output from the model scoring process:
+The second bucket prediction contains the output from the model scoring process under `prediction/anomaly-detection/iforest/data`:
image::spark-k8s-anomaly-detection-taxi-data/minio_4.png[]
@@ -147,7 +145,9 @@ image::spark-k8s-anomaly-detection-taxi-data/spark_job.png[]
== Dashboard
-The anomaly detection dashboard is pre-defined and accessible under `Dashboards` when logged in to Superset:
+Open the `external-http` Superset endpoint found in the output of the `stackablectl stacklet list` command. The anomaly detection
+dashboard is pre-defined and accessible under the `Dashboards` tab when logged in to Superset using the username `admin`
+password `adminadmin`:
image::spark-k8s-anomaly-detection-taxi-data/superset_anomaly_scores.png[]
diff --git a/docs/modules/demos/pages/trino-iceberg.adoc b/docs/modules/demos/pages/trino-iceberg.adoc
index dd3827dd..ef9601b6 100644
--- a/docs/modules/demos/pages/trino-iceberg.adoc
+++ b/docs/modules/demos/pages/trino-iceberg.adoc
@@ -46,25 +46,25 @@ This demo will
== List the deployed Stackable services
-To list the installed installed Stackable services run the following command:
+To list the installed Stackable services run the following command:
[source,console]
----
$ stackablectl stacklet list
-┌─────────┬──────────────┬───────────┬──────────────────────────────────────────────┐
-│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS │
-╞═════════╪══════════════╪═══════════╪══════════════════════════════════════════════╡
-│ hive ┆ hive-iceberg ┆ default ┆ hive 172.18.0.4:30637 │
-│ ┆ ┆ ┆ metrics 172.18.0.4:30176 │
-├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ opa ┆ opa ┆ default ┆ http http://172.18.0.2:32470 │
-├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ trino ┆ trino ┆ default ┆ coordinator-metrics 172.18.0.2:32402 │
-│ ┆ ┆ ┆ coordinator-https https://172.18.0.2:31605 │
-├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-│ minio ┆ minio ┆ default ┆ http http://172.18.0.2:30357 │
-│ ┆ ┆ ┆ console-http http://172.18.0.2:30310 │
-└─────────┴──────────────┴───────────┴──────────────────────────────────────────────┘
+
+┌─────────┬───────────────┬───────────┬───────────────────────────────────────────────┬─────────────────────────────────┐
+│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │
+╞═════════╪═══════════════╪═══════════╪═══════════════════════════════════════════════╪═════════════════════════════════╡
+│ hive ┆ hive-iceberg ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ opa ┆ opa ┆ default ┆ ┆ Available, Reconciling, Running │
+├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ trino ┆ trino ┆ default ┆ coordinator-metrics 172.18.0.2:32612 ┆ Available, Reconciling, Running │
+│ ┆ ┆ ┆ coordinator-https https://172.18.0.2:30856 ┆ │
+├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ minio ┆ minio-console ┆ default ┆ http http://172.18.0.2:32489 ┆ │
+└─────────┴───────────────┴───────────┴───────────────────────────────────────────────┴─────────────────────────────────┘
+
----
include::partial$instance-hint.adoc[]
@@ -72,16 +72,17 @@ include::partial$instance-hint.adoc[]
== MinIO
You can view the available buckets and objects (think of files) described in the
-xref:data-lakehouse-iceberg-trino-spark.adoc#_minio[data-lakehouse-iceberg-trino-spark] demo.
+xref:data-lakehouse-iceberg-trino-spark.adoc#_minio[data-lakehouse-iceberg-trino-spark] demo. Currently, the bucket `lakehouse`
+is still empty, but will be filled during the next steps.
== Connect to Trino
-Have a look at the xref:data-lakehouse-iceberg-trino-spark.adoc#_connect_with_dbeaver[documentation] on how to
+Have a look at the xref:trino:usage-guide/connect_to_trino#_connect_with_dbeaver[documentation] on how to
connect with DBeaver. As an alternative, you can use https://trino.io/download.html[trino-cli] by running:
[source,console]
----
-$ java -jar ~/Downloads/trino-cli-396-executable.jar --user admin --insecure --password --server https://172.18.0.3:31250
+$ java -jar ~/Downloads/trino-cli-451-executable.jar --user admin --insecure --password --server https://172.18.0.2:30856
----
Make sure to replace the server endpoint with the endpoint listed in the `stackablectl stacklet list` output. When
@@ -131,7 +132,7 @@ show tables in tpch.sf5;
(8 rows)
----
-The dataset comes with different scale factors. This demo is intended to run on a Laptop, so it starts with a scale
+The dataset comes with different scale factors. This demo is intended to run on a laptop, so it starts with a scale
factor of 5 (hence the `sf5` in the above query). If you have a sufficiently large S3 and Trino deployed, you can easily
re-run the statements below with a different scale factor. This demo has been tested up to a scale factor of 10000, but
you can choose any scale in between or even more if desired.
@@ -498,91 +499,6 @@ try to spin up 8 Trino workers with 16GB RAM each, chances are high that Pods wi
resources required can't be fulfilled.
====
-=== Scale S3
-
-That should be the preferred option if you can access a managed S3, for example, from a Cloud provider with an excellent
-network interconnection.
-
-You can change the endpoint of the S3 by running `kubectl edit s3connection minio -o yaml` and `kubectl edit secret
-minio-s3-credentials`. Please note that the credentials need to be base64 encoded.
-
-.Example IONOS configuration
-[%collapsible]
-====
-[source,sql]
-----
-apiVersion: s3.stackable.tech/v1alpha1
-kind: S3Connection
-metadata:
- name: ionos
-spec:
- host: s3-eu-central-1.ionoscloud.com
- port: 443
- tls:
- verification:
- server:
- caCert:
- webPki: {}
- credentials:
- secretClass: ionos-s3-credentials
----
-apiVersion: secrets.stackable.tech/v1alpha1
-kind: SecretClass
-metadata:
- name: ionos-s3-credentials
-spec:
- backend:
- k8sSearch:
- searchNamespace:
- pod: {}
----
-apiVersion: v1
-kind: Secret
-metadata:
- name: ionos-s3-credentials
- labels:
- secrets.stackable.tech/class: ionos-s3-credentials
-stringData:
- accessKey: ""
- secretKey: ""
-----
-====
-
-If you don't have access to a managed S3 or don't want to use it, you can also scale up the MinIO cluster. You can see
-the available replicas using the following command:
-
-[source,console]
-----
-$ kubectl get statefulsets.apps minio
-
-NAME READY AGE
-minio 2/2 4m16s
-----
-
-You can edit the MinIO cluster using `kubectl edit statefulsets.apps minio`.
-Especially interesting are the following options:
-
-[source,yaml]
-----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
- name: minio
-spec:
- replicas: 5 # Number of MinIO nodes
- template:
- spec:
- containers:
- - name: minio
- resources:
- requests:
- cpu: 1000m # Guaranteed CPU available (one core in this case)
- memory: 4Gi # RAM available
-----
-
-For example, set `spec.replicas` to `5` and save the changes. You can re-run `kubectl get statefulsets.apps minio` to see the
-effect.
-
=== Scale Trino
Run `kubectl edit trinocluster trino`. Modify the following settings to your needs:
diff --git a/docs/modules/demos/pages/trino-taxi-data.adoc b/docs/modules/demos/pages/trino-taxi-data.adoc
index 0855cbda..c5d51ae3 100644
--- a/docs/modules/demos/pages/trino-taxi-data.adoc
+++ b/docs/modules/demos/pages/trino-taxi-data.adoc
@@ -81,7 +81,7 @@ include::partial$instance-hint.adoc[]
== Inspect the data in S3
The S3 provided by MinIO is used as a persistent storage to store all the data used. You can look at the test data
-within the MinIO web interface by opening the endpoint `console-http` from your `stackablectl stacklet list` command
+within the MinIO web interface by opening the endpoint `http` from your `stackablectl stacklet list` command
output. You have to use the endpoint from your command output. In this case, it is http://172.18.0.2:32654. Open it with
your favourite browser.
@@ -115,7 +115,7 @@ When you start executing SQL queries, you will see the queries getting processed
== Use the Superset web interface
-Superset gives the ability to execute SQL queries and build dashboards. Open the endpoint `external-superset`
+Superset gives the ability to execute SQL queries and build dashboards. Open the endpoint `external-http`
in your browser (`http://172.18.0.2:31312` in this case).
image::trino-taxi-data/superset_1.png[]
@@ -138,8 +138,8 @@ You can clearly see the impact of COVID-19 on the taxi business.
=== Execute arbitrary SQL statements
-Within Superset, you can create dashboards and run arbitrary SQL statements. On the top, click on the tab `SQL Lab` ->
-`SQL Editor`.
+Within Superset, you can create dashboards and run arbitrary SQL statements. On the top, click on the tab `SQL` ->
+`SQL Lab`.
image::trino-taxi-data/superset_5.png[]
@@ -180,7 +180,7 @@ Within Superset you can execute arbitrary SQL statements to explore the taxi dat
questions by executing SQL statements? The {trino-language-docs}[Trino documentation on their SQL language] might help
you.
-How many taxi trips there where in the year 2021?
+How many taxi trips took place in the year 2021?
.See the answer
[%collapsible]
@@ -199,7 +199,7 @@ returns 30.903.982 trips.
What was the maximum amount of passengers?
-.See the Answer
+.See the answer
[%collapsible]
====
@@ -269,7 +269,7 @@ Pretty cheap for that amount of people! These are probably invalid records.
What was the highest tip (measured in percentage of the original fee) ever given?
-.See the Answer
+.See the answer
[%collapsible]
====
@@ -309,7 +309,7 @@ You also have the possibility to create additional charts and bundle them togeth
You can use the MinIO webinterface to upload additional data. As an alternative you can use the S3 API with an S3 client
like https://s3tools.org/s3cmd[s3cmd]. It is recommended to put the data into a folder (prefix) in the `demo` bucket.
-Have a look at the defined tables inside the `hive`.`demo` schema on how to inform Trino about the newly available data.
+Have a look at the defined tables inside the `hive.demo` schema on how to inform Trino about the newly available data.
.Table Definitions
[%collapsible]
@@ -372,5 +372,5 @@ AND tpep_pickup_datetime <= from_iso8601_timestamp('2022-05-31T00:00:00')
=== Connect to Trino via CLI, Python or DBeaver
-If you prefer running your SQL statements via command-line, a Python script or a graphical Database manager like DBeaver
+If you prefer running your SQL statements via command-line, a Python script or a graphical database manager like DBeaver
please have a look at the {trino-client-docs}[the Trino documentation] on how to do that.
diff --git a/stacks/data-lakehouse-iceberg-trino-spark/trino.yaml b/stacks/data-lakehouse-iceberg-trino-spark/trino.yaml
index 0e7e9330..a86ccb46 100644
--- a/stacks/data-lakehouse-iceberg-trino-spark/trino.yaml
+++ b/stacks/data-lakehouse-iceberg-trino-spark/trino.yaml
@@ -129,22 +129,22 @@ data:
trino.rego: |
package trino
- import future.keywords.in
+ import rego.v1
default allow = false
# Allow non-batched access
- allow {
+ allow if {
is_admin
}
# Allow batched access
- extended[i] {
+ batch contains i if {
some i
input.action.filterResources[i]
is_admin
}
# Corner case: filtering columns is done with a single table item, and many columns inside
- extended[i] {
+ batch contains i if {
some i
input.action.operation == "FilterColumns"
count(input.action.filterResources) == 1
@@ -152,6 +152,6 @@ data:
is_admin
}
- is_admin() {
+ is_admin() if {
input.context.identity.user == "admin"
}
diff --git a/stacks/dual-hive-hdfs-s3/trino.yaml b/stacks/dual-hive-hdfs-s3/trino.yaml
index f31642f6..5af415fe 100644
--- a/stacks/dual-hive-hdfs-s3/trino.yaml
+++ b/stacks/dual-hive-hdfs-s3/trino.yaml
@@ -102,22 +102,22 @@ data:
trino.rego: |
package trino
- import future.keywords.in
+ import rego.v1
default allow = false
# Allow non-batched access
- allow {
+ allow if {
is_admin
}
# Allow batched access
- extended[i] {
+ batch contains i if {
some i
input.action.filterResources[i]
is_admin
}
# Corner case: filtering columns is done with a single table item, and many columns inside
- extended[i] {
+ batch contains i if {
some i
input.action.operation == "FilterColumns"
count(input.action.filterResources) == 1
@@ -125,6 +125,6 @@ data:
is_admin
}
- is_admin() {
+ is_admin() if {
input.context.identity.user == "admin"
}
diff --git a/stacks/nifi-kafka-druid-superset-s3/druid.yaml b/stacks/nifi-kafka-druid-superset-s3/druid.yaml
index c8ab557e..c2ee0369 100644
--- a/stacks/nifi-kafka-druid-superset-s3/druid.yaml
+++ b/stacks/nifi-kafka-druid-superset-s3/druid.yaml
@@ -23,7 +23,7 @@ spec:
bucketName: demo
connection:
inline:
- host: minio-druid
+ host: minio
port: 9000
accessStyle: Path
credentials:
@@ -62,7 +62,7 @@ spec:
min: "1"
max: "4" # Need enough CPU cores to run multiple ingestion jobs at once
memory:
- limit: 4Gi
+ limit: 16Gi
routers:
roleGroups:
default:
diff --git a/stacks/signal-processing/nifi.yaml b/stacks/signal-processing/nifi.yaml
index c643c767..bdb9cede 100644
--- a/stacks/signal-processing/nifi.yaml
+++ b/stacks/signal-processing/nifi.yaml
@@ -7,7 +7,6 @@ spec:
image:
productVersion: 1.25.0
custom: docker.stackable.tech/demos/nifi:1.25.0-postgresql
- pullPolicy: Never
clusterConfig:
listenerClass: external-unstable
zookeeperConfigMapName: nifi-znode
diff --git a/stacks/stacks-v2.yaml b/stacks/stacks-v2.yaml
index 075a1b49..ec899364 100644
--- a/stacks/stacks-v2.yaml
+++ b/stacks/stacks-v2.yaml
@@ -220,7 +220,7 @@ stacks:
supportedNamespaces: []
resourceRequests:
cpu: 8900m
- memory: 30042Mi
+ memory: 42330Mi
pvc: 75Gi
parameters:
- name: nifiAdminPassword