From 6f109d7d048a3bca4e0cdea33a8a8f4aec9e8835 Mon Sep 17 00:00:00 2001
From: Fabrice Jammes <fabrice.jammes@clermont.in2p3.fr>
Date: Wed, 6 Nov 2024 16:39:26 +0100
Subject: [PATCH 1/5] Replace minio with HDFS

- Install stackable operators with argoCD
- Improve ignoreDifferences field in Argo
  * add /spec/names/categories and shortNames
- Install HDFS with argoCD
- Reduce memory for spark pods to 1Gi
- Bump spark-operator to v2.1.0
- Add hdfs init for user 185 inside chart
---
 .ciux                                        |  2 +-
 .github/workflows/e2e-common.yml             |  8 +--
 TODO.913-replace-minio-s3-with-hdfs.org      | 39 +++++++++++
 chart/templates/_helpers.tpl                 |  4 ++
 chart/templates/job-hdfs-init.yaml           | 25 +++++++
 chart/templates/spark-fink-distribution.yaml |  6 ++
 chart/templates/spark-fink-raw2science.yaml  |  6 ++
 chart/templates/spark-fink-stream2raw.yaml   |  6 ++
 chart/values-ci-noscience.yaml               |  4 --
 chart/values.yaml                            |  5 +-
 doc/troubleshoot.md                          | 29 +++++++-
 e2e/argocd.sh                                |  3 +-
 e2e/run.sh                                   |  4 +-
 fink_broker/spark_utils.py                   | 12 +++-
 hdfs-tmp/README.md                           | 11 +++
 hdfs-tmp/install.sh                          | 11 +++
 hdfs-tmp/mkdir.sh                            | 31 ++++++++
 hdfs-tmp/test-hdfs.sh                        | 74 ++++++++++++++++++++
 18 files changed, 265 insertions(+), 15 deletions(-)
 create mode 100644 TODO.913-replace-minio-s3-with-hdfs.org
 create mode 100644 chart/templates/job-hdfs-init.yaml
 create mode 100644 hdfs-tmp/README.md
 create mode 100755 hdfs-tmp/install.sh
 create mode 100755 hdfs-tmp/mkdir.sh
 create mode 100755 hdfs-tmp/test-hdfs.sh

diff --git a/.ciux b/.ciux
index 1e7af2ce..ddb29554 100644
--- a/.ciux
+++ b/.ciux
@@ -31,7 +31,7 @@ dependencies:
   - image: gitlab-registry.in2p3.fr/astrolabsoftware/fink/spark-py:k8s-3.4.1
     labels:
       build: "true"
-  - package: github.com/k8s-school/ktbx@v1.1.4-rc1
+  - package: github.com/k8s-school/ktbx@v1.1.4-rc3
     labels:
       itest: "optional"
   - package: github.com/astrolabsoftware/finkctl/v3@v3.1.3-rc1
diff --git a/.github/workflows/e2e-common.yml b/.github/workflows/e2e-common.yml
index 8c9f5395..7e962f93 100644
--- a/.github/workflows/e2e-common.yml
+++ b/.github/workflows/e2e-common.yml
@@ -24,7 +24,7 @@ on:
       private_registry_token:
         required: true
 env:
-  CIUX_VERSION: v0.0.4-rc9
+  CIUX_VERSION: v0.0.4-rc10
   GHA_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
   SUFFIX: ${{ inputs.suffix }}
   CI_REPO: ${{ inputs.ci_repo }}
@@ -134,13 +134,13 @@ jobs:
           else
             echo "Using pre-existing image from registry (See  "Ciux project ignition" section)"
           fi
-      - name: Run argoCD
-        run: |
-          ./e2e/argocd.sh
       # - name: Setup tmate session
       #   uses: mxschmitt/action-tmate@v3
       #   with:
       #     detached: true
+      - name: Run argoCD
+        run: |
+          ./e2e/argocd.sh
       - name: Check results
         run: |
           ./e2e/check-results.sh
diff --git a/TODO.913-replace-minio-s3-with-hdfs.org b/TODO.913-replace-minio-s3-with-hdfs.org
new file mode 100644
index 00000000..919703d2
--- /dev/null
+++ b/TODO.913-replace-minio-s3-with-hdfs.org
@@ -0,0 +1,39 @@
+* DONE Reduce mem limit for pod journalnode !!!!
+* Upgrade stackable operator to v2.1.0
+
+* TODO hdfs operator
+
+** limit and request for memory: see https://github.com/stackabletech/hdfs-operator/issues/625
+** TODO: open issue: zkfc on datanode is not compliant with memory setting
+
+In the example below memory limit is 256Mi for nameNode in hdfscluster CR, but it become 768Mi in each related pod because the `zkfs` container is not impacted by the CR configuration.
+This should be fixed because it prevents running the setup on CI platforms with low memory like Github Action for instances.
+
+kubectl get -n hdfs hdfscluster simple-hdfs  -o yaml -o jsonpath  -o=jsonpath='{.spec.nameNodes.config.resources}'
+{"cpu":{"min":"0"},"memory":{"limit":"256Mi"}}
+
+kubectl describe nodes | grep namenode
+  hdfs                        simple-hdfs-namenode-default-0                                                         100m (0%)     1400m (1%)  768Mi (0%)       768Mi (0%)     34m
+  hdfs                        simple-hdfs-namenode-default-1                                                         100m (0%)     1400m (1%)  768Mi (0%)       768Mi (0%)     31m
+
+kubectl get pods -n hdfs simple-hdfs-namenode-default-0 -o jsonpath  -o=jsonpath='{.spec.containers[1].name}'
+zkfc
+
+kubectl get pods -n hdfs simple-hdfs-namenode-default-0 -o jsonpath  -o=jsonpath='{.spec.containers[1].resources}'  | jq
+{
+  "limits": {
+    "cpu": "400m",
+    "memory": "512Mi"
+  },
+  "requests": {
+    "cpu": "100m",
+    "memory": "512Mi"
+  }
+}
+
+
+** management of argoCD default values (jqpath expression): https://github.com/stackabletech/hdfs-operator/issues/626
+** TODO: open issue: be able to run only one dataNode on CI
+
+* Add helm option on HDFS cpu.min (also for operators!)
+* Move fink image to docker.stackable.tech/stackable/hadoop:3.3.6-stackable24.11.0
diff --git a/chart/templates/_helpers.tpl b/chart/templates/_helpers.tpl
index 0cc22e35..87d3a69b 100644
--- a/chart/templates/_helpers.tpl
+++ b/chart/templates/_helpers.tpl
@@ -85,7 +85,11 @@ restartPolicy:
 - '-log_level'
 - '{{ .Values.log_level }}'
 - '-online_data_prefix'
+{{- if .Values.online_data_prefix }}
+- '{{ .Values.online_data_prefix }}'
+{{- else }}
 - 's3a://{{ tpl .Values.s3.bucket . }}'
+{{- end }}
 - '-producer'
 - '{{ .Values.producer }}'
 - '-tinterval'
diff --git a/chart/templates/job-hdfs-init.yaml b/chart/templates/job-hdfs-init.yaml
new file mode 100644
index 00000000..0ab3941b
--- /dev/null
+++ b/chart/templates/job-hdfs-init.yaml
@@ -0,0 +1,25 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: hdfs-init
+  namespace: hdfs
+  annotations:
+    "helm.sh/hook": "pre-install"
+spec:
+  template:
+    spec:
+      containers:
+      - name: hdfs-client
+        image: apache/hadoop:3.4.0
+        command: ["sh", "-c"]
+        args:
+          - |
+            hdfs dfs -fs $HDFS_URL -mkdir -p /user/185 && \
+            hdfs dfs -fs $HDFS_URL -chown 185:hdfs /user/185 && \
+            hdfs dfs -fs $HDFS_URL -chmod 700 /user/185
+        env:
+        - name: HDFS_URL
+          value: hdfs://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.hdfs:8020
+        - name: HADOOP_USER_NAME
+          value: stackable
+      restartPolicy: OnFailure
diff --git a/chart/templates/spark-fink-distribution.yaml b/chart/templates/spark-fink-distribution.yaml
index 3fb5232e..df6aba2d 100644
--- a/chart/templates/spark-fink-distribution.yaml
+++ b/chart/templates/spark-fink-distribution.yaml
@@ -23,6 +23,9 @@ spec:
   driver:
     cores: {{ tpl .Values.distribution.cores . }}
     coreRequest: "{{ tpl .Values.distribution.coreRequest . }}"
+    env:
+      - name: SPARK_USER
+        value: "{{ .Values.hadoop_user_name }}"
     memory: "{{ tpl .Values.distribution.memory . }}"
     javaOptions: "-Divy.cache.dir=/tmp -Divy.home=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true"
     labels:
@@ -31,6 +34,9 @@ spec:
   executor:
     cores: {{ tpl .Values.distribution.cores . }}
     coreRequest: "{{ tpl .Values.distribution.coreRequest . }}"
+    env:
+      - name: SPARK_USER
+        value: "{{ .Values.hadoop_user_name }}"
     memory: "{{ tpl .Values.distribution.memory . }}"
     instances: {{ tpl .Values.distribution.instances . }}
     javaOptions: "-Djava.security.auth.login.config=/etc/fink-broker/kafka-jaas.conf -Dcom.amazonaws.sdk.disableCertChecking=true"
diff --git a/chart/templates/spark-fink-raw2science.yaml b/chart/templates/spark-fink-raw2science.yaml
index bdc56953..04936078 100644
--- a/chart/templates/spark-fink-raw2science.yaml
+++ b/chart/templates/spark-fink-raw2science.yaml
@@ -12,6 +12,9 @@ spec:
   driver:
     cores: {{ tpl .Values.raw2science.cores . }}
     coreRequest: "{{ tpl .Values.raw2science.coreRequest . }}"
+    env:
+      - name: SPARK_USER
+        value: "{{ .Values.hadoop_user_name }}"
     memory: "{{ tpl .Values.raw2science.memory . }}"
     javaOptions: "-Divy.cache.dir=/tmp -Divy.home=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true"
     labels:
@@ -20,6 +23,9 @@ spec:
   executor:
     cores: {{ tpl .Values.raw2science.cores . }}
     coreRequest: "{{ tpl .Values.raw2science.coreRequest . }}"
+    env:
+      - name: SPARK_USER
+        value: "{{ .Values.hadoop_user_name }}"
     memory: "{{ tpl .Values.raw2science.memory . }}"
     javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true"
     instances: {{ tpl .Values.raw2science.instances . }}
diff --git a/chart/templates/spark-fink-stream2raw.yaml b/chart/templates/spark-fink-stream2raw.yaml
index c54a68a6..75b04064 100644
--- a/chart/templates/spark-fink-stream2raw.yaml
+++ b/chart/templates/spark-fink-stream2raw.yaml
@@ -21,6 +21,9 @@ spec:
   driver:
     cores: {{ tpl .Values.distribution.cores . }}
     coreRequest: "{{ tpl .Values.stream2raw.coreRequest . }}"
+    env:
+      - name: SPARK_USER
+        value: "{{ .Values.hadoop_user_name }}"
     memory: "{{ tpl .Values.stream2raw.memory . }}"
     labels:
       version: 3.4.1
@@ -29,6 +32,9 @@ spec:
   executor:
     cores: {{ tpl .Values.distribution.cores . }}
     coreRequest: "{{ tpl .Values.stream2raw.coreRequest . }}"
+    env:
+      - name: SPARK_USER
+        value: "{{ .Values.hadoop_user_name }}"
     memory: "{{ tpl .Values.stream2raw.memory . }}"
     instances: {{ tpl .Values.distribution.instances . }}
     javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true"
diff --git a/chart/values-ci-noscience.yaml b/chart/values-ci-noscience.yaml
index 3a7c0dc4..1d2ce651 100644
--- a/chart/values-ci-noscience.yaml
+++ b/chart/values-ci-noscience.yaml
@@ -9,10 +9,6 @@ instances: 1
 
 fink_trigger_update: "2"
 
-# Can be overriden using --image option
-
-# Default to s3a://<s3.bucket>
-# online_data_prefix: s3a://fink-broker-online
 producer: sims
 
 log_level: INFO
diff --git a/chart/values.yaml b/chart/values.yaml
index 81e17cf8..41312f44 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -3,6 +3,7 @@
 
 night: "20240101"
 
+hadoop_user_name: "185"
 image:
   pullPolicy: IfNotPresent
   repository: gitlab-registry.in2p3.fr/astrolabsoftware/fink
@@ -13,7 +14,7 @@ image:
 cores: 1
 coreRequest: 0
 instances: 1
-memory: 1500m 
+memory: "1000m"
 # instances: 1
 
 fink_trigger_update: "2"
@@ -21,7 +22,7 @@ fink_trigger_update: "2"
 # Can be overriden using --image option
 
 # Default to s3a://<s3.bucket>
-# online_data_prefix: s3a://fink-broker-online
+online_data_prefix: hdfs://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.hdfs:8020///user/185
 producer: sims
 
 log_level: INFO
diff --git a/doc/troubleshoot.md b/doc/troubleshoot.md
index dfaf0e78..704d10da 100644
--- a/doc/troubleshoot.md
+++ b/doc/troubleshoot.md
@@ -1,6 +1,5 @@
 # Troubleshooting guide
 
-
 ## Run s5cmd (s3 client)
 
 From inside the k8s cluster:
@@ -22,4 +21,32 @@ kubectl run -it --rm s5cmd --image=peakcom/s5cmd --env AWS_ACCESS_KEY_ID=minio -
 ## Use --all if needed
 kubectl delete -n spark sparkapplication fink-broker-distribution
 argocd app sync fink-broker
+```
+
+## Debug fink-broker helm chart
+
+```shell
+cd fink-broker
+helm install --debug fink ./chart -f ./chart/values-ci-noscience.yaml --dry-run
+```
+
+## ArgoCD
+
+### Access argoCD web UI
+
+```bash
+kubectl port-forward -n argocd $(kubectl get  pods --selector=app.kubernetes.io/name=argocd-server -n argocd --output=jsonpath="{.items..metadata.name}") 8080
+# Login is "admin, Password is set to "password", fix this in production
+kubectl -n argocd patch secret argocd-secret  -p '{"stringData": {"admin.password": "$2a$10$rRyBsGSHK6.uc8fntPwVIuLVHgsAhAX7TcdrqW/RADU0uh7CaChLa", "admin.passwordMtime": "'$(date +%FT%T%Z)'"  }}'
+```
+
+### Fine-tune "ignoreDifferences" field of an ArgoCD Application
+
+```bash
+# Install yq
+sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq &&\\n    sudo chmod +x /usr/bin/yq
+# Retrieve failedsyncmanifest.yaml file in ArgoCD web UI
+yq failedsyncmanifest.yaml -o json > failedsyncmanifest.json
+# Fine-tune 'jqPathExpressions'
+cat failedsyncmanifest.json | jq '.spec.versions[].additionalPrinterColumns | select(. == [])'
 ```
\ No newline at end of file
diff --git a/e2e/argocd.sh b/e2e/argocd.sh
index 2e2bd3df..6f163418 100755
--- a/e2e/argocd.sh
+++ b/e2e/argocd.sh
@@ -76,7 +76,8 @@ argocd app wait -l app.kubernetes.io/part-of=fink,app.kubernetes.io/component=st
 # Sync fink-broker
 argocd app sync -l app.kubernetes.io/instance=fink
 
-if [ $e2e_enabled == "true" ]; then
+if [ $e2e_enabled == "true" ]
+then
   echo "Retrieve kafka secrets for e2e tests"
   while ! kubectl get secret fink-producer --namespace kafka
   do
diff --git a/e2e/run.sh b/e2e/run.sh
index ab7c7cda..711727a8 100755
--- a/e2e/run.sh
+++ b/e2e/run.sh
@@ -21,7 +21,7 @@ usage () {
 
 SUFFIX="noscience"
 
-ciux_version=v0.0.4-rc8
+ciux_version=v0.0.4-rc10
 export CIUXCONFIG=$HOME/.ciux/ciux.sh
 
 src_dir=$DIR/..
@@ -30,6 +30,7 @@ build=false
 e2e=false
 monitoring=false
 push=false
+CIUX_IMAGE_URL="undefined"
 
 token="${TOKEN:-}"
 
@@ -120,6 +121,7 @@ then
 fi
 $DIR/prereq-install.sh $monitoring_opt
 
+
 . $CIUXCONFIG
 if [ $CIUX_BUILD = true ]; then
   kind load docker-image $CIUX_IMAGE_URL --name "$cluster"
diff --git a/fink_broker/spark_utils.py b/fink_broker/spark_utils.py
index 0df4470b..ebe2d6b3 100644
--- a/fink_broker/spark_utils.py
+++ b/fink_broker/spark_utils.py
@@ -336,7 +336,17 @@ def connect_to_raw_database(basepath: str, path: str, latestfirst: bool) -> Data
         wait_sec = increase_wait_time(wait_sec)
 
     # Create a DF from the database
-    userschema = spark.read.parquet(basepath).schema
+    # We need to wait for the schema to be available
+    while True:
+        try:
+            userschema = spark.read.parquet(basepath).schema
+        except Exception as e:
+            _LOG.error("Error while reading %s, %s", basepath, e)
+            time.sleep(wait_sec)
+            wait_sec = increase_wait_time(wait_sec)
+            continue
+        else:
+            break
 
     df = (
         spark.readStream.format("parquet")
diff --git a/hdfs-tmp/README.md b/hdfs-tmp/README.md
new file mode 100644
index 00000000..c21051b4
--- /dev/null
+++ b/hdfs-tmp/README.md
@@ -0,0 +1,11 @@
+# Minio should be disabled in fink-cd!
+
+kubectl run --image apache/hadoop:3.4.0 hdfs-client -- sleep infinity
+
+kubectl exec -it hdfs-client bash
+    hdfs dfs -fs hdfs://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.default.svc.cluster.local:8020 -df
+    hdfs dfs -fs hdfs://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.default.svc.cluster.local:8020 -ls
+    export HADOOP_USER_NAME=stackable
+    hdfs dfs -fs hdfs://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.default.svc.cluster.local:8020 -touch /toto
+    hdfs dfs -fs hdfs://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.default.svc.cluster.local:8020 -ls /
+
diff --git a/hdfs-tmp/install.sh b/hdfs-tmp/install.sh
new file mode 100755
index 00000000..8e076bd8
--- /dev/null
+++ b/hdfs-tmp/install.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -euxo pipefail
+
+DIR=$(cd "$(dirname "$0")"; pwd -P)
+
+STCK_BIN="/tmp/stackable"
+
+# TODO move to a job inside argoCD
+ink "Create hdfs directory"
+$DIR/mkdir.sh
diff --git a/hdfs-tmp/mkdir.sh b/hdfs-tmp/mkdir.sh
new file mode 100755
index 00000000..b7cddb55
--- /dev/null
+++ b/hdfs-tmp/mkdir.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+# This script creates a directory in HDFS and sets the owner to user 185
+
+set -euxo pipefail
+
+DIR=$(cd "$(dirname "$0")"; pwd -P)
+
+NS=hdfs
+
+timeout=300s
+
+# Wait for HDFS statefulset to be available
+# TODO improve this
+kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=hdfs --timeout=$timeout -n $NS
+kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=zookeeper --timeout=$timeout -n $NS
+sleep 60
+
+hdfs_url="hdfs://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.$NS:8020"
+
+# Check if pod hdfs-client exists
+if ! kubectl get -n "$NS" pod hdfs-client &> /dev/null; then
+    kubectl run -n "$NS" --image apache/hadoop:3.4.0 hdfs-client -- sleep infinity
+fi
+
+kubectl wait -n "$NS" --for=condition=ready pod/hdfs-client --timeout=$timeout
+
+kubectl exec -n "$NS" -it hdfs-client -- sh -c "export HADOOP_USER_NAME=stackable && \
+    hdfs dfs -fs $hdfs_url -mkdir -p /user/185 && \
+    hdfs dfs -fs $hdfs_url -chown 185:hdfs /user/185 && \
+    hdfs dfs -fs $hdfs_url -chmod 700 /user/185"
diff --git a/hdfs-tmp/test-hdfs.sh b/hdfs-tmp/test-hdfs.sh
new file mode 100755
index 00000000..3339692f
--- /dev/null
+++ b/hdfs-tmp/test-hdfs.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+set -euo pipefail
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+# Step 1: Create the webhdfs.yaml file
+cat <<EOF > $DIR.webhdfs.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: webhdfs
+  labels:
+    app: webhdfs
+spec:
+  replicas: 1
+  serviceName: webhdfs-svc
+  selector:
+    matchLabels:
+      app: webhdfs
+  template:
+    metadata:
+      labels:
+        app: webhdfs
+    spec:
+      containers:
+        - name: webhdfs
+          image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable0.0.0-dev
+          stdin: true
+          tty: true
+EOF
+
+# Step 2: Apply the StatefulSet and monitor progress
+echo "Applying webhdfs StatefulSet..."
+kubectl apply -f $DIR/webhdfs.yaml
+echo "Waiting for webhdfs pod to be ready..."
+kubectl rollout status --watch --timeout=5m statefulset/webhdfs
+
+# Step 3: Check the root directory status in HDFS (should be empty)
+echo "Checking root directory in HDFS..."
+kubectl exec -n default webhdfs-0 -- curl -s -XGET "http://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.default.svc.cluster.local:9870/webhdfs/v1/?op=LISTSTATUS"
+
+# Step 4: Create a sample file for uploading
+echo "Creating sample file testdata.txt..."
+echo "This is a test file for HDFS upload." > $DIR/testdata.txt
+
+# Step 5: Copy the file to the helper pod
+echo "Copying testdata.txt to webhdfs pod..."
+kubectl cp -n default $DIR/testdata.txt webhdfs-0:/tmp
+
+# Step 6: Initiate a two-step PUT request to create the file in HDFS
+echo "Initiating file creation in HDFS (first step)..."
+create_response=$(kubectl exec -n default webhdfs-0 -- \
+curl -s -XPUT -T /tmp/testdata.txt "http://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.default.svc.cluster.local:9870/webhdfs/v1/testdata.txt?user.name=stackable&op=CREATE&noredirect=true")
+
+# Extract the location for the second PUT request
+location=$(echo "$create_response" | grep -o 'http://[^"]*')
+echo "Location for second PUT request: $location"
+
+# Step 7: Complete the file upload
+echo "Completing file creation in HDFS (second step)..."
+kubectl exec -n default webhdfs-0 -- curl -s -XPUT -T /tmp/testdata.txt "$location"
+
+# Step 8: Verify that the file has been created in HDFS
+echo "Re-checking root directory in HDFS to verify file creation..."
+kubectl exec -n default webhdfs-0 -- curl -s -XGET "http://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.default.svc.cluster.local:9870/webhdfs/v1/?op=LISTSTATUS"
+
+# Step 9: Delete the file from HDFS to clean up
+echo "Deleting testdata.txt from HDFS..."
+kubectl exec -n default webhdfs-0 -- curl -s -XDELETE "http://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.default.svc.cluster.local:9870/webhdfs/v1/testdata.txt?user.name=stackable&op=DELETE"
+
+# Clean up local files
+rm $DIR/webhdfs.yaml $DIR/testdata.txt
+echo "Cleanup completed. HDFS testing script finished."

From fb010a11b8ca913cb9d8b3fd6d73d64122960485 Mon Sep 17 00:00:00 2001
From: Fabrice Jammes <fabrice.jammes@clermont.in2p3.fr>
Date: Fri, 20 Dec 2024 15:35:51 +0100
Subject: [PATCH 2/5] Cleanup and merge todo files

---
 Dockerfile                              |  1 +
 TODO.913-replace-minio-s3-with-hdfs.org | 39 --------------
 TODO.argocd                             |  3 --
 TODO.org                                | 68 ++++++++++++++-----------
 4 files changed, 38 insertions(+), 73 deletions(-)
 delete mode 100644 TODO.913-replace-minio-s3-with-hdfs.org
 delete mode 100644 TODO.argocd

diff --git a/Dockerfile b/Dockerfile
index d5ba75bc..7e1eaa48 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -27,6 +27,7 @@ RUN apt-get update && \
     apt install -y --no-install-recommends wget git apt-transport-https ca-certificates gnupg-agent apt-utils build-essential && \
     rm -rf /var/cache/apt/*
 
+# Download and install Spark dependencies listed in jars-urls.txt
 ADD deps/jars-urls.txt $FINK_HOME/
 RUN xargs -n 1 curl --fail --output-dir /opt/spark/jars -O < $FINK_HOME/jars-urls.txt
 
diff --git a/TODO.913-replace-minio-s3-with-hdfs.org b/TODO.913-replace-minio-s3-with-hdfs.org
deleted file mode 100644
index 919703d2..00000000
--- a/TODO.913-replace-minio-s3-with-hdfs.org
+++ /dev/null
@@ -1,39 +0,0 @@
-* DONE Reduce mem limit for pod journalnode !!!!
-* Upgrade stackable operator to v2.1.0
-
-* TODO hdfs operator
-
-** limit and request for memory: see https://github.com/stackabletech/hdfs-operator/issues/625
-** TODO: open issue: zkfc on datanode is not compliant with memory setting
-
-In the example below memory limit is 256Mi for nameNode in hdfscluster CR, but it become 768Mi in each related pod because the `zkfs` container is not impacted by the CR configuration.
-This should be fixed because it prevents running the setup on CI platforms with low memory like Github Action for instances.
-
-kubectl get -n hdfs hdfscluster simple-hdfs  -o yaml -o jsonpath  -o=jsonpath='{.spec.nameNodes.config.resources}'
-{"cpu":{"min":"0"},"memory":{"limit":"256Mi"}}
-
-kubectl describe nodes | grep namenode
-  hdfs                        simple-hdfs-namenode-default-0                                                         100m (0%)     1400m (1%)  768Mi (0%)       768Mi (0%)     34m
-  hdfs                        simple-hdfs-namenode-default-1                                                         100m (0%)     1400m (1%)  768Mi (0%)       768Mi (0%)     31m
-
-kubectl get pods -n hdfs simple-hdfs-namenode-default-0 -o jsonpath  -o=jsonpath='{.spec.containers[1].name}'
-zkfc
-
-kubectl get pods -n hdfs simple-hdfs-namenode-default-0 -o jsonpath  -o=jsonpath='{.spec.containers[1].resources}'  | jq
-{
-  "limits": {
-    "cpu": "400m",
-    "memory": "512Mi"
-  },
-  "requests": {
-    "cpu": "100m",
-    "memory": "512Mi"
-  }
-}
-
-
-** management of argoCD default values (jqpath expression): https://github.com/stackabletech/hdfs-operator/issues/626
-** TODO: open issue: be able to run only one dataNode on CI
-
-* Add helm option on HDFS cpu.min (also for operators!)
-* Move fink image to docker.stackable.tech/stackable/hadoop:3.3.6-stackable24.11.0
diff --git a/TODO.argocd b/TODO.argocd
deleted file mode 100644
index a35dfb35..00000000
--- a/TODO.argocd
+++ /dev/null
@@ -1,3 +0,0 @@
-- check https://stackoverflow.com/questions/78922618/how-to-enforce-sync-order-in-argocd-app-of-apps-pattern and argocd issue
-- WIP4 upgrade spark-operator chart to 2.0.0-rc0 and remove prereq from fink-cd
-- check https://github.com/argoproj/argocd-example-apps/tree/master/helm-dependency to install spark-operator, and other operator
diff --git a/TODO.org b/TODO.org
index eac8e4f5..cb9efa6c 100644
--- a/TODO.org
+++ b/TODO.org
@@ -1,9 +1,3 @@
-* DONE use gitlab@virtualdata as a CI repo
-* DONE check fink-alert-simulator error message in CI:
- ⚠ fink-alert-simulator-cjxv2  main      fink-alert-simulator-cjxv2  5m        Error (exit code 1): pods "fink-alert-simulator-cjxv2" is forbidden: User "system:serviceaccount:argocd:default" cannot patch resource "pods" in API group "" in the namespace "argocd"
-* DONE trigger ci for OOMkill
-* 729
-** DONE use "kubectl get kafkatopics.kafka.strimzi.io  -n kafka" to check success of integration tests, maybe in fnkctl?
 ** TODO DELAYED BECAUSE IT NOT BLOCKS BUT WARN create topic in distribute before sending alerts in order to avoid error below: https://fink-broker.slack.com/archives/D03KJ390F17/p1692008729660549
 Du coup ça fonctionne avec un compte utilisateur, par contre j'ai pas activé les autorisations dans kafka car le fink-alert-simulator aurait pu plus écrire dans le topic sans authentification.
 12 h 28
@@ -15,24 +9,9 @@ En fait c'est du au fait que le topic existe pas, ça fonctionne si on relance l
 Tu crois qu'on pourrais pré-créer les topic pour éviter ce problème
 @JulienPeloton
 ?
-** DONE add user authentication in kafka https://stackoverflow.com/questions/65729535/how-to-do-i-connect-kafka-python-to-accept-username-and-password-for-jaas-like-i
 * TODO Enable authZ in kafka (require authN setup in fink-alert-simulator)
-* TODO [#B] distribute should wait for data to appear instead of crashing in connect_to_raw_database()
 * TODO move nodeport to internal for svc kafka-cluster-kafka-external-bootstrap
-* DONE improve final test in CI (check Kafka with fink-client https://github.com/astrolabsoftware/fink-client)
 * TODO run code-check.sh in CI
-* DONE add unit test for schema_converter
-* TODO https://stackoverflow.com/questions/30385981/how-to-access-s3a-files-from-apache-spark
-Document +add SO post?:
-Download hadoop binary release: https://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-3.2.4/hadoop-3.2.4.tar.gz
-extract and copy jar:
- fjammes@clrinfopo18  ~/Downloads/hadoop-3.2.4  cp ./share/hadoop/tools/lib/hadoop-aws-3.2.4.jar ~/src/k8s-spark-py/custom/jars
- fjammes@clrinfopo18  ~/Downloads/hadoop-3.2.4  cp ./share/hadoop/tools/lib/aws-java-sdk-bundle-1.11.901.jar ~/src/k8s-spark-py/custom/jars
-	// WARNING package are not deployed in spark-executor
-	// see https://stackoverflow.com/a/67299668/2784039
-* TODO document hack to retrieve Maven URLs
-kubectl logs stream2raw-py-f529af864f8dee60-driver | grep downlo | cut -d' ' -f2 > jars-urls.txt
-OR add mnv copy:dependencies when building the image?
 * TODO manage dependencies
 What to do with:
 1. hbase-spark-hbase2.4_spark3_scala2.12_hadoop3.2.jar
@@ -40,16 +19,43 @@ hbase-spark-protocol-shaded-hbase2.4_spark3_scala2.12_hadoop3.2.jar
 which are both in k8s-spark-py/custom and fink-broker/libs (cf. FINK_JARS)
 cf. Julien are they required?
 2. custom/jars/commons-pool2-2.6.2.jar which was in k8s-spark-py/custom
-* DONE document minio install and bucket creation:
-    5  curl https://dl.min.io/client/mc/release/linux-amd64/mc  --create-dirs -o $HOME/minio-binaries/mc
-    6  chmod +x $HOME/minio-binaries/mc
-   15  export PATH=$PATH:$HOME/minio-binaries/
-   17  mc alias set s3 http://minio.minio:9000 minioadmin minioadmin
-   19  mc ls s3
-   27  mc mb s3/fink-broker-online
- mc ls f1 --recursive fink-broker-online/
-* TODO test removal of options below
+* TODO test removal of options below whith useing hdfs
 +    --conf spark.driver.extraJavaOptions="-Divy.cache.dir=/tmp -Divy.home=/tmp" \
      --conf spark.hadoop.fs.s3a.path.style.access=true \
 +    --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
-* DONE INSTALL MINIO https://min.io/docs/minio/kubernetes/upstream/index.html?
+* TODO hdfs operator management
+
+** limit and request for memory: see https://github.com/stackabletech/hdfs-operator/issues/625
+** TODO: open issue: zkfc on datanode is not compliant with memory setting
+
+In the example below memory limit is 256Mi for nameNode in hdfscluster CR, but it become 768Mi in each related pod because the `zkfs` container is not impacted by the CR configuration.
+This should be fixed because it prevents running the setup on CI platforms with low memory like Github Action for instances.
+
+kubectl get -n hdfs hdfscluster simple-hdfs  -o yaml -o jsonpath  -o=jsonpath='{.spec.nameNodes.config.resources}'
+{"cpu":{"min":"0"},"memory":{"limit":"256Mi"}}
+
+kubectl describe nodes | grep namenode
+  hdfs                        simple-hdfs-namenode-default-0                                                         100m (0%)     1400m (1%)  768Mi (0%)       768Mi (0%)     34m
+  hdfs                        simple-hdfs-namenode-default-1                                                         100m (0%)     1400m (1%)  768Mi (0%)       768Mi (0%)     31m
+
+kubectl get pods -n hdfs simple-hdfs-namenode-default-0 -o jsonpath  -o=jsonpath='{.spec.containers[1].name}'
+zkfc
+
+kubectl get pods -n hdfs simple-hdfs-namenode-default-0 -o jsonpath  -o=jsonpath='{.spec.containers[1].resources}'  | jq
+{
+  "limits": {
+    "cpu": "400m",
+    "memory": "512Mi"
+  },
+  "requests": {
+    "cpu": "100m",
+    "memory": "512Mi"
+  }
+}
+
+
+** management of argoCD default values (jqpath expression): https://github.com/stackabletech/hdfs-operator/issues/626
+** TODO: open issue: be able to run only one dataNode on CI
+
+* Add helm option on HDFS cpu.min (also for operators!)
+* Move fink image to docker.stackable.tech/stackable/hadoop:3.3.6-stackable24.11.
\ No newline at end of file

From 2605ba70dff1edffaf620238fe26bbe74d2dd4ce Mon Sep 17 00:00:00 2001
From: Fabrice Jammes <fabrice.jammes@clermont.in2p3.fr>
Date: Mon, 23 Dec 2024 12:07:05 +0100
Subject: [PATCH 3/5] Split hdfs/s3 configuration

---
 .ciux                                        |  2 +-
 TODO.org                                     | 66 ++++++++++----------
 chart/templates/_helpers.tpl                 |  9 +++
 chart/templates/spark-fink-distribution.yaml |  8 +--
 chart/templates/spark-fink-raw2science.yaml  |  8 +--
 chart/templates/spark-fink-stream2raw.yaml   |  8 +--
 chart/values.yaml                            |  7 ++-
 7 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/.ciux b/.ciux
index ddb29554..6f05e384 100644
--- a/.ciux
+++ b/.ciux
@@ -31,7 +31,7 @@ dependencies:
   - image: gitlab-registry.in2p3.fr/astrolabsoftware/fink/spark-py:k8s-3.4.1
     labels:
       build: "true"
-  - package: github.com/k8s-school/ktbx@v1.1.4-rc3
+  - package: github.com/k8s-school/ktbx@v1.1.4-rc4
     labels:
       itest: "optional"
   - package: github.com/astrolabsoftware/finkctl/v3@v3.1.3-rc1
diff --git a/TODO.org b/TODO.org
index cb9efa6c..2da5df81 100644
--- a/TODO.org
+++ b/TODO.org
@@ -1,33 +1,8 @@
-** TODO DELAYED BECAUSE IT NOT BLOCKS BUT WARN create topic in distribute before sending alerts in order to avoid error below: https://fink-broker.slack.com/archives/D03KJ390F17/p1692008729660549
-Du coup ça fonctionne avec un compte utilisateur, par contre j'ai pas activé les autorisations dans kafka car le fink-alert-simulator aurait pu plus écrire dans le topic sans authentification.
-12 h 28
-J'ai maintenant ce message d'erreur:
-23/08/14 10:26:52 WARN NetworkClient: [Producer clientId=producer-1] Error while fetching metadata with correlation id 29 : {fink_simbad_grav_candidates_ztf=LEADER_NOT_AVAILABLE}
-12 h 32
-En fait c'est du au fait que le topic existe pas, ça fonctionne si on relance lae job distribute...
-12 h 33
-Tu crois qu'on pourrais pré-créer les topic pour éviter ce problème
-@JulienPeloton
-?
-* TODO Enable authZ in kafka (require authN setup in fink-alert-simulator)
-* TODO move nodeport to internal for svc kafka-cluster-kafka-external-bootstrap
-* TODO run code-check.sh in CI
-* TODO manage dependencies
-What to do with:
-1. hbase-spark-hbase2.4_spark3_scala2.12_hadoop3.2.jar
-hbase-spark-protocol-shaded-hbase2.4_spark3_scala2.12_hadoop3.2.jar
-which are both in k8s-spark-py/custom and fink-broker/libs (cf. FINK_JARS)
-cf. Julien are they required?
-2. custom/jars/commons-pool2-2.6.2.jar which was in k8s-spark-py/custom
-* TODO test removal of options below whith useing hdfs
-+    --conf spark.driver.extraJavaOptions="-Divy.cache.dir=/tmp -Divy.home=/tmp" \
-     --conf spark.hadoop.fs.s3a.path.style.access=true \
-+    --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
+#+TITLE: current
 * TODO hdfs operator management
 
-** limit and request for memory: see https://github.com/stackabletech/hdfs-operator/issues/625
-** TODO: open issue: zkfc on datanode is not compliant with memory setting
-
+** TODO limit and request for memory: monitor issue https://github.com/stackabletech/hdfs-operator/issues/625
+** TODO open issue: zkfc on datanode is not compliant with memory setting
 In the example below memory limit is 256Mi for nameNode in hdfscluster CR, but it become 768Mi in each related pod because the `zkfs` container is not impacted by the CR configuration.
 This should be fixed because it prevents running the setup on CI platforms with low memory like Github Action for instances.
 
@@ -54,8 +29,35 @@ kubectl get pods -n hdfs simple-hdfs-namenode-default-0 -o jsonpath  -o=jsonpath
 }
 
 
-** management of argoCD default values (jqpath expression): https://github.com/stackabletech/hdfs-operator/issues/626
-** TODO: open issue: be able to run only one dataNode on CI
+** TODO management of argoCD default values (jqpath expression): monitor issue https://github.com/stackabletech/hdfs-operator/issues/626
+** TODO open issue: be able to run only one dataNode on CI
 
-* Add helm option on HDFS cpu.min (also for operators!)
-* Move fink image to docker.stackable.tech/stackable/hadoop:3.3.6-stackable24.11.
\ No newline at end of file
+** TODO Add helm option on HDFS cpu.min (also for operators!)
+** TODO Move fink image to docker.stackable.tech/stackable/hadoop:3.3.6-stackable24.11.
+
+#+TITLE: previous
+* TODO DELAYED BECAUSE IT NOT BLOCKS BUT WARN create topic in distribute before sending alerts in order to avoid error below: https://fink-broker.slack.com/archives/D03KJ390F17/p1692008729660549
+Du coup ça fonctionne avec un compte utilisateur, par contre j'ai pas activé les autorisations dans kafka car le fink-alert-simulator aurait pu plus écrire dans le topic sans authentification.
+12 h 28
+J'ai maintenant ce message d'erreur:
+23/08/14 10:26:52 WARN NetworkClient: [Producer clientId=producer-1] Error while fetching metadata with correlation id 29 : {fink_simbad_grav_candidates_ztf=LEADER_NOT_AVAILABLE}
+12 h 32
+En fait c'est du au fait que le topic existe pas, ça fonctionne si on relance lae job distribute...
+12 h 33
+Tu crois qu'on pourrais pré-créer les topic pour éviter ce problème
+@JulienPeloton
+?
+* TODO Enable authZ in kafka (require authN setup in fink-alert-simulator)
+* TODO move nodeport to internal for svc kafka-cluster-kafka-external-bootstrap
+* TODO run code-check.sh in CI
+* TODO manage dependencies
+What to do with:
+1. hbase-spark-hbase2.4_spark3_scala2.12_hadoop3.2.jar
+hbase-spark-protocol-shaded-hbase2.4_spark3_scala2.12_hadoop3.2.jar
+which are both in k8s-spark-py/custom and fink-broker/libs (cf. FINK_JARS)
+cf. Julien are they required?
+2. custom/jars/commons-pool2-2.6.2.jar which was in k8s-spark-py/custom
+* TODO test removal of options below whith useing hdfs
++    --conf spark.driver.extraJavaOptions="-Divy.cache.dir=/tmp -Divy.home=/tmp" \
+     --conf spark.hadoop.fs.s3a.path.style.access=true \
++    --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
diff --git a/chart/templates/_helpers.tpl b/chart/templates/_helpers.tpl
index 87d3a69b..f37207e5 100644
--- a/chart/templates/_helpers.tpl
+++ b/chart/templates/_helpers.tpl
@@ -52,6 +52,7 @@ app.kubernetes.io/instance: {{ .Release.Name }}
 
 {{/* Generate s3 configuration */}}
 {{- define "fink.s3config" -}}
+{{ if eq .Values.storage "s3" -}}
 spark.hadoop.fs.s3a.endpoint: {{ .Values.s3.endpoint }}
 spark.hadoop.fs.s3a.access.key: {{ .Values.s3.access_key }}
 spark.hadoop.fs.s3a.secret.key: {{ .Values.s3.secret_key }}
@@ -62,7 +63,15 @@ spark.hadoop.fs.s3a.path.style.access: "true"
 spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"
 spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem"
 {{- end }}
+{{- end }}
 
+{{/* Generate hdfs configuration */}}
+{{- define "fink.hdfsconfig" -}}
+{{ if eq .Values.storage "hdfs" -}}
+- name: SPARK_USER
+  value: "{{ .Values.hdfs.hadoop_user_name }}"
+{{- end }}
+{{- end }}
 
 {{/* Generate common configuration */}}
 {{- define "fink.common" -}}
diff --git a/chart/templates/spark-fink-distribution.yaml b/chart/templates/spark-fink-distribution.yaml
index df6aba2d..3ac81084 100644
--- a/chart/templates/spark-fink-distribution.yaml
+++ b/chart/templates/spark-fink-distribution.yaml
@@ -23,9 +23,7 @@ spec:
   driver:
     cores: {{ tpl .Values.distribution.cores . }}
     coreRequest: "{{ tpl .Values.distribution.coreRequest . }}"
-    env:
-      - name: SPARK_USER
-        value: "{{ .Values.hadoop_user_name }}"
+    env: {{- include "fink.hdfsconfig" . | nindent 6 }}
     memory: "{{ tpl .Values.distribution.memory . }}"
     javaOptions: "-Divy.cache.dir=/tmp -Divy.home=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true"
     labels:
@@ -34,9 +32,7 @@ spec:
   executor:
     cores: {{ tpl .Values.distribution.cores . }}
     coreRequest: "{{ tpl .Values.distribution.coreRequest . }}"
-    env:
-      - name: SPARK_USER
-        value: "{{ .Values.hadoop_user_name }}"
+    env: {{- include "fink.hdfsconfig" . | nindent 6 }}
     memory: "{{ tpl .Values.distribution.memory . }}"
     instances: {{ tpl .Values.distribution.instances . }}
     javaOptions: "-Djava.security.auth.login.config=/etc/fink-broker/kafka-jaas.conf -Dcom.amazonaws.sdk.disableCertChecking=true"
diff --git a/chart/templates/spark-fink-raw2science.yaml b/chart/templates/spark-fink-raw2science.yaml
index 04936078..b08920da 100644
--- a/chart/templates/spark-fink-raw2science.yaml
+++ b/chart/templates/spark-fink-raw2science.yaml
@@ -12,9 +12,7 @@ spec:
   driver:
     cores: {{ tpl .Values.raw2science.cores . }}
     coreRequest: "{{ tpl .Values.raw2science.coreRequest . }}"
-    env:
-      - name: SPARK_USER
-        value: "{{ .Values.hadoop_user_name }}"
+    env: {{- include "fink.hdfsconfig" . | nindent 6 }}
     memory: "{{ tpl .Values.raw2science.memory . }}"
     javaOptions: "-Divy.cache.dir=/tmp -Divy.home=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true"
     labels:
@@ -23,9 +21,7 @@ spec:
   executor:
     cores: {{ tpl .Values.raw2science.cores . }}
     coreRequest: "{{ tpl .Values.raw2science.coreRequest . }}"
-    env:
-      - name: SPARK_USER
-        value: "{{ .Values.hadoop_user_name }}"
+    env: {{- include "fink.hdfsconfig" . | nindent 6 }}
     memory: "{{ tpl .Values.raw2science.memory . }}"
     javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true"
     instances: {{ tpl .Values.raw2science.instances . }}
diff --git a/chart/templates/spark-fink-stream2raw.yaml b/chart/templates/spark-fink-stream2raw.yaml
index 75b04064..783bdb83 100644
--- a/chart/templates/spark-fink-stream2raw.yaml
+++ b/chart/templates/spark-fink-stream2raw.yaml
@@ -21,9 +21,7 @@ spec:
   driver:
     cores: {{ tpl .Values.distribution.cores . }}
     coreRequest: "{{ tpl .Values.stream2raw.coreRequest . }}"
-    env:
-      - name: SPARK_USER
-        value: "{{ .Values.hadoop_user_name }}"
+    env: {{- include "fink.hdfsconfig" . | nindent 6 }}
     memory: "{{ tpl .Values.stream2raw.memory . }}"
     labels:
       version: 3.4.1
@@ -32,9 +30,7 @@ spec:
   executor:
     cores: {{ tpl .Values.distribution.cores . }}
     coreRequest: "{{ tpl .Values.stream2raw.coreRequest . }}"
-    env:
-      - name: SPARK_USER
-        value: "{{ .Values.hadoop_user_name }}"
+    env: {{- include "fink.hdfsconfig" . | nindent 6 }}
     memory: "{{ tpl .Values.stream2raw.memory . }}"
     instances: {{ tpl .Values.distribution.instances . }}
     javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true"
diff --git a/chart/values.yaml b/chart/values.yaml
index 41312f44..115e00d5 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -3,7 +3,6 @@
 
 night: "20240101"
 
-hadoop_user_name: "185"
 image:
   pullPolicy: IfNotPresent
   repository: gitlab-registry.in2p3.fr/astrolabsoftware/fink
@@ -63,6 +62,9 @@ distribution:
     schema: "/home/fink/fink-alert-schemas/ztf/distribution_schema_0p2.avsc"
     substream_prefix: "fink_"
 
+
+storage: hdfs
+
 #
 # Parameters used to access the S3 bucket
 #
@@ -74,6 +76,9 @@ s3:
   access_key: "minio"
   secret_key: "minio123"
 
+hdfs:
+  hadoop_user_name: "185"
+
 serviceAccount:
   # Specifies whether a service account should be created
   create: true

From cc842db0cbdb7112eb58348b06f05fa5e395c2a4 Mon Sep 17 00:00:00 2001
From: Fabrice Jammes <fabrice.jammes@clermont.in2p3.fr>
Date: Fri, 27 Dec 2024 11:53:58 +0100
Subject: [PATCH 4/5] Bump fink hdfs image to v24.11.0

---
 .ciux          | 3 +++
 doc/release.md | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.ciux b/.ciux
index 6f05e384..469da244 100644
--- a/.ciux
+++ b/.ciux
@@ -28,6 +28,9 @@ dependencies:
       dev: "true"
       itest: "true"
       release: "true"
+  - image: gitlab-registry.in2p3.fr/astrolabsoftware/fink/stackable-hadoop:v24.11.0
+    labels:
+      itest: "true"
   - image: gitlab-registry.in2p3.fr/astrolabsoftware/fink/spark-py:k8s-3.4.1
     labels:
       build: "true"
diff --git a/doc/release.md b/doc/release.md
index ecb6ad97..5708c430 100644
--- a/doc/release.md
+++ b/doc/release.md
@@ -17,7 +17,7 @@ Url for the CI is: https://github.com/astrolabsoftware/fink-broker/actions
     ciux get deps ./fink-broker -l release
     ```
 
-    Clone all the necessary repositories and ensure you are using their `main` branch.
+    Clone all the necessary repositories and ensure you are using their `master/main` branch.
 
 ## Get Release Tag
 

From 676bff65e641ad2410b0346949bb5209736c2a2c Mon Sep 17 00:00:00 2001
From: Fabrice Jammes <fabrice.jammes@clermont.in2p3.fr>
Date: Fri, 27 Dec 2024 12:38:57 +0100
Subject: [PATCH 5/5] Test both hdfs and s3 in GHA

---
 .github/workflows/e2e-common.yml   | 18 ++++++++------
 .github/workflows/e2e-gha.yml      |  4 ---
 chart/templates/job-hdfs-init.yaml |  2 ++
 e2e/argocd.sh                      | 40 +++++++++++++++++++++++++++++-
 e2e/run.sh                         | 19 ++++++++------
 fink_broker/spark_utils.py         |  2 +-
 6 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/e2e-common.yml b/.github/workflows/e2e-common.yml
index 7e962f93..98e40a50 100644
--- a/.github/workflows/e2e-common.yml
+++ b/.github/workflows/e2e-common.yml
@@ -6,28 +6,29 @@ on:
         required: true
         type: string
       ci_repo:
-        required: true
+        description: 'Intermediate registry to use'
+        required: false
         type: string
+        default: ""
       runner:
         required: true
         type: string
       kind_version:
-        required: true
+        description: 'Kind version to use'
+        required: false
         type: string
+        default: "v0.20.0"
     secrets:
       registry_username:
         required: true
       registry_token:
         required: true
-      private_registry_username:
-        required: true
-      private_registry_token:
-        required: true
 env:
   CIUX_VERSION: v0.0.4-rc10
   GHA_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
   SUFFIX: ${{ inputs.suffix }}
   CI_REPO: ${{ inputs.ci_repo }}
+  STORAGE: ${{ inputs.storage }}
   # Override the self-hosted runner value
   POD_NAMESPACE: default
 jobs:
@@ -78,6 +79,9 @@ jobs:
           name: docker-artifact
           path: artifacts
   integration-tests:
+    strategy:
+      matrix:
+        storage: [hdfs, s3]
     name: Run integration tests
     runs-on: ${{ fromJSON(inputs.runner) }}
     outputs:
@@ -140,7 +144,7 @@ jobs:
       #     detached: true
       - name: Run argoCD
         run: |
-          ./e2e/argocd.sh
+          ./e2e/argocd.sh -S "${{ matrix.storage }}"
       - name: Check results
         run: |
           ./e2e/check-results.sh
diff --git a/.github/workflows/e2e-gha.yml b/.github/workflows/e2e-gha.yml
index 52874541..26dc2803 100644
--- a/.github/workflows/e2e-gha.yml
+++ b/.github/workflows/e2e-gha.yml
@@ -12,11 +12,7 @@ jobs:
     uses: ./.github/workflows/e2e-common.yml
     with:
       suffix: "noscience"
-      ci_repo: ""
       runner: "['ubuntu-22.04']"
-      kind_version: "v0.20.0"
     secrets:
       registry_username: ${{ secrets.REGISTRY_USERNAME }}
       registry_token: ${{ secrets.REGISTRY_TOKEN }}
-      private_registry_username: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
-      private_registry_token: ${{ secrets.PRIVATE_REGISTRY_TOKEN }}
diff --git a/chart/templates/job-hdfs-init.yaml b/chart/templates/job-hdfs-init.yaml
index 0ab3941b..4f966be6 100644
--- a/chart/templates/job-hdfs-init.yaml
+++ b/chart/templates/job-hdfs-init.yaml
@@ -1,3 +1,4 @@
+{{ if eq .Values.storage "hdfs" -}}
 apiVersion: batch/v1
 kind: Job
 metadata:
@@ -23,3 +24,4 @@ spec:
         - name: HADOOP_USER_NAME
           value: stackable
       restartPolicy: OnFailure
+{{- end }}
diff --git a/e2e/argocd.sh b/e2e/argocd.sh
index 6f163418..27f2a968 100755
--- a/e2e/argocd.sh
+++ b/e2e/argocd.sh
@@ -9,6 +9,27 @@ set -euxo pipefail
 
 DIR=$(cd "$(dirname "$0")"; pwd -P)
 
+storage="hdfs"
+
+usage() {
+    cat << EOD
+Usage: $(basename "$0") [options]
+Available options:
+  -h            This message
+  -S <storage>  Storage to use (hdfs or minio)
+EOD
+}
+
+# Get the options
+while getopts hS: c ; do
+    case $c in
+        h) usage ; exit 0 ;;
+        S) storage="$OPTARG" ;;
+        \?) usage ; exit 2 ;;
+    esac
+done
+shift "$((OPTIND-1))"
+
 CIUXCONFIG=${CIUXCONFIG:-"$HOME/.ciux/ciux.sh"}
 . $CIUXCONFIG
 
@@ -36,11 +57,26 @@ e2e_enabled="true"
 argocd login --core
 kubectl config set-context --current --namespace="$NS"
 
+if [ $storage == "s3" ]
+then
+  hdfs_enabled="false"
+  s3_enabled="true"
+  online_data_prefix=""
+elif [ $storage == "hdfs" ]
+then
+  hdfs_enabled="true"
+  s3_enabled="false"
+  online_data_prefix="hdfs://simple-hdfs-namenode-default-0.simple-hdfs-namenode-default.hdfs:8020///user/185"
+fi
+
+
 # Create fink app
 argocd app create fink --dest-server https://kubernetes.default.svc \
     --dest-namespace "$NS" \
     --repo https://github.com/astrolabsoftware/fink-cd.git \
     --path apps --revision "$FINK_CD_WORKBRANCH" \
+    -p s3.enabled="$s3_enabled" \
+    -p hdfs.enabled="$hdfs_enabled" \
     -p spec.source.targetRevision.default="$FINK_CD_WORKBRANCH" \
     -p spec.source.targetRevision.finkbroker="$FINK_BROKER_WORKBRANCH" \
     -p spec.source.targetRevision.finkalertsimulator="$FINK_ALERT_SIMULATOR_WORKBRANCH"
@@ -61,7 +97,9 @@ argocd app set fink-broker -p image.repository="$CIUX_IMAGE_REGISTRY" \
     -p e2e.enabled="$e2e_enabled" \
     -p image.tag="$CIUX_IMAGE_TAG" \
     -p log_level="DEBUG" \
-    -p night="20200101"
+    -p night="20200101" \
+    -p online_data_prefix="$online_data_prefix" \
+    -p storage="$storage"
 
 argocd app set fink-alert-simulator -p image.tag="$FINK_ALERT_SIMULATOR_VERSION"
 
diff --git a/e2e/run.sh b/e2e/run.sh
index 711727a8..fc58b8aa 100755
--- a/e2e/run.sh
+++ b/e2e/run.sh
@@ -30,26 +30,29 @@ build=false
 e2e=false
 monitoring=false
 push=false
+storage="hdfs"
 CIUX_IMAGE_URL="undefined"
 
 token="${TOKEN:-}"
 
 # Get options for suffix
-while getopts hcms opt; do
+while getopts hcmsS: opt; do
   case ${opt} in
-    s )
-      SUFFIX=""
-      ;;
+
     c )
       cleanup=true
       ;;
-    m )
-      monitoring=true
-      ;;
     h )
       usage
       exit 0
       ;;
+    m )
+      monitoring=true
+      ;;
+    s )
+      SUFFIX=""
+      ;;
+    S) storage="$OPTARG" ;;
     \? )
       usage
       exit 1
@@ -128,7 +131,7 @@ if [ $CIUX_BUILD = true ]; then
 fi
 
 echo "Run ArgoCD to install the whole fink e2e tests stack"
-$DIR/argocd.sh
+$DIR/argocd.sh -S "$storage"
 
 echo "Check the results of the tests."
 $DIR/check-results.sh
diff --git a/fink_broker/spark_utils.py b/fink_broker/spark_utils.py
index ebe2d6b3..088e477b 100644
--- a/fink_broker/spark_utils.py
+++ b/fink_broker/spark_utils.py
@@ -340,7 +340,7 @@ def connect_to_raw_database(basepath: str, path: str, latestfirst: bool) -> Data
     while True:
         try:
             userschema = spark.read.parquet(basepath).schema
-        except Exception as e:
+        except Exception as e:  # noqa: PERF203
             _LOG.error("Error while reading %s, %s", basepath, e)
             time.sleep(wait_sec)
             wait_sec = increase_wait_time(wait_sec)