diff --git a/.github/workflows/lint-test-high-availability.yaml b/.github/workflows/lint-test-high-availability.yaml index eae1ee0..dff1304 100644 --- a/.github/workflows/lint-test-high-availability.yaml +++ b/.github/workflows/lint-test-high-availability.yaml @@ -3,7 +3,7 @@ name: Lint and Test Charts High Availability on: pull_request: paths: - - 'charts/memgraph-high-availability/**' + - "charts/memgraph-high-availability/**" jobs: lint-test: @@ -21,7 +21,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: '3.12' + python-version: "3.12" check-latest: true - name: Download dependencies @@ -46,7 +46,7 @@ jobs: - name: Run chart-testing (lint) if: steps.list-changed.outputs.changed == 'true' - run: ct lint --target-branch ${{ github.event.repository.default_branch }} --check-version-increment false --charts charts/memgraph-high-availability + run: ct lint --target-branch ${{ github.event.repository.default_branch }} --check-version-increment false --lint-conf lintconf.yaml --charts charts/memgraph-high-availability # 🔹 Minikube instead of kind - name: Set up Minikube @@ -90,12 +90,13 @@ jobs: if: steps.list-changed.outputs.changed == 'true' run: | echo "Waiting for all pods in the namespace to be Ready..." - for i in {1..60}; do + all_ready=false + for _ in {1..60}; do echo "--- Pod status at $(date) ---" - kubectl get pods -o wide + kubectl get pods -o wide | grep -E '(NAME|memgraph-coordinator|memgraph-data)' # If any pods are Pending, describe them - pending=$(kubectl get pods --no-headers | awk '$3=="Pending"{print $1}') + pending=$(kubectl get pods --no-headers | grep -E 'memgraph-(coordinator|data)' | awk '$3=="Pending"{print $1}') if [ -n "$pending" ]; then echo "⚠️ Some pods are Pending, describing..." for pod in $pending; do @@ -104,16 +105,22 @@ jobs: done fi - # Check if all pods are ready - not_ready=$(kubectl get pods --no-headers | awk '{print $2}' | grep -vE '^([0-9]+)/\1$' || true) + # Check if all pods are Ready + not_ready=$(kubectl get pods --no-headers | grep -E 'memgraph-(coordinator|data)' | awk '{print $2}' | grep -vE '^([0-9]+)/\1$' || true) if [ -z "$not_ready" ]; then echo "✅ All pods are Ready" + all_ready=true break fi - sleep 10 done + if [ "$all_ready" = false ]; then + echo "❌ ERROR: Timeout waiting for pods to become ready after 10 minutes" + kubectl get pods | grep -E '(NAME|memgraph-coordinator|memgraph-data)' + exit 1 + fi + - name: Run Helm tests (create test Jobs) if: steps.list-changed.outputs.changed == 'true' run: | diff --git a/.gitignore b/.gitignore index 7fed4d3..3ff9b80 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ docker/ .DS_Store Thumbs.db charts/memgraph-high-availability/charts/ +.vscode/ diff --git a/charts/memgraph-high-availability/templates/cluster-setup.yaml b/charts/memgraph-high-availability/templates/cluster-setup.yaml new file mode 100644 index 0000000..da624f2 --- /dev/null +++ b/charts/memgraph-high-availability/templates/cluster-setup.yaml @@ -0,0 +1,72 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: cluster-setup + annotations: + "helm.sh/hook": post-install +spec: + template: + spec: + restartPolicy: Never + containers: + - name: cluster-setup + image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag }}" + command: ["/bin/bash", "-c"] + args: + - | + set -e # Exit on error + + echo "Waiting for Memgraph coordinators to be ready..." + + # Function to wait for a service to be ready + wait_for_service() { + local host=$1 + local port=$2 + local max_attempts=60 + local attempt=0 + + while [ $attempt -lt $max_attempts ]; do + echo "Checking $host:$port (attempt $((attempt+1))/$max_attempts)" + + if timeout 5 bash -c "cat < /dev/null > /dev/tcp/$host/$port" 2>/dev/null; then + echo "✅ $host:$port is ready" + return 0 + fi + + attempt=$((attempt+1)) + sleep 5 + done + + echo "❌ Timeout waiting for $host:$port" + return 1 + } + + # Wait for all coordinators + {{- range .Values.coordinators }} + wait_for_service "memgraph-coordinator-{{ .id }}.{{ $.Release.Namespace }}.svc.cluster.local" {{ $.Values.ports.boltPort }} + {{- end }} + + # Wait for all data instances + {{- range .Values.data }} + wait_for_service "memgraph-data-{{ .id }}.{{ $.Release.Namespace }}.svc.cluster.local" {{ $.Values.ports.boltPort }} + {{- end }} + + # Check if instances are already registered + INSTANCE_COUNT=$(echo "SHOW INSTANCES;" | mgconsole --host memgraph-coordinator-{{ (index .Values.coordinators 0).id }}.{{ .Release.Namespace }}.svc.cluster.local --port {{$.Values.ports.boltPort }} --output-format=csv 2>/dev/null | tail -n +2 | wc -l) + if [ "$INSTANCE_COUNT" -gt 1 ]; then + echo "✅ Cluster already configured with $INSTANCE_COUNT instances. Skipping registration." + exit 0 + fi + + # Add all coordinators + {{- range .Values.coordinators }} + echo 'ADD COORDINATOR {{ .id }} WITH CONFIG {"bolt_server": "memgraph-coordinator-{{ .id }}.{{ $.Release.Namespace }}.svc.cluster.local:{{ $.Values.ports.boltPort }}", "management_server": "memgraph-coordinator-{{ .id }}.{{ $.Release.Namespace }}.svc.cluster.local:{{ $.Values.ports.managementPort }}", "coordinator_server": "memgraph-coordinator-{{ .id }}.{{$.Release.Namespace }}.svc.cluster.local:{{ $.Values.ports.coordinatorPort }}"};' | mgconsole --host memgraph-coordinator-{{ (index $.Values.coordinators 0).id }}.{{ $.Release.Namespace}}.svc.cluster.local --port {{ $.Values.ports.boltPort }} + {{- end }} + + # Register all data instances + {{- range $index, $instance := .Values.data }} + echo 'REGISTER INSTANCE instance_{{ add $index 1 }} WITH CONFIG {"bolt_server": "memgraph-data-{{ $instance.id }}.{{ $.Release.Namespace }}.svc.cluster.local:{{$.Values.ports.boltPort }}", "management_server": "memgraph-data-{{ $instance.id }}.{{ $.Release.Namespace }}.svc.cluster.local:{{ $.Values.ports.managementPort }}", "replication_server": "memgraph-data-{{ $instance.id }}.{{ $.Release.Namespace }}.svc.cluster.local:{{ $.Values.ports.replicationPort }}"};' | mgconsole --host memgraph-coordinator-{{ (index $.Values.coordinators 0).id }}.{{ $.Release.Namespace }}.svc.cluster.local --port {{ $.Values.ports.boltPort }} + {{- end }} + + # Set first data instance as MAIN + echo 'SET INSTANCE instance_1 TO MAIN;' | mgconsole --host memgraph-coordinator-{{ (index .Values.coordinators 0).id }}.{{ $.Release.Namespace }}.svc.cluster.local --port {{ $.Values.ports.boltPort }} diff --git a/charts/memgraph-high-availability/templates/tests/test-connection.yaml b/charts/memgraph-high-availability/templates/tests/test-connection.yaml index f52b29f..ef78222 100644 --- a/charts/memgraph-high-availability/templates/tests/test-connection.yaml +++ b/charts/memgraph-high-availability/templates/tests/test-connection.yaml @@ -19,41 +19,6 @@ spec: - | PORT=7687 - echo "=== Cluster init via coordinator-1 ===" - cat > /tmp/cluster-setup.cypher <<'EOF' - ADD COORDINATOR 1 WITH CONFIG { - "bolt_server": "memgraph-coordinator-1.default.svc.cluster.local:7687", - "management_server": "memgraph-coordinator-1.default.svc.cluster.local:10000", - "coordinator_server": "memgraph-coordinator-1.default.svc.cluster.local:12000" - }; - ADD COORDINATOR 2 WITH CONFIG { - "bolt_server": "memgraph-coordinator-2.default.svc.cluster.local:7687", - "management_server": "memgraph-coordinator-2.default.svc.cluster.local:10000", - "coordinator_server": "memgraph-coordinator-2.default.svc.cluster.local:12000" - }; - ADD COORDINATOR 3 WITH CONFIG { - "bolt_server": "memgraph-coordinator-3.default.svc.cluster.local:7687", - "management_server": "memgraph-coordinator-3.default.svc.cluster.local:10000", - "coordinator_server": "memgraph-coordinator-3.default.svc.cluster.local:12000" - }; - REGISTER INSTANCE instance_0 WITH CONFIG { - "bolt_server": "memgraph-data-0.default.svc.cluster.local:7687", - "management_server": "memgraph-data-0.default.svc.cluster.local:10000", - "replication_server": "memgraph-data-0.default.svc.cluster.local:20000" - }; - REGISTER INSTANCE instance_1 WITH CONFIG { - "bolt_server": "memgraph-data-1.default.svc.cluster.local:7687", - "management_server": "memgraph-data-1.default.svc.cluster.local:10000", - "replication_server": "memgraph-data-1.default.svc.cluster.local:20000" - }; - SET INSTANCE instance_0 TO MAIN; - EOF - - echo "Applying cluster setup..." - mgconsole \ - --host "memgraph-coordinator-1.default.svc.cluster.local" \ - --port "$PORT" < /tmp/cluster-setup.cypher - hosts_coordinators=" memgraph-coordinator-1.default.svc.cluster.local memgraph-coordinator-2.default.svc.cluster.local diff --git a/charts/memgraph-high-availability/values.yaml b/charts/memgraph-high-availability/values.yaml index 18dcdef..b5e1ea2 100644 --- a/charts/memgraph-high-availability/values.yaml +++ b/charts/memgraph-high-availability/values.yaml @@ -40,10 +40,10 @@ storage: coreDumpsMountPath: /var/core/memgraph ports: - boltPort: 7687 # If you change this value, change it also in probes definition + boltPort: 7687 # If you change this value, change it also in probes definition managementPort: 10000 replicationPort: 20000 - coordinatorPort: 12000 # If you change this value, change it also in probes definition + coordinatorPort: 12000 # If you change this value, change it also in probes definition externalAccessConfig: dataInstance: @@ -56,7 +56,7 @@ externalAccessConfig: annotations: {} headlessService: - enabled: false # If set to true, each data and coordinator instance will use headless service + enabled: false # If set to true, each data and coordinator instance will use headless service # Affinity controls the scheduling of the memgraph-high-availability pods. # By default data pods will avoid being scheduled on the same node as other data pods, @@ -114,33 +114,33 @@ container: data: readinessProbe: tcpSocket: - port: 7687 # If you change bolt port, change this also + port: 7687 # If you change bolt port, change this also failureThreshold: 20 timeoutSeconds: 10 periodSeconds: 5 livenessProbe: tcpSocket: - port: 7687 # If you change bolt port, change this also + port: 7687 # If you change bolt port, change this also failureThreshold: 20 timeoutSeconds: 10 periodSeconds: 5 # When restoring Memgraph from a backup, it is important to give enough time app to start. Here, we set it to 2h by default. startupProbe: tcpSocket: - port: 7687 # If you change bolt port, change this also + port: 7687 # If you change bolt port, change this also failureThreshold: 1440 timeoutSeconds: 10 periodSeconds: 5 coordinators: readinessProbe: tcpSocket: - port: 12000 # If you change coordinator port, change this also + port: 12000 # If you change coordinator port, change this also failureThreshold: 20 timeoutSeconds: 10 periodSeconds: 5 livenessProbe: tcpSocket: - port: 12000 # If you change coordinator port, change this also + port: 12000 # If you change coordinator port, change this also failureThreshold: 20 timeoutSeconds: 10 periodSeconds: 5 @@ -155,13 +155,12 @@ resources: data: {} coordinators: {} - updateStrategy: - type: RollingUpdate # Set to OnDelete to support ISSU + type: RollingUpdate # Set to OnDelete to support ISSU prometheus: enabled: false - namespace: monitoring # Namespace where K8s resources from mg-exporter.yaml will be installed and where your kube-prometheus-stack chart is installed + namespace: monitoring # Namespace where K8s resources from mg-exporter.yaml will be installed and where your kube-prometheus-stack chart is installed memgraphExporter: port: 9115 pullFrequencySeconds: 5 @@ -188,7 +187,6 @@ extraEnv: # - name: COORD_TWO # value: "two_rr" - ## @param initContainers Add additional init containers for data instances and coordinators ## ref: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ ## e.g: @@ -206,74 +204,70 @@ initContainers: # Setting the Memgraph's memory limit to more than the available resources can trigger pod eviction and restarts before Memgraph can make a query exception and continue running # the pod. data: -- id: "0" - restoreDataFromSnapshot: false - volumeSnapshotName: data-0-snap # Used only if restoreDataFromSnapshot is set to true - args: - - "--management-port=10000" - - "--bolt-port=7687" - - "--also-log-to-stderr" - - "--log-level=TRACE" - - "--log-file=/var/log/memgraph/memgraph.log" - - "--data-directory=/var/lib/memgraph/mg_data" + - id: "0" + restoreDataFromSnapshot: false + volumeSnapshotName: data-0-snap # Used only if restoreDataFromSnapshot is set to true + args: + - "--management-port=10000" + - "--bolt-port=7687" + - "--also-log-to-stderr" + - "--log-level=TRACE" + - "--log-file=/var/log/memgraph/memgraph.log" + - "--data-directory=/var/lib/memgraph/mg_data" -- id: "1" - restoreDataFromSnapshot: false - volumeSnapshotName: data-1-snap # Used only if restoreDataFromSnapshot is set to true - args: - - "--management-port=10000" - - "--bolt-port=7687" - - "--also-log-to-stderr" - - "--log-level=TRACE" - - "--log-file=/var/log/memgraph/memgraph.log" - - "--data-directory=/var/lib/memgraph/mg_data" + - id: "1" + restoreDataFromSnapshot: false + volumeSnapshotName: data-1-snap # Used only if restoreDataFromSnapshot is set to true + args: + - "--management-port=10000" + - "--bolt-port=7687" + - "--also-log-to-stderr" + - "--log-level=TRACE" + - "--log-file=/var/log/memgraph/memgraph.log" + - "--data-directory=/var/lib/memgraph/mg_data" coordinators: -- id: "1" - restoreDataFromSnapshot: false - volumeSnapshotName: coord-1-snap # Used only if restoreDataFromSnapshot is set to true - args: - - "--coordinator-id=1" - - "--coordinator-port=12000" - - "--management-port=10000" - - "--bolt-port=7687" - - "--also-log-to-stderr" - - "--log-level=TRACE" - - "--coordinator-hostname=memgraph-coordinator-1.default.svc.cluster.local" - - "--log-file=/var/log/memgraph/memgraph.log" - - "--nuraft-log-file=/var/log/memgraph/memgraph.log" - - "--data-directory=/var/lib/memgraph/mg_data" - -- id: "2" - restoreDataFromSnapshot: false - volumeSnapshotName: coord-2-snap # Used only if restoreDataFromSnapshot is set to true - args: - - "--coordinator-id=2" - - "--coordinator-port=12000" - - "--management-port=10000" - - "--bolt-port=7687" - - "--also-log-to-stderr" - - "--log-level=TRACE" - - "--coordinator-hostname=memgraph-coordinator-2.default.svc.cluster.local" - - "--log-file=/var/log/memgraph/memgraph.log" - - "--nuraft-log-file=/var/log/memgraph/memgraph.log" - - "--data-directory=/var/lib/memgraph/mg_data" + - id: "1" + restoreDataFromSnapshot: false + volumeSnapshotName: coord-1-snap # Used only if restoreDataFromSnapshot is set to true + args: + - "--coordinator-id=1" + - "--coordinator-port=12000" + - "--management-port=10000" + - "--bolt-port=7687" + - "--also-log-to-stderr" + - "--log-level=TRACE" + - "--coordinator-hostname=memgraph-coordinator-1.default.svc.cluster.local" + - "--log-file=/var/log/memgraph/memgraph.log" + - "--data-directory=/var/lib/memgraph/mg_data" -- id: "3" - restoreDataFromSnapshot: false - volumeSnapshotName: coord-3-snap # Used only if restoreDataFromSnapshot is set to true - args: - - "--coordinator-id=3" - - "--coordinator-port=12000" - - "--management-port=10000" - - "--bolt-port=7687" - - "--also-log-to-stderr" - - "--log-level=TRACE" - - "--coordinator-hostname=memgraph-coordinator-3.default.svc.cluster.local" - - "--log-file=/var/log/memgraph/memgraph.log" - - "--nuraft-log-file=/var/log/memgraph/memgraph.log" - - "--data-directory=/var/lib/memgraph/mg_data" + - id: "2" + restoreDataFromSnapshot: false + volumeSnapshotName: coord-2-snap # Used only if restoreDataFromSnapshot is set to true + args: + - "--coordinator-id=2" + - "--coordinator-port=12000" + - "--management-port=10000" + - "--bolt-port=7687" + - "--also-log-to-stderr" + - "--log-level=TRACE" + - "--coordinator-hostname=memgraph-coordinator-2.default.svc.cluster.local" + - "--log-file=/var/log/memgraph/memgraph.log" + - "--data-directory=/var/lib/memgraph/mg_data" + - id: "3" + restoreDataFromSnapshot: false + volumeSnapshotName: coord-3-snap # Used only if restoreDataFromSnapshot is set to true + args: + - "--coordinator-id=3" + - "--coordinator-port=12000" + - "--management-port=10000" + - "--bolt-port=7687" + - "--also-log-to-stderr" + - "--log-level=TRACE" + - "--coordinator-hostname=memgraph-coordinator-3.default.svc.cluster.local" + - "--log-file=/var/log/memgraph/memgraph.log" + - "--data-directory=/var/lib/memgraph/mg_data" userContainers: data: [] diff --git a/lintconf.yaml b/lintconf.yaml new file mode 100644 index 0000000..99aa38f --- /dev/null +++ b/lintconf.yaml @@ -0,0 +1,42 @@ +--- +rules: + braces: + min-spaces-inside: 0 + max-spaces-inside: 0 + min-spaces-inside-empty: -1 + max-spaces-inside-empty: -1 + brackets: + min-spaces-inside: 0 + max-spaces-inside: 0 + min-spaces-inside-empty: -1 + max-spaces-inside-empty: -1 + colons: + max-spaces-before: 0 + max-spaces-after: 1 + commas: + max-spaces-before: 0 + min-spaces-after: 1 + max-spaces-after: 1 + comments: + require-starting-space: true + min-spaces-from-content: 1 + document-end: disable + document-start: disable # No --- to start a file + empty-lines: + max: 2 + max-start: 0 + max-end: 0 + hyphens: + max-spaces-after: 1 + indentation: + spaces: consistent + indent-sequences: whatever + check-multi-line-strings: false + key-duplicates: enable + line-length: disable # Lines can be any length + new-line-at-end-of-file: enable + new-lines: + type: unix + trailing-spaces: enable + truthy: + level: warning