Add ConfigMap/Secret, multi-container labs, diagnose script, exam notes, CI

psimaker · psimaker · commit b3f769a9e8c3 · 2026-03-23T09:10:43.000+01:00
diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
@@ -0,0 +1,42 @@
+name: Validate manifests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Validate YAML syntax
+        run: |
+          echo "Checking YAML files..."
+          ERRORS=0
+          for f in $(find labs -name '*.yml' -o -name '*.yaml'); do
+            if ! python3 -c "import yaml, sys; yaml.safe_load_all(open('$f'))" 2>/dev/null; then
+              echo "FAIL: $f"
+              ERRORS=$((ERRORS + 1))
+            else
+              echo "  ok: $f"
+            fi
+          done
+          if [ "$ERRORS" -gt 0 ]; then
+            echo "$ERRORS file(s) failed validation"
+            exit 1
+          fi
+          echo "All YAML files valid"
+
+      - name: Lint with kubeval
+        uses: instrumenta/kubeval-action@master
+        with:
+          files: labs
+        continue-on-error: true
+
+      - name: shellcheck scripts
+        run: |
+          sudo apt-get install -y shellcheck
+          shellcheck scripts/*.sh || true
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@ Built on a bare-metal cluster (3x Raspberry Pi 4, kubeadm, Calico CNI).
 | Domain | Weight | Labs |
 |--------|--------|------|
 | [Cluster Architecture & Installation](labs/cluster-setup/) | 25% | kubeadm init/join, etcd backup/restore, upgrade |
-| [Workloads & Scheduling](labs/workloads/) | 15% | Deployments, DaemonSets, resource limits, scheduling |
+| [Workloads & Scheduling](labs/workloads/) | 15% | Deployments, DaemonSets, ConfigMaps/Secrets, multi-container pods, scheduling |
 | [Services & Networking](labs/networking/) | 20% | Services, Ingress, NetworkPolicy, DNS |
 | [Storage](labs/storage/) | 10% | PV, PVC, StorageClasses |
 | [Troubleshooting](labs/troubleshooting/) | 30% | Broken nodes, CrashLoopBackOff, DNS failures |
@@ -54,6 +54,11 @@ k expose deploy web --port=80 --type=NodePort $do > svc.yml
 |--------|---------|
 | [`scripts/etcd-backup.sh`](scripts/etcd-backup.sh) | Snapshot etcd and verify restore |
 | [`scripts/cluster-upgrade.sh`](scripts/cluster-upgrade.sh) | Step-by-step kubeadm upgrade |
+| [`labs/troubleshooting/diagnose.sh`](labs/troubleshooting/diagnose.sh) | Quick cluster health check |
+
+## Notes
+
+- [`notes/exam-tips.md`](notes/exam-tips.md) — Shortcuts, jsonpath examples, common mistakes
 
 ## Resources
 
diff --git a/labs/troubleshooting/README.md b/labs/troubleshooting/README.md
@@ -2,6 +2,10 @@
 
 Largest exam domain. Practice diagnosing issues without looking at solutions first.
 
+## Scripts
+
+- [`diagnose.sh`](diagnose.sh) — Quick cluster health check (nodes, control plane, DNS, problem pods, warnings)
+
 ## Scenarios
 
 ### 1. Node NotReady
diff --git a/labs/troubleshooting/diagnose.sh b/labs/troubleshooting/diagnose.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# Quick cluster health check — useful when things look wrong
+set -euo pipefail
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+pass() { echo -e "${GREEN}[OK]${NC} $1"; }
+warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
+fail() { echo -e "${RED}[FAIL]${NC} $1"; }
+
+echo "=== Cluster Health Check ==="
+echo ""
+
+# 1. Node status
+echo "--- Nodes ---"
+NOT_READY=$(kubectl get nodes --no-headers | grep -v " Ready" || true)
+if [ -z "$NOT_READY" ]; then
+  pass "All nodes are Ready"
+else
+  fail "Nodes not ready:"
+  echo "$NOT_READY"
+fi
+kubectl get nodes -o wide --no-headers
+echo ""
+
+# 2. Control plane pods
+echo "--- Control Plane ---"
+for component in kube-apiserver kube-controller-manager kube-scheduler etcd; do
+  STATUS=$(kubectl get pods -n kube-system -l component="$component" --no-headers 2>/dev/null | awk '{print $3}')
+  if [ "$STATUS" = "Running" ]; then
+    pass "$component"
+  else
+    fail "$component — status: ${STATUS:-not found}"
+  fi
+done
+echo ""
+
+# 3. CoreDNS
+echo "--- CoreDNS ---"
+DNS_PODS=$(kubectl get pods -n kube-system -l k8s-app=kube-dns --no-headers 2>/dev/null)
+DNS_RUNNING=$(echo "$DNS_PODS" | grep -c "Running" || true)
+DNS_TOTAL=$(echo "$DNS_PODS" | wc -l | tr -d ' ')
+if [ "$DNS_RUNNING" -eq "$DNS_TOTAL" ] && [ "$DNS_TOTAL" -gt 0 ]; then
+  pass "CoreDNS ($DNS_RUNNING/$DNS_TOTAL running)"
+else
+  warn "CoreDNS ($DNS_RUNNING/$DNS_TOTAL running)"
+fi
+echo ""
+
+# 4. Pods not running
+echo "--- Problem Pods ---"
+BAD_PODS=$(kubectl get pods -A --no-headers --field-selector=status.phase!=Running,status.phase!=Succeeded 2>/dev/null || true)
+if [ -z "$BAD_PODS" ]; then
+  pass "No pods in bad state"
+else
+  warn "Pods not Running/Succeeded:"
+  echo "$BAD_PODS"
+fi
+echo ""
+
+# 5. Recent events (warnings only)
+echo "--- Recent Warnings (last 10min) ---"
+WARNINGS=$(kubectl get events -A --field-selector type=Warning --sort-by=.lastTimestamp 2>/dev/null | tail -10)
+if [ -z "$WARNINGS" ]; then
+  pass "No recent warnings"
+else
+  echo "$WARNINGS"
+fi
+echo ""
+
+# 6. Resource pressure
+echo "--- Resource Usage ---"
+if kubectl top nodes &>/dev/null; then
+  kubectl top nodes
+else
+  warn "Metrics server not available (kubectl top won't work)"
+fi
+echo ""
+echo "=== Done ==="
diff --git a/labs/workloads/README.md b/labs/workloads/README.md
@@ -8,6 +8,8 @@
 - [`resource-limits.yml`](resource-limits.yml) — Pod with requests/limits and LimitRange
 - [`node-affinity.yml`](node-affinity.yml) — Schedule pods to specific nodes
 - [`taint-toleration.yml`](taint-toleration.yml) — Taint a node, schedule with toleration
+- [`configmap-secret.yml`](configmap-secret.yml) — ConfigMap + Secret as env vars and volume mounts
+- [`multi-container.yml`](multi-container.yml) — Init container + sidecar logging pattern
 
 ## Key Concepts
 
diff --git a/labs/workloads/configmap-secret.yml b/labs/workloads/configmap-secret.yml
@@ -0,0 +1,67 @@
+# App config via ConfigMap + database credentials via Secret
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: app-config
+  namespace: default
+data:
+  APP_ENV: "production"
+  LOG_LEVEL: "info"
+  MAX_CONNECTIONS: "100"
+  config.yaml: |
+    server:
+      port: 8080
+      read_timeout: 30s
+      write_timeout: 30s
+    cache:
+      ttl: 300
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: db-credentials
+  namespace: default
+type: Opaque
+stringData:
+  DB_HOST: "postgres.database.svc.cluster.local"
+  DB_USER: "app"
+  DB_PASSWORD: "changeme"
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: app
+  labels:
+    app: backend
+spec:
+  containers:
+    - name: app
+      image: busybox:1.36
+      command: ["sh", "-c", "echo \"DB=$DB_HOST user=$DB_USER\" && cat /etc/app/config.yaml && sleep 3600"]
+      # Individual keys as env vars
+      envFrom:
+        - secretRef:
+            name: db-credentials
+      env:
+        - name: APP_ENV
+          valueFrom:
+            configMapKeyRef:
+              name: app-config
+              key: APP_ENV
+        - name: LOG_LEVEL
+          valueFrom:
+            configMapKeyRef:
+              name: app-config
+              key: LOG_LEVEL
+      # File as volume mount
+      volumeMounts:
+        - name: config-volume
+          mountPath: /etc/app
+          readOnly: true
+  volumes:
+    - name: config-volume
+      configMap:
+        name: app-config
+        items:
+          - key: config.yaml
+            path: config.yaml
diff --git a/labs/workloads/multi-container.yml b/labs/workloads/multi-container.yml
@@ -0,0 +1,58 @@
+# Init container waits for a dependency, sidecar ships logs
+apiVersion: v1
+kind: Pod
+metadata:
+  name: web-app
+  labels:
+    app: web
+spec:
+  initContainers:
+    # Wait until the database service is resolvable via DNS
+    - name: wait-for-db
+      image: busybox:1.36
+      command:
+        - sh
+        - -c
+        - |
+          echo "Waiting for postgres to become available..."
+          until nslookup postgres.default.svc.cluster.local; do
+            echo "  ...not ready, retrying in 2s"
+            sleep 2
+          done
+          echo "postgres is up"
+  containers:
+    # Main application container
+    - name: app
+      image: nginx:1.27
+      ports:
+        - containerPort: 80
+      volumeMounts:
+        - name: logs
+          mountPath: /var/log/nginx
+      resources:
+        requests:
+          cpu: 100m
+          memory: 128Mi
+        limits:
+          cpu: 250m
+          memory: 256Mi
+
+    # Sidecar: tails nginx access log to stdout (collected by cluster logging)
+    - name: log-shipper
+      image: busybox:1.36
+      command: ["sh", "-c", "tail -F /var/log/nginx/access.log"]
+      volumeMounts:
+        - name: logs
+          mountPath: /var/log/nginx
+          readOnly: true
+      resources:
+        requests:
+          cpu: 25m
+          memory: 32Mi
+        limits:
+          cpu: 50m
+          memory: 64Mi
+
+  volumes:
+    - name: logs
+      emptyDir: {}
diff --git a/notes/exam-tips.md b/notes/exam-tips.md
@@ -0,0 +1,80 @@
+# Exam Notes
+
+Things I keep forgetting or that cost me time in practice runs.
+
+## Time Management
+
+- 2 hours, 17 questions — roughly 7 min per question
+- Flag hard ones and come back, don't get stuck on a single task
+- Some questions are worth 4%, others 7-8% — prioritize high-value ones
+
+## Shortcuts That Save Time
+
+```bash
+# Set these up FIRST, before touching any question
+alias k='kubectl'
+alias kgp='kubectl get pods -A'
+alias kgn='kubectl get nodes'
+alias kd='kubectl describe'
+export do='--dry-run=client -o yaml'
+export now='--grace-period=0 --force'
+
+# vim settings (add to ~/.vimrc)
+set tabstop=2
+set shiftwidth=2
+set expandtab
+```
+
+## jsonpath
+
+Comes up all the time. I always forget the syntax.
+
+```bash
+# Get internal IPs of all nodes
+kubectl get nodes -o jsonpath='{.items[*].status.addresses[?(@.type=="InternalIP")].address}'
+
+# List all container images running in a namespace
+kubectl get pods -n kube-system -o jsonpath='{range .items[*]}{.spec.containers[*].image}{"\n"}{end}'
+
+# Get PV sorted by capacity
+kubectl get pv --sort-by=.spec.capacity.storage
+
+# Custom columns
+kubectl get pods -o custom-columns=NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName
+```
+
+## etcd
+
+Always need the certs. Check the etcd pod manifest if unsure:
+
+```bash
+cat /etc/kubernetes/manifests/etcd.yaml | grep -E 'cert|key|cacert'
+```
+
+## Common Mistakes
+
+- Forgetting `--namespace` — always double-check which namespace the question asks for
+- NetworkPolicy: once you create ANY policy selecting a pod, all other traffic is denied by default
+- PV/PVC: accessModes and capacity must match, otherwise the PVC stays Pending
+- `kubeadm upgrade apply` only on control plane, `kubeadm upgrade node` on workers
+- Static pod manifests go in `/etc/kubernetes/manifests/`, not applied via kubectl
+- After editing a static pod manifest, kubelet picks it up automatically — no restart needed
+
+## kubectl Tricks
+
+```bash
+# Generate YAML without applying
+kubectl run tmp --image=nginx $do > pod.yml
+
+# Quick debug pod
+kubectl run debug --image=busybox:1.36 --rm -it -- sh
+
+# Check if RBAC allows something
+kubectl auth can-i create deployments --as=dev -n staging
+
+# See why a pod isn't scheduled
+kubectl describe pod <name> | grep -A5 Events
+
+# Diff before applying
+kubectl diff -f manifest.yml
+```