From d470ce92909efacfe2d9e28ff61e3dc1015d3e3a Mon Sep 17 00:00:00 2001 From: aymericdd Date: Mon, 24 Nov 2025 18:01:49 +0100 Subject: [PATCH] fix(pod-replacement): exclude container requirement Pod replacement operates at the pod level and should not require container information. The feature was failing because the injector expected target containers to be specified, but pod replacement works directly with pods identified by their IP address. This fix adds logic to detect when running the pod-replacement command and skips the container validation, creating a single config without container dependencies instead. Jira: CHAOSPLT-1330 --- api/v1beta1/pod_replacement.go | 8 +++-- cli/injector/main.go | 56 +++++++++++++++++++++------------ cli/injector/pod_replacement.go | 6 ++-- examples/demo.yaml | 24 ++++++++++++++ 4 files changed, 70 insertions(+), 24 deletions(-) diff --git a/api/v1beta1/pod_replacement.go b/api/v1beta1/pod_replacement.go index ba8981d897..cbff98a83a 100644 --- a/api/v1beta1/pod_replacement.go +++ b/api/v1beta1/pod_replacement.go @@ -5,7 +5,11 @@ package v1beta1 -import "strconv" +import ( + "strconv" + + chaostypes "github.com/DataDog/chaos-controller/types" +) // PodReplacementSpec represents a pod replacement disruption type PodReplacementSpec struct { @@ -27,7 +31,7 @@ func (s *PodReplacementSpec) Validate() error { // GenerateArgs generates injection or cleanup pod arguments for the given spec func (s *PodReplacementSpec) GenerateArgs() []string { args := []string{ - "pod-replacement", + chaostypes.DisruptionKindPodReplacement, "inject", } diff --git a/cli/injector/main.go b/cli/injector/main.go index de904108be..8be89ce6fd 100644 --- a/cli/injector/main.go +++ b/cli/injector/main.go @@ -222,6 +222,16 @@ func initConfig() { // assign to the pointer to level the new value to persist it after this method disruptionArgs.Level = chaostypes.DisruptionLevel(disruptionLevelRaw) + // check if we're running pod-replacement command which doesn't need containers + isPodReplacement := false + + for _, arg := range os.Args { + if arg == chaostypes.DisruptionKindPodReplacement { + isPodReplacement = true + break + } + } + switch disruptionArgs.Level { case chaostypes.DisruptionLevelPod: // check for container ID flag @@ -231,33 +241,39 @@ func initConfig() { return } - for containerName, containerID := range disruptionArgs.TargetContainers { - // retrieve container info - ctn, err := container.New(containerID, containerName) - if err != nil { - log.Fatalw("can't create container object", tags.ErrorKey, err) + if !isPodReplacement { + // Pod replacement operates at the pod level and doesn't need container information + for containerName, containerID := range disruptionArgs.TargetContainers { + // retrieve container info + ctn, err := container.New(containerID, containerName) + if err != nil { + log.Fatalw("can't create container object", tags.ErrorKey, err) - return - } + return + } - log.Infow("injector targeting container", tags.ContainerIDKey, containerID, tags.ContainerNameKey, containerName) + log.Infow("injector targeting container", tags.ContainerIDKey, containerID, tags.ContainerNameKey, containerName) - pid := ctn.PID() + pid := ctn.PID() - // keep pid for later if this is a chaos handler container - if disruptionArgs.OnInit && ctn.Name() == chaosInitContName { - handlerPID = pid - } + // keep pid for later if this is a chaos handler container + if disruptionArgs.OnInit && ctn.Name() == chaosInitContName { + handlerPID = pid + } - ctns = append(ctns, ctn) - pids = append(pids, pid) - } + ctns = append(ctns, ctn) + pids = append(pids, pid) + } + } else { + // check for pod IP flag + if disruptionArgs.TargetPodIP == "" { + log.Fatal("--target-pod-ip flag must be passed when --level=pod") - // check for pod IP flag - if disruptionArgs.TargetPodIP == "" { - log.Fatal("--target-pod-ip flag must be passed when --level=pod") + return + } - return + pids = []uint32{1} + ctns = []container.Container{nil} } case chaostypes.DisruptionLevelNode: pids = []uint32{1} diff --git a/cli/injector/pod_replacement.go b/cli/injector/pod_replacement.go index d3e29eb9c4..c2552216d1 100644 --- a/cli/injector/pod_replacement.go +++ b/cli/injector/pod_replacement.go @@ -8,14 +8,16 @@ package main import ( "strconv" + "github.com/spf13/cobra" + "github.com/DataDog/chaos-controller/api/v1beta1" "github.com/DataDog/chaos-controller/injector" "github.com/DataDog/chaos-controller/o11y/tags" - "github.com/spf13/cobra" + chaostypes "github.com/DataDog/chaos-controller/types" ) var podReplacementCmd = &cobra.Command{ - Use: "pod-replacement", + Use: chaostypes.DisruptionKindPodReplacement, Short: "Pod replacement subcommands", Run: injectAndWait, PreRun: func(cmd *cobra.Command, args []string) { diff --git a/examples/demo.yaml b/examples/demo.yaml index e9b7ee1ee2..21042c464f 100644 --- a/examples/demo.yaml +++ b/examples/demo.yaml @@ -217,6 +217,30 @@ spec: requests: memory: 16Mi cpu: 10m + ## Create a second container to ensure the pod replace is working as expected + - name: storage-monitor-2 + image: alpine:latest + command: ["/bin/sh"] + args: + - -c + - | + echo "Storage monitoring container started" + # Create a test file to demonstrate persistence + echo "demo-storage-$(date)" > /mnt/shared/storage-info.txt + # Keep container running and periodically update the file + while true; do + echo "$(date): Storage is available" >> /mnt/shared/heartbeat.log + sleep 30 + done + volumeMounts: + - mountPath: /mnt/shared + name: shared-storage + resources: + limits: + memory: 16Mi + cpu: 10m + requests: + memory: 16Mi volumeClaimTemplates: - metadata: name: shared-storage