From 5cd6d5a41cfc4a2fb09a2bd01b58e9c9b9e78e48 Mon Sep 17 00:00:00 2001 From: Andrew Hu Date: Wed, 24 Nov 2021 11:13:33 -0500 Subject: [PATCH 1/5] support draining multiple node refactor the drainNode function to support drainning multiple node Signed-off-by: Andrew Hu --- chaoslib/litmus/node-drain/lib/node-drain.go | 75 ++++++++++++-------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/chaoslib/litmus/node-drain/lib/node-drain.go b/chaoslib/litmus/node-drain/lib/node-drain.go index 52a7535fe..4ea8be0cc 100644 --- a/chaoslib/litmus/node-drain/lib/node-drain.go +++ b/chaoslib/litmus/node-drain/lib/node-drain.go @@ -6,6 +6,7 @@ import ( "os/exec" "os/signal" "strconv" + "strings" "syscall" "time" @@ -19,6 +20,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/litmuschaos/litmus-go/pkg/utils/retry" "github.com/pkg/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -111,37 +113,50 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli // drainNode drain the application node func drainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { - select { - case <-inject: - // stopping the chaos execution, if abort signal received - os.Exit(0) - default: - log.Infof("[Inject]: Draining the %v node", experimentsDetails.TargetNode) - - command := exec.Command("kubectl", "drain", experimentsDetails.TargetNode, "--ignore-daemonsets", "--delete-local-data", "--force", "--timeout", strconv.Itoa(experimentsDetails.ChaosDuration)+"s") - var out, stderr bytes.Buffer - command.Stdout = &out - command.Stderr = &stderr - if err := command.Run(); err != nil { - log.Infof("Error String: %v", stderr.String()) - return errors.Errorf("Unable to drain the %v node, err: %v", experimentsDetails.TargetNode, err) - } + targetNodes := strings.Split(experimentsDetails.TargetNode, ",") + + log.Infof("Target nodes list: %v", targetNodes) + for _, targetNode := range targetNodes { + + select { + case <-inject: + // stopping the chaos execution, if abort signal received + os.Exit(0) + default: + log.Infof("[Inject]: Draining the %v node", targetNode) + + command := exec.Command("kubectl", "drain", targetNode, "--ignore-daemonsets", "--delete-emptydir-data", "--force", "--timeout", strconv.Itoa(experimentsDetails.ChaosDuration)+"s") + var out, stderr bytes.Buffer + command.Stdout = &out + command.Stderr = &stderr + if err := command.Run(); err != nil { + log.Infof("Error String: %v", stderr.String()) + return errors.Errorf("Unable to drain the %v node, err: %v", experimentsDetails.TargetNode, err) + } - common.SetTargets(experimentsDetails.TargetNode, "injected", "node", chaosDetails) - - return retry. - Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)). - Wait(time.Duration(experimentsDetails.Delay) * time.Second). - Try(func(attempt uint) error { - nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(experimentsDetails.TargetNode, v1.GetOptions{}) - if err != nil { - return err - } - if !nodeSpec.Spec.Unschedulable { - return errors.Errorf("%v node is not in unschedulable state", experimentsDetails.TargetNode) - } - return nil - }) + common.SetTargets(targetNode, "injected", "node", chaosDetails) + + err = retry. + Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)). + Wait(time.Duration(experimentsDetails.Delay) * time.Second). + Try(func(attempt uint) error { + nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(targetNode, v1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + return nil + } else { + return err + } + } + if !nodeSpec.Spec.Unschedulable { + return errors.Errorf("%v node is not in unschedulable state", targetNode) + } + return nil + }) + if err != nil { + return err + } + } } return nil } From 4abdd9c5e3817372517dfe7f253adb0843824d96 Mon Sep 17 00:00:00 2001 From: Caleb Xu Date: Fri, 14 Jan 2022 14:38:13 -0500 Subject: [PATCH 2/5] node-drain: return error if no nodes specified Signed-off-by: Caleb Xu --- chaoslib/litmus/node-drain/lib/node-drain.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/chaoslib/litmus/node-drain/lib/node-drain.go b/chaoslib/litmus/node-drain/lib/node-drain.go index c2d8b6ae4..2da5f64a7 100644 --- a/chaoslib/litmus/node-drain/lib/node-drain.go +++ b/chaoslib/litmus/node-drain/lib/node-drain.go @@ -115,6 +115,9 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli func drainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { targetNodes := strings.Split(experimentsDetails.TargetNode, ",") + if len(targetNodes) == 0 { + return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") + } log.Infof("Target nodes list: %v", targetNodes) for _, targetNode := range targetNodes { @@ -165,7 +168,11 @@ func drainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients cl // uncordonNode uncordon the application node func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { - targetNodes := strings.Split(experimentsDetails.TargetNode, ",") + targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") + if len(targetNodes) == 0 { + return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") + } + for _, targetNode := range targetNodes { //Check node exist before uncordon the node @@ -196,7 +203,11 @@ func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)). Wait(time.Duration(experimentsDetails.Delay) * time.Second). Try(func(attempt uint) error { - targetNodes := strings.Split(experimentsDetails.TargetNode, ",") + targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") + if len(targetNodes) == 0 { + return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") + } + for _, targetNode := range targetNodes { nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(targetNode, v1.GetOptions{}) if err != nil { From d449bc454c605ffed14a6b5733d3072ecab3ed48 Mon Sep 17 00:00:00 2001 From: Caleb Xu Date: Fri, 14 Jan 2022 15:56:54 -0500 Subject: [PATCH 3/5] node-drain: ExperimentDetails.TargetNode is now Experiment.TargetNodes Signed-off-by: Caleb Xu --- chaoslib/litmus/node-drain/lib/node-drain.go | 12 ++++++------ .../generic/node-drain/experiment/node-drain.go | 6 +++--- pkg/generic/node-drain/environment/environment.go | 2 +- pkg/generic/node-drain/types/types.go | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/chaoslib/litmus/node-drain/lib/node-drain.go b/chaoslib/litmus/node-drain/lib/node-drain.go index 2da5f64a7..bc1451447 100644 --- a/chaoslib/litmus/node-drain/lib/node-drain.go +++ b/chaoslib/litmus/node-drain/lib/node-drain.go @@ -49,16 +49,16 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli common.WaitForDuration(experimentsDetails.RampTime) } - if experimentsDetails.TargetNode == "" { + if experimentsDetails.TargetNodes == "" { //Select node for kubelet-service-kill - experimentsDetails.TargetNode, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) + experimentsDetails.TargetNodes, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) if err != nil { return err } } if experimentsDetails.EngineName != "" { - msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNode + " node" + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNodes + " node" types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") } @@ -114,7 +114,7 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli // drainNode drain the application node func drainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { - targetNodes := strings.Split(experimentsDetails.TargetNode, ",") + targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") if len(targetNodes) == 0 { return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") } @@ -135,7 +135,7 @@ func drainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients cl command.Stderr = &stderr if err := command.Run(); err != nil { log.Infof("Error String: %v", stderr.String()) - return errors.Errorf("Unable to drain the %v node, err: %v", experimentsDetails.TargetNode, err) + return errors.Errorf("Unable to drain the %v node, err: %v", targetNode, err) } common.SetTargets(targetNode, "injected", "node", chaosDetails) @@ -218,7 +218,7 @@ func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients } } if nodeSpec.Spec.Unschedulable { - return errors.Errorf("%v node is in unschedulable state", experimentsDetails.TargetNode) + return errors.Errorf("%v node is in unschedulable state", targetNode) } } return nil diff --git a/experiments/generic/node-drain/experiment/node-drain.go b/experiments/generic/node-drain/experiment/node-drain.go index d83cb7034..9ed5796e7 100644 --- a/experiments/generic/node-drain/experiment/node-drain.go +++ b/experiments/generic/node-drain/experiment/node-drain.go @@ -64,7 +64,7 @@ func NodeDrain(clients clients.ClientSets) { //DISPLAY THE APP INFORMATION log.InfoWithValues("[Info]: The application information is as follows", logrus.Fields{ "Node Label": experimentsDetails.NodeLabel, - "Target Node": experimentsDetails.TargetNode, + "Target Nodes": experimentsDetails.TargetNodes, "Chaos Duration": experimentsDetails.ChaosDuration, }) @@ -93,7 +93,7 @@ func NodeDrain(clients clients.ClientSets) { // Checking the status of target nodes log.Info("[Status]: Getting the status of target nodes") - if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Target nodes are not in the ready state, err: %v", err) failStep := "[pre-chaos]: Failed to verify the status of nodes, err: " + err.Error() types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) @@ -166,7 +166,7 @@ func NodeDrain(clients clients.ClientSets) { // Checking the status of target nodes log.Info("[Status]: Getting the status of target nodes") - if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") diff --git a/pkg/generic/node-drain/environment/environment.go b/pkg/generic/node-drain/environment/environment.go index d0d1069a4..326a06d5f 100644 --- a/pkg/generic/node-drain/environment/environment.go +++ b/pkg/generic/node-drain/environment/environment.go @@ -24,7 +24,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.InstanceID = types.Getenv("INSTANCE_ID", "") experimentDetails.ChaosPodName = types.Getenv("POD_NAME", "") experimentDetails.AuxiliaryAppInfo = types.Getenv("AUXILIARY_APPINFO", "") - experimentDetails.TargetNode = types.Getenv("TARGET_NODE", "") + experimentDetails.TargetNodes = types.Getenv("TARGET_NODES", "") experimentDetails.Delay, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_DELAY", "2")) experimentDetails.Timeout, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_TIMEOUT", "180")) experimentDetails.TargetContainer = types.Getenv("TARGET_CONTAINER", "") diff --git a/pkg/generic/node-drain/types/types.go b/pkg/generic/node-drain/types/types.go index 722cc7f82..4d594ece9 100644 --- a/pkg/generic/node-drain/types/types.go +++ b/pkg/generic/node-drain/types/types.go @@ -18,7 +18,7 @@ type ExperimentDetails struct { InstanceID string ChaosNamespace string ChaosPodName string - TargetNode string + TargetNodes string AuxiliaryAppInfo string Timeout int Delay int From 05726d30beb0ac2e5193961714834bc9d71d52fc Mon Sep 17 00:00:00 2001 From: Caleb Xu Date: Mon, 17 Jan 2022 14:09:54 -0500 Subject: [PATCH 4/5] node-drain: run pre/post checks on all target nodes Signed-off-by: Caleb Xu --- .../node-drain/experiment/node-drain.go | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/experiments/generic/node-drain/experiment/node-drain.go b/experiments/generic/node-drain/experiment/node-drain.go index 9ed5796e7..36663ba21 100644 --- a/experiments/generic/node-drain/experiment/node-drain.go +++ b/experiments/generic/node-drain/experiment/node-drain.go @@ -2,6 +2,7 @@ package experiment import ( "os" + "strings" "github.com/litmuschaos/chaos-operator/pkg/apis/litmuschaos/v1alpha1" litmusLIB "github.com/litmuschaos/litmus-go/chaoslib/litmus/node-drain/lib" @@ -68,6 +69,14 @@ func NodeDrain(clients clients.ClientSets) { "Chaos Duration": experimentsDetails.ChaosDuration, }) + targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") + if len(targetNodes) == 0 { + log.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") + failStep := "[pre-chaos]: No target nodes provided, expected the comma-separated names of one or more nodes" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) @@ -93,13 +102,15 @@ func NodeDrain(clients clients.ClientSets) { // Checking the status of target nodes log.Info("[Status]: Getting the status of target nodes") - if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - log.Errorf("Target nodes are not in the ready state, err: %v", err) - failStep := "[pre-chaos]: Failed to verify the status of nodes, err: " + err.Error() - types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") - result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) - return + for _, targetNode := range targetNodes { + if err := status.CheckNodeStatus(targetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Errorf("Target nodes are not in the ready state, err: %v", err) + failStep := "[pre-chaos]: Failed to verify the status of nodes, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } } if experimentsDetails.EngineName != "" { @@ -166,10 +177,12 @@ func NodeDrain(clients clients.ClientSets) { // Checking the status of target nodes log.Info("[Status]: Getting the status of target nodes") - if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) - types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + for _, targetNode := range targetNodes { + if err := status.CheckNodeStatus(targetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } } if experimentsDetails.EngineName != "" { From 96a94ec8d128b957611bbc390dac756cc9560458 Mon Sep 17 00:00:00 2001 From: Caleb Xu Date: Wed, 19 Jan 2022 08:37:32 -0500 Subject: [PATCH 5/5] node-drain: fix code formatting Signed-off-by: Caleb Xu --- chaoslib/litmus/node-drain/lib/node-drain.go | 2 +- experiments/generic/node-drain/experiment/node-drain.go | 2 +- pkg/generic/node-drain/types/types.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chaoslib/litmus/node-drain/lib/node-drain.go b/chaoslib/litmus/node-drain/lib/node-drain.go index bc1451447..b80c7b985 100644 --- a/chaoslib/litmus/node-drain/lib/node-drain.go +++ b/chaoslib/litmus/node-drain/lib/node-drain.go @@ -207,7 +207,7 @@ func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients if len(targetNodes) == 0 { return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") } - + for _, targetNode := range targetNodes { nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(targetNode, v1.GetOptions{}) if err != nil { diff --git a/experiments/generic/node-drain/experiment/node-drain.go b/experiments/generic/node-drain/experiment/node-drain.go index 36663ba21..5a7e49010 100644 --- a/experiments/generic/node-drain/experiment/node-drain.go +++ b/experiments/generic/node-drain/experiment/node-drain.go @@ -65,7 +65,7 @@ func NodeDrain(clients clients.ClientSets) { //DISPLAY THE APP INFORMATION log.InfoWithValues("[Info]: The application information is as follows", logrus.Fields{ "Node Label": experimentsDetails.NodeLabel, - "Target Nodes": experimentsDetails.TargetNodes, + "Target Nodes": experimentsDetails.TargetNodes, "Chaos Duration": experimentsDetails.ChaosDuration, }) diff --git a/pkg/generic/node-drain/types/types.go b/pkg/generic/node-drain/types/types.go index 4d594ece9..287f6dc31 100644 --- a/pkg/generic/node-drain/types/types.go +++ b/pkg/generic/node-drain/types/types.go @@ -18,7 +18,7 @@ type ExperimentDetails struct { InstanceID string ChaosNamespace string ChaosPodName string - TargetNodes string + TargetNodes string AuxiliaryAppInfo string Timeout int Delay int