diff --git a/chaoslib/litmus/node-drain/lib/node-drain.go b/chaoslib/litmus/node-drain/lib/node-drain.go index 65ae77563..b80c7b985 100644 --- a/chaoslib/litmus/node-drain/lib/node-drain.go +++ b/chaoslib/litmus/node-drain/lib/node-drain.go @@ -49,16 +49,16 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli common.WaitForDuration(experimentsDetails.RampTime) } - if experimentsDetails.TargetNode == "" { + if experimentsDetails.TargetNodes == "" { //Select node for kubelet-service-kill - experimentsDetails.TargetNode, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) + experimentsDetails.TargetNodes, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) if err != nil { return err } } if experimentsDetails.EngineName != "" { - msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNode + " node" + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNodes + " node" types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") } @@ -114,37 +114,53 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli // drainNode drain the application node func drainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { - select { - case <-inject: - // stopping the chaos execution, if abort signal received - os.Exit(0) - default: - log.Infof("[Inject]: Draining the %v node", experimentsDetails.TargetNode) + targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") + if len(targetNodes) == 0 { + return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") + } - command := exec.Command("kubectl", "drain", experimentsDetails.TargetNode, "--ignore-daemonsets", "--delete-local-data", "--force", "--timeout", strconv.Itoa(experimentsDetails.ChaosDuration)+"s") - var out, stderr bytes.Buffer - command.Stdout = &out - command.Stderr = &stderr - if err := command.Run(); err != nil { - log.Infof("Error String: %v", stderr.String()) - return errors.Errorf("Unable to drain the %v node, err: %v", experimentsDetails.TargetNode, err) - } + log.Infof("Target nodes list: %v", targetNodes) + for _, targetNode := range targetNodes { - common.SetTargets(experimentsDetails.TargetNode, "injected", "node", chaosDetails) + select { + case <-inject: + // stopping the chaos execution, if abort signal received + os.Exit(0) + default: + log.Infof("[Inject]: Draining the %v node", targetNode) + + command := exec.Command("kubectl", "drain", targetNode, "--ignore-daemonsets", "--delete-emptydir-data", "--force", "--timeout", strconv.Itoa(experimentsDetails.ChaosDuration)+"s") + var out, stderr bytes.Buffer + command.Stdout = &out + command.Stderr = &stderr + if err := command.Run(); err != nil { + log.Infof("Error String: %v", stderr.String()) + return errors.Errorf("Unable to drain the %v node, err: %v", targetNode, err) + } - return retry. - Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)). - Wait(time.Duration(experimentsDetails.Delay) * time.Second). - Try(func(attempt uint) error { - nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(experimentsDetails.TargetNode, v1.GetOptions{}) - if err != nil { - return err - } - if !nodeSpec.Spec.Unschedulable { - return errors.Errorf("%v node is not in unschedulable state", experimentsDetails.TargetNode) - } - return nil - }) + common.SetTargets(targetNode, "injected", "node", chaosDetails) + + err = retry. + Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)). + Wait(time.Duration(experimentsDetails.Delay) * time.Second). + Try(func(attempt uint) error { + nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(targetNode, v1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + return nil + } else { + return err + } + } + if !nodeSpec.Spec.Unschedulable { + return errors.Errorf("%v node is not in unschedulable state", targetNode) + } + return nil + }) + if err != nil { + return err + } + } } return nil } @@ -152,7 +168,11 @@ func drainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients cl // uncordonNode uncordon the application node func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { - targetNodes := strings.Split(experimentsDetails.TargetNode, ",") + targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") + if len(targetNodes) == 0 { + return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") + } + for _, targetNode := range targetNodes { //Check node exist before uncordon the node @@ -183,7 +203,11 @@ func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)). Wait(time.Duration(experimentsDetails.Delay) * time.Second). Try(func(attempt uint) error { - targetNodes := strings.Split(experimentsDetails.TargetNode, ",") + targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") + if len(targetNodes) == 0 { + return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") + } + for _, targetNode := range targetNodes { nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(targetNode, v1.GetOptions{}) if err != nil { @@ -194,7 +218,7 @@ func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients } } if nodeSpec.Spec.Unschedulable { - return errors.Errorf("%v node is in unschedulable state", experimentsDetails.TargetNode) + return errors.Errorf("%v node is in unschedulable state", targetNode) } } return nil diff --git a/experiments/generic/node-drain/experiment/node-drain.go b/experiments/generic/node-drain/experiment/node-drain.go index d83cb7034..5a7e49010 100644 --- a/experiments/generic/node-drain/experiment/node-drain.go +++ b/experiments/generic/node-drain/experiment/node-drain.go @@ -2,6 +2,7 @@ package experiment import ( "os" + "strings" "github.com/litmuschaos/chaos-operator/pkg/apis/litmuschaos/v1alpha1" litmusLIB "github.com/litmuschaos/litmus-go/chaoslib/litmus/node-drain/lib" @@ -64,10 +65,18 @@ func NodeDrain(clients clients.ClientSets) { //DISPLAY THE APP INFORMATION log.InfoWithValues("[Info]: The application information is as follows", logrus.Fields{ "Node Label": experimentsDetails.NodeLabel, - "Target Node": experimentsDetails.TargetNode, + "Target Nodes": experimentsDetails.TargetNodes, "Chaos Duration": experimentsDetails.ChaosDuration, }) + targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") + if len(targetNodes) == 0 { + log.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") + failStep := "[pre-chaos]: No target nodes provided, expected the comma-separated names of one or more nodes" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) @@ -93,13 +102,15 @@ func NodeDrain(clients clients.ClientSets) { // Checking the status of target nodes log.Info("[Status]: Getting the status of target nodes") - if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - log.Errorf("Target nodes are not in the ready state, err: %v", err) - failStep := "[pre-chaos]: Failed to verify the status of nodes, err: " + err.Error() - types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") - result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) - return + for _, targetNode := range targetNodes { + if err := status.CheckNodeStatus(targetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Errorf("Target nodes are not in the ready state, err: %v", err) + failStep := "[pre-chaos]: Failed to verify the status of nodes, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } } if experimentsDetails.EngineName != "" { @@ -166,10 +177,12 @@ func NodeDrain(clients clients.ClientSets) { // Checking the status of target nodes log.Info("[Status]: Getting the status of target nodes") - if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) - types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + for _, targetNode := range targetNodes { + if err := status.CheckNodeStatus(targetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } } if experimentsDetails.EngineName != "" { diff --git a/pkg/generic/node-drain/environment/environment.go b/pkg/generic/node-drain/environment/environment.go index d0d1069a4..326a06d5f 100644 --- a/pkg/generic/node-drain/environment/environment.go +++ b/pkg/generic/node-drain/environment/environment.go @@ -24,7 +24,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.InstanceID = types.Getenv("INSTANCE_ID", "") experimentDetails.ChaosPodName = types.Getenv("POD_NAME", "") experimentDetails.AuxiliaryAppInfo = types.Getenv("AUXILIARY_APPINFO", "") - experimentDetails.TargetNode = types.Getenv("TARGET_NODE", "") + experimentDetails.TargetNodes = types.Getenv("TARGET_NODES", "") experimentDetails.Delay, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_DELAY", "2")) experimentDetails.Timeout, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_TIMEOUT", "180")) experimentDetails.TargetContainer = types.Getenv("TARGET_CONTAINER", "") diff --git a/pkg/generic/node-drain/types/types.go b/pkg/generic/node-drain/types/types.go index 722cc7f82..287f6dc31 100644 --- a/pkg/generic/node-drain/types/types.go +++ b/pkg/generic/node-drain/types/types.go @@ -18,7 +18,7 @@ type ExperimentDetails struct { InstanceID string ChaosNamespace string ChaosPodName string - TargetNode string + TargetNodes string AuxiliaryAppInfo string Timeout int Delay int