diff --git a/bin/experiment/experiment.go b/bin/experiment/experiment.go index 1a0be860f..fad4f4257 100755 --- a/bin/experiment/experiment.go +++ b/bin/experiment/experiment.go @@ -56,6 +56,7 @@ import ( ebsLossByTag "github.com/litmuschaos/litmus-go/experiments/kube-aws/ebs-loss-by-tag/experiment" ec2TerminateByID "github.com/litmuschaos/litmus-go/experiments/kube-aws/ec2-terminate-by-id/experiment" ec2TerminateByTag "github.com/litmuschaos/litmus-go/experiments/kube-aws/ec2-terminate-by-tag/experiment" + rdsInstanceStop "github.com/litmuschaos/litmus-go/experiments/kube-aws/rds-instance-stop/experiment" k6Loadgen "github.com/litmuschaos/litmus-go/experiments/load/k6-loadgen/experiment" springBootFaults "github.com/litmuschaos/litmus-go/experiments/spring-boot/spring-boot-faults/experiment" vmpoweroff "github.com/litmuschaos/litmus-go/experiments/vmware/vm-poweroff/experiment" @@ -149,6 +150,8 @@ func main() { ebsLossByID.EBSLossByID(clients) case "ebs-loss-by-tag": ebsLossByTag.EBSLossByTag(clients) + case "rds-instance-stop": + rdsInstanceStop.RDSInstanceStop(clients) case "node-restart": nodeRestart.NodeRestart(clients) case "pod-dns-error": diff --git a/chaoslib/litmus/rds-instance-stop/lib/rds-instance-stop.go b/chaoslib/litmus/rds-instance-stop/lib/rds-instance-stop.go new file mode 100644 index 000000000..55a977117 --- /dev/null +++ b/chaoslib/litmus/rds-instance-stop/lib/rds-instance-stop.go @@ -0,0 +1,255 @@ +package lib + +import ( + "fmt" + "os" + "os/signal" + "strings" + "syscall" + "time" + + "github.com/litmuschaos/litmus-go/pkg/cerrors" + awslib "github.com/litmuschaos/litmus-go/pkg/cloud/aws/rds" + "github.com/litmuschaos/litmus-go/pkg/events" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/kube-aws/rds-instance-stop/types" + "github.com/litmuschaos/litmus-go/pkg/probe" + "github.com/palantir/stacktrace" + + "github.com/litmuschaos/litmus-go/pkg/clients" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" +) + +var ( + err error + inject, abort chan os.Signal +) + +func PrepareRDSInstanceStop(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + + // Inject channel is used to transmit signal notifications. + inject = make(chan os.Signal, 1) + // Catch and relay certain signal(s) to inject channel. + signal.Notify(inject, os.Interrupt, syscall.SIGTERM) + + // Abort channel is used to transmit signal notifications. + abort = make(chan os.Signal, 1) + // Catch and relay certain signal(s) to abort channel. + signal.Notify(abort, os.Interrupt, syscall.SIGTERM) + + // Waiting for the ramp time before chaos injection + if experimentsDetails.RampTime != 0 { + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) + common.WaitForDuration(experimentsDetails.RampTime) + } + + // Get the instance identifier or list of instance identifiers + instanceIdentifierList := strings.Split(experimentsDetails.RDSInstanceIdentifier, ",") + if experimentsDetails.RDSInstanceIdentifier == "" || len(instanceIdentifierList) == 0 { + return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no RDS instance identifier found to stop"} + } + + instanceIdentifierList = common.FilterBasedOnPercentage(experimentsDetails.InstanceAffectedPerc, instanceIdentifierList) + log.Infof("[Chaos]:Number of Instance targeted: %v", len(instanceIdentifierList)) + + // Watching for the abort signal and revert the chaos + go abortWatcher(experimentsDetails, instanceIdentifierList, chaosDetails) + + switch strings.ToLower(experimentsDetails.Sequence) { + case "serial": + if err = injectChaosInSerialMode(experimentsDetails, instanceIdentifierList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + return stacktrace.Propagate(err, "could not run chaos in serial mode") + } + case "parallel": + if err = injectChaosInParallelMode(experimentsDetails, instanceIdentifierList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + return stacktrace.Propagate(err, "could not run chaos in parallel mode") + } + default: + return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + } + + // Waiting for the ramp time after chaos injection + if experimentsDetails.RampTime != 0 { + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) + common.WaitForDuration(experimentsDetails.RampTime) + } + return nil +} + +// injectChaosInSerialMode will inject the rds instance termination in serial mode that is one after other +func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetails, instanceIdentifierList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + + select { + case <-inject: + // Stopping the chaos execution, if abort signal received + os.Exit(0) + default: + // ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin + ChaosStartTimeStamp := time.Now() + duration := int(time.Since(ChaosStartTimeStamp).Seconds()) + + for duration < experimentsDetails.ChaosDuration { + + log.Infof("[Info]: Target instance identifier list, %v", instanceIdentifierList) + + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on rds instance" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } + + for i, identifier := range instanceIdentifierList { + + // Deleting the RDS instance + log.Info("[Chaos]: Stopping the desired RDS instance") + if err := awslib.RDSInstanceStop(identifier, experimentsDetails.Region); err != nil { + return stacktrace.Propagate(err, "rds instance failed to stop") + } + + common.SetTargets(identifier, "injected", "RDS", chaosDetails) + + // Wait for rds instance to completely stop + log.Infof("[Wait]: Wait for RDS instance '%v' to get in stopped state", identifier) + if err := awslib.WaitForRDSInstanceDown(experimentsDetails.Timeout, experimentsDetails.Delay, identifier, experimentsDetails.Region); err != nil { + return stacktrace.Propagate(err, "rds instance failed to stop") + } + + // Run the probes during chaos + // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration + if len(resultDetails.ProbeDetails) != 0 && i == 0 { + if err = probe.RunProbes(chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + return stacktrace.Propagate(err, "failed to run probes") + } + } + + // Wait for chaos interval + log.Infof("[Wait]: Waiting for chaos interval of %vs", experimentsDetails.ChaosInterval) + time.Sleep(time.Duration(experimentsDetails.ChaosInterval) * time.Second) + + // Starting the RDS instance + log.Info("[Chaos]: Starting back the RDS instance") + if err = awslib.RDSInstanceStart(identifier, experimentsDetails.Region); err != nil { + return stacktrace.Propagate(err, "rds instance failed to start") + } + + // Wait for rds instance to get in available state + log.Infof("[Wait]: Wait for RDS instance '%v' to get in available state", identifier) + if err := awslib.WaitForRDSInstanceUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.Region, identifier); err != nil { + return stacktrace.Propagate(err, "rds instance failed to start") + } + + common.SetTargets(identifier, "reverted", "RDS", chaosDetails) + } + duration = int(time.Since(ChaosStartTimeStamp).Seconds()) + } + } + return nil +} + +// injectChaosInParallelMode will inject the rds instance termination in parallel mode that is all at once +func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDetails, instanceIdentifierList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + + select { + case <-inject: + // stopping the chaos execution, if abort signal received + os.Exit(0) + default: + //ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin + ChaosStartTimeStamp := time.Now() + duration := int(time.Since(ChaosStartTimeStamp).Seconds()) + + for duration < experimentsDetails.ChaosDuration { + + log.Infof("[Info]: Target instance identifier list, %v", instanceIdentifierList) + + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on rds instance" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } + + // PowerOff the instance + for _, identifier := range instanceIdentifierList { + // Stopping the RDS instance + log.Info("[Chaos]: Stopping the desired RDS instance") + if err := awslib.RDSInstanceStop(identifier, experimentsDetails.Region); err != nil { + return stacktrace.Propagate(err, "rds instance failed to stop") + } + common.SetTargets(identifier, "injected", "RDS", chaosDetails) + } + + for _, identifier := range instanceIdentifierList { + // Wait for rds instance to completely stop + log.Infof("[Wait]: Wait for RDS instance '%v' to get in stopped state", identifier) + if err := awslib.WaitForRDSInstanceDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.Region, identifier); err != nil { + return stacktrace.Propagate(err, "rds instance failed to stop") + } + common.SetTargets(identifier, "reverted", "RDS", chaosDetails) + } + + // Run the probes during chaos + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + return stacktrace.Propagate(err, "failed to run probes") + } + } + + // Wait for chaos interval + log.Infof("[Wait]: Waiting for chaos interval of %vs", experimentsDetails.ChaosInterval) + time.Sleep(time.Duration(experimentsDetails.ChaosInterval) * time.Second) + + // Starting the RDS instance + for _, identifier := range instanceIdentifierList { + log.Info("[Chaos]: Starting back the RDS instance") + if err = awslib.RDSInstanceStart(identifier, experimentsDetails.Region); err != nil { + return stacktrace.Propagate(err, "rds instance failed to start") + } + } + + for _, identifier := range instanceIdentifierList { + // Wait for rds instance to get in available state + log.Infof("[Wait]: Wait for RDS instance '%v' to get in available state", identifier) + if err := awslib.WaitForRDSInstanceUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.Region, identifier); err != nil { + return stacktrace.Propagate(err, "rds instance failed to start") + } + } + + for _, identifier := range instanceIdentifierList { + common.SetTargets(identifier, "reverted", "RDS", chaosDetails) + } + duration = int(time.Since(ChaosStartTimeStamp).Seconds()) + } + } + return nil +} + +// watching for the abort signal and revert the chaos +func abortWatcher(experimentsDetails *experimentTypes.ExperimentDetails, instanceIdentifierList []string, chaosDetails *types.ChaosDetails) { + + <-abort + + log.Info("[Abort]: Chaos Revert Started") + for _, identifier := range instanceIdentifierList { + instanceState, err := awslib.GetRDSInstanceStatus(identifier, experimentsDetails.Region) + if err != nil { + log.Errorf("Failed to get instance status when an abort signal is received: %v", err) + } + if instanceState != "running" { + + log.Info("[Abort]: Waiting for the RDS instance to get down") + if err := awslib.WaitForRDSInstanceDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.Region, identifier); err != nil { + log.Errorf("Unable to wait till stop of the instance: %v", err) + } + + log.Info("[Abort]: Starting RDS instance as abort signal received") + err := awslib.RDSInstanceStart(identifier, experimentsDetails.Region) + if err != nil { + log.Errorf("RDS instance failed to start when an abort signal is received: %v", err) + } + } + common.SetTargets(identifier, "reverted", "RDS", chaosDetails) + } + log.Info("[Abort]: Chaos Revert Completed") + os.Exit(1) +} diff --git a/experiments/kube-aws/rds-instance-stop/README.md b/experiments/kube-aws/rds-instance-stop/README.md new file mode 100644 index 000000000..6165430b0 --- /dev/null +++ b/experiments/kube-aws/rds-instance-stop/README.md @@ -0,0 +1,14 @@ +## Experiment Metadata + + + + + + + + + + + + +
Name Description Documentation Link
RDS Instance Stop This experiment causes termination of an RDS instance before bringing it back to available state using the instance identifier after the specified chaos duration. We can also control the number of target instance using instance affected percentage Here
diff --git a/experiments/kube-aws/rds-instance-stop/experiment/rds-instance-stop.go b/experiments/kube-aws/rds-instance-stop/experiment/rds-instance-stop.go new file mode 100644 index 000000000..c014029cb --- /dev/null +++ b/experiments/kube-aws/rds-instance-stop/experiment/rds-instance-stop.go @@ -0,0 +1,189 @@ +package experiment + +import ( + "os" + + "github.com/litmuschaos/chaos-operator/api/litmuschaos/v1alpha1" + litmusLIB "github.com/litmuschaos/litmus-go/chaoslib/litmus/rds-instance-stop/lib" + "github.com/litmuschaos/litmus-go/pkg/clients" + aws "github.com/litmuschaos/litmus-go/pkg/cloud/aws/rds" + "github.com/litmuschaos/litmus-go/pkg/events" + experimentEnv "github.com/litmuschaos/litmus-go/pkg/kube-aws/rds-instance-stop/environment" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/kube-aws/rds-instance-stop/types" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/probe" + "github.com/litmuschaos/litmus-go/pkg/result" + "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" + "github.com/sirupsen/logrus" +) + +// RDSInstanceStop will stop an aws rds instance +func RDSInstanceStop(clients clients.ClientSets) { + + var ( + err error + ) + experimentsDetails := experimentTypes.ExperimentDetails{} + resultDetails := types.ResultDetails{} + eventsDetails := types.EventDetails{} + chaosDetails := types.ChaosDetails{} + + // Fetching all the ENV passed from the runner pod + log.Infof("[PreReq]: Getting the ENV for the %v experiment", os.Getenv("EXPERIMENT_NAME")) + experimentEnv.GetENV(&experimentsDetails) + + // Initialize the chaos attributes + types.InitialiseChaosVariables(&chaosDetails) + + // Initialize Chaos Result Parameters + types.SetResultAttributes(&resultDetails, chaosDetails) + + if experimentsDetails.EngineName != "" { + // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet + if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { + log.Errorf("Unable to initialize the probes: %v", err) + return + } + } + + // Updating the chaos result in the beginning of experiment + log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) + if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { + log.Errorf("Unable to create the chaosresult: %v", err) + result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + return + } + + // Set the chaos result uid + result.SetResultUID(&resultDetails, clients, &chaosDetails) + + // Generating the event in chaosresult to mark the verdict as awaited + msg := "experiment: " + experimentsDetails.ExperimentName + ", Result: Awaited" + types.SetResultEventAttributes(&eventsDetails, types.AwaitedVerdict, msg, "Normal", &resultDetails) + if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult"); eventErr != nil { + log.Errorf("Failed to create %v event inside chaosresult", types.AwaitedVerdict) + } + + // DISPLAY THE INSTANCE INFORMATION + log.InfoWithValues("The instance information is as follows", logrus.Fields{ + "Chaos Duration": experimentsDetails.ChaosDuration, + "Chaos Namespace": experimentsDetails.ChaosNamespace, + "Instance Identifier": experimentsDetails.RDSInstanceIdentifier, + "Instance Affected Percentage": experimentsDetails.InstanceAffectedPerc, + "Sequence": experimentsDetails.Sequence, + }) + + // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result + go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + + if experimentsDetails.EngineName != "" { + // Marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // Run the probes in the pre-chaos check + if len(resultDetails.ProbeDetails) != 0 { + + if err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { + log.Errorf("Probe Failed: %v", err) + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) + if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil { + log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) + } + result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + // Generating the events for the pre-chaos check + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) + if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil { + log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) + } + } + + // Verify the aws rds instance is available (pre-chaos) + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the aws rds instances are in available state (pre-chaos)") + if err = aws.InstanceStatusCheckByInstanceIdentifier(experimentsDetails.RDSInstanceIdentifier, experimentsDetails.Region); err != nil { + log.Errorf("RDS instance status check failed, err: %v", err) + result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + return + } + log.Info("[Status]: RDS instance is in available state (pre-chaos)") + } + + chaosDetails.Phase = types.ChaosInjectPhase + + if err = litmusLIB.PrepareRDSInstanceStop(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { + log.Errorf("Chaos injection failed, err: %v", err) + result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + return + } + + log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName) + resultDetails.Verdict = v1alpha1.ResultVerdictPassed + + chaosDetails.Phase = types.PostChaosPhase + + // Verify the aws rds instance is available (post-chaos) + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the aws rds instances are in available state (post-chaos)") + if err = aws.InstanceStatusCheckByInstanceIdentifier(experimentsDetails.RDSInstanceIdentifier, experimentsDetails.Region); err != nil { + log.Errorf("RDS instance status check failed, err: %v", err) + result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + return + } + log.Info("[Status]: RDS instance is in available state (post-chaos)") + } + + if experimentsDetails.EngineName != "" { + // Marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // Run the probes in the post-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { + log.Errorf("Probes Failed: %v", err) + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) + if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil { + log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) + } + result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + + // Generating post chaos event + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Normal", &chaosDetails) + if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil { + log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) + } + } + + // Updating the chaosResult in the end of experiment + log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) + if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { + log.Errorf("Unable to update the chaosresult: %v", err) + return + } + + // Generating the event in chaosresult to mark the verdict as pass/fail + msg = "experiment: " + experimentsDetails.ExperimentName + ", Result: " + string(resultDetails.Verdict) + reason, eventType := types.GetChaosResultVerdictEvent(resultDetails.Verdict) + types.SetResultEventAttributes(&eventsDetails, reason, msg, eventType, &resultDetails) + if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult"); eventErr != nil { + log.Errorf("Failed to create %v event inside chaosresult", reason) + } + + if experimentsDetails.EngineName != "" { + msg := experimentsDetails.ExperimentName + " experiment has been " + string(resultDetails.Verdict) + "ed" + types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) + if eventErr := events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine"); eventErr != nil { + log.Errorf("Failed to create %v event inside chaosengine", types.Summary) + } + } +} diff --git a/experiments/kube-aws/rds-instance-stop/rbac.yaml b/experiments/kube-aws/rds-instance-stop/rbac.yaml new file mode 100644 index 000000000..46564ff33 --- /dev/null +++ b/experiments/kube-aws/rds-instance-stop/rbac.yaml @@ -0,0 +1,49 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rds-instance-stop-sa + namespace: default + labels: + name: rds-instance-stop-sa + app.kubernetes.io/part-of: litmus +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: rds-instance-stop-sa + labels: + name: rds-instance-stop-sa + app.kubernetes.io/part-of: litmus +rules: + - apiGroups: [""] + resources: ["pods","events","secrets"] + verbs: ["create","list","get","patch","update","delete","deletecollection"] + - apiGroups: [""] + resources: ["pods/exec","pods/log"] + verbs: ["create","list","get"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create","list","get","delete","deletecollection"] + - apiGroups: ["litmuschaos.io"] + resources: ["chaosengines","chaosexperiments","chaosresults"] + verbs: ["create","list","get","patch","update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["patch","get","list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: rds-instance-stop-sa + labels: + name: rds-instance-stop-sa + app.kubernetes.io/part-of: litmus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: rds-instance-stop-sa +subjects: + - kind: ServiceAccount + name: rds-instance-stop-sa + namespace: default \ No newline at end of file diff --git a/experiments/kube-aws/rds-instance-stop/test/test.yml b/experiments/kube-aws/rds-instance-stop/test/test.yml new file mode 100644 index 000000000..c213687d4 --- /dev/null +++ b/experiments/kube-aws/rds-instance-stop/test/test.yml @@ -0,0 +1,43 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litmus-experiment +spec: + replicas: 1 + selector: + matchLabels: + app: litmus-experiment + template: + metadata: + labels: + app: litmus-experiment + spec: + serviceAccountName: rds-instance-stop-sa + containers: + - name: gotest + image: busybox + command: + - sleep + - "3600" + env: + - name: CHAOS_NAMESPACE + value: 'default' + + - name: RDS_INSTANCE_IDENTIFIER + value: '' + + - name: REGION + value: '' + + - name: RAMP_TIME + value: '' + + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + + secrets: + - name: cloud-secret + mountPath: /tmp/ \ No newline at end of file diff --git a/pkg/cloud/aws/rds/rds-instance-status.go b/pkg/cloud/aws/rds/rds-instance-status.go new file mode 100644 index 000000000..2b3e28754 --- /dev/null +++ b/pkg/cloud/aws/rds/rds-instance-status.go @@ -0,0 +1,77 @@ +package aws + +import ( + "fmt" + "strings" + + "github.com/aws/aws-sdk-go/service/rds" + "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/cloud/aws/common" + "github.com/litmuschaos/litmus-go/pkg/log" +) + +// GetRDSInstanceStatus will verify and give the rds instance details. +func GetRDSInstanceStatus(instanceIdentifier, region string) (string, error) { + + var err error + // Load session from shared config + sess := common.GetAWSSession(region) + + // Create new RDS client + rdsSvc := rds.New(sess) + + // Call to get detailed information on each instance + result, err := rdsSvc.DescribeDBInstances(nil) + if err != nil { + return "", cerrors.Error{ + ErrorCode: cerrors.ErrorTypeStatusChecks, + Reason: fmt.Sprintf("failed to describe the instances: %v", err), + Target: fmt.Sprintf("{RDS Instance Identifier: %v, Region: %v}", instanceIdentifier, region), + } + } + + for _, instanceDetails := range result.DBInstances { + if *instanceDetails.DBInstanceIdentifier == instanceIdentifier { + return *instanceDetails.DBInstanceStatus, nil + } + } + return "", cerrors.Error{ + ErrorCode: cerrors.ErrorTypeStatusChecks, + Reason: "failed to get the status of RDS instance", + Target: fmt.Sprintf("{RDS Instance Identifier: %v, Region: %v}", instanceIdentifier, region), + } +} + +// InstanceStatusCheckByInstanceIdentifier is used to check the instance status of all the instance under chaos. +func InstanceStatusCheckByInstanceIdentifier(instanceIdentifier, region string) error { + + instanceIdentifierList := strings.Split(instanceIdentifier, ",") + if instanceIdentifier == "" || len(instanceIdentifierList) == 0 { + return cerrors.Error{ + ErrorCode: cerrors.ErrorTypeStatusChecks, + Reason: "no instance identifier provided to stop", + Target: fmt.Sprintf("{RDS Instance Identifier: %v, Region: %v}", instanceIdentifier, region), + } + } + log.Infof("[Info]: The instances under chaos(IUC) are: %v", instanceIdentifierList) + return InstanceStatusCheck(instanceIdentifierList, region) +} + +// InstanceStatusCheck is used to check the instance status of the instances. +func InstanceStatusCheck(instanceIdentifierList []string, region string) error { + + for _, id := range instanceIdentifierList { + instanceState, err := GetRDSInstanceStatus(id, region) + if err != nil { + return err + } + if instanceState != "available" { + return cerrors.Error{ + ErrorCode: cerrors.ErrorTypeStatusChecks, + Reason: fmt.Sprintf("rds instance is not in available state, current state: %v", instanceState), + Target: fmt.Sprintf("{RDS Instance Identifier: %v, Region: %v}", id, region), + } + } + } + return nil +} diff --git a/pkg/cloud/aws/rds/rds-operations.go b/pkg/cloud/aws/rds/rds-operations.go new file mode 100644 index 000000000..76c1ef10e --- /dev/null +++ b/pkg/cloud/aws/rds/rds-operations.go @@ -0,0 +1,125 @@ +package aws + +import ( + "fmt" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/rds" + "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/cloud/aws/common" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/utils/retry" + "github.com/palantir/stacktrace" + "github.com/sirupsen/logrus" +) + +// RDSInstanceStop will stop an aws rds instance +func RDSInstanceStop(identifier, region string) error { + + // Load session from shared config + sess := common.GetAWSSession(region) + + // Create new RDS client + rdsSvc := rds.New(sess) + + input := &rds.StopDBInstanceInput{ + DBInstanceIdentifier: aws.String(identifier), + } + result, err := rdsSvc.StopDBInstance(input) + if err != nil { + return cerrors.Error{ + ErrorCode: cerrors.ErrorTypeChaosInject, + Reason: fmt.Sprintf("failed to stop RDS instance: %v", common.CheckAWSError(err).Error()), + Target: fmt.Sprintf("{RDS Instance Identifier: %v, Region: %v}", identifier, region), + } + } + + log.InfoWithValues("Stopping RDS instance:", logrus.Fields{ + "DBInstanceStatus": *result.DBInstance.DBInstanceStatus, + "DBInstanceIdentifier": *result.DBInstance.DBInstanceIdentifier, + }) + + return nil +} + +// RDSInstanceStart will start an aws rds instance +func RDSInstanceStart(identifier, region string) error { + + // Load session from shared config + sess := common.GetAWSSession(region) + + // Create new RDS client + rdsSvc := rds.New(sess) + + input := &rds.StartDBInstanceInput{ + DBInstanceIdentifier: aws.String(identifier), + } + result, err := rdsSvc.StartDBInstance(input) + if err != nil { + return cerrors.Error{ + ErrorCode: cerrors.ErrorTypeChaosInject, + Reason: fmt.Sprintf("failed to start RDS instance: %v", common.CheckAWSError(err).Error()), + Target: fmt.Sprintf("{RDS Instance Identifier: %v, Region: %v}", identifier, region), + } + } + + log.InfoWithValues("Starting RDS instance:", logrus.Fields{ + "DBInstanceStatus": *result.DBInstance.DBInstanceStatus, + "DBInstanceIdentifier": *result.DBInstance.DBInstanceIdentifier, + }) + + return nil +} + +// WaitForRDSInstanceDown will wait for the rds instance to get in stopped state +func WaitForRDSInstanceDown(timeout, delay int, region, identifier string) error { + + log.Info("[Status]: Checking RDS instance status") + return retry. + Times(uint(timeout / delay)). + Wait(time.Duration(delay) * time.Second). + Try(func(attempt uint) error { + + instanceState, err := GetRDSInstanceStatus(identifier, region) + if err != nil { + return stacktrace.Propagate(err, "failed to get the status of RDS instance") + } + if instanceState != "stopped" { + log.Infof("The instance state is %v", instanceState) + return cerrors.Error{ + ErrorCode: cerrors.ErrorTypeStatusChecks, + Reason: fmt.Sprintf("RDS instance is not in stopped state"), + Target: fmt.Sprintf("{RDS Instance Identifier: %v, Region: %v}", identifier, region), + } + } + log.Infof("The instance state is %v", instanceState) + return nil + }) +} + +// WaitForRDSInstanceUp will wait for the rds instance to get in available state +func WaitForRDSInstanceUp(timeout, delay int, region, identifier string) error { + + log.Info("[Status]: Checking RDS instance status") + return retry. + Times(uint(timeout / delay)). + Wait(time.Duration(delay) * time.Second). + Try(func(attempt uint) error { + + instanceState, err := GetRDSInstanceStatus(identifier, region) + if err != nil { + return stacktrace.Propagate(err, "failed to get the status of RDS instance") + } + if instanceState != "available" { + log.Infof("The instance state is %v", instanceState) + return cerrors.Error{ + ErrorCode: cerrors.ErrorTypeStatusChecks, + Reason: fmt.Sprintf("RDS instance is not in available state"), + Target: fmt.Sprintf("{RDS Instance Identifier: %v, Region: %v}", identifier, region), + } + } + log.Infof("The instance state is %v", instanceState) + return nil + }) +} diff --git a/pkg/kube-aws/rds-instance-stop/environment/environment.go b/pkg/kube-aws/rds-instance-stop/environment/environment.go new file mode 100644 index 000000000..c54301656 --- /dev/null +++ b/pkg/kube-aws/rds-instance-stop/environment/environment.go @@ -0,0 +1,29 @@ +package environment + +import ( + "strconv" + + clientTypes "k8s.io/apimachinery/pkg/types" + + experimentTypes "github.com/litmuschaos/litmus-go/pkg/kube-aws/rds-instance-stop/types" + "github.com/litmuschaos/litmus-go/pkg/types" +) + +// GetENV fetches all the env variables from the runner pod +func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { + experimentDetails.ExperimentName = types.Getenv("EXPERIMENT_NAME", "rds-instance-stop") + experimentDetails.ChaosNamespace = types.Getenv("CHAOS_NAMESPACE", "litmus") + experimentDetails.EngineName = types.Getenv("CHAOSENGINE", "") + experimentDetails.ChaosDuration, _ = strconv.Atoi(types.Getenv("TOTAL_CHAOS_DURATION", "30")) + experimentDetails.ChaosInterval, _ = strconv.Atoi(types.Getenv("CHAOS_INTERVAL", "30")) + experimentDetails.RampTime, _ = strconv.Atoi(types.Getenv("RAMP_TIME", "0")) + experimentDetails.ChaosUID = clientTypes.UID(types.Getenv("CHAOS_UID", "")) + experimentDetails.InstanceID = types.Getenv("INSTANCE_ID", "") + experimentDetails.ChaosPodName = types.Getenv("POD_NAME", "") + experimentDetails.Delay, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_DELAY", "2")) + experimentDetails.Timeout, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_TIMEOUT", "600")) + experimentDetails.RDSInstanceIdentifier = types.Getenv("RDS_INSTANCE_IDENTIFIER", "") + experimentDetails.Region = types.Getenv("REGION", "") + experimentDetails.InstanceAffectedPerc, _ = strconv.Atoi(types.Getenv("INSTANCE_AFFECTED_PERC", "100")) + experimentDetails.Sequence = types.Getenv("SEQUENCE", "parallel") +} diff --git a/pkg/kube-aws/rds-instance-stop/types/types.go b/pkg/kube-aws/rds-instance-stop/types/types.go new file mode 100644 index 000000000..44a6fc988 --- /dev/null +++ b/pkg/kube-aws/rds-instance-stop/types/types.go @@ -0,0 +1,24 @@ +package types + +import ( + clientTypes "k8s.io/apimachinery/pkg/types" +) + +// ExperimentDetails is for collecting all the experiment-related details +type ExperimentDetails struct { + ExperimentName string + EngineName string + RampTime int + ChaosDuration int + ChaosInterval int + ChaosUID clientTypes.UID + InstanceID string + ChaosNamespace string + ChaosPodName string + Timeout int + Delay int + RDSInstanceIdentifier string + Region string + InstanceAffectedPerc int + Sequence string +}