From 238784bb694533ea8cf7bea2050dcdb85ffff684 Mon Sep 17 00:00:00 2001 From: litmusbot Date: Thu, 14 Apr 2022 08:10:28 +0000 Subject: [PATCH] 2165995170: version upgraded for chaos-charts --- charts/aws-ssm/experiments.yaml | 48 +- charts/azure/experiments.yaml | 86 +- charts/gcp/experiments.yaml | 116 +- charts/generic/experiments.yaml | 2168 +++++++++++++++--------------- charts/kube-aws/experiments.yaml | 52 +- charts/openebs/experiments.yaml | 694 +++++----- 6 files changed, 1582 insertions(+), 1582 deletions(-) diff --git a/charts/aws-ssm/experiments.yaml b/charts/aws-ssm/experiments.yaml index 53a9c067b..6915473cb 100644 --- a/charts/aws-ssm/experiments.yaml +++ b/charts/aws-ssm/experiments.yaml @@ -1,12 +1,12 @@ apiVersion: litmuschaos.io/v1alpha1 description: message: | - Execute AWS SSM Chaos on given ec2 instance Tag + Execute AWS SSM Chaos on given ec2 instance IDs kind: ChaosExperiment metadata: - name: aws-ssm-chaos-by-tag + name: aws-ssm-chaos-by-id labels: - name: aws-ssm-chaos-by-tag + name: aws-ssm-chaos-by-id app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -46,7 +46,7 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name aws-ssm-chaos-by-tag + - ./experiments -name aws-ssm-chaos-by-id command: - /bin/bash env: @@ -55,14 +55,14 @@ spec: - name: CHAOS_INTERVAL value: '60' - + # Period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - # provide tag of the target ec2 instances - # ex: team:devops (key:value) - - name: EC2_INSTANCE_TAG + # Instance ID of the target ec2 instance + # Multiple IDs can also be provided as comma separated values ex: id1,id2 + - name: EC2_INSTANCE_ID value: '' - name: REGION @@ -77,10 +77,6 @@ spec: - name: AWS_SHARED_CREDENTIALS_FILE value: '/tmp/cloud_config.yml' - # percentage of total instance to target - - name: INSTANCE_AFFECTED_PERC - value: '' - # Provide the name of ssm doc # if not using the default stress docs - name: DOCUMENT_NAME @@ -113,7 +109,7 @@ spec: - name: MEMORY_PERCENTAGE value: '80' - # provide the CPU chores to comsumed + # provide the CPU chores to be comsumed # 0 will consume all the available cpu cores - name: CPU_CORE value: '0' @@ -124,7 +120,7 @@ spec: value: 'litmus' labels: - name: aws-ssm-chaos-by-tag + name: aws-ssm-chaos-by-id app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest @@ -136,12 +132,12 @@ spec: apiVersion: litmuschaos.io/v1alpha1 description: message: | - Execute AWS SSM Chaos on given ec2 instance IDs + Execute AWS SSM Chaos on given ec2 instance Tag kind: ChaosExperiment metadata: - name: aws-ssm-chaos-by-id + name: aws-ssm-chaos-by-tag labels: - name: aws-ssm-chaos-by-id + name: aws-ssm-chaos-by-tag app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -181,7 +177,7 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name aws-ssm-chaos-by-id + - ./experiments -name aws-ssm-chaos-by-tag command: - /bin/bash env: @@ -190,14 +186,14 @@ spec: - name: CHAOS_INTERVAL value: '60' - + # Period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - # Instance ID of the target ec2 instance - # Multiple IDs can also be provided as comma separated values ex: id1,id2 - - name: EC2_INSTANCE_ID + # provide tag of the target ec2 instances + # ex: team:devops (key:value) + - name: EC2_INSTANCE_TAG value: '' - name: REGION @@ -212,6 +208,10 @@ spec: - name: AWS_SHARED_CREDENTIALS_FILE value: '/tmp/cloud_config.yml' + # percentage of total instance to target + - name: INSTANCE_AFFECTED_PERC + value: '' + # Provide the name of ssm doc # if not using the default stress docs - name: DOCUMENT_NAME @@ -244,7 +244,7 @@ spec: - name: MEMORY_PERCENTAGE value: '80' - # provide the CPU chores to be comsumed + # provide the CPU chores to comsumed # 0 will consume all the available cpu cores - name: CPU_CORE value: '0' @@ -255,7 +255,7 @@ spec: value: 'litmus' labels: - name: aws-ssm-chaos-by-id + name: aws-ssm-chaos-by-tag app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest diff --git a/charts/azure/experiments.yaml b/charts/azure/experiments.yaml index 06d46d207..27045bcc4 100644 --- a/charts/azure/experiments.yaml +++ b/charts/azure/experiments.yaml @@ -1,12 +1,12 @@ apiVersion: litmuschaos.io/v1alpha1 description: message: | - Terminating azure VM instance + Detaches disk from the VM and then re-attaches disk to the VM kind: ChaosExperiment metadata: - name: azure-instance-stop + name: azure-disk-loss labels: - name: azure-instance-stop + name: azure-disk-loss app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -46,25 +46,24 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name azure-instance-stop + - ./experiments -name azure-disk-loss command: - /bin/bash env: - name: TOTAL_CHAOS_DURATION - value: '30' + value: '30' - name: CHAOS_INTERVAL value: '30' + - name: LIB + value: 'litmus' + # Period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - - # provide the target instance name(s) (comma separated if multiple) - - name: AZURE_INSTANCE_NAME - value: '' - + # provide the resource group of the instance - name: RESOURCE_GROUP value: '' @@ -72,39 +71,39 @@ spec: # accepts enable/disable, default is disable - name: SCALE_SET value: '' - - # Provide the path of aks credentials mounted from secret + + # provide the virtual disk names (comma separated if multiple) + - name: VIRTUAL_DISK_NAMES + value: '' + + # provide the sequence type for the run. Options: serial/parallel + - name: SEQUENCE + value: 'parallel' + + # provide the path to aks credentials mounted from secret - name: AZURE_AUTH_LOCATION value: '/tmp/azure.auth' - - name: SEQUENCE - value: 'parallel' - - # provide the LIB - # only litmus supported - - name: LIB - value: 'litmus' - labels: - name: azure-instance-stop + name: azure-disk-loss app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest + secrets: - name: cloud-secret mountPath: /tmp/ - --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Detaches disk from the VM and then re-attaches disk to the VM + Terminating azure VM instance kind: ChaosExperiment metadata: - name: azure-disk-loss + name: azure-instance-stop labels: - name: azure-disk-loss + name: azure-instance-stop app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -144,24 +143,25 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name azure-disk-loss + - ./experiments -name azure-instance-stop command: - /bin/bash env: - name: TOTAL_CHAOS_DURATION - value: '30' + value: '30' - name: CHAOS_INTERVAL value: '30' - - name: LIB - value: 'litmus' - # Period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - + + # provide the target instance name(s) (comma separated if multiple) + - name: AZURE_INSTANCE_NAME + value: '' + # provide the resource group of the instance - name: RESOURCE_GROUP value: '' @@ -169,27 +169,27 @@ spec: # accepts enable/disable, default is disable - name: SCALE_SET value: '' - - # provide the virtual disk names (comma separated if multiple) - - name: VIRTUAL_DISK_NAMES - value: '' - - # provide the sequence type for the run. Options: serial/parallel - - name: SEQUENCE - value: 'parallel' - - # provide the path to aks credentials mounted from secret + + # Provide the path of aks credentials mounted from secret - name: AZURE_AUTH_LOCATION value: '/tmp/azure.auth' + - name: SEQUENCE + value: 'parallel' + + # provide the LIB + # only litmus supported + - name: LIB + value: 'litmus' + labels: - name: azure-disk-loss + name: azure-instance-stop app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest - secrets: - name: cloud-secret mountPath: /tmp/ + --- diff --git a/charts/gcp/experiments.yaml b/charts/gcp/experiments.yaml index 6c9982d00..5cf6be28b 100644 --- a/charts/gcp/experiments.yaml +++ b/charts/gcp/experiments.yaml @@ -1,12 +1,12 @@ apiVersion: litmuschaos.io/v1alpha1 description: message: | - Stops GCP VM instances and GKE nodes for a specified duration of time and later restarts them + Causes loss of a non-boot storage persistent disk from a GCP VM instance for a specified duration of time kind: ChaosExperiment metadata: - name: gcp-vm-instance-stop + name: gcp-vm-disk-loss labels: - name: gcp-vm-instance-stop + name: gcp-vm-disk-loss app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -42,15 +42,11 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] - # for experiment to perform node status checks - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name gcp-vm-instance-stop + - ./experiments -name gcp-vm-disk-loss command: - /bin/bash env: @@ -61,40 +57,40 @@ spec: - name: CHAOS_INTERVAL value: '30' - # parallel or serial; determines how the VM instances are terminated, all at once or one at a time - - name: SEQUENCE - value: 'parallel' - - # provide the LIB - # only litmus supported - name: LIB value: 'litmus' - - # period to wait before and after injection of chaos in sec + + # Period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - # enable or disable; shall be enabled if the target instance is a part of an auto scaling group. - - name: AUTO_SCALING_GROUP - value: 'disable' - - # Instance name of the target vm instance(s) - # Multiple instance names can be provided as comma separated values ex: instance1,instance2 - - name: VM_INSTANCE_NAMES - value: '' - - # GCP project ID to which the vm instances belong + # parallel or serial; determines how chaos is injected + - name: SEQUENCE + value: 'parallel' + + # set the GCP project id - name: GCP_PROJECT_ID value: '' - # Instance zone(s) of the target vm instance(s) - # If more than one instance is targetted, provide zone for each in the order of their - # respective instance name in VM_INSTANCE_NAME as comma separated values ex: zone1,zone2 - - name: INSTANCE_ZONES + # set the disk volume name(s) as comma seperated values + # eg. volume1,volume2,... + - name: DISK_VOLUME_NAMES + value: '' + + # set the disk zone(s) as comma seperated values in the corresponding + # order of DISK_VOLUME_NAME + # eg. zone1,zone2,... + - name: DISK_ZONES + value: '' + + # set the device name(s) as comma seperated values in the corresponding + # order of DISK_VOLUME_NAME + # eg. device1,device2,... + - name: DEVICE_NAMES value: '' labels: - name: gcp-vm-instance-stop + name: gcp-vm-disk-loss app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest @@ -106,12 +102,12 @@ spec: apiVersion: litmuschaos.io/v1alpha1 description: message: | - Causes loss of a non-boot storage persistent disk from a GCP VM instance for a specified duration of time + Stops GCP VM instances and GKE nodes for a specified duration of time and later restarts them kind: ChaosExperiment metadata: - name: gcp-vm-disk-loss + name: gcp-vm-instance-stop labels: - name: gcp-vm-disk-loss + name: gcp-vm-instance-stop app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -147,11 +143,15 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] + # for experiment to perform node status checks + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name gcp-vm-disk-loss + - ./experiments -name gcp-vm-instance-stop command: - /bin/bash env: @@ -162,40 +162,40 @@ spec: - name: CHAOS_INTERVAL value: '30' + # parallel or serial; determines how the VM instances are terminated, all at once or one at a time + - name: SEQUENCE + value: 'parallel' + + # provide the LIB + # only litmus supported - name: LIB value: 'litmus' - - # Period to wait before and after injection of chaos in sec + + # period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - # parallel or serial; determines how chaos is injected - - name: SEQUENCE - value: 'parallel' - - # set the GCP project id + # enable or disable; shall be enabled if the target instance is a part of an auto scaling group. + - name: AUTO_SCALING_GROUP + value: 'disable' + + # Instance name of the target vm instance(s) + # Multiple instance names can be provided as comma separated values ex: instance1,instance2 + - name: VM_INSTANCE_NAMES + value: '' + + # GCP project ID to which the vm instances belong - name: GCP_PROJECT_ID value: '' - # set the disk volume name(s) as comma seperated values - # eg. volume1,volume2,... - - name: DISK_VOLUME_NAMES - value: '' - - # set the disk zone(s) as comma seperated values in the corresponding - # order of DISK_VOLUME_NAME - # eg. zone1,zone2,... - - name: DISK_ZONES - value: '' - - # set the device name(s) as comma seperated values in the corresponding - # order of DISK_VOLUME_NAME - # eg. device1,device2,... - - name: DEVICE_NAMES + # Instance zone(s) of the target vm instance(s) + # If more than one instance is targetted, provide zone for each in the order of their + # respective instance name in VM_INSTANCE_NAME as comma separated values ex: zone1,zone2 + - name: INSTANCE_ZONES value: '' labels: - name: gcp-vm-disk-loss + name: gcp-vm-instance-stop app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest diff --git a/charts/generic/experiments.yaml b/charts/generic/experiments.yaml index 12e7202da..6733c2bbc 100644 --- a/charts/generic/experiments.yaml +++ b/charts/generic/experiments.yaml @@ -1,19 +1,18 @@ ---- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Taint the node where application pod is scheduled + Injects cpu consumption on pods belonging to an app deployment kind: ChaosExperiment metadata: - name: node-taint + name: pod-cpu-hog labels: - name: node-taint + name: pod-cpu-hog app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Cluster + scope: Namespaced permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -33,12 +32,24 @@ spec: verbs: ["get","list","watch"] # for creating and managing to execute comands inside target container - apiGroups: [""] - resources: ["pods/exec","pods/eviction"] + resources: ["pods/exec"] verbs: ["get","list","create"] - # ignore daemonsets while draining the node + # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) - apiGroups: ["apps"] - resources: ["daemonsets"] - verbs: ["list","get","delete"] + resources: ["deployments","statefulsets","replicasets", "daemonsets"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: ["apps.openshift.io"] + resources: ["deploymentconfigs"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: [""] + resources: ["replicationcontrollers"] + verbs: ["get","list"] + # deriving the parent/owner details of the pod(if parent is argo-rollouts) + - apiGroups: ["argoproj.io"] + resources: ["rollouts"] + verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -47,65 +58,91 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] - # for experiment to perform node status checks - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get","list","patch","update"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name node-taint + - ./experiments -name pod-cpu-hog command: - /bin/bash env: + - name: TOTAL_CHAOS_DURATION + value: '60' - - name: TARGET_NODE - value: '' + ## Number of CPU cores to stress + - name: CPU_CORES + value: '1' - - name: NODE_LABEL + ## LOAD CPU WITH GIVEN PERCENT LOADING FOR THE CPU STRESS WORKERS. + ## 0 IS EFFECTIVELY A SLEEP (NO LOAD) AND 100 IS FULL LOADING + - name: CPU_LOAD + value: '100' + + ## Percentage of total pods to target + - name: PODS_AFFECTED_PERC value: '' - - name: TOTAL_CHAOS_DURATION - value: '60' + ## Period to wait before and after injection of chaos in sec + - name: RAMP_TIME + value: '' - # Provide the LIB here - # Only litmus supported + ## env var that describes the library used to execute the chaos + ## default: litmus. Supported values: litmus, pumba - name: LIB value: 'litmus' - # Period to wait before and after injection of chaos in sec - - name: RAMP_TIME + ## It is used in pumba lib only + - name: LIB_IMAGE + value: 'litmuschaos/go-runner:latest' + + ## It is used in pumba lib only + - name: STRESS_IMAGE + value: 'alexeiled/stress-ng:latest-ubuntu' + + ## provide the cluster runtime + - name: CONTAINER_RUNTIME + value: 'docker' + + # provide the socket file path + - name: SOCKET_PATH + value: '/var/run/docker.sock' + + - name: TARGET_PODS value: '' - # set taint label & effect - # key=value:effect or key:effect - - name: TAINTS - value: '' + # To select pods on specific node(s) + - name: NODE_LABEL + value: '' + ## it defines the sequence of chaos execution for multiple target pods + ## supported values: serial, parallel + - name: SEQUENCE + value: 'parallel' + labels: - name: node-taint + name: pod-cpu-hog app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job + app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Give IO disk stress on a node belonging to a deployment + Fillup Ephemeral Storage of a Resource kind: ChaosExperiment metadata: - name: node-io-stress + name: disk-fill labels: - name: node-io-stress + name: disk-fill app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Cluster - permissions: + scope: Namespaced + permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] resources: ["pods"] @@ -126,6 +163,22 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] + # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) + - apiGroups: ["apps"] + resources: ["deployments","statefulsets","replicasets", "daemonsets"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: ["apps.openshift.io"] + resources: ["deploymentconfigs"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: [""] + resources: ["replicationcontrollers"] + verbs: ["get","list"] + # deriving the parent/owner details of the pod(if parent is argo-rollouts) + - apiGroups: ["argoproj.io"] + resources: ["rollouts"] + verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -134,95 +187,88 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] - # for experiment to perform node status checks - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name node-io-stress + - ./experiments -name disk-fill command: - /bin/bash env: - - name: TOTAL_CHAOS_DURATION - value: '120' + - name: TARGET_CONTAINER + value: '' + + - name: FILL_PERCENTAGE + value: '80' - ## specify the size as percentage of free space on the file system - ## default value 90 (in percentage) - - name: FILESYSTEM_UTILIZATION_PERCENTAGE - value: '10' + - name: TOTAL_CHAOS_DURATION + value: '60' - ## we can specify the size in Gigabyte (Gb) also in place of percentage of free space - ## NOTE: for selecting this option FILESYSTEM_UTILIZATION_PERCENTAGE should be empty - - name: FILESYSTEM_UTILIZATION_BYTES + # Period to wait before and after injection of chaos in sec + - name: RAMP_TIME value: '' - ## Number of core of CPU - - name: CPU - value: '1' + # Provide the LIB here + # Only litmus supported + - name: LIB + value: 'litmus' - ## Total number of workers default value is 4 - - name: NUMBER_OF_WORKERS - value: '4' + # provide the data block size + # supported unit is KB + - name: DATA_BLOCK_SIZE + value: '256' - ## Total number of vm workers - - name: VM_WORKERS - value: '1' + - name: TARGET_PODS + value: '' - ## enter the comma separated target nodes name - - name: TARGET_NODES + - name: EPHEMERAL_STORAGE_MEBIBYTES value: '' + # To select pods on specific node(s) - name: NODE_LABEL - value: '' - - # Period to wait before and after injection of chaos in sec - - name: RAMP_TIME value: '' - # Provide the LIB here - # Only litmus supported - - name: LIB - value: 'litmus' + ## percentage of total pods to target + - name: PODS_AFFECTED_PERC + value: '' - # provide lib image - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' + value: 'litmuschaos/go-runner:latest' - ## percentage of total nodes to target - - name: NODES_AFFECTED_PERC - value: '' + # Provide the container runtime path + # Default set to docker container path + - name: CONTAINER_PATH + value: '/var/lib/docker/containers' - ## it defines the sequence of chaos execution for multiple target nodes + ## it defines the sequence of chaos execution for multiple target pods ## supported values: serial, parallel - name: SEQUENCE value: 'parallel' - + labels: - name: node-io-stress + name: disk-fill app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job + app.kubernetes.io/host-path-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - poweroff node + Injects network latency on pods belonging to an app deployment kind: ChaosExperiment metadata: - name: node-poweroff + name: pod-network-latency labels: - name: node-poweroff + name: pod-network-latency app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Cluster + scope: Namespaced permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -232,9 +278,9 @@ spec: - apiGroups: [""] resources: ["events"] verbs: ["create","get","list","patch","update"] - # Fetch configmaps & secrets details and mount it to the experiment pod (if specified) + # Fetch configmaps details and mount it to the experiment pod (if specified) - apiGroups: [""] - resources: ["configmaps","secrets"] + resources: ["configmaps"] verbs: ["get","list",] # Track and get the runner, experiment, and helper pods log - apiGroups: [""] @@ -244,6 +290,22 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] + # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) + - apiGroups: ["apps"] + resources: ["deployments","statefulsets","replicasets", "daemonsets"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: ["apps.openshift.io"] + resources: ["deploymentconfigs"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: [""] + resources: ["replicationcontrollers"] + verbs: ["get","list"] + # deriving the parent/owner details of the pod(if parent is argo-rollouts) + - apiGroups: ["argoproj.io"] + resources: ["rollouts"] + verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -252,70 +314,98 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] - # for experiment to perform node status checks - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name node-restart + - ./experiments -name pod-network-latency command: - /bin/bash env: - - name: SSH_USER - value: 'root' - - - name: TOTAL_CHAOS_DURATION - value: '60' - - - name: REBOOT_COMMAND - value: '-o ServerAliveInterval=1 -o ServerAliveCountMax=1 "sudo systemctl poweroff --force --force" ; true' - - # Period to wait before and after injection of chaos in sec - - name: RAMP_TIME + + - name: TARGET_CONTAINER value: '' - # PROVIDE THE LIB HERE - # ONLY LITMUS SUPPORTED - - name: LIB - value: 'litmus' + - name: NETWORK_INTERFACE + value: 'eth0' # provide lib image - name: LIB_IMAGE - value: "litmuschaos/go-runner:latest" + value: 'litmuschaos/go-runner:latest' - # ENTER THE TARGET NODE NAME - - name: TARGET_NODE - value: '' + - name: TC_IMAGE + value: 'gaiadocker/iproute2' - - name: NODE_LABEL - value: '' + - name: NETWORK_LATENCY + value: '2000' #in ms - # ENTER THE TARGET NODE IP - - name: TARGET_NODE_IP + - name: TOTAL_CHAOS_DURATION + value: '60' # in seconds + + # Time period to wait before and after injection of chaos in sec + - name: RAMP_TIME + value: '' + + - name: JITTER + value: '0' + + # lib can be litmus or pumba + - name: LIB + value: 'litmus' + + ## percentage of total pods to target + - name: PODS_AFFECTED_PERC + value: '' + + - name: TARGET_PODS + value: '' + + # provide the name of container runtime + # for litmus LIB, it supports docker, containerd, crio + # for pumba LIB, it supports docker only + - name: CONTAINER_RUNTIME + value: 'docker' + + # provide the destination ips + # chaos injection will be triggered for these destination ips + - name: DESTINATION_IPS + value: '' + + # provide the destination hosts + # chaos injection will be triggered for these destination hosts + - name: DESTINATION_HOSTS + value: '' + + # provide the socket file path + - name: SOCKET_PATH + value: '/var/run/docker.sock' + + # To select pods on specific node(s) + - name: NODE_LABEL value: '' + ## it defines the sequence of chaos execution for multiple target pods + ## supported values: serial, parallel + - name: SEQUENCE + value: 'parallel' + labels: - name: node-poweroff + name: pod-network-latency app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job + app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest - secrets: - - name: id-rsa - mountPath: /mnt/ --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Injects 100% network packet loss on pods belonging to an app deployment + Pod DNS Error injects dns failure/error in target pod containers kind: ChaosExperiment metadata: - name: pod-network-partition + name: pod-dns-error labels: - name: pod-network-partition + name: pod-dns-error app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -343,87 +433,102 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] + # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) + - apiGroups: ["apps"] + resources: ["deployments","statefulsets","replicasets", "daemonsets"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: ["apps.openshift.io"] + resources: ["deploymentconfigs"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: [""] + resources: ["replicationcontrollers"] + verbs: ["get","list"] + # deriving the parent/owner details of the pod(if parent is argo-rollouts) + - apiGroups: ["argoproj.io"] + resources: ["rollouts"] + verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] verbs: ["create","list","get","delete","deletecollection"] - # performs CRUD operations on the network policies - - apiGroups: ["networking.k8s.io"] - resources: ["networkpolicies"] - verbs: ["create","delete","list","get"] # for creation, status polling and deletion of litmus chaos resources used within a chaos workflow - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] image: "litmuschaos/go-runner:latest" - imagePullPolicy: Always args: - - -c - - ./experiments -name pod-network-partition + - -c + - ./experiments -name pod-dns-error command: - - /bin/bash + - /bin/bash env: - - - name: TOTAL_CHAOS_DURATION - value: '60' # in seconds + - name: TARGET_CONTAINER + value: "" - # ime period to wait before and after injection of chaos in sec - - name: RAMP_TIME - value: '' + # provide lib image + - name: LIB_IMAGE + value: "litmuschaos/go-runner:latest" - # it should be litmus - - name: LIB - value: 'litmus' + - name: TOTAL_CHAOS_DURATION + value: "60" # in seconds - # provide the destination ips - # chaos injection will be triggered for these destination ips - - name: DESTINATION_IPS - value: '' + # Time period to wait before and after injection of chaos in sec + - name: RAMP_TIME + value: "" - # provide the destination hosts - # chaos injection will be triggered for these destination hosts - - name: DESTINATION_HOSTS - value: '' + ## percentage of total pods to target + - name: PODS_AFFECTED_PERC + value: "" - # provide network policy type - # support ingress, egress, all values - - name: POLICY_TYPES - value: 'all' + - name: TARGET_PODS + value: "" - # provide labels of the destination pods - - name: POD_SELECTOR - value: '' + # provide the name of container runtime, it supports docker, containerd, crio + - name: CONTAINER_RUNTIME + value: "docker" - # provide labels the destination namespaces - - name: NAMESPACE_SELECTOR - value: '' + # provide the socket file path + - name: SOCKET_PATH + value: "/var/run/docker.sock" - # provide comma separated ports - - name: PORTS - value: '' + ## it defines the sequence of chaos execution for multiple target pods + ## supported values: serial, parallel + - name: SEQUENCE + value: "parallel" + + # list of the target hostnames or kewywords eg. '["litmuschaos","chaosnative.io"]' . If empty all hostnames are targets + - name: TARGET_HOSTNAMES + value: "" + + # can be either exact or substring, determines whether the dns query has to match exactly with one of the targets or can have any of the targets as substring + - name: MATCH_SCHEME + value: "exact" labels: - name: pod-network-partition + experiment: pod-dns-error app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job + app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Injects cpu consumption on pods belonging to an app deployment + Give a cpu spike on a node belonging to a deployment kind: ChaosExperiment metadata: - name: pod-cpu-hog-exec + name: node-cpu-hog labels: - name: pod-cpu-hog-exec + name: node-cpu-hog app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Namespaced + scope: Cluster permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -445,22 +550,6 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] - # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) - - apiGroups: ["apps"] - resources: ["deployments","statefulsets","replicasets", "daemonsets"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: ["apps.openshift.io"] - resources: ["deploymentconfigs"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: [""] - resources: ["replicationcontrollers"] - verbs: ["get","list"] - # deriving the parent/owner details of the pod(if parent is argo-rollouts) - - apiGroups: ["argoproj.io"] - resources: ["rollouts"] - verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -469,48 +558,63 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] + # for experiment to perform node status checks + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name pod-cpu-hog-exec + - ./experiments -name node-cpu-hog command: - /bin/bash env: + - name: TOTAL_CHAOS_DURATION value: '60' - ## Number of CPU cores to stress - - name: CPU_CORES - value: '1' + # Period to wait before and after injection of chaos in sec + - name: RAMP_TIME + value: '' - ## Percentage of total pods to target - - name: PODS_AFFECTED_PERC + ## ENTER THE NUMBER OF CORES OF CPU FOR CPU HOGGING + ## OPTIONAL VALUE IN CASE OF EMPTY VALUE IT WILL TAKE NODE CPU CAPACITY + - name: NODE_CPU_CORE value: '' - ## Period to wait before and after injection of chaos in sec - - name: RAMP_TIME + ## LOAD CPU WITH GIVEN PERCENT LOADING FOR THE CPU STRESS WORKERS. + ## 0 IS EFFECTIVELY A SLEEP (NO LOAD) AND 100 IS FULL LOADING + - name: CPU_LOAD + value: '100' + + # ENTER THE COMMA SEPARATED TARGET NODES NAME + - name: TARGET_NODES value: '' - ## env var that describes the library used to execute the chaos - ## default: litmus. Supported values: litmus + - name: NODE_LABEL + value: '' + + # PROVIDE THE LIB HERE + # ONLY LITMUS SUPPORTED - name: LIB value: 'litmus' - - # The command to kill the chaos process - - name: CHAOS_KILL_COMMAND - value: "kill $(find /proc -name exe -lname '*/md5sum' 2>&1 | grep -v 'Permission denied' | awk -F/ '{print $(NF-1)}')" - - name: TARGET_PODS + # provide lib image + - name: LIB_IMAGE + value: 'litmuschaos/go-runner:latest' + + ## percentage of total nodes to target + - name: NODES_AFFECTED_PERC value: '' - ## it defines the sequence of chaos execution for multiple target pods + ## it defines the sequence of chaos execution for multiple target nodes ## supported values: serial, parallel - name: SEQUENCE value: 'parallel' - + labels: - name: pod-cpu-hog-exec + name: node-cpu-hog app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest @@ -523,9 +627,9 @@ description: Injects memory consumption on pods belonging to an app deployment kind: ChaosExperiment metadata: - name: pod-memory-hog-exec + name: pod-memory-hog labels: - name: pod-memory-hog-exec + name: pod-memory-hog app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -581,7 +685,7 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name pod-memory-hog-exec + - ./experiments -name pod-memory-hog command: - /bin/bash env: @@ -591,6 +695,10 @@ spec: ## enter the amount of memory in megabytes to be consumed by the application pod - name: MEMORY_CONSUMPTION value: '500' + + ## Number of workers to perform stress + - name: NUMBER_OF_WORKERS + value: '1' ## percentage of total pods to target - name: PODS_AFFECTED_PERC @@ -601,131 +709,61 @@ spec: value: '' ## env var that describes the library used to execute the chaos - ## default: litmus. Supported values: litmus + ## default: litmus. Supported values: litmus, pumba - name: LIB value: 'litmus' - - # The command to kill the chaos process - - name: CHAOS_KILL_COMMAND - value: "kill $(find /proc -name exe -lname '*/dd' 2>&1 | grep -v 'Permission denied' | awk -F/ '{print $(NF-1)}' | head -n 1)" - - ## it defines the sequence of chaos execution for multiple target pods - ## supported values: serial, parallel - - name: SEQUENCE - value: 'parallel' - - - name: TARGET_PODS - value: '' - labels: - name: pod-memory-hog-exec - app.kubernetes.io/part-of: litmus - app.kubernetes.io/component: experiment-job - app.kubernetes.io/version: latest + ## It is used in pumba lib only + - name: LIB_IMAGE + value: 'litmuschaos/go-runner:latest' ---- ---- -apiVersion: litmuschaos.io/v1alpha1 -description: - message: | - Drain the node where application pod is scheduled -kind: ChaosExperiment -metadata: - name: node-drain - labels: - name: node-drain - app.kubernetes.io/part-of: litmus - app.kubernetes.io/component: chaosexperiment - app.kubernetes.io/version: latest -spec: - definition: - scope: Cluster - permissions: - # Create and monitor the experiment & helper pods - - apiGroups: [""] - resources: ["pods"] - verbs: ["create","delete","get","list","patch","update", "deletecollection"] - # Performs CRUD operations on the events inside chaosengine and chaosresult - - apiGroups: [""] - resources: ["events"] - verbs: ["create","get","list","patch","update"] - # Fetch configmaps details and mount it to the experiment pod (if specified) - - apiGroups: [""] - resources: ["configmaps"] - verbs: ["get","list",] - # Track and get the runner, experiment, and helper pods log - - apiGroups: [""] - resources: ["pods/log"] - verbs: ["get","list","watch"] - # for creating and managing to execute comands inside target container - - apiGroups: [""] - resources: ["pods/exec","pods/eviction"] - verbs: ["get","list","create"] - # ignore daemonsets while draining the node - - apiGroups: ["apps"] - resources: ["daemonsets"] - verbs: ["list","get","delete"] - # for configuring and monitor the experiment job by the chaos-runner pod - - apiGroups: ["batch"] - resources: ["jobs"] - verbs: ["create","list","get","delete","deletecollection"] - # for creation, status polling and deletion of litmus chaos resources used within a chaos workflow - - apiGroups: ["litmuschaos.io"] - resources: ["chaosengines","chaosexperiments","chaosresults"] - verbs: ["create","list","get","patch","update","delete"] - # for experiment to perform node status checks - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get","list","patch"] - image: "litmuschaos/go-runner:latest" - imagePullPolicy: Always - args: - - -c - - ./experiments -name node-drain - command: - - /bin/bash - env: - - - name: TARGET_NODE - value: '' + ## It is used in pumba lib only + - name: STRESS_IMAGE + value: 'alexeiled/stress-ng:latest-ubuntu' - - name: NODE_LABEL - value: '' + ## provide the cluster runtime + - name: CONTAINER_RUNTIME + value: 'docker' - - name: TOTAL_CHAOS_DURATION - value: '60' + # provide the socket file path + - name: SOCKET_PATH + value: '/var/run/docker.sock' + + ## it defines the sequence of chaos execution for multiple target pods + ## supported values: serial, parallel + - name: SEQUENCE + value: 'parallel' - # Provide the LIB here - # Only litmus supported - - name: LIB - value: 'litmus' + - name: TARGET_PODS + value: '' + + # To select pods on specific node(s) + - name: NODE_LABEL + value: '' - # Period to wait before and after injection of chaos in sec - - name: RAMP_TIME - value: '' - labels: - name: node-drain + name: pod-memory-hog app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job + app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Give a cpu spike on a node belonging to a deployment + Injects 100% network packet loss on pods belonging to an app deployment kind: ChaosExperiment metadata: - name: node-cpu-hog + name: pod-network-partition labels: - name: node-cpu-hog + name: pod-network-partition app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Cluster + scope: Namespaced permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -751,67 +789,63 @@ spec: - apiGroups: ["batch"] resources: ["jobs"] verbs: ["create","list","get","delete","deletecollection"] + # performs CRUD operations on the network policies + - apiGroups: ["networking.k8s.io"] + resources: ["networkpolicies"] + verbs: ["create","delete","list","get"] # for creation, status polling and deletion of litmus chaos resources used within a chaos workflow - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] - # for experiment to perform node status checks - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name node-cpu-hog + - ./experiments -name pod-network-partition command: - /bin/bash env: - + - name: TOTAL_CHAOS_DURATION - value: '60' + value: '60' # in seconds - # Period to wait before and after injection of chaos in sec + # ime period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - ## ENTER THE NUMBER OF CORES OF CPU FOR CPU HOGGING - ## OPTIONAL VALUE IN CASE OF EMPTY VALUE IT WILL TAKE NODE CPU CAPACITY - - name: NODE_CPU_CORE - value: '' - - ## LOAD CPU WITH GIVEN PERCENT LOADING FOR THE CPU STRESS WORKERS. - ## 0 IS EFFECTIVELY A SLEEP (NO LOAD) AND 100 IS FULL LOADING - - name: CPU_LOAD - value: '100' + # it should be litmus + - name: LIB + value: 'litmus' - # ENTER THE COMMA SEPARATED TARGET NODES NAME - - name: TARGET_NODES + # provide the destination ips + # chaos injection will be triggered for these destination ips + - name: DESTINATION_IPS value: '' - - name: NODE_LABEL + # provide the destination hosts + # chaos injection will be triggered for these destination hosts + - name: DESTINATION_HOSTS value: '' - # PROVIDE THE LIB HERE - # ONLY LITMUS SUPPORTED - - name: LIB - value: 'litmus' + # provide network policy type + # support ingress, egress, all values + - name: POLICY_TYPES + value: 'all' - # provide lib image - - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' + # provide labels of the destination pods + - name: POD_SELECTOR + value: '' - ## percentage of total nodes to target - - name: NODES_AFFECTED_PERC + # provide labels the destination namespaces + - name: NAMESPACE_SELECTOR + value: '' + + # provide comma separated ports + - name: PORTS value: '' - ## it defines the sequence of chaos execution for multiple target nodes - ## supported values: serial, parallel - - name: SEQUENCE - value: 'parallel' - labels: - name: node-cpu-hog + name: pod-network-partition app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest @@ -959,19 +993,18 @@ spec: --- apiVersion: litmuschaos.io/v1alpha1 description: - message: | - Restart node + message: "Kills a container belonging to an application pod \n" kind: ChaosExperiment metadata: - name: node-restart + name: container-kill labels: - name: node-restart + name: container-kill app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Cluster + scope: Namespaced permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -981,9 +1014,9 @@ spec: - apiGroups: [""] resources: ["events"] verbs: ["create","get","list","patch","update"] - # Fetch configmaps & secrets details and mount it to the experiment pod (if specified) + # Fetch configmaps details and mount it to the experiment pod (if specified) - apiGroups: [""] - resources: ["configmaps","secrets"] + resources: ["configmaps"] verbs: ["get","list",] # Track and get the runner, experiment, and helper pods log - apiGroups: [""] @@ -993,6 +1026,22 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] + # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) + - apiGroups: ["apps"] + resources: ["deployments","statefulsets","replicasets", "daemonsets"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: ["apps.openshift.io"] + resources: ["deploymentconfigs"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: [""] + resources: ["replicationcontrollers"] + verbs: ["get","list"] + # deriving the parent/owner details of the pod(if parent is argo-rollouts) + - apiGroups: ["argoproj.io"] + resources: ["rollouts"] + verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -1001,74 +1050,89 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] - # for experiment to perform node status checks - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name node-restart + - ./experiments -name container-kill command: - /bin/bash env: - - name: SSH_USER - value: 'root' - - name: TOTAL_CHAOS_DURATION - value: '60' + - name: TARGET_CONTAINER + value: '' # Period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - # PROVIDE THE LIB HERE - # ONLY LITMUS SUPPORTED + # lib can be litmus or pumba - name: LIB value: 'litmus' - - # provide lib image - - name: LIB_IMAGE - value: "litmuschaos/go-runner:latest" - - # ENTER THE TARGET NODE NAME - - name: TARGET_NODE + + - name: TARGET_PODS value: '' - - name: NODE_LABEL - value: '' + # provide the chaos interval + - name: CHAOS_INTERVAL + value: '10' - # ENTER THE TARGET NODE IP - - name: TARGET_NODE_IP - value: '' + - name: SIGNAL + value: 'SIGKILL' - labels: - name: node-restart - app.kubernetes.io/part-of: litmus - app.kubernetes.io/component: experiment-job - app.kubernetes.io/version: latest - secrets: - - name: id-rsa - mountPath: /mnt/ + # provide the socket file path + - name: SOCKET_PATH + value: '/var/run/docker.sock' + + # provide the name of container runtime + # for litmus LIB, it supports docker, containerd, crio + # for pumba LIB, it supports docker only + - name: CONTAINER_RUNTIME + value: 'docker' + + # provide the total chaos duration + - name: TOTAL_CHAOS_DURATION + value: '20' + + ## percentage of total pods to target + - name: PODS_AFFECTED_PERC + value: '' + + # To select pods on specific node(s) + - name: NODE_LABEL + value: '' + + - name: LIB_IMAGE + value: 'litmuschaos/go-runner:latest' + + ## it defines the sequence of chaos execution for multiple target pods + ## supported values: serial, parallel + - name: SEQUENCE + value: 'parallel' + + labels: + name: container-kill + app.kubernetes.io/part-of: litmus + app.kubernetes.io/component: experiment-job + app.kubernetes.io/runtime-api-usage: "true" + app.kubernetes.io/version: latest ---- --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - IO stress on a app pods belonging to an app deployment + Scale the application replicas and test the node autoscaling on cluster kind: ChaosExperiment metadata: - name: pod-io-stress + name: pod-autoscaler labels: - name: pod-io-stress + name: pod-autoscaler app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Namespaced + scope: Cluster permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -1090,22 +1154,10 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] - # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) + # performs CRUD operations on the deployments and statefulsets - apiGroups: ["apps"] - resources: ["deployments","statefulsets","replicasets", "daemonsets"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: ["apps.openshift.io"] - resources: ["deploymentconfigs"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: [""] - resources: ["replicationcontrollers"] - verbs: ["get","list"] - # deriving the parent/owner details of the pod(if parent is argo-rollouts) - - apiGroups: ["argoproj.io"] - resources: ["rollouts"] - verbs: ["list","get"] + resources: ["deployments","statefulsets"] + verbs: ["list","get","patch","update"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -1118,86 +1170,43 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name pod-io-stress + - ./experiments -name pod-autoscaler command: - /bin/bash env: - - name: TOTAL_CHAOS_DURATION - value: '120' - ## specify the size as percentage of free space on the file system - ## default value 90 (in percentage) - - name: FILESYSTEM_UTILIZATION_PERCENTAGE - value: '10' - - ## we can specify the size in Gigabyte (Gb) also in place of percentage of free space - ## NOTE: for selecting this option FILESYSTEM_UTILIZATION_PERCENTAGE should be empty - - name: FILESYSTEM_UTILIZATION_BYTES - value: '' - - ## Total number of workers default value is 4 - - name: NUMBER_OF_WORKERS - value: '4' - - ## Percentage of total pods to target - - name: PODS_AFFECTED_PERC - value: '' - - # provide volume mount path - - name: VOLUME_MOUNT_PATH - value: '' - - ## specify the comma separated target pods - - name: TARGET_PODS - value: '' - - # To select pods on specific node(s) - - name: NODE_LABEL - value: '' - - # Period to wait before and after injection of chaos in sec - - name: RAMP_TIME - value: '' - - # Provide the LIB here - # support litmus and pumba - - name: LIB - value: 'litmus' - - # provide lib image - - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' - - ## provide the cluster runtime - - name: CONTAINER_RUNTIME - value: 'docker' + - name: TOTAL_CHAOS_DURATION + value: '60' - # provide the socket file path - - name: SOCKET_PATH - value: '/var/run/docker.sock' + # Period to wait before and after injection of chaos in sec + - name: RAMP_TIME + value: '' - ## it defines the sequence of chaos execution for multiple target pods - ## supported values: serial, parallel - - name: SEQUENCE - value: 'parallel' + # Number of replicas to scale + - name: REPLICA_COUNT + value: '5' + # PROVIDE THE LIB HERE + # ONLY LITMUS SUPPORTED + - name: LIB + value: 'litmus' + labels: - name: pod-io-stress + name: pod-autoscaler app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Give a memory hog on a node belonging to a deployment + Kills the kubelet service on the application node to check the resiliency. kind: ChaosExperiment metadata: - name: node-memory-hog + name: kubelet-service-kill labels: - name: node-memory-hog + name: kubelet-service-kill app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -1241,79 +1250,56 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name node-memory-hog + - ./experiments -name kubelet-service-kill command: - /bin/bash env: - + - name: TOTAL_CHAOS_DURATION - value: '120' - - ## Specify the size as percent of total node capacity Ex: '30' - ## NOTE: for selecting this option keep MEMORY_CONSUMPTION_MEBIBYTES empty - - name: MEMORY_CONSUMPTION_PERCENTAGE - value: '' - - ## Specify the amount of memory to be consumed in mebibytes - ## NOTE: for selecting this option keep MEMORY_CONSUMPTION_PERCENTAGE empty - - name: MEMORY_CONSUMPTION_MEBIBYTES - value: '' - - - name: NUMBER_OF_WORKERS - value: '1' - - # ENTER THE COMMA SEPARATED TARGET NODES NAME - - name: TARGET_NODES - value: '' - - - name: NODE_LABEL - value: '' + value: '60' # in seconds # Period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - # Provide the LIB here - # Only litmus supported - name: LIB value: 'litmus' + - name: NODE_LABEL + value: '' + # provide lib image - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' - - ## percentage of total nodes to target - - name: NODES_AFFECTED_PERC + value: 'ubuntu:16.04' + + # provide the target node name + - name: TARGET_NODE value: '' - ## it defines the sequence of chaos execution for multiple target nodes - ## supported values: serial, parallel - - name: SEQUENCE - value: 'parallel' - labels: - name: node-memory-hog + name: kubelet-service-kill app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job + app.kubernetes.io/service-kill: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Fillup Ephemeral Storage of a Resource + Restart node kind: ChaosExperiment metadata: - name: disk-fill + name: node-restart labels: - name: disk-fill + name: node-restart app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Namespaced - permissions: + scope: Cluster + permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] resources: ["pods"] @@ -1322,9 +1308,9 @@ spec: - apiGroups: [""] resources: ["events"] verbs: ["create","get","list","patch","update"] - # Fetch configmaps details and mount it to the experiment pod (if specified) + # Fetch configmaps & secrets details and mount it to the experiment pod (if specified) - apiGroups: [""] - resources: ["configmaps"] + resources: ["configmaps","secrets"] verbs: ["get","list",] # Track and get the runner, experiment, and helper pods log - apiGroups: [""] @@ -1334,22 +1320,6 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] - # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) - - apiGroups: ["apps"] - resources: ["deployments","statefulsets","replicasets", "daemonsets"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: ["apps.openshift.io"] - resources: ["deploymentconfigs"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: [""] - resources: ["replicationcontrollers"] - verbs: ["get","list"] - # deriving the parent/owner details of the pod(if parent is argo-rollouts) - - apiGroups: ["argoproj.io"] - resources: ["rollouts"] - verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -1358,20 +1328,20 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] + # for experiment to perform node status checks + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name disk-fill + - ./experiments -name node-restart command: - /bin/bash env: - - - name: TARGET_CONTAINER - value: '' - - - name: FILL_PERCENTAGE - value: '80' + - name: SSH_USER + value: 'root' - name: TOTAL_CHAOS_DURATION value: '60' @@ -1380,66 +1350,51 @@ spec: - name: RAMP_TIME value: '' - # Provide the LIB here - # Only litmus supported + # PROVIDE THE LIB HERE + # ONLY LITMUS SUPPORTED - name: LIB value: 'litmus' - # provide the data block size - # supported unit is KB - - name: DATA_BLOCK_SIZE - value: '256' - - - name: TARGET_PODS - value: '' + # provide lib image + - name: LIB_IMAGE + value: "litmuschaos/go-runner:latest" - - name: EPHEMERAL_STORAGE_MEBIBYTES + # ENTER THE TARGET NODE NAME + - name: TARGET_NODE value: '' - # To select pods on specific node(s) - name: NODE_LABEL value: '' - ## percentage of total pods to target - - name: PODS_AFFECTED_PERC + # ENTER THE TARGET NODE IP + - name: TARGET_NODE_IP value: '' - - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' - - # Provide the container runtime path - # Default set to docker container path - - name: CONTAINER_PATH - value: '/var/lib/docker/containers' - - ## it defines the sequence of chaos execution for multiple target pods - ## supported values: serial, parallel - - name: SEQUENCE - value: 'parallel' - labels: - name: disk-fill + name: node-restart app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/host-path-usage: "true" app.kubernetes.io/version: latest + secrets: + - name: id-rsa + mountPath: /mnt/ --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Kills the kubelet service on the application node to check the resiliency. + Inject network packet corruption into application pod kind: ChaosExperiment metadata: - name: kubelet-service-kill + name: pod-network-corruption labels: - name: kubelet-service-kill + name: pod-network-corruption app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Cluster + scope: Namespaced permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -1461,6 +1416,22 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] + # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) + - apiGroups: ["apps"] + resources: ["deployments","statefulsets","replicasets", "daemonsets"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: ["apps.openshift.io"] + resources: ["deploymentconfigs"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: [""] + resources: ["replicationcontrollers"] + verbs: ["get","list"] + # deriving the parent/owner details of the pod(if parent is argo-rollouts) + - apiGroups: ["argoproj.io"] + resources: ["rollouts"] + verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -1469,63 +1440,101 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] - # for experiment to perform node status checks - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name kubelet-service-kill + - ./experiments -name pod-network-corruption command: - /bin/bash env: - + + - name: TARGET_CONTAINER + value: '' + + # provide lib image + - name: LIB_IMAGE + value: 'litmuschaos/go-runner:latest' + + - name: NETWORK_INTERFACE + value: 'eth0' + + - name: TC_IMAGE + value: 'gaiadocker/iproute2' + + - name: NETWORK_PACKET_CORRUPTION_PERCENTAGE + value: '100' #in PERCENTAGE + - name: TOTAL_CHAOS_DURATION value: '60' # in seconds - # Period to wait before and after injection of chaos in sec + # Time period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - + + # lib can be litmus or pumba - name: LIB value: 'litmus' + ## percentage of total pods to target + - name: PODS_AFFECTED_PERC + value: '' + + - name: TARGET_PODS + value: '' + + # To select pods on specific node(s) - name: NODE_LABEL value: '' - # provide lib image - - name: LIB_IMAGE - value: 'ubuntu:16.04' - - # provide the target node name - - name: TARGET_NODE + # provide the name of container runtime + # for litmus LIB, it supports docker, containerd, crio + # for pumba LIB, it supports docker only + - name: CONTAINER_RUNTIME + value: 'docker' + + # provide the destination ips + # chaos injection will be triggered for these destination ips + - name: DESTINATION_IPS + value: '' + + # provide the destination hosts + # chaos injection will be triggered for these destination hosts + - name: DESTINATION_HOSTS value: '' + # provide the socket file path + - name: SOCKET_PATH + value: '/var/run/docker.sock' + + ## it defines the sequence of chaos execution for multiple target pods + ## supported values: serial, parallel + - name: SEQUENCE + value: 'parallel' + labels: - name: kubelet-service-kill + name: pod-network-corruption app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/service-kill: "true" + app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Scale the application replicas and test the node autoscaling on cluster + Deletes a pod belonging to a deployment/statefulset/daemonset kind: ChaosExperiment metadata: - name: pod-autoscaler + name: pod-delete labels: - name: pod-autoscaler + name: pod-delete app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Cluster + scope: Namespaced permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -1547,10 +1556,22 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] - # performs CRUD operations on the deployments and statefulsets + # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) - apiGroups: ["apps"] - resources: ["deployments","statefulsets"] - verbs: ["list","get","patch","update"] + resources: ["deployments","statefulsets","replicasets", "daemonsets"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: ["apps.openshift.io"] + resources: ["deploymentconfigs"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: [""] + resources: ["replicationcontrollers"] + verbs: ["get","list"] + # deriving the parent/owner details of the pod(if parent is argo-rollouts) + - apiGroups: ["argoproj.io"] + resources: ["rollouts"] + verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -1563,43 +1584,60 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name pod-autoscaler + - ./experiments -name pod-delete command: - /bin/bash env: - name: TOTAL_CHAOS_DURATION - value: '60' + value: '15' # Period to wait before and after injection of chaos in sec - name: RAMP_TIME value: '' - # Number of replicas to scale - - name: REPLICA_COUNT + - name: FORCE + value: 'true' + + - name: CHAOS_INTERVAL value: '5' - # PROVIDE THE LIB HERE - # ONLY LITMUS SUPPORTED + ## percentage of total pods to target + - name: PODS_AFFECTED_PERC + value: '' + - name: LIB - value: 'litmus' - + value: 'litmus' + + - name: TARGET_PODS + value: '' + + # To select pods on specific node(s) + - name: NODE_LABEL + value: '' + + ## it defines the sequence of chaos execution for multiple target pods + ## supported values: serial, parallel + - name: SEQUENCE + value: 'parallel' + labels: - name: pod-autoscaler + name: pod-delete app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest +--- --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Pod DNS Error injects dns failure/error in target pod containers + Injects memory consumption on pods belonging to an app deployment kind: ChaosExperiment metadata: - name: pod-dns-error + name: pod-memory-hog-exec labels: - name: pod-dns-error + name: pod-memory-hog-exec app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -1652,71 +1690,61 @@ spec: resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] image: "litmuschaos/go-runner:latest" + imagePullPolicy: Always args: - - -c - - ./experiments -name pod-dns-error + - -c + - ./experiments -name pod-memory-hog-exec command: - - /bin/bash + - /bin/bash env: - - name: TARGET_CONTAINER - value: "" - - # provide lib image - - name: LIB_IMAGE - value: "litmuschaos/go-runner:latest" - - name: TOTAL_CHAOS_DURATION - value: "60" # in seconds + value: '60' - # Time period to wait before and after injection of chaos in sec - - name: RAMP_TIME - value: "" + ## enter the amount of memory in megabytes to be consumed by the application pod + - name: MEMORY_CONSUMPTION + value: '500' ## percentage of total pods to target - name: PODS_AFFECTED_PERC - value: "" - - - name: TARGET_PODS - value: "" - - # provide the name of container runtime, it supports docker, containerd, crio - - name: CONTAINER_RUNTIME - value: "docker" + value: '' - # provide the socket file path - - name: SOCKET_PATH - value: "/var/run/docker.sock" + ## Period to wait before and after injection of chaos in sec + - name: RAMP_TIME + value: '' + ## env var that describes the library used to execute the chaos + ## default: litmus. Supported values: litmus + - name: LIB + value: 'litmus' + + # The command to kill the chaos process + - name: CHAOS_KILL_COMMAND + value: "kill $(find /proc -name exe -lname '*/dd' 2>&1 | grep -v 'Permission denied' | awk -F/ '{print $(NF-1)}' | head -n 1)" + ## it defines the sequence of chaos execution for multiple target pods ## supported values: serial, parallel - name: SEQUENCE - value: "parallel" - - # list of the target hostnames or kewywords eg. '["litmuschaos","chaosnative.io"]' . If empty all hostnames are targets - - name: TARGET_HOSTNAMES - value: "" + value: 'parallel' - # can be either exact or substring, determines whether the dns query has to match exactly with one of the targets or can have any of the targets as substring - - name: MATCH_SCHEME - value: "exact" + - name: TARGET_PODS + value: '' labels: - experiment: pod-dns-error + name: pod-memory-hog-exec app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Deletes a pod belonging to a deployment/statefulset/daemonset + Pod DNS Spoof can spoof particular dns requests in target pod container to desired target hostnames kind: ChaosExperiment metadata: - name: k8-pod-delete + name: pod-dns-spoof labels: - name: k8-pod-delete + name: pod-dns-spoof app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -1724,90 +1752,118 @@ spec: definition: scope: Namespaced permissions: - - apiGroups: - - "" - - "apps" - - "batch" - - "litmuschaos.io" - resources: - - "deployments" - - "jobs" - - "pods" - - "configmaps" - - "chaosengines" - - "chaosexperiments" - - "chaosresults" - verbs: - - "create" - - "list" - - "get" - - "patch" - - "update" - - "delete" - - apiGroups: - - "" - resources: - - "nodes" - verbs : - - "get" - - "list" - image: "litmuschaos/py-runner:latest" + # Create and monitor the experiment & helper pods + - apiGroups: [""] + resources: ["pods"] + verbs: ["create","delete","get","list","patch","update", "deletecollection"] + # Performs CRUD operations on the events inside chaosengine and chaosresult + - apiGroups: [""] + resources: ["events"] + verbs: ["create","get","list","patch","update"] + # Fetch configmaps details and mount it to the experiment pod (if specified) + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get","list",] + # Track and get the runner, experiment, and helper pods log + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get","list","watch"] + # for creating and managing to execute comands inside target container + - apiGroups: [""] + resources: ["pods/exec"] + verbs: ["get","list","create"] + # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) + - apiGroups: ["apps"] + resources: ["deployments","statefulsets","replicasets", "daemonsets"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: ["apps.openshift.io"] + resources: ["deploymentconfigs"] + verbs: ["list","get"] + # deriving the parent/owner details of the pod(if parent is deploymentConfig) + - apiGroups: [""] + resources: ["replicationcontrollers"] + verbs: ["get","list"] + # deriving the parent/owner details of the pod(if parent is argo-rollouts) + - apiGroups: ["argoproj.io"] + resources: ["rollouts"] + verbs: ["list","get"] + # for configuring and monitor the experiment job by the chaos-runner pod + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create","list","get","delete","deletecollection"] + # for creation, status polling and deletion of litmus chaos resources used within a chaos workflow + - apiGroups: ["litmuschaos.io"] + resources: ["chaosengines","chaosexperiments","chaosresults"] + verbs: ["create","list","get","patch","update","delete"] + image: "litmuschaos/go-runner:latest" args: - - -c - - python /litmus/byoc/chaostest/chaostest/kubernetes/k8_wrapper.py; exit 0 + - -c + - ./experiments -name pod-dns-spoof command: - - /bin/bash + - /bin/bash env: - - name: CHAOSTOOLKIT_IN_POD - value: 'true' + - name: TARGET_CONTAINER + value: "" - - name: FILE - value: 'pod-app-kill-count.json' + # provide lib image + - name: LIB_IMAGE + value: "litmuschaos/go-runner:latest" - - name: NAME_SPACE - value: '' + - name: TOTAL_CHAOS_DURATION + value: "60" # in seconds - - name: LABEL_NAME - value: '' + # Time period to wait before and after injection of chaos in sec + - name: RAMP_TIME + value: "" - - name: APP_ENDPOINT - value: '' + ## percentage of total pods to target + - name: PODS_AFFECTED_PERC + value: "" - - name: PERCENTAGE - value: '50' + - name: TARGET_PODS + value: "" - - name: REPORT - value: 'true' + # provide the name of container runtime, it supports docker, containerd, crio + - name: CONTAINER_RUNTIME + value: "docker" - - name: REPORT_ENDPOINT - value: 'none' - - - name: TEST_NAMESPACE - value: 'default' + # provide the socket file path + - name: SOCKET_PATH + value: "/var/run/docker.sock" + + ## it defines the sequence of chaos execution for multiple target pods + ## supported values: serial, parallel + - name: SEQUENCE + value: "parallel" + # map of the target hostnames eg. '{"abc.com":"spoofabc.com"}' . If empty no queries will be spoofed + - name: SPOOF_MAP + value: "" labels: - name: k8-pod-delete + experiment: pod-dns-spoof app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job + app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Injects network latency on pods belonging to an app deployment + Give IO disk stress on a node belonging to a deployment kind: ChaosExperiment metadata: - name: pod-network-latency + name: node-io-stress labels: - name: pod-network-latency + name: node-io-stress app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Namespaced + scope: Cluster permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -1829,22 +1885,6 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] - # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) - - apiGroups: ["apps"] - resources: ["deployments","statefulsets","replicasets", "daemonsets"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: ["apps.openshift.io"] - resources: ["deploymentconfigs"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: [""] - resources: ["replicationcontrollers"] - verbs: ["get","list"] - # deriving the parent/owner details of the pod(if parent is argo-rollouts) - - apiGroups: ["argoproj.io"] - resources: ["rollouts"] - verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -1853,98 +1893,89 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] + # for experiment to perform node status checks + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name pod-network-latency + - ./experiments -name node-io-stress command: - /bin/bash env: - - - name: TARGET_CONTAINER - value: '' - - - name: NETWORK_INTERFACE - value: 'eth0' - - # provide lib image - - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' - - - name: TC_IMAGE - value: 'gaiadocker/iproute2' - - - name: NETWORK_LATENCY - value: '2000' #in ms - name: TOTAL_CHAOS_DURATION - value: '60' # in seconds + value: '120' - # Time period to wait before and after injection of chaos in sec - - name: RAMP_TIME + ## specify the size as percentage of free space on the file system + ## default value 90 (in percentage) + - name: FILESYSTEM_UTILIZATION_PERCENTAGE + value: '10' + + ## we can specify the size in Gigabyte (Gb) also in place of percentage of free space + ## NOTE: for selecting this option FILESYSTEM_UTILIZATION_PERCENTAGE should be empty + - name: FILESYSTEM_UTILIZATION_BYTES value: '' - - name: JITTER - value: '0' + ## Number of core of CPU + - name: CPU + value: '1' - # lib can be litmus or pumba - - name: LIB - value: 'litmus' + ## Total number of workers default value is 4 + - name: NUMBER_OF_WORKERS + value: '4' - ## percentage of total pods to target - - name: PODS_AFFECTED_PERC - value: '' + ## Total number of vm workers + - name: VM_WORKERS + value: '1' - - name: TARGET_PODS + ## enter the comma separated target nodes name + - name: TARGET_NODES value: '' - # provide the name of container runtime - # for litmus LIB, it supports docker, containerd, crio - # for pumba LIB, it supports docker only - - name: CONTAINER_RUNTIME - value: 'docker' + - name: NODE_LABEL + value: '' - # provide the destination ips - # chaos injection will be triggered for these destination ips - - name: DESTINATION_IPS + # Period to wait before and after injection of chaos in sec + - name: RAMP_TIME value: '' - # provide the destination hosts - # chaos injection will be triggered for these destination hosts - - name: DESTINATION_HOSTS - value: '' + # Provide the LIB here + # Only litmus supported + - name: LIB + value: 'litmus' - # provide the socket file path - - name: SOCKET_PATH - value: '/var/run/docker.sock' + # provide lib image + - name: LIB_IMAGE + value: 'litmuschaos/go-runner:latest' - # To select pods on specific node(s) - - name: NODE_LABEL + ## percentage of total nodes to target + - name: NODES_AFFECTED_PERC value: '' - ## it defines the sequence of chaos execution for multiple target pods + ## it defines the sequence of chaos execution for multiple target nodes ## supported values: serial, parallel - name: SEQUENCE value: 'parallel' - + labels: - name: pod-network-latency + name: node-io-stress app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Inject network packet corruption into application pod + Injects network packet duplication on pods belonging to an app deployment kind: ChaosExperiment metadata: - name: pod-network-corruption + name: pod-network-duplication labels: - name: pod-network-corruption + name: pod-network-duplication app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -2000,49 +2031,46 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name pod-network-corruption + - ./experiments -name pod-network-duplication command: - /bin/bash env: - + - name: TOTAL_CHAOS_DURATION + value: '60' + + - name: RAMP_TIME + value: '' + - name: TARGET_CONTAINER value: '' - # provide lib image - - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' + - name: TC_IMAGE + value: 'gaiadocker/iproute2' - name: NETWORK_INTERFACE value: 'eth0' - - name: TC_IMAGE - value: 'gaiadocker/iproute2' + - name: NETWORK_PACKET_DUPLICATION_PERCENTAGE + value: '100' # in percentage - - name: NETWORK_PACKET_CORRUPTION_PERCENTAGE - value: '100' #in PERCENTAGE - - - name: TOTAL_CHAOS_DURATION - value: '60' # in seconds - - # Time period to wait before and after injection of chaos in sec - - name: RAMP_TIME - value: '' - # lib can be litmus or pumba - name: LIB - value: 'litmus' - - ## percentage of total pods to target - - name: PODS_AFFECTED_PERC - value: '' + value: 'litmus' - name: TARGET_PODS - value: '' + value: '' # To select pods on specific node(s) - name: NODE_LABEL + value: '' + + ## percentage of total pods to target + - name: PODS_AFFECTED_PERC value: '' + - name: LIB_IMAGE + value: 'litmuschaos/go-runner:latest' + # provide the name of container runtime # for litmus LIB, it supports docker, containerd, crio # for pumba LIB, it supports docker only @@ -2067,9 +2095,9 @@ spec: ## supported values: serial, parallel - name: SEQUENCE value: 'parallel' - + labels: - name: pod-network-corruption + name: pod-network-duplication app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/runtime-api-usage: "true" @@ -2082,9 +2110,9 @@ description: Injects cpu consumption on pods belonging to an app deployment kind: ChaosExperiment metadata: - name: pod-cpu-hog + name: pod-cpu-hog-exec labels: - name: pod-cpu-hog + name: pod-cpu-hog-exec app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -2140,7 +2168,7 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name pod-cpu-hog + - ./experiments -name pod-cpu-hog-exec command: - /bin/bash env: @@ -2149,12 +2177,7 @@ spec: ## Number of CPU cores to stress - name: CPU_CORES - value: '1' - - ## LOAD CPU WITH GIVEN PERCENT LOADING FOR THE CPU STRESS WORKERS. - ## 0 IS EFFECTIVELY A SLEEP (NO LOAD) AND 100 IS FULL LOADING - - name: CPU_LOAD - value: '100' + value: '1' ## Percentage of total pods to target - name: PODS_AFFECTED_PERC @@ -2165,61 +2188,44 @@ spec: value: '' ## env var that describes the library used to execute the chaos - ## default: litmus. Supported values: litmus, pumba + ## default: litmus. Supported values: litmus - name: LIB value: 'litmus' - - ## It is used in pumba lib only - - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' - - ## It is used in pumba lib only - - name: STRESS_IMAGE - value: 'alexeiled/stress-ng:latest-ubuntu' - - ## provide the cluster runtime - - name: CONTAINER_RUNTIME - value: 'docker' - - # provide the socket file path - - name: SOCKET_PATH - value: '/var/run/docker.sock' + + # The command to kill the chaos process + - name: CHAOS_KILL_COMMAND + value: "kill $(find /proc -name exe -lname '*/md5sum' 2>&1 | grep -v 'Permission denied' | awk -F/ '{print $(NF-1)}')" - name: TARGET_PODS value: '' - # To select pods on specific node(s) - - name: NODE_LABEL - value: '' - ## it defines the sequence of chaos execution for multiple target pods ## supported values: serial, parallel - name: SEQUENCE value: 'parallel' labels: - name: pod-cpu-hog + name: pod-cpu-hog-exec app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Injects network packet duplication on pods belonging to an app deployment + poweroff node kind: ChaosExperiment metadata: - name: pod-network-duplication + name: node-poweroff labels: - name: pod-network-duplication + name: node-poweroff app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Namespaced + scope: Cluster permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -2229,9 +2235,9 @@ spec: - apiGroups: [""] resources: ["events"] verbs: ["create","get","list","patch","update"] - # Fetch configmaps details and mount it to the experiment pod (if specified) + # Fetch configmaps & secrets details and mount it to the experiment pod (if specified) - apiGroups: [""] - resources: ["configmaps"] + resources: ["configmaps","secrets"] verbs: ["get","list",] # Track and get the runner, experiment, and helper pods log - apiGroups: [""] @@ -2241,22 +2247,6 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] - # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) - - apiGroups: ["apps"] - resources: ["deployments","statefulsets","replicasets", "daemonsets"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: ["apps.openshift.io"] - resources: ["deploymentconfigs"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: [""] - resources: ["replicationcontrollers"] - verbs: ["get","list"] - # deriving the parent/owner details of the pod(if parent is argo-rollouts) - - apiGroups: ["argoproj.io"] - resources: ["rollouts"] - verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -2265,92 +2255,71 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] + # for experiment to perform node status checks + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name pod-network-duplication + - ./experiments -name node-restart command: - /bin/bash env: + - name: SSH_USER + value: 'root' + - name: TOTAL_CHAOS_DURATION value: '60' - - name: RAMP_TIME - value: '' + - name: REBOOT_COMMAND + value: '-o ServerAliveInterval=1 -o ServerAliveCountMax=1 "sudo systemctl poweroff --force --force" ; true' - - name: TARGET_CONTAINER + # Period to wait before and after injection of chaos in sec + - name: RAMP_TIME value: '' - - name: TC_IMAGE - value: 'gaiadocker/iproute2' - - - name: NETWORK_INTERFACE - value: 'eth0' - - - name: NETWORK_PACKET_DUPLICATION_PERCENTAGE - value: '100' # in percentage - - # lib can be litmus or pumba + # PROVIDE THE LIB HERE + # ONLY LITMUS SUPPORTED - name: LIB - value: 'litmus' - - - name: TARGET_PODS - value: '' - - # To select pods on specific node(s) - - name: NODE_LABEL - value: '' - - ## percentage of total pods to target - - name: PODS_AFFECTED_PERC - value: '' + value: 'litmus' + # provide lib image - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' - - # provide the name of container runtime - # for litmus LIB, it supports docker, containerd, crio - # for pumba LIB, it supports docker only - - name: CONTAINER_RUNTIME - value: 'docker' + value: "litmuschaos/go-runner:latest" - # provide the destination ips - # chaos injection will be triggered for these destination ips - - name: DESTINATION_IPS + # ENTER THE TARGET NODE NAME + - name: TARGET_NODE value: '' - # provide the destination hosts - # chaos injection will be triggered for these destination hosts - - name: DESTINATION_HOSTS + - name: NODE_LABEL value: '' - # provide the socket file path - - name: SOCKET_PATH - value: '/var/run/docker.sock' - - ## it defines the sequence of chaos execution for multiple target pods - ## supported values: serial, parallel - - name: SEQUENCE - value: 'parallel' + # ENTER THE TARGET NODE IP + - name: TARGET_NODE_IP + value: '' labels: - name: pod-network-duplication + name: node-poweroff app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest + secrets: + - name: id-rsa + mountPath: /mnt/ +--- --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Kills the docker service on the application node to check the resiliency. + Drain the node where application pod is scheduled kind: ChaosExperiment metadata: - name: docker-service-kill + name: node-drain labels: - name: docker-service-kill + name: node-drain app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -2376,8 +2345,12 @@ spec: verbs: ["get","list","watch"] # for creating and managing to execute comands inside target container - apiGroups: [""] - resources: ["pods/exec"] + resources: ["pods/exec","pods/eviction"] verbs: ["get","list","create"] + # ignore daemonsets while draining the node + - apiGroups: ["apps"] + resources: ["daemonsets"] + verbs: ["list","get","delete"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -2389,61 +2362,142 @@ spec: # for experiment to perform node status checks - apiGroups: [""] resources: ["nodes"] - verbs: ["get","list"] + verbs: ["get","list","patch"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name docker-service-kill + - ./experiments -name node-drain command: - /bin/bash env: - - name: TOTAL_CHAOS_DURATION - value: '90' # in seconds + - name: TARGET_NODE + value: '' - # Period to wait before injection of chaos in sec - - name: RAMP_TIME + - name: NODE_LABEL value: '' + - name: TOTAL_CHAOS_DURATION + value: '60' + + # Provide the LIB here + # Only litmus supported - name: LIB value: 'litmus' - - name: NODE_LABEL + # Period to wait before and after injection of chaos in sec + - name: RAMP_TIME value: '' + + labels: + name: node-drain + app.kubernetes.io/part-of: litmus + app.kubernetes.io/component: experiment-job + app.kubernetes.io/version: latest - # provide lib image - - name: LIB_IMAGE - value: 'ubuntu:16.04' - - # provide the target node name - - name: TARGET_NODE +--- +apiVersion: litmuschaos.io/v1alpha1 +description: + message: | + Deletes a pod belonging to a deployment/statefulset/daemonset +kind: ChaosExperiment +metadata: + name: k8-pod-delete + labels: + name: k8-pod-delete + app.kubernetes.io/part-of: litmus + app.kubernetes.io/component: chaosexperiment + app.kubernetes.io/version: latest +spec: + definition: + scope: Namespaced + permissions: + - apiGroups: + - "" + - "apps" + - "batch" + - "litmuschaos.io" + resources: + - "deployments" + - "jobs" + - "pods" + - "configmaps" + - "chaosengines" + - "chaosexperiments" + - "chaosresults" + verbs: + - "create" + - "list" + - "get" + - "patch" + - "update" + - "delete" + - apiGroups: + - "" + resources: + - "nodes" + verbs : + - "get" + - "list" + image: "litmuschaos/py-runner:latest" + args: + - -c + - python /litmus/byoc/chaostest/chaostest/kubernetes/k8_wrapper.py; exit 0 + command: + - /bin/bash + env: + - name: CHAOSTOOLKIT_IN_POD + value: 'true' + + - name: FILE + value: 'pod-app-kill-count.json' + + - name: NAME_SPACE + value: '' + + - name: LABEL_NAME + value: '' + + - name: APP_ENDPOINT value: '' + - name: PERCENTAGE + value: '50' + + - name: REPORT + value: 'true' + + - name: REPORT_ENDPOINT + value: 'none' + + - name: TEST_NAMESPACE + value: 'default' + + labels: - name: docker-service-kill + name: k8-pod-delete app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/service-kill: "true" app.kubernetes.io/version: latest - +--- --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Deletes a pod belonging to a deployment/statefulset/daemonset + Taint the node where application pod is scheduled kind: ChaosExperiment metadata: - name: pod-delete + name: node-taint labels: - name: pod-delete + name: node-taint app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Namespaced + scope: Cluster permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -2463,24 +2517,12 @@ spec: verbs: ["get","list","watch"] # for creating and managing to execute comands inside target container - apiGroups: [""] - resources: ["pods/exec"] + resources: ["pods/exec","pods/eviction"] verbs: ["get","list","create"] - # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) + # ignore daemonsets while draining the node - apiGroups: ["apps"] - resources: ["deployments","statefulsets","replicasets", "daemonsets"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: ["apps.openshift.io"] - resources: ["deploymentconfigs"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: [""] - resources: ["replicationcontrollers"] - verbs: ["get","list"] - # deriving the parent/owner details of the pod(if parent is argo-rollouts) - - apiGroups: ["argoproj.io"] - resources: ["rollouts"] - verbs: ["list","get"] + resources: ["daemonsets"] + verbs: ["list","get","delete"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -2489,49 +2531,44 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] + # for experiment to perform node status checks + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list","patch","update"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name pod-delete + - ./experiments -name node-taint command: - /bin/bash env: - - name: TOTAL_CHAOS_DURATION - value: '15' - - # Period to wait before and after injection of chaos in sec - - name: RAMP_TIME + - name: TARGET_NODE value: '' - - name: FORCE - value: 'true' - - - name: CHAOS_INTERVAL - value: '5' - - ## percentage of total pods to target - - name: PODS_AFFECTED_PERC + - name: NODE_LABEL value: '' + - name: TOTAL_CHAOS_DURATION + value: '60' + + # Provide the LIB here + # Only litmus supported - name: LIB - value: 'litmus' + value: 'litmus' - - name: TARGET_PODS + # Period to wait before and after injection of chaos in sec + - name: RAMP_TIME value: '' - # To select pods on specific node(s) - - name: NODE_LABEL - value: '' + # set taint label & effect + # key=value:effect or key:effect + - name: TAINTS + value: '' - ## it defines the sequence of chaos execution for multiple target pods - ## supported values: serial, parallel - - name: SEQUENCE - value: 'parallel' - labels: - name: pod-delete + name: node-taint app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest @@ -2540,18 +2577,18 @@ spec: apiVersion: litmuschaos.io/v1alpha1 description: message: | - Pod DNS Spoof can spoof particular dns requests in target pod container to desired target hostnames + Give a memory hog on a node belonging to a deployment kind: ChaosExperiment metadata: - name: pod-dns-spoof + name: node-memory-hog labels: - name: pod-dns-spoof + name: node-memory-hog app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Namespaced + scope: Cluster permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -2573,22 +2610,6 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] - # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) - - apiGroups: ["apps"] - resources: ["deployments","statefulsets","replicasets", "daemonsets"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: ["apps.openshift.io"] - resources: ["deploymentconfigs"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: [""] - resources: ["replicationcontrollers"] - verbs: ["get","list"] - # deriving the parent/owner details of the pod(if parent is argo-rollouts) - - apiGroups: ["argoproj.io"] - resources: ["rollouts"] - verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -2597,56 +2618,68 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] + # for experiment to perform node status checks + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list"] image: "litmuschaos/go-runner:latest" + imagePullPolicy: Always args: - - -c - - ./experiments -name pod-dns-spoof + - -c + - ./experiments -name node-memory-hog command: - - /bin/bash + - /bin/bash env: - - name: TARGET_CONTAINER - value: "" - # provide lib image - - name: LIB_IMAGE - value: "litmuschaos/go-runner:latest" + - name: TOTAL_CHAOS_DURATION + value: '120' - - name: TOTAL_CHAOS_DURATION - value: "60" # in seconds + ## Specify the size as percent of total node capacity Ex: '30' + ## NOTE: for selecting this option keep MEMORY_CONSUMPTION_MEBIBYTES empty + - name: MEMORY_CONSUMPTION_PERCENTAGE + value: '' + + ## Specify the amount of memory to be consumed in mebibytes + ## NOTE: for selecting this option keep MEMORY_CONSUMPTION_PERCENTAGE empty + - name: MEMORY_CONSUMPTION_MEBIBYTES + value: '' - # Time period to wait before and after injection of chaos in sec - - name: RAMP_TIME - value: "" + - name: NUMBER_OF_WORKERS + value: '1' - ## percentage of total pods to target - - name: PODS_AFFECTED_PERC - value: "" + # ENTER THE COMMA SEPARATED TARGET NODES NAME + - name: TARGET_NODES + value: '' - - name: TARGET_PODS - value: "" + - name: NODE_LABEL + value: '' - # provide the name of container runtime, it supports docker, containerd, crio - - name: CONTAINER_RUNTIME - value: "docker" + # Period to wait before and after injection of chaos in sec + - name: RAMP_TIME + value: '' - # provide the socket file path - - name: SOCKET_PATH - value: "/var/run/docker.sock" + # Provide the LIB here + # Only litmus supported + - name: LIB + value: 'litmus' - ## it defines the sequence of chaos execution for multiple target pods - ## supported values: serial, parallel - - name: SEQUENCE - value: "parallel" + # provide lib image + - name: LIB_IMAGE + value: 'litmuschaos/go-runner:latest' - # map of the target hostnames eg. '{"abc.com":"spoofabc.com"}' . If empty no queries will be spoofed - - name: SPOOF_MAP - value: "" + ## percentage of total nodes to target + - name: NODES_AFFECTED_PERC + value: '' + ## it defines the sequence of chaos execution for multiple target nodes + ## supported values: serial, parallel + - name: SEQUENCE + value: 'parallel' + labels: - experiment: pod-dns-spoof + name: node-memory-hog app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/runtime-api-usage: "true" app.kubernetes.io/version: latest --- @@ -2654,12 +2687,12 @@ spec: apiVersion: litmuschaos.io/v1alpha1 description: message: | - Injects memory consumption on pods belonging to an app deployment + IO stress on a app pods belonging to an app deployment kind: ChaosExperiment metadata: - name: pod-memory-hog + name: pod-io-stress labels: - name: pod-memory-hog + name: pod-io-stress app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -2715,64 +2748,71 @@ spec: imagePullPolicy: Always args: - -c - - ./experiments -name pod-memory-hog + - ./experiments -name pod-io-stress command: - /bin/bash env: - name: TOTAL_CHAOS_DURATION - value: '60' + value: '120' - ## enter the amount of memory in megabytes to be consumed by the application pod - - name: MEMORY_CONSUMPTION - value: '500' - - ## Number of workers to perform stress + ## specify the size as percentage of free space on the file system + ## default value 90 (in percentage) + - name: FILESYSTEM_UTILIZATION_PERCENTAGE + value: '10' + + ## we can specify the size in Gigabyte (Gb) also in place of percentage of free space + ## NOTE: for selecting this option FILESYSTEM_UTILIZATION_PERCENTAGE should be empty + - name: FILESYSTEM_UTILIZATION_BYTES + value: '' + + ## Total number of workers default value is 4 - name: NUMBER_OF_WORKERS - value: '1' + value: '4' - ## percentage of total pods to target + ## Percentage of total pods to target - name: PODS_AFFECTED_PERC + value: '' + + # provide volume mount path + - name: VOLUME_MOUNT_PATH value: '' - ## Period to wait before and after injection of chaos in sec + ## specify the comma separated target pods + - name: TARGET_PODS + value: '' + + # To select pods on specific node(s) + - name: NODE_LABEL + value: '' + + # Period to wait before and after injection of chaos in sec - name: RAMP_TIME - value: '' + value: '' - ## env var that describes the library used to execute the chaos - ## default: litmus. Supported values: litmus, pumba + # Provide the LIB here + # support litmus and pumba - name: LIB value: 'litmus' - ## It is used in pumba lib only + # provide lib image - name: LIB_IMAGE value: 'litmuschaos/go-runner:latest' - ## It is used in pumba lib only - - name: STRESS_IMAGE - value: 'alexeiled/stress-ng:latest-ubuntu' - ## provide the cluster runtime - name: CONTAINER_RUNTIME value: 'docker' # provide the socket file path - name: SOCKET_PATH - value: '/var/run/docker.sock' - + value: '/var/run/docker.sock' + ## it defines the sequence of chaos execution for multiple target pods ## supported values: serial, parallel - name: SEQUENCE value: 'parallel' - - name: TARGET_PODS - value: '' - - # To select pods on specific node(s) - - name: NODE_LABEL - value: '' - labels: - name: pod-memory-hog + name: pod-io-stress app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/runtime-api-usage: "true" @@ -2781,18 +2821,19 @@ spec: --- apiVersion: litmuschaos.io/v1alpha1 description: - message: "Kills a container belonging to an application pod \n" + message: | + Kills the docker service on the application node to check the resiliency. kind: ChaosExperiment metadata: - name: container-kill + name: docker-service-kill labels: - name: container-kill + name: docker-service-kill app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest spec: definition: - scope: Namespaced + scope: Cluster permissions: # Create and monitor the experiment & helper pods - apiGroups: [""] @@ -2814,22 +2855,6 @@ spec: - apiGroups: [""] resources: ["pods/exec"] verbs: ["get","list","create"] - # deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets}) - - apiGroups: ["apps"] - resources: ["deployments","statefulsets","replicasets", "daemonsets"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: ["apps.openshift.io"] - resources: ["deploymentconfigs"] - verbs: ["list","get"] - # deriving the parent/owner details of the pod(if parent is deploymentConfig) - - apiGroups: [""] - resources: ["replicationcontrollers"] - verbs: ["get","list"] - # deriving the parent/owner details of the pod(if parent is argo-rollouts) - - apiGroups: ["argoproj.io"] - resources: ["rollouts"] - verbs: ["list","get"] # for configuring and monitor the experiment job by the chaos-runner pod - apiGroups: ["batch"] resources: ["jobs"] @@ -2838,71 +2863,46 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] + # for experiment to perform node status checks + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name container-kill + - ./experiments -name docker-service-kill command: - /bin/bash env: + + - name: TOTAL_CHAOS_DURATION + value: '90' # in seconds - - name: TARGET_CONTAINER - value: '' - - # Period to wait before and after injection of chaos in sec + # Period to wait before injection of chaos in sec - name: RAMP_TIME value: '' - # lib can be litmus or pumba - name: LIB value: 'litmus' - - - name: TARGET_PODS - value: '' - - # provide the chaos interval - - name: CHAOS_INTERVAL - value: '10' - - - name: SIGNAL - value: 'SIGKILL' - - # provide the socket file path - - name: SOCKET_PATH - value: '/var/run/docker.sock' - - # provide the name of container runtime - # for litmus LIB, it supports docker, containerd, crio - # for pumba LIB, it supports docker only - - name: CONTAINER_RUNTIME - value: 'docker' - - # provide the total chaos duration - - name: TOTAL_CHAOS_DURATION - value: '20' - - ## percentage of total pods to target - - name: PODS_AFFECTED_PERC - value: '' - # To select pods on specific node(s) - name: NODE_LABEL value: '' - - name: LIB_IMAGE - value: 'litmuschaos/go-runner:latest' - - ## it defines the sequence of chaos execution for multiple target pods - ## supported values: serial, parallel - - name: SEQUENCE - value: 'parallel' + # provide lib image + - name: LIB_IMAGE + value: 'ubuntu:16.04' + + # provide the target node name + - name: TARGET_NODE + value: '' labels: - name: container-kill + name: docker-service-kill app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job - app.kubernetes.io/runtime-api-usage: "true" + app.kubernetes.io/service-kill: "true" app.kubernetes.io/version: latest + --- diff --git a/charts/kube-aws/experiments.yaml b/charts/kube-aws/experiments.yaml index 2a3561cb8..c2ba0ceaf 100644 --- a/charts/kube-aws/experiments.yaml +++ b/charts/kube-aws/experiments.yaml @@ -1,12 +1,12 @@ apiVersion: litmuschaos.io/v1alpha1 description: message: | - Detaching an ebs volume from ec2 instance. + Stopping an EC2 instance identified by tag. kind: ChaosExperiment metadata: - name: ebs-loss-by-id + name: ec2-terminate-by-tag labels: - name: ebs-loss-by-id + name: ec2-terminate-by-tag app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -42,11 +42,15 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] + # for experiment to perform node status checks + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name ebs-loss-by-id + - ./experiments -name ec2-terminate-by-tag command: - /bin/bash env: @@ -60,16 +64,24 @@ spec: - name: RAMP_TIME value: '' - - name: EBS_VOLUME_ID + - name: INSTANCE_TAG value: '' + # enable it if the target instance is a part of self-managed nodegroup. + - name: MANAGED_NODEGROUP + value: 'disable' + - name: REGION value: '' + # Target the percentage of instance filtered from tag + - name: INSTANCE_AFFECTED_PERC + value: '' + - name: SEQUENCE value: 'parallel' - # Provide the path of aws credentials mounted from secret + # Provide the path of aws credentials mounted from secret - name: AWS_SHARED_CREDENTIALS_FILE value: '/tmp/cloud_config.yml' @@ -79,7 +91,7 @@ spec: value: 'litmus' labels: - name: ebs-loss-by-id + name: ec2-terminate-by-tag app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest @@ -191,12 +203,12 @@ spec: apiVersion: litmuschaos.io/v1alpha1 description: message: | - Stopping an EC2 instance identified by tag. + Detaching an ebs volume from ec2 instance. kind: ChaosExperiment metadata: - name: ec2-terminate-by-tag + name: ebs-loss-by-id labels: - name: ec2-terminate-by-tag + name: ebs-loss-by-id app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest @@ -232,15 +244,11 @@ spec: - apiGroups: ["litmuschaos.io"] resources: ["chaosengines","chaosexperiments","chaosresults"] verbs: ["create","list","get","patch","update","delete"] - # for experiment to perform node status checks - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get","list"] image: "litmuschaos/go-runner:latest" imagePullPolicy: Always args: - -c - - ./experiments -name ec2-terminate-by-tag + - ./experiments -name ebs-loss-by-id command: - /bin/bash env: @@ -254,24 +262,16 @@ spec: - name: RAMP_TIME value: '' - - name: INSTANCE_TAG + - name: EBS_VOLUME_ID value: '' - # enable it if the target instance is a part of self-managed nodegroup. - - name: MANAGED_NODEGROUP - value: 'disable' - - name: REGION value: '' - # Target the percentage of instance filtered from tag - - name: INSTANCE_AFFECTED_PERC - value: '' - - name: SEQUENCE value: 'parallel' - # Provide the path of aws credentials mounted from secret + # Provide the path of aws credentials mounted from secret - name: AWS_SHARED_CREDENTIALS_FILE value: '/tmp/cloud_config.yml' @@ -281,7 +281,7 @@ spec: value: 'litmus' labels: - name: ec2-terminate-by-tag + name: ebs-loss-by-id app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest diff --git a/charts/openebs/experiments.yaml b/charts/openebs/experiments.yaml index cdd52fc7f..718e4711a 100644 --- a/charts/openebs/experiments.yaml +++ b/charts/openebs/experiments.yaml @@ -1,46 +1,42 @@ apiVersion: litmuschaos.io/v1alpha1 description: message: | - Network loss to pool pod belonging to a OpenEBS PVC + Network loss to target pod belonging to a OpenEBS PVC kind: ChaosExperiment metadata: labels: litmuschaos.io/name: openebs - name: openebs-pool-network-loss + name: openebs-target-network-loss app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest - name: openebs-pool-network-loss + name: openebs-target-network-loss spec: definition: scope: Cluster permissions: - apiGroups: - "" + - "extensions" - "apps" - - "litmuschaos.io" - "batch" - - "extensions" + - "litmuschaos.io" - "storage.k8s.io" - - "openebs.io" resources: + - "jobs" - "pods" + - "events" + - "services" - "pods/log" - "pods/exec" - - "events" - - "jobs" - "configmaps" - - "services" + - "secrets" - "persistentvolumeclaims" - "storageclasses" - - "persistentvolumeclaims" - "persistentvolumes" - "chaosengines" - "chaosexperiments" - "chaosresults" - - "cstorpools" - - "cstorvolumereplicas" - - "replicasets" verbs: - "create" - "get" @@ -52,7 +48,7 @@ spec: imagePullPolicy: Always args: - -c - - ansible-playbook ./experiments/openebs/openebs-pool-network-loss/openebs_pool_network_loss_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + - ansible-playbook ./experiments/openebs/openebs-target-network-loss/openebs_target_network_loss_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 command: - /bin/bash env: @@ -62,14 +58,17 @@ spec: - name: OPENEBS_NAMESPACE value: 'openebs' + - name: APP_PVC + value: '' + + - name: TC_IMAGE + value: 'gaiadocker/iproute2' + # only pumba supported # For pumba image use : gaiaadm/pumba:0.6.5 - name: LIB_IMAGE value: 'gaiaadm/pumba:0.6.5' - - name: TC_IMAGE - value: 'gaiadocker/iproute2' - - name: NETWORK_PACKET_LOSS_PERCENTAGE value: '100' # in percentage @@ -86,26 +85,29 @@ spec: value: '' labels: - name: openebs-pool-network-loss + name: openebs-target-network-loss app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest + #configmaps: + #- name: openebs-target-network-loss + # mountPath: /mnt +--- --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Network delay to pool pod belonging to a OpenEBS PVC - This experiment is using pumba lib for network chaos + Kill the OpenEBS NFS provisioner container and check if pods consuming the NFS PVs continue to be available and volumes are writable (RWM mode) kind: ChaosExperiment metadata: labels: litmuschaos.io/name: openebs - name: openebs-pool-network-delay + name: openebs-nfs-provisioner-kill app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest - name: openebs-pool-network-delay + name: openebs-nfs-provisioner-kill spec: definition: scope: Cluster @@ -117,89 +119,97 @@ spec: - "batch" - "extensions" - "storage.k8s.io" - - "openebs.io" resources: - "pods" - "pods/exec" - - "jobs" - "pods/log" + - "deployments" - "events" + - "jobs" - "configmaps" - "services" - "persistentvolumeclaims" - "storageclasses" - "persistentvolumes" - - "chaosengines" - "chaosexperiments" - "chaosresults" - - "cstorpools" - - "cstorvolumereplicas" - - "replicasets" + - "chaosengines" verbs: - "create" - - "get" - "list" + - "get" - "patch" - "update" - "delete" + - apiGroups: + - "" + resources: + - "nodes" + verbs: + - "get" + - "list" + image: "litmuschaos/ansible-runner:latest" imagePullPolicy: Always args: - -c - - ansible-playbook ./experiments/openebs/openebs-pool-network-delay/openebs_pool_network_delay_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + - ansible-playbook ./experiments/openebs/openebs-nfs-provisioner-kill/openebs_nfs_provisioner_kill_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 command: - /bin/bash env: - name: ANSIBLE_STDOUT_CALLBACK value: 'default' - - name: OPENEBS_NAMESPACE - value: 'openebs' + # NFS default container + - name: TARGET_CONTAINER + value: 'nfs-provisioner' - # only pumba supported - # For pumba image use : gaiaadm/pumba:0.6.5 + # Period to wait before injection of chaos in sec + - name: RAMP_TIME + value: '' + + # It supports pumba and containerd + - name: LIB + value: 'pumba' + + # LIB_IMAGE can be - gaiaadm/pumba:0.6.5, gprasath/crictl:ci + # For pumba image use: gaiaadm/pumba:0.6.5 + # For containerd image use: gprasath/crictl:ci - name: LIB_IMAGE value: 'gaiaadm/pumba:0.6.5' - # in milliseconds - - name: NETWORK_DELAY - value: '60000' - - - name: TC_IMAGE - value: 'gaiadocker/iproute2' + # provide the chaos interval + - name: CHAOS_INTERVAL + value: '10' + # provide the total chaos duration - name: TOTAL_CHAOS_DURATION - value: '60' # in seconds - - - name: LIVENESS_APP_LABEL - value: '' - - - name: LIVENESS_APP_NAMESPACE - value: '' - - - name: DATA_PERSISTENCE - value: '' + value: '20' labels: - name: openebs-pool-network-delay - app.kubernetes.io/part-of: litmus + name: openebs-nfs-provisioner-kill + app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest + configmaps: + - name: openebs-nfs-provisioner-kill + mountPath: /mnt/ +--- --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Network delay to target pod belonging to a deployment/statefulset/daemonset + Kill the cstor target/Jiva controller pod and check if gets created again kind: ChaosExperiment metadata: labels: litmuschaos.io/name: openebs - name: openebs-target-network-delay + name: openebs-target-pod-failure app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest - name: openebs-target-network-delay + name: openebs-target-pod-failure spec: definition: scope: Cluster @@ -212,20 +222,21 @@ spec: - "litmuschaos.io" - "storage.k8s.io" resources: + - "deployments" - "jobs" - - "pods" - - "services" - "events" - - "pods/exec" + - "pods" - "pods/log" + - "pods/exec" - "configmaps" - "secrets" - - "persistentvolumeclaims" - - "storageclasses" - - "persistentvolumes" + - "services" - "chaosengines" - "chaosexperiments" - "chaosresults" + - "persistentvolumeclaims" + - "storageclasses" + - "persistentvolumes" verbs: - "create" - "get" @@ -233,11 +244,18 @@ spec: - "list" - "patch" - "update" + - apiGroups: + - "" + resources: + - "nodes" + verbs: + - "get" + - "list" image: "litmuschaos/ansible-runner:latest" imagePullPolicy: Always args: - -c - - ansible-playbook ./experiments/openebs/openebs-target-network-delay/openebs_target_network_delay_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + - ansible-playbook ./experiments/openebs/openebs-target-pod-failure/openebs_target_pod_failure_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 command: - /bin/bash env: @@ -246,24 +264,13 @@ spec: - name: OPENEBS_NAMESPACE value: 'openebs' - + - name: APP_PVC value: '' - - name: TC_IMAGE - value: 'gaiadocker/iproute2' - - # only pumba supported - # For pumba image use : gaiaadm/pumba:0.6.5 - - name: LIB_IMAGE - value: 'gaiaadm/pumba:0.6.5' - - - name: NETWORK_DELAY - value: '60000' # in milliseconds + - name: FORCE + value: 'true' - - name: TOTAL_CHAOS_DURATION - value: '60' # in seconds - - name: LIVENESS_APP_LABEL value: '' @@ -271,15 +278,28 @@ spec: value: '' - name: DATA_PERSISTENCE - value: '' + value: '' + + - name: TOTAL_CHAOS_DURATION + value: '60' + + # provide the kill count + - name: KILL_COUNT + value: '' + + - name: CHAOS_INTERVAL + value: '15' + + - name: DEPLOY_TYPE + value: 'deployment' labels: - name: openebs-target-network-delay + name: openebs-target-pod-failure app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest #configmaps: - #- name: openebs-target-network-delay + #- name: openebs-target-pod-failure # mountPath: /mnt --- @@ -287,16 +307,16 @@ spec: apiVersion: litmuschaos.io/v1alpha1 description: message: | - Kill the pool pod and check if gets scheduled again + Kill the pool container and check if gets scheduled again kind: ChaosExperiment metadata: labels: litmuschaos.io/name: openebs - name: openebs-pool-pod-failure + name: openebs-pool-container-failure app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest - name: openebs-pool-pod-failure + name: openebs-pool-container-failure spec: definition: scope: Cluster @@ -310,15 +330,14 @@ spec: - "openebs.io" - "storage.k8s.io" resources: - - "deployments" - "replicasets" - - "jobs" - - "pods/log" - "events" + - "jobs" - "pods" + - "pods/log" + - "pods/exec" - "configmaps" - "secrets" - - "storageclasses" - "persistentvolumeclaims" - "cstorvolumereplicas" - "chaosengines" @@ -331,23 +350,16 @@ spec: - "list" - "patch" - "update" - - apiGroups: - - "" - resources: - - "nodes" - verbs: - - "get" - - "list" image: "litmuschaos/ansible-runner:latest" imagePullPolicy: Always args: - -c - - ansible-playbook ./experiments/openebs/openebs-pool-pod-failure/openebs_pool_pod_failure_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + - ansible-playbook ./experiments/openebs/openebs-pool-container-failure/openebs_pool_container_failure_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 command: - /bin/bash env: - name: ANSIBLE_STDOUT_CALLBACK - value: default + value: 'default' - name: OPENEBS_NS value: 'openebs' @@ -358,44 +370,52 @@ spec: - name: LIVENESS_APP_LABEL value: '' + # only pumba supported + # For pumba image use : gaiaadm/pumba:0.6.5 + - name: LIB_IMAGE + value: 'gaiaadm/pumba:0.6.5' + - name: LIVENESS_APP_NAMESPACE value: '' - - name: CHAOS_ITERATIONS - value: '2' + # provide the chaos interval + - name: CHAOS_INTERVAL + value: '10' - # provide the kill count - - name: KILL_COUNT - value: '' + # provide the total chaos duration + - name: TOTAL_CHAOS_DURATION + value: '20' - name: DATA_PERSISTENCE value: '' + - name: CHAOS_ITERATIONS + value: '2' + labels: - name: openebs-pool-pod-failure + name: openebs-pool-container-failure app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest #configmaps: - #- name: openebs-pool-pod-failure + #- name: openebs-pool-container-failure # mountPath: /mnt - --- --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Kill the OpenEBS NFS provisioner container and check if pods consuming the NFS PVs continue to be available and volumes are writable (RWM mode) + Kill the cstor target/Jiva controller container and check if gets created again kind: ChaosExperiment metadata: labels: litmuschaos.io/name: openebs - name: openebs-nfs-provisioner-kill + name: openebs-target-container-failure app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest - name: openebs-nfs-provisioner-kill + name: openebs-target-container-failure spec: definition: scope: Cluster @@ -403,25 +423,133 @@ spec: - apiGroups: - "" - "apps" - - "litmuschaos.io" - "batch" - - "extensions" + - "litmuschaos.io" - "storage.k8s.io" resources: + - "jobs" - "pods" + - "events" - "pods/exec" - "pods/log" - - "deployments" - - "events" - - "jobs" - "configmaps" - - "services" + - "secrets" - "persistentvolumeclaims" - "storageclasses" - "persistentvolumes" + - "chaosengines" - "chaosexperiments" - "chaosresults" + verbs: + - "create" + - "delete" + - "get" + - "list" + - "patch" + - "update" + image: "litmuschaos/ansible-runner:latest" + imagePullPolicy: Always + args: + - -c + - ansible-playbook ./experiments/openebs/openebs-target-container-failure/openebs_target_container_failure_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + command: + - /bin/bash + env: + - name: ANSIBLE_STDOUT_CALLBACK + value: 'default' + + - name: OPENEBS_NAMESPACE + value: 'openebs' + + - name: APP_PVC + value: '' + + # LIB_IMAGE can be - gaiaadm/pumba:0.6.5, gprasath/crictl:ci + # For pumba image use : gaiaadm/pumba:0.6.5 + # For containerd image use : gprasath/crictl:ci + - name: LIB_IMAGE + value: 'gaiaadm/pumba:0.6.5' + + # Specify the container runtime used , to pick the relevant chaos util + - name: CONTAINER_RUNTIME + value: 'docker' + + # TARGET_CONTAINER values: cstor-volume-mgmt , cstor-istgt + # For cstor-volume-istgt container kill use : cstor-istgt + # For volume-mgmt-kill container use : cstor-volume-mgmt + + - name: TARGET_CONTAINER + value: 'cstor-volume-mgmt' + + - name: FORCE + value: 'true' + + - name: LIVENESS_APP_LABEL + value: '' + + - name: LIVENESS_APP_NAMESPACE + value: '' + + - name: DATA_PERSISTENCE + value: '' + + - name: DEPLOY_TYPE + value: 'deployment' + + # provide the chaos interval + - name: CHAOS_INTERVAL + value: '10' + + # provide the total chaos duration + - name: TOTAL_CHAOS_DURATION + value: '20' + + - name: SOAK_TIME + value: '60' + + labels: + name: openebs-target-container-failure + app.kubernetes.io/part-of: litmus + app.kubernetes.io/component: experiment-job + app.kubernetes.io/version: latest + #configmaps: + #- name: openebs-target-container-failure + # mountPath: /mnt + +--- +--- +apiVersion: litmuschaos.io/v1alpha1 +description: + message: | + Kill all openebs control plane pod and check if gets scheduled again +kind: ChaosExperiment +metadata: + labels: + litmuschaos.io/name: openebs + name: openebs-control-plane-chaos + app.kubernetes.io/part-of: litmus + app.kubernetes.io/component: chaosexperiment + app.kubernetes.io/version: latest + name: openebs-control-plane-chaos +spec: + definition: + scope: Namespaced + permissions: + - apiGroups: + - "" + - "litmuschaos.io" + - "batch" + - "apps" + resources: + - "pods" + - "pods/log" + - "deployments" + - "events" + - "jobs" + - "configmaps" - "chaosengines" + - "chaosexperiments" + - "chaosresults" verbs: - "create" - "list" @@ -441,63 +569,48 @@ spec: imagePullPolicy: Always args: - -c - - ansible-playbook ./experiments/openebs/openebs-nfs-provisioner-kill/openebs_nfs_provisioner_kill_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + - ansible-playbook ./experiments/openebs/openebs-control-plane-chaos/openebs_control_plane_chaos_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 command: - /bin/bash env: - name: ANSIBLE_STDOUT_CALLBACK value: 'default' - # NFS default container - - name: TARGET_CONTAINER - value: 'nfs-provisioner' + - name: OPENEBS_NAMESPACE + value: 'openebs' - # Period to wait before injection of chaos in sec + ## Period to wait before injection of chaos - name: RAMP_TIME value: '' + + - name: FORCE + value: '' - # It supports pumba and containerd + ## env var that describes the library used to execute the chaos + ## default: litmus. Supported values: litmus, powerfulseal - name: LIB - value: 'pumba' - - # LIB_IMAGE can be - gaiaadm/pumba:0.6.5, gprasath/crictl:ci - # For pumba image use: gaiaadm/pumba:0.6.5 - # For containerd image use: gprasath/crictl:ci - - name: LIB_IMAGE - value: 'gaiaadm/pumba:0.6.5' - - # provide the chaos interval - - name: CHAOS_INTERVAL - value: '10' - - # provide the total chaos duration - - name: TOTAL_CHAOS_DURATION - value: '20' + value: 'litmus' labels: - name: openebs-nfs-provisioner-kill + name: openebs-control-plane-chaos app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest - configmaps: - - name: openebs-nfs-provisioner-kill - mountPath: /mnt/ ---- --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Kill the cstor target/Jiva controller container and check if gets created again + Network loss to pool pod belonging to a OpenEBS PVC kind: ChaosExperiment metadata: labels: litmuschaos.io/name: openebs - name: openebs-target-container-failure + name: openebs-pool-network-loss app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest - name: openebs-target-container-failure + name: openebs-pool-network-loss spec: definition: scope: Cluster @@ -505,27 +618,33 @@ spec: - apiGroups: - "" - "apps" - - "batch" - "litmuschaos.io" + - "batch" + - "extensions" - "storage.k8s.io" + - "openebs.io" resources: - - "jobs" - "pods" - - "events" - - "pods/exec" - "pods/log" + - "pods/exec" + - "events" + - "jobs" - "configmaps" - - "secrets" + - "services" - "persistentvolumeclaims" - "storageclasses" + - "persistentvolumeclaims" - "persistentvolumes" - "chaosengines" - "chaosexperiments" - "chaosresults" + - "cstorpools" + - "cstorvolumereplicas" + - "replicasets" verbs: - "create" - - "delete" - "get" + - "delete" - "list" - "patch" - "update" @@ -533,7 +652,7 @@ spec: imagePullPolicy: Always args: - -c - - ansible-playbook ./experiments/openebs/openebs-target-container-failure/openebs_target_container_failure_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + - ansible-playbook ./experiments/openebs/openebs-pool-network-loss/openebs_pool_network_loss_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 command: - /bin/bash env: @@ -543,28 +662,19 @@ spec: - name: OPENEBS_NAMESPACE value: 'openebs' - - name: APP_PVC - value: '' - - # LIB_IMAGE can be - gaiaadm/pumba:0.6.5, gprasath/crictl:ci + # only pumba supported # For pumba image use : gaiaadm/pumba:0.6.5 - # For containerd image use : gprasath/crictl:ci - - name: LIB_IMAGE - value: 'gaiaadm/pumba:0.6.5' + - name: LIB_IMAGE + value: 'gaiaadm/pumba:0.6.5' - # Specify the container runtime used , to pick the relevant chaos util - - name: CONTAINER_RUNTIME - value: 'docker' + - name: TC_IMAGE + value: 'gaiadocker/iproute2' - # TARGET_CONTAINER values: cstor-volume-mgmt , cstor-istgt - # For cstor-volume-istgt container kill use : cstor-istgt - # For volume-mgmt-kill container use : cstor-volume-mgmt - - - name: TARGET_CONTAINER - value: 'cstor-volume-mgmt' + - name: NETWORK_PACKET_LOSS_PERCENTAGE + value: '100' # in percentage - - name: FORCE - value: 'true' + - name: TOTAL_CHAOS_DURATION + value: '120' # in seconds - name: LIVENESS_APP_LABEL value: '' @@ -575,28 +685,11 @@ spec: - name: DATA_PERSISTENCE value: '' - - name: DEPLOY_TYPE - value: 'deployment' - - # provide the chaos interval - - name: CHAOS_INTERVAL - value: '10' - - # provide the total chaos duration - - name: TOTAL_CHAOS_DURATION - value: '20' - - - name: SOAK_TIME - value: '60' - labels: - name: openebs-target-container-failure + name: openebs-pool-network-loss app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest - #configmaps: - #- name: openebs-target-container-failure - # mountPath: /mnt --- --- @@ -716,16 +809,16 @@ spec: apiVersion: litmuschaos.io/v1alpha1 description: message: | - Kill the cstor target/Jiva controller pod and check if gets created again + Kill the pool pod and check if gets scheduled again kind: ChaosExperiment metadata: labels: litmuschaos.io/name: openebs - name: openebs-target-pod-failure + name: openebs-pool-pod-failure app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest - name: openebs-target-pod-failure + name: openebs-pool-pod-failure spec: definition: scope: Cluster @@ -736,23 +829,23 @@ spec: - "apps" - "batch" - "litmuschaos.io" + - "openebs.io" - "storage.k8s.io" resources: - "deployments" + - "replicasets" - "jobs" + - "pods/log" - "events" - "pods" - - "pods/log" - - "pods/exec" - "configmaps" - "secrets" - - "services" + - "storageclasses" + - "persistentvolumeclaims" + - "cstorvolumereplicas" - "chaosengines" - "chaosexperiments" - "chaosresults" - - "persistentvolumeclaims" - - "storageclasses" - - "persistentvolumes" verbs: - "create" - "get" @@ -771,105 +864,101 @@ spec: imagePullPolicy: Always args: - -c - - ansible-playbook ./experiments/openebs/openebs-target-pod-failure/openebs_target_pod_failure_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + - ansible-playbook ./experiments/openebs/openebs-pool-pod-failure/openebs_pool_pod_failure_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 command: - /bin/bash env: - name: ANSIBLE_STDOUT_CALLBACK - value: 'default' + value: default - - name: OPENEBS_NAMESPACE + - name: OPENEBS_NS value: 'openebs' - + - name: APP_PVC value: '' - - name: FORCE - value: 'true' - - name: LIVENESS_APP_LABEL value: '' - name: LIVENESS_APP_NAMESPACE - value: '' - - - name: DATA_PERSISTENCE - value: '' + value: '' - - name: TOTAL_CHAOS_DURATION - value: '60' + - name: CHAOS_ITERATIONS + value: '2' # provide the kill count - name: KILL_COUNT value: '' - - name: CHAOS_INTERVAL - value: '15' - - - name: DEPLOY_TYPE - value: 'deployment' + - name: DATA_PERSISTENCE + value: '' labels: - name: openebs-target-pod-failure + name: openebs-pool-pod-failure app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest #configmaps: - #- name: openebs-target-pod-failure + #- name: openebs-pool-pod-failure # mountPath: /mnt + --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Network loss to target pod belonging to a OpenEBS PVC + Network delay to pool pod belonging to a OpenEBS PVC + This experiment is using pumba lib for network chaos kind: ChaosExperiment metadata: labels: litmuschaos.io/name: openebs - name: openebs-target-network-loss + name: openebs-pool-network-delay app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest - name: openebs-target-network-loss + name: openebs-pool-network-delay spec: definition: scope: Cluster permissions: - apiGroups: - "" - - "extensions" - "apps" - - "batch" - "litmuschaos.io" + - "batch" + - "extensions" - "storage.k8s.io" + - "openebs.io" resources: - - "jobs" - "pods" - - "events" - - "services" - - "pods/log" - "pods/exec" + - "jobs" + - "pods/log" + - "events" - "configmaps" - - "secrets" + - "services" - "persistentvolumeclaims" - "storageclasses" - "persistentvolumes" - "chaosengines" - "chaosexperiments" - "chaosresults" + - "cstorpools" + - "cstorvolumereplicas" + - "replicasets" verbs: - "create" - "get" - - "delete" - "list" - "patch" - "update" + - "delete" image: "litmuschaos/ansible-runner:latest" imagePullPolicy: Always args: - -c - - ansible-playbook ./experiments/openebs/openebs-target-network-loss/openebs_target_network_loss_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + - ansible-playbook ./experiments/openebs/openebs-pool-network-delay/openebs_pool_network_delay_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 command: - /bin/bash env: @@ -879,22 +968,20 @@ spec: - name: OPENEBS_NAMESPACE value: 'openebs' - - name: APP_PVC - value: '' - - - name: TC_IMAGE - value: 'gaiadocker/iproute2' - # only pumba supported # For pumba image use : gaiaadm/pumba:0.6.5 - name: LIB_IMAGE value: 'gaiaadm/pumba:0.6.5' - - name: NETWORK_PACKET_LOSS_PERCENTAGE - value: '100' # in percentage + # in milliseconds + - name: NETWORK_DELAY + value: '60000' + + - name: TC_IMAGE + value: 'gaiadocker/iproute2' - name: TOTAL_CHAOS_DURATION - value: '120' # in seconds + value: '60' # in seconds - name: LIVENESS_APP_LABEL value: '' @@ -906,110 +993,25 @@ spec: value: '' labels: - name: openebs-target-network-loss - app.kubernetes.io/part-of: litmus - app.kubernetes.io/component: experiment-job - app.kubernetes.io/version: latest - #configmaps: - #- name: openebs-target-network-loss - # mountPath: /mnt - ---- ---- -apiVersion: litmuschaos.io/v1alpha1 -description: - message: | - Kill all openebs control plane pod and check if gets scheduled again -kind: ChaosExperiment -metadata: - labels: - litmuschaos.io/name: openebs - name: openebs-control-plane-chaos - app.kubernetes.io/part-of: litmus - app.kubernetes.io/component: chaosexperiment - app.kubernetes.io/version: latest - name: openebs-control-plane-chaos -spec: - definition: - scope: Namespaced - permissions: - - apiGroups: - - "" - - "litmuschaos.io" - - "batch" - - "apps" - resources: - - "pods" - - "pods/log" - - "deployments" - - "events" - - "jobs" - - "configmaps" - - "chaosengines" - - "chaosexperiments" - - "chaosresults" - verbs: - - "create" - - "list" - - "get" - - "patch" - - "update" - - "delete" - - apiGroups: - - "" - resources: - - "nodes" - verbs: - - "get" - - "list" - - image: "litmuschaos/ansible-runner:latest" - imagePullPolicy: Always - args: - - -c - - ansible-playbook ./experiments/openebs/openebs-control-plane-chaos/openebs_control_plane_chaos_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 - command: - - /bin/bash - env: - - name: ANSIBLE_STDOUT_CALLBACK - value: 'default' - - - name: OPENEBS_NAMESPACE - value: 'openebs' - - ## Period to wait before injection of chaos - - name: RAMP_TIME - value: '' - - - name: FORCE - value: '' - - ## env var that describes the library used to execute the chaos - ## default: litmus. Supported values: litmus, powerfulseal - - name: LIB - value: 'litmus' - - labels: - name: openebs-control-plane-chaos - app.kubernetes.io/part-of: litmus + name: openebs-pool-network-delay + app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest ---- --- apiVersion: litmuschaos.io/v1alpha1 description: message: | - Kill the pool container and check if gets scheduled again + Network delay to target pod belonging to a deployment/statefulset/daemonset kind: ChaosExperiment metadata: labels: litmuschaos.io/name: openebs - name: openebs-pool-container-failure + name: openebs-target-network-delay app.kubernetes.io/part-of: litmus app.kubernetes.io/component: chaosexperiment app.kubernetes.io/version: latest - name: openebs-pool-container-failure + name: openebs-target-network-delay spec: definition: scope: Cluster @@ -1020,19 +1022,19 @@ spec: - "apps" - "batch" - "litmuschaos.io" - - "openebs.io" - "storage.k8s.io" resources: - - "replicasets" - - "events" - "jobs" - "pods" - - "pods/log" + - "services" + - "events" - "pods/exec" + - "pods/log" - "configmaps" - "secrets" - "persistentvolumeclaims" - - "cstorvolumereplicas" + - "storageclasses" + - "persistentvolumes" - "chaosengines" - "chaosexperiments" - "chaosresults" @@ -1047,51 +1049,49 @@ spec: imagePullPolicy: Always args: - -c - - ansible-playbook ./experiments/openebs/openebs-pool-container-failure/openebs_pool_container_failure_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 + - ansible-playbook ./experiments/openebs/openebs-target-network-delay/openebs_target_network_delay_ansible_logic.yml -i /etc/ansible/hosts -vv; exit 0 command: - /bin/bash env: - name: ANSIBLE_STDOUT_CALLBACK value: 'default' - - name: OPENEBS_NS + - name: OPENEBS_NAMESPACE value: 'openebs' - name: APP_PVC value: '' - - name: LIVENESS_APP_LABEL - value: '' + - name: TC_IMAGE + value: 'gaiadocker/iproute2' # only pumba supported # For pumba image use : gaiaadm/pumba:0.6.5 - - name: LIB_IMAGE + - name: LIB_IMAGE value: 'gaiaadm/pumba:0.6.5' - - name: LIVENESS_APP_NAMESPACE - value: '' - - # provide the chaos interval - - name: CHAOS_INTERVAL - value: '10' + - name: NETWORK_DELAY + value: '60000' # in milliseconds - # provide the total chaos duration - name: TOTAL_CHAOS_DURATION - value: '20' + value: '60' # in seconds + + - name: LIVENESS_APP_LABEL + value: '' - - name: DATA_PERSISTENCE + - name: LIVENESS_APP_NAMESPACE value: '' - - name: CHAOS_ITERATIONS - value: '2' + - name: DATA_PERSISTENCE + value: '' labels: - name: openebs-pool-container-failure + name: openebs-target-network-delay app.kubernetes.io/part-of: litmus app.kubernetes.io/component: experiment-job app.kubernetes.io/version: latest #configmaps: - #- name: openebs-pool-container-failure + #- name: openebs-target-network-delay # mountPath: /mnt ---