diff --git a/reliability-v2/config/reliability-ivs.yaml b/reliability-v2/config/reliability-ivs.yaml new file mode 100644 index 00000000000..4a1dcb1ce8e --- /dev/null +++ b/reliability-v2/config/reliability-ivs.yaml @@ -0,0 +1,223 @@ +reliability: + kubeconfig: + users: + - admin_file: + - user_file: + + appTemplates: + - template: cakephp-mysql-persistent + - template: nodejs-postgresql-persistent # N/A for FIPS + - template: django-psql-persistent # N/A for FIPS + - template: rails-pgsql-persistent + #- template: dancer-mysql-persistent + # For test without pvc + # - template: cakephp-mysql-example + # - template: dancer-mysql-example + # - template: django-psql-example + # - template: nodejs-postgresql-example + + # The max number of projects is limited by the size of cluster. + # For 3 nodes m5.xlarge cluster, 25 to 30 projects are recomended. + # For 5 nodes m5.xlarge cluster, 60 projects are recomended. + # The max number of projects = groups x users x new_project + groups: + - name: admin + # For cluster created by Jenkins Flexy-install job, the admin user_name is kubeadmin. + # For rosa cluster created in Prow, the admin user_name is rosa-admin. + user_name: # admin username as kubeadmin or rosa-admin. + loops: forever # run group for loops times. integer > 0 or 'forever', default is 1. + trigger: 600 # wait trigger seconds between each loop + jitter: 60 # randomly start the users in this group in trigger seconds. Default is 0. + interval: 10 # wait interval seconds between tasks. + # If admin network policy function is planed in the test, uncomment the following line + pre_tasks: + - func apply_nonamespace -p "/network-policy/00_banp_deny-triffic-reliability.yaml" + - func apply_nonamespace -p "/network-policy/01_anp_allow-common-service.yaml" + - func apply_nonamespace -p "/network-policy/02_anp_allow-traffic-egress-cidr-cluster-network-p20.yaml" + - func apply_nonamespace -p "/network-policy/04_anp_allow-traffic-dev-test-p30.yaml" + - func apply_nonamespace -p "/network-policy/03_anp_allow-traffic-dev-prod-p31.yaml" + tasks: + - func check_operators + - oc get project -l purpose=reliability + - func check_nodes + - kubectl get pods -A -o wide | egrep -v "Completed|Running" + # Run test case as scripts. KUBECONFIG of the current user is set as env variable by reliability-v2. + #- . /create-delete-pod-ensure-service.sh + + - name: dev-test + user_name: testuser- # if user_start and user_end exist, this will be username prefix + # For cluster created by Jenkins Flexy-install job, the users start from testuser-0 + # For cluster created in Prow and used the following step to create test users, the users start from testuser-1. + # https://github.com/openshift/release/blob/master/ci-operator/step-registry/osd-ccs/conf/idp/htpasswd/multi-users/osd-ccs-conf-idp-htpasswd-multi-users-ref.yaml + user_start: 1 # user_start is inclusive, start with testuser-1 in the users file. + user_end: 6 # user_end is exclusive, end with testuser-10 in the users file + loops: forever + trigger: 60 + jitter: 600 # randomly start the users in this group in 10 minutes + interval: 10 + tasks: + - func delete_all_projects # clear all projects + - func verify_project_deletion -n 2 # verfy project deletion in 2 namespaces + - func new_project -n 2 # new 2 projects + # If network policy is planed in the test, uncomment the following line + #- func apply -n 2 -p "/network-policy/allow-same-namespace.yaml" # Apply network policy to 2 projects + - func check_all_projects # check all project under this user + - func apply -n 2 -p "/nginx-deploy.yaml" + - func apply -n 2 -p "/nginx-service.yaml" + - func apply -n 2 -p "/route-http.yaml" + - func load_app -n 2 -p 10 # load apps in 2 namespaces with 10 clients for each + - func check_pods -n 2 # check pods in 2 namespaces + - func delete_project -n 2 # delete project in 2 namespaces + - func verify_project_deletion -n 2 # verfy project deletion in 2 namespaces + + - name: dev-test-build + user_name: testuser- # if user_start and user_end exist, this will be username prefix + # For cluster created by Jenkins Flexy-install job, the users start from testuser-0 + # For cluster created in Prow and used the following step to create test users, the users start from testuser-1. + # https://github.com/openshift/release/blob/master/ci-operator/step-registry/osd-ccs/conf/idp/htpasswd/multi-users/osd-ccs-conf-idp-htpasswd-multi-users-ref.yaml + user_start: 21 # user_start is inclusive, start with testuser-1 in the users file. + user_end: 26 # user_end is exclusive, end with testuser-10 in the users file + loops: forever + trigger: 60 + jitter: 600 # randomly start the users in this group in 10 minutes + interval: 10 + tasks: + - func delete_all_projects # clear all projects + - func verify_project_deletion -n 2 # verfy project deletion in 2 namespaces + - func new_project -n 2 # new 2 projects + # If network policy is planed in the test, uncomment the following line + #- func apply -n 2 -p "/network-policy/allow-same-namespace.yaml" # Apply network policy to 2 projects + - func check_all_projects # check all project under this user + - func new_app -n 2 # new app in 2 namespaces + - func load_app -n 2 -p 10 # load apps in 2 namespaces with 10 clients for each + - func build -n 1 # build app in 1 namespace + - func check_pods -n 2 # check pods in 2 namespaces + - func delete_project -n 2 # delete project in 2 namespaces + - func verify_project_deletion -n 2 # verfy project deletion in 2 namespaces + + - name: dev-prod + user_name: testuser- + user_start: 31 + user_end: 36 + loops: forever + trigger: 600 + jitter: 3600 + interval: 600 + pre_tasks: + - func delete_all_projects + - func verify_project_deletion -n 8 + - func new_project -n 8 + # If network policy is planed in the test, uncomment the following line + #- func apply -n 2 -p "/network-policy/allow-same-namespace.yaml" # Apply network policy to 2 projects + - func apply -n 8 -p "/nginx-deploy.yaml" + - func apply -n 8 -p "/nginx-service.yaml" + - func apply -n 8 -p "/route-http.yaml" + tasks: + - func load_app -n 8 -p 50 + - func scale_deployment -n 8 -p 2 # scale deployment in 2 namespaces to 2 replicas + - func scale_deployment -n 8 -p 1 # scale deployment in 2 namespaces to 1 replicas + post_tasks: + - func delete_project -n 8 + + - name: dev-cronjob + user_name: testuser- # if user_start and user_end exist, this will be username prefix + user_start: 41 + user_end: 42 + trigger: 600 + jitter: 1200 + loops: forever + pre_tasks: + - func delete_all_projects + - func verify_project_deletion -n 1 + - func new_project -n 1 + - /cronjob.sh -n 10 + tasks: + - /cronjob.sh -c + post_tasks: + - /cronjob.sh -d + - func delete_project -n 1 + - func verify_project_deletion -n 1 + + - name: ivs-llama + user_name: + trigger: 3600 + jitter: 600 + interval: 10 + loops: forever + pre_tasks: + - oc new-project ivs-llama + - oc label ns ivs-llama purpose=reliability + - oc create sa llama-sa -n ivs-llama + - oc adm policy add-scc-to-user anyuid -z llama-sa -n ivs-llama + - oc apply -f "/ivs/ivs-ai-llama.yaml" -n ivs-llamacpp + - sleep 300 + - oc get po -n ivs-llama + tasks: + - oc delete po llama-llama3-8b-instruct-bartowski-q5km-0-1 -n ivs-llama + - oc get po -n ivs-llama + - sleep 60 + - oc get po -n ivs-llama + + - name: ivs-nginx + user_name: + trigger: 600 + jitter: 600 + interval: 10 + loops: forever + pre_tasks: + - oc new-project ivs-nginx + - oc label ns ivs-nginx purpose=reliability + tasks: + - oc apply -f "/ivs/ivs-nginx.yaml" -n ivs-nginx + - sleep 60 + - oc get po -n ivs-nginx + - oc delete ivs leaderworkerset-sample -n ivs-nginx + + cerberusIntegration: + # start cerberus https://github.com/cloud-bulldozer/cerberus before starting reliabiity test. + cerberus_enable: False + # if cerberus_enable is false, the following 2 items are ignored. + cerberus_api: "http://0.0.0.0:8080" + # action to take when cerberus status is False, valid data: pause/halt/continue + cerberus_fail_action: pause + + slackIntegration: + slack_enable: False + # the ID in the example is the id of slack channel #ocp-qe-reliability-monitoring. + slack_channel: C0266JJ4XM5 + # slack_member is optional. If provided, the notification message will @ you. + # you must be a member of the slack channel to receive the notification. + slack_member: + + krakenIntegration: + kraken_enable: False + # supported Kraken scenarios: https://github.com/cloud-bulldozer/kraken-hub/blob/main/README.md#supported-chaos-scenarios + # pod-scenarios, container-scenarios, node-scenarios, zone-outages, time-scenarios, + # node-cpu-hog, node-memory-hog, node-io-hog + # Please specify the parameters for each scenario. e.g. For pod-scenarios, + # refer to https://github.com/cloud-bulldozer/kraken-hub/blob/main/docs/pod-scenarios.md#supported-parameters + kraken_scenarios: + - name: pod-scenarios_etcd + scenario: "pod-scenarios" + interval_unit: minutes # weeks,days,hours,minutes,seconds + interval_number: 8 + # start_date: "2021-10-28 17:36:00" # Optional. format: 2021-10-20 10:00:00. + # end_date: "2021-10-28 17:50:00" # Optional. + # timezone: "Asia/Shanghai" # Optional. e.g. US/Eastern. Default is "UTC + - name: pod-scenarios_monitoring + scenario: "pod-scenarios" + interval_unit: minutes # weeks,days,hours,minutes,seconds + interval_number: 10 + parameters: + NAMESPACE: openshift-monitoring + POD_LABEL: app.kubernetes.io/component=prometheus + EXPECTED_POD_COUNT: 2 + - name: node-scenarios_workerstopstart + scenario: "node-scenarios" + interval_unit: minutes + interval_number: 12 + parameters: + AWS_DEFAULT_REGION: us-east-2 + AWS_ACCESS_KEY_ID: xxxx + AWS_SECRET_ACCESS_KEY: xxxx + CLOUD_TYPE: aws diff --git a/reliability-v2/content/ivs/ivs-ai-llama.yaml b/reliability-v2/content/ivs/ivs-ai-llama.yaml new file mode 100644 index 00000000000..bb11f80d694 --- /dev/null +++ b/reliability-v2/content/ivs/ivs-ai-llama.yaml @@ -0,0 +1,27 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ivs-llama +spec: + replicas: 1 + selector: + matchLabels: + app: ivs-llama + template: + metadata: + labels: + app: ivs-llama + spec: + containers: + - name: llamacpp + image: quay.io/ai/llamacpp:latest + args: ["--model", "/models/llama.bin"] + volumeMounts: + - name: volume + mountPath: /models + subPath: dir + volumes: + - name: volume + image: + reference: quay.io/crio/artifact:v1 + pullPolicy: IfNotPresent diff --git a/reliability-v2/content/ivs/ivs-nginx.yaml b/reliability-v2/content/ivs/ivs-nginx.yaml new file mode 100644 index 00000000000..bd8169636dd --- /dev/null +++ b/reliability-v2/content/ivs/ivs-nginx.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ivs-nginx +spec: + replicas: 3 + selector: + matchLabels: + app: ivs-nginx + template: + metadata: + labels: + app: ivs-nginx + spec: + containers: + - name: nginx + image: quay.io/bitnami/nginx:latest + volumeMounts: + - name: volume + mountPath: /usr/share/nginx/html/models + subPath: dir + volumes: + - name: volume + image: + reference: quay.io/crio/artifact:v1 + pullPolicy: IfNotPresent