Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI: BATS: Make k8s/helm-install-rancher pass on macOS #7069

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/spelling/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ gcs
GENERALIZEDTIME
getwindowid
ghp
gitjob
gitmodules
gitrepo
gke
Expand Down
12 changes: 6 additions & 6 deletions bats/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ SC_EXCLUDES ?= SC1091,SC2034,SC2154
lint:
find tests -name '*.bash' | xargs ./scripts/bats-lint.pl
find tests -name '*.bats' | xargs ./scripts/bats-lint.pl
find tests -name '*.bash' | xargs shellcheck -s bash -e $(SC_EXCLUDES)
find tests -name '*.bats' | xargs shellcheck -s bash -e $(SC_EXCLUDES)
find scripts -name '*.sh' | xargs shellcheck -s bash -e $(SC_EXCLUDES)
find tests -name '*.bash' | xargs shfmt -s -d
find tests -name '*.bats' | xargs shfmt -s -d
find scripts -name '*.sh' | xargs shfmt -s -d
find tests -name '*.bash' | xargs shellcheck --shell=bash --exclude=$(SC_EXCLUDES)
find tests -name '*.bats' | xargs shellcheck --shell=bash --exclude=$(SC_EXCLUDES)
find scripts -name '*.sh' | xargs shellcheck --shell=bash --exclude=$(SC_EXCLUDES)
find tests -name '*.bash' | xargs shfmt --simplify --diff --language-dialect bats --indent 4
find tests -name '*.bats' | xargs shfmt --simplify --diff --language-dialect bats --indent 4
find scripts -name '*.sh' | xargs shfmt --simplify --diff

DEPS = bin/darwin/jq bin/linux/jq

Expand Down
4 changes: 3 additions & 1 deletion bats/tests/helpers/utils.bash
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,9 @@ capture_logs() {
cp -LR "${PATH_LOGS}/" "$logdir"
echo "${BATS_TEST_DESCRIPTION:-teardown}" >"${logdir}/test_description"
# Capture settings.json
cp "$PATH_CONFIG_FILE" "$logdir"
if [[ -f $PATH_CONFIG_FILE ]]; then
cp "$PATH_CONFIG_FILE" "$logdir"
fi
foreach_profile export_profile "$logdir"
fi
}
Expand Down
148 changes: 133 additions & 15 deletions bats/tests/k8s/helm-install-rancher.bats
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Test case 11 & 12

load '../helpers/load'
RD_FILE_RAMDISK_SIZE=12 # We need more disk to run the Rancher image.

local_setup_file() {
RD_USE_RAMDISK=false
}

local_setup() {
needs_port 443
Expand Down Expand Up @@ -84,6 +87,65 @@ determine_chart_version() {
fail || return
}

assert_not_empty_list() {
run "$@"
assert_success || return
run jq_output length
assert_success || return
refute_output 0 || return
}

assert_true() {
run --separate-stderr "$@"
assert_success || return
assert_output --regexp '^([Tt]rue|1)$' || return
}

# Given namespace and app name, assert that a log line contains the given string.
assert_pod_log_line() {
local namespace="$1"
local selector="app=$2"
shift 2
local expect="$*"
run kubectl get pod --namespace "$namespace" --selector "$selector" --output=jsonpath='{.items[0].metadata.name}'
assert_success
assert_output || return
local name="$output"

run kubectl logs --namespace "$namespace" "$name"
assert_success || return
assert_output --partial "$expect" || return
}

# Pull down the image manually first so we are less likely to time out when
# deploying rancher
pull_rancher_image() {
local rancher_chart_version
if ! load_var rancher_chart_version; then
fail "Could not restore Rancher chart version"
fi
local CONTAINERD_NAMESPACE=k8s.io
try ctrctl pull --quiet "rancher/rancher:v$rancher_chart_version"
}

wait_for_rancher_pod() {
try assert_pod_log_line cattle-system rancher Listening on :443
try assert_pod_log_line cattle-system rancher Starting catalog controller
Comment on lines +132 to +133
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since wait_for_rancher_pod is called via try (which calls it via run), we need to add || return to each step that can fail.

I also think the lines would be easier to read if the expected string was quoted:

Suggested change
try assert_pod_log_line cattle-system rancher Listening on :443
try assert_pod_log_line cattle-system rancher Starting catalog controller
try assert_pod_log_line cattle-system rancher "Listening on :443" || return
try assert_pod_log_line cattle-system rancher "Starting catalog controller" || return

But given that so many asserts all target the same namespace and app, I wonder if they shouldn't be selected by global variables as well:

Suggested change
try assert_pod_log_line cattle-system rancher Listening on :443
try assert_pod_log_line cattle-system rancher Starting catalog controller
local NAMESPACE=cattle-system
local APP=rancher
try assert_pod_log_line "Listening on :443" || return
try assert_pod_log_line "Starting catalog controller" || return

try --max 60 --delay 10 assert_pod_log_line cattle-system rancher Watching metadata for rke-machine-config.cattle.io/v1
try --max 60 --delay 10 assert_pod_log_line cattle-system rancher 'Creating clusterRole for roleTemplate Cluster Owner (cluster-owner).'
try assert_pod_log_line cattle-system rancher Rancher startup complete
try assert_pod_log_line cattle-system rancher Created machine for node
}

wait_for_webhook_pod() {
try assert_pod_log_line cattle-system rancher-webhook Rancher-webhook version
try assert_pod_log_line cattle-system rancher-webhook Listening on :9443
# Depending on version, this is either "cattle-webhook-tls" or "cattle-system/cattle-webhook-tls"
try assert_pod_log_line cattle-system rancher-webhook Creating new TLS secret for cattle-
try assert_pod_log_line cattle-system rancher-webhook Active TLS secret cattle-
try assert_pod_log_line cattle-system rancher-webhook 'Sleeping for 15 seconds then applying webhook config'
}

deploy_rancher() {
# TODO remove `skip_unless_host_ip` once `traefik_hostname` no longer needs it
if is_windows; then
Expand All @@ -98,22 +160,70 @@ deploy_rancher() {
helm upgrade \
--install cert-manager jetstack/cert-manager \
--namespace cert-manager \
--set installCRDs=true \
--set crds.enabled=true \
--set crds.keep=true \
--set prometheus.enabled=false \
--set "extraArgs[0]=--enable-certificate-owner-ref=true" \
--create-namespace
try assert_not_empty_list helm list --namespace cert-manager --deployed --output json --selector name=cert-manager
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an empty line before?

wait_for_kube_deployment_available --namespace cert-manager cert-manager

local host
host=$(traefik_hostname) || return
jandubois marked this conversation as resolved.
Show resolved Hide resolved

comment "Installing rancher $rancher_chart_version"
# The helm install can take a long time, especially on CI. Therefore we
# avoid using --wait / --timeout, and instead check for forward progress
# at each step.
helm upgrade \
--install rancher rancher-latest/rancher \
--version "$rancher_chart_version" \
--namespace cattle-system \
--set hostname="$host" \
--wait \
--timeout=10m \
--set replicas=1 \
--create-namespace

try assert_not_empty_list helm list --all --output json --namespace cattle-system --selector name=rancher
jandubois marked this conversation as resolved.
Show resolved Hide resolved
try assert_not_empty_list helm list --deployed --output json --namespace cattle-system --selector name=rancher
try kubectl get ingress --namespace cattle-system rancher
try assert_not_empty_list kubectl get ingress --namespace cattle-system rancher --output jsonpath='{.status.loadBalancer.ingress}'

try --max 60 --delay 10 kubectl get namespace fleet-local
try --max 60 --delay 10 kubectl get namespace local
try --max 60 --delay 10 kubectl get namespace cattle-global-data
try --max 60 --delay 10 kubectl get namespace fleet-default

try assert_not_empty_list kubectl get pods --namespace cattle-system --selector app=rancher --output jsonpath='{.items}'

# Unfortunately, the Rancher pod could get restarted; this may lead to the
# wait steps to fail and we need to start again from the top.
try wait_for_rancher_pod

try assert_true kubectl get APIServices v3.project.cattle.io --output=jsonpath='{.status.conditions[?(@.type=="Available")].status}'

try kubectl get namespace cattle-fleet-system
try kubectl get namespace cattle-system

try --max 48 kubectl get deployment --namespace cattle-fleet-system fleet-controller
try assert_kube_deployment_available --namespace cattle-fleet-system gitjob
try assert_kube_deployment_available --namespace cattle-fleet-system fleet-controller

try --max 60 --delay 10 assert_not_empty_list kubectl get pods --namespace cattle-system --selector app=rancher-webhook --output jsonpath='{.items}'

# Unfortunately, the webhook pod might restart too :(
try wait_for_webhook_pod

try --max 120 assert_kube_deployment_available --namespace cattle-system rancher
try --max 120 assert_kube_deployment_available --namespace cattle-fleet-local-system fleet-agent
try --max 60 assert_kube_deployment_available --namespace cattle-system rancher-webhook

# The rancher pod sometimes falls over on its own; retry in a loop to
# detect flapping.
local i
for i in {1..10}; do
sleep 1
try --max 60 --delay 10 assert_kube_deployment_available --namespace cattle-system rancher
done
}

verify_rancher() {
Expand All @@ -122,24 +232,32 @@ verify_rancher() {
skip_unless_host_ip
fi

# Get k3s logs if possible before things fail
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment doesn't match the following commands; they don't fetch logs; they just list deployments and pods. I assume to get the information into the BATS output.

This comment seems to belong on top of the next block instead. I guess it became confusing because I asked you to put in more newlines. 😄

kubectl get deployments --all-namespaces || :
kubectl get pods --all-namespaces || :

local name
jandubois marked this conversation as resolved.
Show resolved Hide resolved
name="$(kubectl get pod -n cattle-system --selector app=rancher --output=jsonpath='{.items[].metadata.name}' || echo '')"
if [[ -n $name ]]; then
kubectl logs -n cattle-system "$name" || :
fi

name="$(kubectl get pod -n cattle-system --selector app=rancher-webhook --output=jsonpath='{.items[].metadata.name}' || echo '')"
if [[ -n $name ]]; then
kubectl logs -n cattle-system "$name" || :
fi

local host
host=$(traefik_hostname) || return

run try --max 9 --delay 10 curl --insecure --silent --show-error "https://${host}/dashboard/auth/login"
run try --max 9 --delay 10 curl --insecure --show-error "https://${host}/dashboard/auth/login"
assert_success
assert_output --partial 'href="/dashboard/'
run kubectl get secret --namespace cattle-system bootstrap-secret -o json
run try kubectl get secret --namespace cattle-system bootstrap-secret -o json
assert_success
assert_output --partial "bootstrapPassword"
}

uninstall_rancher() {
run helm uninstall rancher --namespace cattle-system --wait
assert_nothing
run helm uninstall cert-manager --namespace cert-manager --wait
assert_nothing
}

@test 'add helm repo' {
helm repo add jetstack https://charts.jetstack.io
helm repo add rancher-latest https://releases.rancher.com/server-charts/latest
Expand All @@ -152,6 +270,6 @@ foreach_k3s_version \
start_kubernetes \
wait_for_kubelet \
wait_for_traefik \
pull_rancher_image \
deploy_rancher \
verify_rancher \
uninstall_rancher
verify_rancher
Loading