From 4b5186415a0e23cec33f001178238d9cd7df1d2e Mon Sep 17 00:00:00 2001 From: Martin Linkhorst Date: Fri, 8 Nov 2024 11:14:02 +0100 Subject: [PATCH 1/7] register generic admitter for write protection behind a feature flag --- cluster/config-defaults.yaml | 5 +++ .../01-admission-control/config.yaml | 2 + .../01-admission-control/teapot.yaml | 38 +++++++++++++++++++ cluster/manifests/prometheus/rbac.yaml | 4 ++ .../node-pools/master-default/userdata.yaml | 2 +- 5 files changed, 50 insertions(+), 1 deletion(-) diff --git a/cluster/config-defaults.yaml b/cluster/config-defaults.yaml index c14797507b..a20827fc6b 100644 --- a/cluster/config-defaults.yaml +++ b/cluster/config-defaults.yaml @@ -677,6 +677,11 @@ teapot_admission_controller_configmap_deletion_protection_factories_enabled: "tr # enable the rolebinding admission-controller webhook which validates rolebindings and clusterrolebindings teapot_admission_controller_enable_rolebinding_webhook: "true" +# enable the generic admission-controller webhook which catches all resources +teapot_admission_controller_enable_generic_webhook: "false" +# prevent write operations for non-admin users in protected namespaces +teapot_admission_controller_prevent_write_operations: "false" + # Enable and configure Pod Security Policy rules implemented in admission-controller. teapot_admission_controller_pod_security_policy_enabled: "true" diff --git a/cluster/manifests/01-admission-control/config.yaml b/cluster/manifests/01-admission-control/config.yaml index b07b8ef4a0..2ff375b52c 100644 --- a/cluster/manifests/01-admission-control/config.yaml +++ b/cluster/manifests/01-admission-control/config.yaml @@ -12,6 +12,8 @@ data: dns.default.subdomain-max-length: "{{ .Cluster.ConfigItems.subdomain_max_length }}" + generic.prevent-write-operations.enable: "{{ .Cluster.ConfigItems.teapot_admission_controller_prevent_write_operations }}" + pod.container-resource-control.min-memory-request: "25Mi" pod.container-resource-control.default-cpu-request: "{{ .Cluster.ConfigItems.teapot_admission_controller_default_cpu_request }}" pod.container-resource-control.default-memory-request: "{{ .Cluster.ConfigItems.teapot_admission_controller_default_memory_request }}" diff --git a/cluster/manifests/01-admission-control/teapot.yaml b/cluster/manifests/01-admission-control/teapot.yaml index 315d147034..f30663d2e7 100644 --- a/cluster/manifests/01-admission-control/teapot.yaml +++ b/cluster/manifests/01-admission-control/teapot.yaml @@ -267,3 +267,41 @@ webhooks: apiVersions: ["v1"] resources: ["rolebindings", "clusterrolebindings"] {{- end }} +{{- if eq .Cluster.ConfigItems.teapot_admission_controller_enable_generic_webhook "true" }} + - name: generic-namespaced-admitter.teapot.zalan.do + clientConfig: + url: "https://localhost:8085/generic" + caBundle: "{{ .Cluster.ConfigItems.ca_cert_decompressed }}" + admissionReviewVersions: ["v1beta1"] + failurePolicy: Fail + sideEffects: "NoneOnDryRun" + matchPolicy: Equivalent + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: In + values: [ "kube-system", "visibility", "kubenurse" ] + rules: + - operations: [ "*" ] + apiGroups: ["*"] + apiVersions: ["*"] + resources: ["*/*"] + scope: "Namespaced" + - name: generic-cluster-admitter.teapot.zalan.do + clientConfig: + url: "https://localhost:8085/generic" + caBundle: "{{ .Cluster.ConfigItems.ca_cert_decompressed }}" + admissionReviewVersions: ["v1beta1"] + failurePolicy: Fail + sideEffects: "NoneOnDryRun" + matchPolicy: Equivalent + objectSelector: + matchLabels: + admission.zalando.org/infrastructure-component: "true" + rules: + - operations: [ "*" ] + apiGroups: ["*"] + apiVersions: ["*"] + resources: ["*/*"] + scope: "Cluster" +{{- end }} diff --git a/cluster/manifests/prometheus/rbac.yaml b/cluster/manifests/prometheus/rbac.yaml index fafa4ff1ef..140cc57c3c 100644 --- a/cluster/manifests/prometheus/rbac.yaml +++ b/cluster/manifests/prometheus/rbac.yaml @@ -8,6 +8,8 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: prometheus + labels: + admission.zalando.org/infrastructure-component: "true" rules: - apiGroups: [""] resources: @@ -37,6 +39,8 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: prometheus + labels: + admission.zalando.org/infrastructure-component: "true" roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/cluster/node-pools/master-default/userdata.yaml b/cluster/node-pools/master-default/userdata.yaml index f70b8fae7e..23eef646f4 100644 --- a/cluster/node-pools/master-default/userdata.yaml +++ b/cluster/node-pools/master-default/userdata.yaml @@ -206,7 +206,7 @@ write_files: limits: memory: {{ .Values.InstanceInfo.MemoryFraction (parseInt64 .Cluster.ConfigItems.apiserver_memory_limit_percent)}} {{- end }} - - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/admission-controller:master-222 + - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/admission-controller:master-224 name: admission-controller lifecycle: preStop: From 03da97b2e0dc9baac2a9da7b5675ba4ed66c8c06 Mon Sep 17 00:00:00 2001 From: Noor Malik Date: Wed, 6 Nov 2024 11:49:02 +0100 Subject: [PATCH 2/7] update deployment-service --- .../manifests/deployment-service/controller-statefulset.yaml | 2 +- .../manifests/deployment-service/status-service-deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cluster/manifests/deployment-service/controller-statefulset.yaml b/cluster/manifests/deployment-service/controller-statefulset.yaml index 5e49e82aff..dffb941438 100644 --- a/cluster/manifests/deployment-service/controller-statefulset.yaml +++ b/cluster/manifests/deployment-service/controller-statefulset.yaml @@ -29,7 +29,7 @@ spec: terminationGracePeriodSeconds: 300 containers: - name: "deployment-service-controller" - image: "container-registry.zalando.net/teapot/deployment-controller:master-230" + image: "container-registry.zalando.net/teapot/deployment-controller:master-232" args: - "--config-namespace=kube-system" - "--decrypt-kms-alias-arn=arn:aws:kms:{{ .Cluster.Region }}:{{ .Cluster.InfrastructureAccount | getAWSAccountID }}:alias/deployment-secret" diff --git a/cluster/manifests/deployment-service/status-service-deployment.yaml b/cluster/manifests/deployment-service/status-service-deployment.yaml index 206779593a..d56ba2efaf 100644 --- a/cluster/manifests/deployment-service/status-service-deployment.yaml +++ b/cluster/manifests/deployment-service/status-service-deployment.yaml @@ -1,4 +1,4 @@ -# {{ $image := "container-registry.zalando.net/teapot/deployment-status-service:master-230" }} +# {{ $image := "container-registry.zalando.net/teapot/deployment-status-service:master-232" }} # {{ $version := index (split $image ":") 1 }} apiVersion: apps/v1 From 1664dbcd8d5ec342df9756b949f80a39377c0cd9 Mon Sep 17 00:00:00 2001 From: Noor Malik Date: Fri, 8 Nov 2024 15:42:11 +0100 Subject: [PATCH 3/7] make kube-janitor opt-in for production clusters --- cluster/config-defaults.yaml | 8 +++++++- cluster/manifests/deletions.yaml | 18 ++++++++++++++++++ cluster/manifests/kube-janitor/deployment.yaml | 2 +- cluster/manifests/kube-janitor/rbac.yaml | 2 +- .../manifests/kube-janitor/rules-config.yaml | 2 +- cluster/manifests/kube-janitor/vpa.yaml | 2 +- 6 files changed, 29 insertions(+), 5 deletions(-) diff --git a/cluster/config-defaults.yaml b/cluster/config-defaults.yaml index a20827fc6b..9929d5bc25 100644 --- a/cluster/config-defaults.yaml +++ b/cluster/config-defaults.yaml @@ -1170,7 +1170,13 @@ control_plane_load_balancer_internal: "none" # fs.inotify.max_user_watches = 100000 sysctl_settings: "" - +# kube-janitor configuration +{{if eq .Cluster.Environment "production"}} +# This makes kube-janitor opt-in for production clusters +kube_janitor_enabled: "false" +{{else}} +kube_janitor_enabled: "true" +{{end}} # scheduling_controls teapot_admission_controller_scheduling_controls_enabled: "false" diff --git a/cluster/manifests/deletions.yaml b/cluster/manifests/deletions.yaml index d8e89844b3..31a5b09023 100644 --- a/cluster/manifests/deletions.yaml +++ b/cluster/manifests/deletions.yaml @@ -330,3 +330,21 @@ post_apply: kind: ServiceAccount namespace: kube-system {{- end }} +{{- if ne .Cluster.ConfigItems.kube_janitor_enabled "true" }} +- name: kube-janitor + kind: Deployment + namespace: kube-system +- name: kube-janitor + kind: ConfigMap + namespace: kube-system +- name: kube-janitor + kind: VerticalPodAutoscaler + namespace: kube-system +- name: kube-janitor + kind: ServiceAccount + namespace: kube-system +- name: kube-janitor + kind: ClusterRole +- name: kube-janitor + kind: ClusterRoleBinding +{{- end }} diff --git a/cluster/manifests/kube-janitor/deployment.yaml b/cluster/manifests/kube-janitor/deployment.yaml index 8b7c3859a4..904002c842 100644 --- a/cluster/manifests/kube-janitor/deployment.yaml +++ b/cluster/manifests/kube-janitor/deployment.yaml @@ -1,4 +1,4 @@ -# {{ if ne .Cluster.Environment "production" }} +# {{ if eq .Cluster.ConfigItems "kube_janitor_enabled" "true" }} # {{ $image := "container-registry.zalando.net/teapot/kube-janitor:23.7.0-main-2" }} # {{ $version := index (split (index (split $image ":") 1) "-") 0 }} apiVersion: apps/v1 diff --git a/cluster/manifests/kube-janitor/rbac.yaml b/cluster/manifests/kube-janitor/rbac.yaml index f9a1964d86..973930bb65 100644 --- a/cluster/manifests/kube-janitor/rbac.yaml +++ b/cluster/manifests/kube-janitor/rbac.yaml @@ -1,4 +1,4 @@ -{{ if ne .Cluster.Environment "production" }} +{{ if eq .Cluster.ConfigItems "kube_janitor_enabled" "true" }} apiVersion: v1 kind: ServiceAccount metadata: diff --git a/cluster/manifests/kube-janitor/rules-config.yaml b/cluster/manifests/kube-janitor/rules-config.yaml index 961b660980..55f97908f1 100644 --- a/cluster/manifests/kube-janitor/rules-config.yaml +++ b/cluster/manifests/kube-janitor/rules-config.yaml @@ -1,4 +1,4 @@ -# {{ if ne .Cluster.Environment "production" }} +# {{ if eq .Cluster.ConfigItems "kube_janitor_enabled" "true" }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/cluster/manifests/kube-janitor/vpa.yaml b/cluster/manifests/kube-janitor/vpa.yaml index b55d9a934f..81bffc5cc9 100644 --- a/cluster/manifests/kube-janitor/vpa.yaml +++ b/cluster/manifests/kube-janitor/vpa.yaml @@ -1,4 +1,4 @@ -{{ if ne .Cluster.Environment "production" }} +{{ if eq .Cluster.ConfigItems "kube_janitor_enabled" "true" }} apiVersion: autoscaling.k8s.io/v1 kind: VerticalPodAutoscaler metadata: From c009bbe410a24c361d4d80212be3a07e4b5918a1 Mon Sep 17 00:00:00 2001 From: Noor Malik Date: Fri, 8 Nov 2024 17:05:20 +0100 Subject: [PATCH 4/7] fix templating --- cluster/manifests/kube-janitor/deployment.yaml | 2 +- cluster/manifests/kube-janitor/rbac.yaml | 2 +- cluster/manifests/kube-janitor/rules-config.yaml | 2 +- cluster/manifests/kube-janitor/vpa.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cluster/manifests/kube-janitor/deployment.yaml b/cluster/manifests/kube-janitor/deployment.yaml index 904002c842..2e4eb75632 100644 --- a/cluster/manifests/kube-janitor/deployment.yaml +++ b/cluster/manifests/kube-janitor/deployment.yaml @@ -1,4 +1,4 @@ -# {{ if eq .Cluster.ConfigItems "kube_janitor_enabled" "true" }} +# {{ if eq .Cluster.ConfigItems.kube_janitor_enabled "true" }} # {{ $image := "container-registry.zalando.net/teapot/kube-janitor:23.7.0-main-2" }} # {{ $version := index (split (index (split $image ":") 1) "-") 0 }} apiVersion: apps/v1 diff --git a/cluster/manifests/kube-janitor/rbac.yaml b/cluster/manifests/kube-janitor/rbac.yaml index 973930bb65..c4064b6563 100644 --- a/cluster/manifests/kube-janitor/rbac.yaml +++ b/cluster/manifests/kube-janitor/rbac.yaml @@ -1,4 +1,4 @@ -{{ if eq .Cluster.ConfigItems "kube_janitor_enabled" "true" }} +{{ if eq .Cluster.ConfigItems.kube_janitor_enabled "true" }} apiVersion: v1 kind: ServiceAccount metadata: diff --git a/cluster/manifests/kube-janitor/rules-config.yaml b/cluster/manifests/kube-janitor/rules-config.yaml index 55f97908f1..9d2d562f7a 100644 --- a/cluster/manifests/kube-janitor/rules-config.yaml +++ b/cluster/manifests/kube-janitor/rules-config.yaml @@ -1,4 +1,4 @@ -# {{ if eq .Cluster.ConfigItems "kube_janitor_enabled" "true" }} +# {{ if eq .Cluster.ConfigItems.kube_janitor_enabled "true" }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/cluster/manifests/kube-janitor/vpa.yaml b/cluster/manifests/kube-janitor/vpa.yaml index 81bffc5cc9..be4c207d3f 100644 --- a/cluster/manifests/kube-janitor/vpa.yaml +++ b/cluster/manifests/kube-janitor/vpa.yaml @@ -1,4 +1,4 @@ -{{ if eq .Cluster.ConfigItems "kube_janitor_enabled" "true" }} +{{ if eq .Cluster.ConfigItems.kube_janitor_enabled "true" }} apiVersion: autoscaling.k8s.io/v1 kind: VerticalPodAutoscaler metadata: From 60eb9d8003123b1963704431afbb952f7141236b Mon Sep 17 00:00:00 2001 From: Noor Malik Date: Mon, 11 Nov 2024 11:24:25 +0100 Subject: [PATCH 5/7] add note of caution on the config-item that enables kube-janitor in production --- cluster/config-defaults.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cluster/config-defaults.yaml b/cluster/config-defaults.yaml index 9929d5bc25..b9440555a5 100644 --- a/cluster/config-defaults.yaml +++ b/cluster/config-defaults.yaml @@ -1173,6 +1173,17 @@ sysctl_settings: "" # kube-janitor configuration {{if eq .Cluster.Environment "production"}} # This makes kube-janitor opt-in for production clusters + +# IMPORTANT: +# Please note that before enabling kube-janitor for a production cluster, you +# must ensure that no existing resources should be annotated with a TTL. +# This can happen in the case where a test deployment is deployed to production +# as is. Currently, it's a no-op since kube-janitor doesn't run in production. +# +# This is needed until we can implement namespace prefix matching to reduce +# the scope of kube-janitor to a set of namespace names that aren't known +# at the time of enaling kube-janitor. Once the feature is in place, it would +# be easier to limit the scope. kube_janitor_enabled: "false" {{else}} kube_janitor_enabled: "true" From 58f691a6df751fea4b3a704f6c3d3a2995554312 Mon Sep 17 00:00:00 2001 From: Noor Malik Date: Mon, 11 Nov 2024 16:34:09 +0100 Subject: [PATCH 6/7] update role-sync-controller and use CLI parameters for subjects --- .../manifests/role-sync-controller/cronjob.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/cluster/manifests/role-sync-controller/cronjob.yaml b/cluster/manifests/role-sync-controller/cronjob.yaml index bf982724e0..e96cef782d 100644 --- a/cluster/manifests/role-sync-controller/cronjob.yaml +++ b/cluster/manifests/role-sync-controller/cronjob.yaml @@ -26,5 +26,18 @@ spec: restartPolicy: OnFailure containers: - name: role-sync-controller - image: container-registry.zalando.net/teapot/role-sync-controller:main-1 + image: container-registry-test.zalando.net/teapot/role-sync-controller:main-2 + args: + - --subject-group=PowerUser + - --subject-group=Manual + - --subject-group=Emergency + - --subject-group=okta:common/engineer + - --subject-serviceaccount=default/cdp + - --subject-user=zalando-iam:zalando:service:k8sapi-local_deployment-service-executor + {{- if eq .Cluster.Environment "test"}} + - --subject-group=CollaboratorPowerUser + {{- end}} + {{- if eq .Cluster.Provider "zalando-eks"}} + - --subject-serviceaccount=kube-system/deployment-service-controller + {{- end}} {{ end }} From 6bf4d6c081aaf9cfe0b5acbb9394b9e366fefde1 Mon Sep 17 00:00:00 2001 From: Martin Linkhorst Date: Mon, 11 Nov 2024 16:49:34 +0100 Subject: [PATCH 7/7] use correct image from production registry --- cluster/manifests/role-sync-controller/cronjob.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster/manifests/role-sync-controller/cronjob.yaml b/cluster/manifests/role-sync-controller/cronjob.yaml index e96cef782d..d8d96b24d7 100644 --- a/cluster/manifests/role-sync-controller/cronjob.yaml +++ b/cluster/manifests/role-sync-controller/cronjob.yaml @@ -26,7 +26,7 @@ spec: restartPolicy: OnFailure containers: - name: role-sync-controller - image: container-registry-test.zalando.net/teapot/role-sync-controller:main-2 + image: container-registry.zalando.net/teapot/role-sync-controller:main-2 args: - --subject-group=PowerUser - --subject-group=Manual