diff --git a/jsonnet/components/cluster-monitoring-operator.libsonnet b/jsonnet/components/cluster-monitoring-operator.libsonnet index c9d2b9b8f5..da209e5643 100644 --- a/jsonnet/components/cluster-monitoring-operator.libsonnet +++ b/jsonnet/components/cluster-monitoring-operator.libsonnet @@ -247,6 +247,11 @@ function(params) { resources: ['featuregates'], verbs: ['get', 'list', 'watch'], }, + { + apiGroups: ['config.openshift.io'], + resources: ['clustermonitorings'], + verbs: ['get', 'list', 'watch'], + }, { apiGroups: ['certificates.k8s.io'], resources: ['certificatesigningrequests'], diff --git a/manifests/0000_50_cluster-monitoring-operator_02-role.yaml b/manifests/0000_50_cluster-monitoring-operator_02-role.yaml index 58afb2eaee..6cb285a7e7 100644 --- a/manifests/0000_50_cluster-monitoring-operator_02-role.yaml +++ b/manifests/0000_50_cluster-monitoring-operator_02-role.yaml @@ -135,6 +135,14 @@ rules: - get - list - watch +- apiGroups: + - config.openshift.io + resources: + - clustermonitorings + verbs: + - get + - list + - watch - apiGroups: - certificates.k8s.io resources: diff --git a/manifests/0000_50_cluster-monitoring-operator_06-clusteroperator.yaml b/manifests/0000_50_cluster-monitoring-operator_06-clusteroperator.yaml index d76a1747f6..5c3da8a36d 100644 --- a/manifests/0000_50_cluster-monitoring-operator_06-clusteroperator.yaml +++ b/manifests/0000_50_cluster-monitoring-operator_06-clusteroperator.yaml @@ -40,3 +40,6 @@ status: - group: monitoring.coreos.com name: '' resource: alertmanagerconfigs + - group: config.openshift.io + name: cluster + resource: clustermonitorings diff --git a/pkg/client/client.go b/pkg/client/client.go index e9058276f3..806edae140 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -27,6 +27,7 @@ import ( "github.com/imdario/mergo" configv1 "github.com/openshift/api/config/v1" + configv1alpha1 "github.com/openshift/api/config/v1alpha1" consolev1 "github.com/openshift/api/console/v1" osmv1 "github.com/openshift/api/monitoring/v1" routev1 "github.com/openshift/api/route/v1" @@ -417,6 +418,15 @@ func (c *Client) ClusterOperatorListWatch(ctx context.Context, name string) *cac } } +func (c *Client) ClusterMonitoringListWatch() *cache.ListWatch { + return cache.NewListWatchFromClient( + c.oscclient.ConfigV1alpha1().RESTClient(), + "clustermonitorings", + "", + fields.Everything(), + ) +} + func (c *Client) HasRouteCapability(ctx context.Context) (bool, error) { _, err := c.oscclient.ConfigV1().ClusterOperators().Get(ctx, "ingress", metav1.GetOptions{}) if apierrors.IsNotFound(err) { @@ -595,6 +605,10 @@ func (c *Client) GetConsoleConfig(ctx context.Context, name string) (*configv1.C return c.oscclient.ConfigV1().Consoles().Get(ctx, name, metav1.GetOptions{}) } +func (c *Client) GetClusterMonitoring(ctx context.Context, name string) (*configv1alpha1.ClusterMonitoring, error) { + return c.oscclient.ConfigV1alpha1().ClusterMonitorings().Get(ctx, name, metav1.GetOptions{}) +} + func (c *Client) GetConfigmap(ctx context.Context, namespace, name string) (*v1.ConfigMap, error) { return c.kclient.CoreV1().ConfigMaps(namespace).Get(ctx, name, metav1.GetOptions{}) } diff --git a/pkg/clustermonitoring/controller.go b/pkg/clustermonitoring/controller.go new file mode 100644 index 0000000000..028668e5eb --- /dev/null +++ b/pkg/clustermonitoring/controller.go @@ -0,0 +1,165 @@ +// Copyright 2025 The Cluster Monitoring Operator Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clustermonitoring + +import ( + "context" + "fmt" + "time" + + configv1alpha1 "github.com/openshift/api/config/v1alpha1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + + "github.com/openshift/cluster-monitoring-operator/pkg/client" +) + +const ( + controllerName = "cluster-monitoring" + resyncPeriod = 15 * time.Minute + queueBaseDelay = 50 * time.Millisecond + queueMaxDelay = 3 * time.Minute +) + +// Controller is a controller for ClusterMonitoring resources. +type Controller struct { + client *client.Client + queue workqueue.TypedRateLimitingInterface[string] + informer cache.SharedIndexInformer + triggerReconcile func() +} + +// NewController returns a new ClusterMonitoring controller. +func NewController(ctx context.Context, client *client.Client, version string, triggerReconcile func()) (*Controller, error) { + informer := cache.NewSharedIndexInformer( + client.ClusterMonitoringListWatch(), + &configv1alpha1.ClusterMonitoring{}, + resyncPeriod, + cache.Indexers{}, + ) + + queue := workqueue.NewTypedRateLimitingQueueWithConfig[string]( + workqueue.NewTypedItemExponentialFailureRateLimiter[string](queueBaseDelay, queueMaxDelay), + workqueue.TypedRateLimitingQueueConfig[string]{Name: controllerName}, + ) + + controller := &Controller{ + client: client, + queue: queue, + informer: informer, + triggerReconcile: triggerReconcile, + } + + _, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: controller.handleAdd, + UpdateFunc: controller.handleUpdate, + DeleteFunc: controller.handleDelete, + }) + if err != nil { + return nil, err + } + + return controller, nil +} + +// Run starts the controller. +func (c *Controller) Run(ctx context.Context, workers int) { + klog.Info("Starting ClusterMonitoring controller") + defer c.queue.ShutDown() + + go c.informer.Run(ctx.Done()) + + if !cache.WaitForNamedCacheSync("ClusterMonitoring controller", ctx.Done(), c.informer.HasSynced) { + klog.Error("Failed to sync ClusterMonitoring controller cache") + return + } + + go c.worker(ctx) + + klog.Info("ClusterMonitoring controller started") + <-ctx.Done() + klog.Info("ClusterMonitoring controller stopped") +} + +func (c *Controller) worker(ctx context.Context) { + for c.processNextWorkItem(ctx) { + } +} + +func (c *Controller) processNextWorkItem(ctx context.Context) bool { + key, quit := c.queue.Get() + if quit { + return false + } + defer c.queue.Done(key) + + if err := c.sync(ctx, key); err != nil { + utilruntime.HandleError(fmt.Errorf("error syncing ClusterMonitoring (%s): %w", key, err)) + c.queue.AddRateLimited(key) + return true + } + + klog.V(4).Infof("ClusterMonitoring successfully synced: %s", key) + c.queue.Forget(key) + return true +} + +func (c *Controller) sync(ctx context.Context, key string) error { + klog.V(4).Infof("ClusterMonitoring controller processing: %s", key) + + if c.triggerReconcile != nil { + c.triggerReconcile() + } + + return nil +} + +func (c *Controller) handleAdd(obj interface{}) { + key, ok := c.keyFunc(obj) + if !ok { + return + } + klog.Infof("ClusterMonitoring added: %s", key) + c.queue.Add(key) +} + +func (c *Controller) handleUpdate(oldObj, newObj interface{}) { + key, ok := c.keyFunc(newObj) + if !ok { + return + } + klog.Infof("ClusterMonitoring updated: %s", key) + c.queue.Add(key) +} + +func (c *Controller) handleDelete(obj interface{}) { + key, ok := c.keyFunc(obj) + if !ok { + return + } + klog.Infof("ClusterMonitoring deleted: %s", key) + c.queue.Add(key) +} + +func (c *Controller) keyFunc(obj interface{}) (string, bool) { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) + if err != nil { + klog.Errorf("Creating key for ClusterMonitoring object failed: %v", err) + return key, false + } + return key, true +} diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 91f8e6ecc7..113be6d6b9 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -24,6 +24,7 @@ import ( "github.com/blang/semver/v4" configv1 "github.com/openshift/api/config/v1" + configv1alpha1 "github.com/openshift/api/config/v1alpha1" "github.com/openshift/api/features" configv1client "github.com/openshift/client-go/config/clientset/versioned" configv1informers "github.com/openshift/client-go/config/informers/externalversions" @@ -31,6 +32,7 @@ import ( "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" "github.com/openshift/library-go/pkg/operator/csr" "github.com/openshift/library-go/pkg/operator/events" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" certapiv1 "k8s.io/api/certificates/v1" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -45,9 +47,11 @@ import ( "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" "k8s.io/utils/clock" + "k8s.io/utils/ptr" "github.com/openshift/cluster-monitoring-operator/pkg/alert" "github.com/openshift/cluster-monitoring-operator/pkg/client" + "github.com/openshift/cluster-monitoring-operator/pkg/clustermonitoring" "github.com/openshift/cluster-monitoring-operator/pkg/manifests" "github.com/openshift/cluster-monitoring-operator/pkg/metrics" "github.com/openshift/cluster-monitoring-operator/pkg/tasks" @@ -177,12 +181,13 @@ const ( type Operator struct { namespace, namespaceUserWorkload string - configMapName string - userWorkloadConfigMapName string - images map[string]string - telemetryMatches []string - remoteWrite bool - CollectionProfilesEnabled bool + configMapName string + userWorkloadConfigMapName string + images map[string]string + telemetryMatches []string + remoteWrite bool + CollectionProfilesEnabled bool + ClusterMonitoringConfigEnabled bool lastKnowInfrastructureConfig *InfrastructureConfig lastKnowProxyConfig *ProxyConfig @@ -202,8 +207,9 @@ type Operator struct { assets *manifests.Assets - ruleController *alert.RuleController - relabelController *alert.RelabelConfigController + ruleController *alert.RuleController + relabelController *alert.RelabelConfigController + clusterMonitoringController *clustermonitoring.Controller } func New( @@ -452,10 +458,23 @@ func New( return nil, err } o.CollectionProfilesEnabled = featureGates.Enabled(features.FeatureGateMetricsCollectionProfiles) + o.ClusterMonitoringConfigEnabled = featureGates.Enabled(features.FeatureGateClusterMonitoringConfig) case <-time.After(1 * time.Minute): return nil, fmt.Errorf("timed out waiting for FeatureGate detection") } + // Only create the ClusterMonitoring controller if the feature gate is enabled + if o.ClusterMonitoringConfigEnabled { + clusterMonitoringController, err := clustermonitoring.NewController(ctx, c, version, func() { + key := o.namespace + "/" + o.configMapName + o.enqueue(key) + }) + if err != nil { + return nil, fmt.Errorf("failed to create cluster monitoring controller: %w", err) + } + o.clusterMonitoringController = clusterMonitoringController + } + // csrController runs a controller that requests a client TLS certificate // for Prometheus k8s. This certificate is used to authenticate against the // /metrics endpoint of the targets. @@ -534,6 +553,10 @@ func New( o.relabelController.Run, ) + if o.clusterMonitoringController != nil { + o.controllersToRunFunc = append(o.controllersToRunFunc, o.clusterMonitoringController.Run) + } + return o, nil } @@ -964,6 +987,96 @@ func (o *Operator) loadUserWorkloadConfig(ctx context.Context) (*manifests.UserW return manifests.NewUserWorkloadConfigFromConfigMap(userCM) } +func (o *Operator) mergeClusterMonitoringCRD(ctx context.Context, c *manifests.Config) error { + if !o.ClusterMonitoringConfigEnabled { + return nil + } + + crd, err := o.client.GetClusterMonitoring(ctx, clusterResourceName) + if err != nil { + if apierrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to get ClusterMonitoring CRD: %w", err) + } + + if err := o.mergeAlertmanagerConfig(c, &crd.Spec.AlertmanagerConfig); err != nil { + return fmt.Errorf("failed to merge AlertmanagerConfig: %w", err) + } + + return nil +} + +func (o *Operator) mergeAlertmanagerConfig(c *manifests.Config, amConfig *configv1alpha1.AlertmanagerConfig) error { + if c.ClusterMonitoringConfiguration.AlertmanagerMainConfig == nil { + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig = &manifests.AlertmanagerMainConfig{} + } + + switch amConfig.DeploymentMode { + case configv1alpha1.AlertManagerDeployModeDisabled: + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Enabled = ptr.To(false) + + case configv1alpha1.AlertManagerDeployModeDefaultConfig: + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Enabled = ptr.To(true) + + case configv1alpha1.AlertManagerDeployModeCustomConfig: + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Enabled = ptr.To(true) + + cfg := &amConfig.CustomConfig + + if cfg.LogLevel != "" { + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.LogLevel = strings.ToLower(string(cfg.LogLevel)) + } + + if len(cfg.NodeSelector) > 0 { + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.NodeSelector = cfg.NodeSelector + } + + if len(cfg.Resources) > 0 { + resources := &v1.ResourceRequirements{ + Requests: v1.ResourceList{}, + Limits: v1.ResourceList{}, + } + for _, res := range cfg.Resources { + if !res.Request.IsZero() { + resources.Requests[v1.ResourceName(res.Name)] = res.Request + } + if !res.Limit.IsZero() { + resources.Limits[v1.ResourceName(res.Name)] = res.Limit + } + } + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Resources = resources + } + + if len(cfg.Secrets) > 0 { + secrets := make([]string, len(cfg.Secrets)) + for i, s := range cfg.Secrets { + secrets[i] = string(s) + } + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Secrets = secrets + } + + if len(cfg.Tolerations) > 0 { + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Tolerations = cfg.Tolerations + } + + if len(cfg.TopologySpreadConstraints) > 0 { + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.TopologySpreadConstraints = cfg.TopologySpreadConstraints + } + + if cfg.VolumeClaimTemplate != nil { + c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.VolumeClaimTemplate = &monv1.EmbeddedPersistentVolumeClaim{ + Spec: cfg.VolumeClaimTemplate.Spec, + } + } + + default: + return fmt.Errorf("unknown DeploymentMode: %s", amConfig.DeploymentMode) + } + + return nil +} + func (o *Operator) loadConfig(key string) (*manifests.Config, error) { obj, found, err := o.cmapInf.GetStore().GetByKey(key) if err != nil { @@ -985,6 +1098,11 @@ func (o *Operator) Config(ctx context.Context, key string) (*manifests.Config, e return nil, err } + err = o.mergeClusterMonitoringCRD(ctx, c) + if err != nil { + klog.Warningf("failed to merge ClusterMonitoring CRD configuration: %v", err) + } + err = c.Precheck() if err != nil { return nil, err diff --git a/pkg/operator/operator_test.go b/pkg/operator/operator_test.go index 2bb994ca96..a9de9861fb 100644 --- a/pkg/operator/operator_test.go +++ b/pkg/operator/operator_test.go @@ -21,7 +21,9 @@ import ( "testing" configv1 "github.com/openshift/api/config/v1" + configv1alpha1 "github.com/openshift/api/config/v1alpha1" "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/api/resource" apiutilerrors "k8s.io/apimachinery/pkg/util/errors" "github.com/openshift/cluster-monitoring-operator/pkg/client" @@ -558,3 +560,143 @@ func TestGenerateRunReportFromTaskErrors(t *testing.T) { }) } } + +func TestMergeAlertmanagerConfig(t *testing.T) { + for _, tc := range []struct { + name string + baseConfig *manifests.Config + crdConfig *configv1alpha1.AlertmanagerConfig + expectError bool + validate func(*testing.T, *manifests.Config) + }{ + { + name: "disabled mode", + baseConfig: &manifests.Config{ + ClusterMonitoringConfiguration: &manifests.ClusterMonitoringConfiguration{ + AlertmanagerMainConfig: &manifests.AlertmanagerMainConfig{}, + }, + }, + crdConfig: &configv1alpha1.AlertmanagerConfig{ + DeploymentMode: configv1alpha1.AlertManagerDeployModeDisabled, + }, + validate: func(t *testing.T, c *manifests.Config) { + if c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Enabled == nil { + t.Error("expected Enabled to be set") + return + } + if *c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Enabled { + t.Error("expected Enabled to be false") + } + }, + }, + { + name: "default config mode", + baseConfig: &manifests.Config{ + ClusterMonitoringConfiguration: &manifests.ClusterMonitoringConfiguration{ + AlertmanagerMainConfig: &manifests.AlertmanagerMainConfig{}, + }, + }, + crdConfig: &configv1alpha1.AlertmanagerConfig{ + DeploymentMode: configv1alpha1.AlertManagerDeployModeDefaultConfig, + }, + validate: func(t *testing.T, c *manifests.Config) { + if c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Enabled == nil { + t.Error("expected Enabled to be set") + return + } + if !*c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Enabled { + t.Error("expected Enabled to be true") + } + }, + }, + { + name: "custom config with log level", + baseConfig: &manifests.Config{ + ClusterMonitoringConfiguration: &manifests.ClusterMonitoringConfiguration{ + AlertmanagerMainConfig: &manifests.AlertmanagerMainConfig{}, + }, + }, + crdConfig: &configv1alpha1.AlertmanagerConfig{ + DeploymentMode: configv1alpha1.AlertManagerDeployModeCustomConfig, + CustomConfig: configv1alpha1.AlertmanagerCustomConfig{ + LogLevel: configv1alpha1.LogLevelDebug, + }, + }, + validate: func(t *testing.T, c *manifests.Config) { + if c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.LogLevel != "debug" { + t.Errorf("expected LogLevel debug, got %s", c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.LogLevel) + } + }, + }, + { + name: "custom config with resources", + baseConfig: &manifests.Config{ + ClusterMonitoringConfiguration: &manifests.ClusterMonitoringConfiguration{ + AlertmanagerMainConfig: &manifests.AlertmanagerMainConfig{}, + }, + }, + crdConfig: &configv1alpha1.AlertmanagerConfig{ + DeploymentMode: configv1alpha1.AlertManagerDeployModeCustomConfig, + CustomConfig: configv1alpha1.AlertmanagerCustomConfig{ + Resources: []configv1alpha1.ContainerResource{ + { + Name: "cpu", + Request: resource.MustParse("100m"), + }, + }, + }, + }, + validate: func(t *testing.T, c *manifests.Config) { + if c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Resources == nil { + t.Error("expected Resources to be set") + return + } + if cpu := c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.Resources.Requests.Cpu().String(); cpu != "100m" { + t.Errorf("expected CPU request 100m, got %s", cpu) + } + }, + }, + { + name: "custom config overrides configmap", + baseConfig: &manifests.Config{ + ClusterMonitoringConfiguration: &manifests.ClusterMonitoringConfiguration{ + AlertmanagerMainConfig: &manifests.AlertmanagerMainConfig{ + LogLevel: "info", + }, + }, + }, + crdConfig: &configv1alpha1.AlertmanagerConfig{ + DeploymentMode: configv1alpha1.AlertManagerDeployModeCustomConfig, + CustomConfig: configv1alpha1.AlertmanagerCustomConfig{ + LogLevel: configv1alpha1.LogLevelDebug, + }, + }, + validate: func(t *testing.T, c *manifests.Config) { + if c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.LogLevel != "debug" { + t.Errorf("expected CRD to override ConfigMap, got %s", c.ClusterMonitoringConfiguration.AlertmanagerMainConfig.LogLevel) + } + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + o := &Operator{} + err := o.mergeAlertmanagerConfig(tc.baseConfig, tc.crdConfig) + + if tc.expectError { + if err == nil { + t.Error("expected error, got nil") + } + return + } + + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + + if tc.validate != nil { + tc.validate(t, tc.baseConfig) + } + }) + } +} diff --git a/test/e2e/cluster_monitoring_test.go b/test/e2e/cluster_monitoring_test.go new file mode 100644 index 0000000000..3b5206ad87 --- /dev/null +++ b/test/e2e/cluster_monitoring_test.go @@ -0,0 +1,154 @@ +// Copyright 2025 The Cluster Monitoring Operator Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "testing" + "time" + + configv1alpha1 "github.com/openshift/api/config/v1alpha1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" +) + +const ( + clusterMonitoringName = "cluster" +) + +func TestClusterMonitoring(t *testing.T) { + ctx := context.Background() + clusterMonitorings := f.OpenShiftConfigClient.ConfigV1alpha1().ClusterMonitorings() + + _, err := clusterMonitorings.List(ctx, metav1.ListOptions{Limit: 1}) + if err != nil { + if apierrors.IsNotFound(err) { + t.Skip("ClusterMonitoring CRD not available - ClusterMonitoringConfig feature gate may not be enabled") + return + } + t.Fatalf("unexpected error checking ClusterMonitoring CRD availability: %v", err) + } + + cm := &configv1alpha1.ClusterMonitoring{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterMonitoringName, + }, + Spec: configv1alpha1.ClusterMonitoringSpec{ + AlertmanagerConfig: configv1alpha1.AlertmanagerConfig{ + DeploymentMode: configv1alpha1.AlertManagerDeployModeDefaultConfig, + }, + }, + } + + createdCM, err := clusterMonitorings.Create(ctx, cm, metav1.CreateOptions{}) + if err != nil { + if !apierrors.IsAlreadyExists(err) { + t.Fatalf("failed to create ClusterMonitoring: %v", err) + } + existingCM, err := clusterMonitorings.Get(ctx, clusterMonitoringName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("failed to get existing ClusterMonitoring: %v", err) + } + existingCM.Spec = cm.Spec + createdCM, err = clusterMonitorings.Update(ctx, existingCM, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("failed to update ClusterMonitoring: %v", err) + } + } + + t.Logf("configured ClusterMonitoring resource: %s", createdCM.Name) + + err = wait.PollImmediate(5*time.Second, 30*time.Second, func() (bool, error) { + retrievedCM, err := clusterMonitorings.Get(ctx, clusterMonitoringName, metav1.GetOptions{}) + if err != nil { + t.Logf("waiting for ClusterMonitoring: %v", err) + return false, nil + } + + if retrievedCM.Spec.AlertmanagerConfig.DeploymentMode != configv1alpha1.AlertManagerDeployModeDefaultConfig { + t.Logf("waiting for correct DeploymentMode, got: %s", retrievedCM.Spec.AlertmanagerConfig.DeploymentMode) + return false, nil + } + + return true, nil + }) + + if err != nil { + t.Fatalf("ClusterMonitoring resource not properly created: %v", err) + } + + t.Log("updating ClusterMonitoring resource") + err = wait.PollImmediate(2*time.Second, 30*time.Second, func() (bool, error) { + currentCM, err := clusterMonitorings.Get(ctx, clusterMonitoringName, metav1.GetOptions{}) + if err != nil { + return false, err + } + + currentCM.Spec.AlertmanagerConfig.DeploymentMode = configv1alpha1.AlertManagerDeployModeCustomConfig + currentCM.Spec.AlertmanagerConfig.CustomConfig = configv1alpha1.AlertmanagerCustomConfig{ + LogLevel: configv1alpha1.LogLevelInfo, + } + + _, err = clusterMonitorings.Update(ctx, currentCM, metav1.UpdateOptions{}) + if err != nil { + t.Logf("Retrying update due to: %v", err) + return false, nil // Retry on conflict + } + + return true, nil + }) + + if err != nil { + t.Fatalf("Failed to update ClusterMonitoring: %v", err) + } + + updatedCM, err := clusterMonitorings.Get(ctx, clusterMonitoringName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Failed to get updated ClusterMonitoring: %v", err) + } + + if updatedCM.Spec.AlertmanagerConfig.DeploymentMode != configv1alpha1.AlertManagerDeployModeCustomConfig { + t.Errorf("Expected DeploymentMode to be CustomConfig, got: %s", updatedCM.Spec.AlertmanagerConfig.DeploymentMode) + } + + if updatedCM.Spec.AlertmanagerConfig.CustomConfig.LogLevel != configv1alpha1.LogLevelInfo { + t.Errorf("expected LogLevel Info, got: %s", updatedCM.Spec.AlertmanagerConfig.CustomConfig.LogLevel) + } + + t.Log("verifying alertmanager deployment reflects CRD configuration") + err = wait.PollImmediate(5*time.Second, 2*time.Minute, func() (bool, error) { + am, err := f.MonitoringClient.Alertmanagers(f.Ns).Get(ctx, "main", metav1.GetOptions{}) + if err != nil { + t.Logf("waiting for alertmanager: %v", err) + return false, nil + } + + if am.Status.UpdatedReplicas < 1 { + t.Logf("waiting for alertmanager replicas, current: %d", am.Status.UpdatedReplicas) + return false, nil + } + + if am.Spec.LogLevel != "info" { + t.Logf("unexpected log level: %s", am.Spec.LogLevel) + } + + return true, nil + }) + + if err != nil { + t.Fatalf("alertmanager not properly configured from CRD: %v", err) + } +}