Skip to content

Commit 6323f53

Browse files
committed
operator: implement ClusterMonitoring CRD AlertmanagerConfig support
adds ClusterMonitoring controller that watches the CRD and triggers reconciliation. implements merge logic to apply AlertmanagerConfig settings from the CRD over the existing ConfigMap configuration. supports three deployment modes (Disabled, DefaultConfig, CustomConfig) with fields for pod scheduling, resources, secrets, volumeClaimTemplate and logLevel.
1 parent 84c349e commit 6323f53

File tree

8 files changed

+617
-8
lines changed

8 files changed

+617
-8
lines changed

jsonnet/components/cluster-monitoring-operator.libsonnet

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,11 @@ function(params) {
247247
resources: ['featuregates'],
248248
verbs: ['get', 'list', 'watch'],
249249
},
250+
{
251+
apiGroups: ['config.openshift.io'],
252+
resources: ['clustermonitorings'],
253+
verbs: ['get', 'list', 'watch'],
254+
},
250255
{
251256
apiGroups: ['certificates.k8s.io'],
252257
resources: ['certificatesigningrequests'],

manifests/0000_50_cluster-monitoring-operator_02-role.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,14 @@ rules:
135135
- get
136136
- list
137137
- watch
138+
- apiGroups:
139+
- config.openshift.io
140+
resources:
141+
- clustermonitorings
142+
verbs:
143+
- get
144+
- list
145+
- watch
138146
- apiGroups:
139147
- certificates.k8s.io
140148
resources:

manifests/0000_50_cluster-monitoring-operator_06-clusteroperator.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,6 @@ status:
4040
- group: monitoring.coreos.com
4141
name: ''
4242
resource: alertmanagerconfigs
43+
- group: config.openshift.io
44+
name: cluster
45+
resource: clustermonitorings

pkg/client/client.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
"github.com/imdario/mergo"
2929
configv1 "github.com/openshift/api/config/v1"
30+
configv1alpha1 "github.com/openshift/api/config/v1alpha1"
3031
consolev1 "github.com/openshift/api/console/v1"
3132
osmv1 "github.com/openshift/api/monitoring/v1"
3233
routev1 "github.com/openshift/api/route/v1"
@@ -417,6 +418,15 @@ func (c *Client) ClusterOperatorListWatch(ctx context.Context, name string) *cac
417418
}
418419
}
419420

421+
func (c *Client) ClusterMonitoringListWatch() *cache.ListWatch {
422+
return cache.NewListWatchFromClient(
423+
c.oscclient.ConfigV1alpha1().RESTClient(),
424+
"clustermonitorings",
425+
"",
426+
fields.Everything(),
427+
)
428+
}
429+
420430
func (c *Client) HasRouteCapability(ctx context.Context) (bool, error) {
421431
_, err := c.oscclient.ConfigV1().ClusterOperators().Get(ctx, "ingress", metav1.GetOptions{})
422432
if apierrors.IsNotFound(err) {
@@ -595,6 +605,10 @@ func (c *Client) GetConsoleConfig(ctx context.Context, name string) (*configv1.C
595605
return c.oscclient.ConfigV1().Consoles().Get(ctx, name, metav1.GetOptions{})
596606
}
597607

608+
func (c *Client) GetClusterMonitoring(ctx context.Context, name string) (*configv1alpha1.ClusterMonitoring, error) {
609+
return c.oscclient.ConfigV1alpha1().ClusterMonitorings().Get(ctx, name, metav1.GetOptions{})
610+
}
611+
598612
func (c *Client) GetConfigmap(ctx context.Context, namespace, name string) (*v1.ConfigMap, error) {
599613
return c.kclient.CoreV1().ConfigMaps(namespace).Get(ctx, name, metav1.GetOptions{})
600614
}
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
// Copyright 2025 The Cluster Monitoring Operator Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package clustermonitoring
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"time"
21+
22+
configv1alpha1 "github.com/openshift/api/config/v1alpha1"
23+
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
24+
"k8s.io/client-go/tools/cache"
25+
"k8s.io/client-go/util/workqueue"
26+
"k8s.io/klog/v2"
27+
28+
"github.com/openshift/cluster-monitoring-operator/pkg/client"
29+
)
30+
31+
const (
32+
controllerName = "cluster-monitoring"
33+
resyncPeriod = 15 * time.Minute
34+
queueBaseDelay = 50 * time.Millisecond
35+
queueMaxDelay = 3 * time.Minute
36+
)
37+
38+
// Controller is a controller for ClusterMonitoring resources.
39+
type Controller struct {
40+
client *client.Client
41+
queue workqueue.TypedRateLimitingInterface[string]
42+
informer cache.SharedIndexInformer
43+
triggerReconcile func()
44+
}
45+
46+
// NewController returns a new ClusterMonitoring controller.
47+
func NewController(ctx context.Context, client *client.Client, version string, triggerReconcile func()) (*Controller, error) {
48+
informer := cache.NewSharedIndexInformer(
49+
client.ClusterMonitoringListWatch(),
50+
&configv1alpha1.ClusterMonitoring{},
51+
resyncPeriod,
52+
cache.Indexers{},
53+
)
54+
55+
queue := workqueue.NewTypedRateLimitingQueueWithConfig[string](
56+
workqueue.NewTypedItemExponentialFailureRateLimiter[string](queueBaseDelay, queueMaxDelay),
57+
workqueue.TypedRateLimitingQueueConfig[string]{Name: controllerName},
58+
)
59+
60+
controller := &Controller{
61+
client: client,
62+
queue: queue,
63+
informer: informer,
64+
triggerReconcile: triggerReconcile,
65+
}
66+
67+
_, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
68+
AddFunc: controller.handleAdd,
69+
UpdateFunc: controller.handleUpdate,
70+
DeleteFunc: controller.handleDelete,
71+
})
72+
if err != nil {
73+
return nil, err
74+
}
75+
76+
return controller, nil
77+
}
78+
79+
// Run starts the controller.
80+
func (c *Controller) Run(ctx context.Context, workers int) {
81+
klog.Info("Starting ClusterMonitoring controller")
82+
defer c.queue.ShutDown()
83+
84+
go c.informer.Run(ctx.Done())
85+
86+
if !cache.WaitForNamedCacheSync("ClusterMonitoring controller", ctx.Done(), c.informer.HasSynced) {
87+
klog.Error("Failed to sync ClusterMonitoring controller cache")
88+
return
89+
}
90+
91+
go c.worker(ctx)
92+
93+
klog.Info("ClusterMonitoring controller started")
94+
<-ctx.Done()
95+
klog.Info("ClusterMonitoring controller stopped")
96+
}
97+
98+
func (c *Controller) worker(ctx context.Context) {
99+
for c.processNextWorkItem(ctx) {
100+
}
101+
}
102+
103+
func (c *Controller) processNextWorkItem(ctx context.Context) bool {
104+
key, quit := c.queue.Get()
105+
if quit {
106+
return false
107+
}
108+
defer c.queue.Done(key)
109+
110+
if err := c.sync(ctx, key); err != nil {
111+
utilruntime.HandleError(fmt.Errorf("error syncing ClusterMonitoring (%s): %w", key, err))
112+
c.queue.AddRateLimited(key)
113+
return true
114+
}
115+
116+
klog.V(4).Infof("ClusterMonitoring successfully synced: %s", key)
117+
c.queue.Forget(key)
118+
return true
119+
}
120+
121+
func (c *Controller) sync(ctx context.Context, key string) error {
122+
klog.V(4).Infof("ClusterMonitoring controller processing: %s", key)
123+
124+
if c.triggerReconcile != nil {
125+
c.triggerReconcile()
126+
}
127+
128+
return nil
129+
}
130+
131+
func (c *Controller) handleAdd(obj interface{}) {
132+
key, ok := c.keyFunc(obj)
133+
if !ok {
134+
return
135+
}
136+
klog.Infof("ClusterMonitoring added: %s", key)
137+
c.queue.Add(key)
138+
}
139+
140+
func (c *Controller) handleUpdate(oldObj, newObj interface{}) {
141+
key, ok := c.keyFunc(newObj)
142+
if !ok {
143+
return
144+
}
145+
klog.Infof("ClusterMonitoring updated: %s", key)
146+
c.queue.Add(key)
147+
}
148+
149+
func (c *Controller) handleDelete(obj interface{}) {
150+
key, ok := c.keyFunc(obj)
151+
if !ok {
152+
return
153+
}
154+
klog.Infof("ClusterMonitoring deleted: %s", key)
155+
c.queue.Add(key)
156+
}
157+
158+
func (c *Controller) keyFunc(obj interface{}) (string, bool) {
159+
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
160+
if err != nil {
161+
klog.Errorf("Creating key for ClusterMonitoring object failed: %v", err)
162+
return key, false
163+
}
164+
return key, true
165+
}

0 commit comments

Comments
 (0)