diff --git a/api/config.go b/api/config.go index e0b2c796d..8e3befcdc 100644 --- a/api/config.go +++ b/api/config.go @@ -3,6 +3,8 @@ package api import ( "net/http" "time" + + "github.com/moira-alert/moira" ) // WebContact is container for web ui contact validation. @@ -30,21 +32,27 @@ type Sentry struct { // Config for api configuration variables. type Config struct { - EnableCORS bool - Listen string - GraphiteLocalMetricTTL time.Duration - GraphiteRemoteMetricTTL time.Duration - PrometheusRemoteMetricTTL time.Duration - Flags FeatureFlags + EnableCORS bool + Listen string + MetricsTTL map[moira.ClusterKey]time.Duration + Flags FeatureFlags } // WebConfig is container for web ui configuration parameters. type WebConfig struct { - SupportEmail string `json:"supportEmail,omitempty" example:"opensource@skbkontur.com"` - RemoteAllowed bool `json:"remoteAllowed" example:"true"` - Contacts []WebContact `json:"contacts"` - FeatureFlags FeatureFlags `json:"featureFlags"` - Sentry Sentry `json:"sentry"` + SupportEmail string `json:"supportEmail,omitempty" example:"opensource@skbkontur.com"` + RemoteAllowed bool `json:"remoteAllowed" example:"true"` + MetricSourceClusters []MetricSourceCluster `json:"metric_source_clusters"` + Contacts []WebContact `json:"contacts"` + FeatureFlags FeatureFlags `json:"featureFlags"` + Sentry Sentry `json:"sentry"` +} + +// MetricSourceCluster contains data about supported metric source cluster +type MetricSourceCluster struct { + TriggerSource moira.TriggerSource `json:"trigger_source" example:"graphite_remote"` + ClusterId moira.ClusterId `json:"cluster_id" example:"default"` + ClusterName string `json:"cluster_name" example:"Graphite Remote Prod"` } func (WebConfig) Render(w http.ResponseWriter, r *http.Request) error { diff --git a/api/controller/trigger.go b/api/controller/trigger.go index b4e6c8cb7..2e035f757 100644 --- a/api/controller/trigger.go +++ b/api/controller/trigger.go @@ -56,7 +56,7 @@ func saveTrigger(dataBase moira.Database, trigger *moira.Trigger, triggerID stri lastCheck.UpdateScore() } - if err = dataBase.SetTriggerLastCheck(triggerID, &lastCheck, trigger.TriggerSource); err != nil { + if err = dataBase.SetTriggerLastCheck(triggerID, &lastCheck, trigger.ClusterKey()); err != nil { return nil, api.ErrorInternalServer(err) } diff --git a/api/controller/trigger_metrics.go b/api/controller/trigger_metrics.go index 5a2007dfa..7efd2cccf 100644 --- a/api/controller/trigger_metrics.go +++ b/api/controller/trigger_metrics.go @@ -116,7 +116,7 @@ func deleteTriggerMetrics(dataBase moira.Database, metricName string, triggerID return api.ErrorInternalServer(err) } - if err = dataBase.SetTriggerLastCheck(triggerID, &lastCheck, trigger.TriggerSource); err != nil { + if err = dataBase.SetTriggerLastCheck(triggerID, &lastCheck, trigger.ClusterKey()); err != nil { return api.ErrorInternalServer(err) } diff --git a/api/controller/trigger_metrics_test.go b/api/controller/trigger_metrics_test.go index 92442c07d..a3d7736cc 100644 --- a/api/controller/trigger_metrics_test.go +++ b/api/controller/trigger_metrics_test.go @@ -40,7 +40,7 @@ func TestDeleteTriggerMetric(t *testing.T) { dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(expectedLastCheck, nil) dataBase.EXPECT().RemovePatternsMetrics(trigger.Patterns).Return(nil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.TriggerSource) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.ClusterKey()) err := DeleteTriggerMetric(dataBase, "super.metric1", triggerID) So(err, ShouldBeNil) So(expectedLastCheck, ShouldResemble, emptyLastCheck) @@ -53,7 +53,7 @@ func TestDeleteTriggerMetric(t *testing.T) { dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(expectedLastCheck, nil) dataBase.EXPECT().RemovePatternsMetrics(trigger.Patterns).Return(nil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.TriggerSource) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.ClusterKey()) err := DeleteTriggerMetric(dataBase, "super.metric1", triggerID) So(err, ShouldBeNil) So(expectedLastCheck, ShouldResemble, emptyLastCheck) @@ -117,7 +117,7 @@ func TestDeleteTriggerMetric(t *testing.T) { dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(lastCheck, nil) dataBase.EXPECT().RemovePatternsMetrics(trigger.Patterns).Return(nil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.TriggerSource).Return(expected) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.ClusterKey()).Return(expected) err := DeleteTriggerMetric(dataBase, "super.metric1", triggerID) So(err, ShouldResemble, api.ErrorInternalServer(expected)) }) @@ -175,7 +175,7 @@ func TestDeleteTriggerNodataMetrics(t *testing.T) { dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(expectedLastCheck, nil) dataBase.EXPECT().RemovePatternsMetrics(trigger.Patterns).Return(nil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.TriggerSource) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.ClusterKey()) err := DeleteTriggerNodataMetrics(dataBase, triggerID) So(err, ShouldBeNil) So(expectedLastCheck, ShouldResemble, emptyLastCheck) @@ -188,7 +188,7 @@ func TestDeleteTriggerNodataMetrics(t *testing.T) { dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(expectedLastCheck, nil) dataBase.EXPECT().RemovePatternsMetrics(trigger.Patterns).Return(nil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.TriggerSource) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.ClusterKey()) err := DeleteTriggerNodataMetrics(dataBase, triggerID) So(err, ShouldBeNil) So(expectedLastCheck, ShouldResemble, emptyLastCheck) @@ -201,7 +201,7 @@ func TestDeleteTriggerNodataMetrics(t *testing.T) { dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(expectedLastCheck, nil) dataBase.EXPECT().RemovePatternsMetrics(trigger.Patterns).Return(nil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheckWithoutNodata, trigger.TriggerSource) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheckWithoutNodata, trigger.ClusterKey()) err := DeleteTriggerNodataMetrics(dataBase, triggerID) So(err, ShouldBeNil) So(expectedLastCheck, ShouldResemble, lastCheckWithoutNodata) @@ -214,7 +214,7 @@ func TestDeleteTriggerNodataMetrics(t *testing.T) { dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(expectedLastCheck, nil) dataBase.EXPECT().RemovePatternsMetrics(trigger.Patterns).Return(nil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.TriggerSource) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &expectedLastCheck, trigger.ClusterKey()) err := DeleteTriggerNodataMetrics(dataBase, triggerID) So(err, ShouldBeNil) So(expectedLastCheck, ShouldResemble, emptyLastCheck) @@ -269,7 +269,7 @@ func TestGetTriggerMetrics(t *testing.T) { localSource := mock_metric_source.NewMockMetricSource(mockCtrl) remoteSource := mock_metric_source.NewMockMetricSource(mockCtrl) fetchResult := mock_metric_source.NewMockFetchResult(mockCtrl) - sourceProvider := metricSource.CreateMetricSourceProvider(localSource, remoteSource, nil) + sourceProvider := metricSource.CreateTestMetricSourceProvider(localSource, remoteSource, nil) pattern := "super.puper.pattern" metric := "super.puper.metric" @@ -277,27 +277,16 @@ func TestGetTriggerMetrics(t *testing.T) { var until int64 = 67 var retention int64 = 10 - Convey("Trigger is remote but remote is not configured", t, func() { - dataBase.EXPECT().GetTrigger(triggerID).Return(moira.Trigger{ - ID: triggerID, - Targets: []string{pattern}, - TriggerSource: moira.GraphiteRemote, - }, nil) - remoteSource.EXPECT().IsConfigured().Return(false, nil) - triggerMetrics, err := GetTriggerMetrics(dataBase, sourceProvider, from, until, triggerID) - So(err, ShouldResemble, api.ErrorInternalServer(metricSource.ErrMetricSourceIsNotConfigured)) - So(triggerMetrics, ShouldBeNil) - }) - - Convey("Trigger is remote but remote has bad config", t, func() { - dataBase.EXPECT().GetTrigger(triggerID).Return(moira.Trigger{ + Convey("Trigger is prometheus remote but prometheus remote is not in a registered source", t, func() { + trigger := moira.Trigger{ ID: triggerID, Targets: []string{pattern}, - TriggerSource: moira.GraphiteRemote, - }, nil) - remoteSource.EXPECT().IsConfigured().Return(false, remote.ErrRemoteStorageDisabled) + TriggerSource: moira.PrometheusRemote, + ClusterId: moira.DefaultCluster, + } + dataBase.EXPECT().GetTrigger(triggerID).Return(trigger, nil) triggerMetrics, err := GetTriggerMetrics(dataBase, sourceProvider, from, until, triggerID) - So(err, ShouldResemble, api.ErrorInternalServer(remote.ErrRemoteStorageDisabled)) + So(err, ShouldResemble, api.ErrorInternalServer(fmt.Errorf("unknown metric source with cluster key `%s`", trigger.ClusterKey().String()))) So(triggerMetrics, ShouldBeNil) }) @@ -306,8 +295,8 @@ func TestGetTriggerMetrics(t *testing.T) { ID: triggerID, Targets: []string{pattern}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, nil) - localSource.EXPECT().IsConfigured().Return(true, nil) localSource.EXPECT().Fetch(pattern, from, until, false).Return(fetchResult, nil) fetchResult.EXPECT().GetMetricsData().Return([]metricSource.MetricData{*metricSource.MakeMetricData(metric, []float64{0, 1, 2, 3, 4}, retention, from)}) triggerMetrics, err := GetTriggerMetrics(dataBase, sourceProvider, from, until, triggerID) @@ -339,8 +328,8 @@ func TestGetTriggerMetrics(t *testing.T) { ID: triggerID, Targets: []string{pattern}, TriggerSource: moira.GraphiteRemote, + ClusterId: moira.DefaultCluster, }, nil) - remoteSource.EXPECT().IsConfigured().Return(true, nil) remoteSource.EXPECT().Fetch(pattern, from, until, false).Return(nil, expectedError) triggerMetrics, err := GetTriggerMetrics(dataBase, sourceProvider, from, until, triggerID) diff --git a/api/controller/trigger_test.go b/api/controller/trigger_test.go index e6f9deb4c..29b6939e3 100644 --- a/api/controller/trigger_test.go +++ b/api/controller/trigger_test.go @@ -27,7 +27,7 @@ func TestUpdateTrigger(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(gomock.Any(), 30) dataBase.EXPECT().DeleteTriggerCheckLock(gomock.Any()) dataBase.EXPECT().GetTriggerLastCheck(gomock.Any()).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(gomock.Any(), gomock.Any(), trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(gomock.Any(), gomock.Any(), trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(gomock.Any(), trigger).Return(nil) resp, err := UpdateTrigger(dataBase, &triggerModel, triggerModel.ID, make(map[string]bool)) So(err, ShouldBeNil) @@ -77,7 +77,7 @@ func TestSaveTrigger(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, gomock.Any(), trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, gomock.Any(), trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(triggerID, &trigger).Return(nil) resp, err := saveTrigger(dataBase, &trigger, triggerID, make(map[string]bool)) So(err, ShouldBeNil) @@ -88,7 +88,7 @@ func TestSaveTrigger(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(actualLastCheck, nil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &emptyLastCheck, trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &emptyLastCheck, trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(triggerID, &trigger).Return(nil) resp, err := saveTrigger(dataBase, &trigger, triggerID, make(map[string]bool)) So(err, ShouldBeNil) @@ -101,7 +101,7 @@ func TestSaveTrigger(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, gomock.Any(), trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, gomock.Any(), trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(triggerID, &trigger).Return(nil) resp, err := saveTrigger(dataBase, &trigger, triggerID, map[string]bool{"super.metric1": true, "super.metric2": true}) So(err, ShouldBeNil) @@ -133,7 +133,7 @@ func TestSaveTrigger(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, gomock.Any(), trigger.TriggerSource).Return(expected) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, gomock.Any(), trigger.ClusterKey()).Return(expected) resp, err := saveTrigger(dataBase, &trigger, triggerID, make(map[string]bool)) So(err, ShouldResemble, api.ErrorInternalServer(expected)) So(resp, ShouldBeNil) @@ -144,7 +144,7 @@ func TestSaveTrigger(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, gomock.Any(), trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, gomock.Any(), trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(triggerID, &trigger).Return(expected) resp, err := saveTrigger(dataBase, &trigger, triggerID, make(map[string]bool)) So(err, ShouldResemble, api.ErrorInternalServer(expected)) @@ -175,7 +175,7 @@ func TestVariousTtlState(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(triggerID, &trigger).Return(nil) resp, err := saveTrigger(dataBase, &trigger, triggerID, make(map[string]bool)) So(err, ShouldBeNil) @@ -190,7 +190,7 @@ func TestVariousTtlState(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(triggerID, &trigger).Return(nil) resp, err := saveTrigger(dataBase, &trigger, triggerID, make(map[string]bool)) So(err, ShouldBeNil) @@ -205,7 +205,7 @@ func TestVariousTtlState(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(triggerID, &trigger).Return(nil) resp, err := saveTrigger(dataBase, &trigger, triggerID, make(map[string]bool)) So(err, ShouldBeNil) @@ -220,7 +220,7 @@ func TestVariousTtlState(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(triggerID, &trigger).Return(nil) resp, err := saveTrigger(dataBase, &trigger, triggerID, make(map[string]bool)) So(err, ShouldBeNil) @@ -235,7 +235,7 @@ func TestVariousTtlState(t *testing.T) { dataBase.EXPECT().AcquireTriggerCheckLock(triggerID, 30) dataBase.EXPECT().DeleteTriggerCheckLock(triggerID) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.TriggerSource).Return(nil) + dataBase.EXPECT().SetTriggerLastCheck(triggerID, &lastCheck, trigger.ClusterKey()).Return(nil) dataBase.EXPECT().SaveTrigger(triggerID, &trigger).Return(nil) resp, err := saveTrigger(dataBase, &trigger, triggerID, make(map[string]bool)) So(err, ShouldBeNil) diff --git a/api/dto/triggers.go b/api/dto/triggers.go index 7e8a0d0d6..90cd1714b 100644 --- a/api/dto/triggers.go +++ b/api/dto/triggers.go @@ -71,8 +71,10 @@ type TriggerModel struct { // // Deprecated: Use TriggerSource field instead IsRemote bool `json:"is_remote" example:"false"` - // Shows the source from where the metrics are fetched + // Shows the type of source from where the metrics are fetched TriggerSource moira.TriggerSource `json:"trigger_source" example:"graphite_local"` + // Shows the exact cluster from where the metrics are fetched + ClusterId moira.ClusterId `json:"cluster_id" example:"default"` // If true, first event NODATA → OK will be omitted MuteNewMetrics bool `json:"mute_new_metrics" example:"false"` // A list of targets that have only alone metrics @@ -87,6 +89,11 @@ type TriggerModel struct { UpdatedBy string `json:"updated_by"` } +// ClusterKey returns cluster key composed of trigger source and cluster id associated with the trigger +func (trigger *TriggerModel) ClusterKey() moira.ClusterKey { + return moira.MakeClusterKey(trigger.TriggerSource, trigger.ClusterId) +} + // ToMoiraTrigger transforms TriggerModel to moira.Trigger func (model *TriggerModel) ToMoiraTrigger() *moira.Trigger { return &moira.Trigger{ @@ -104,6 +111,7 @@ func (model *TriggerModel) ToMoiraTrigger() *moira.Trigger { Expression: &model.Expression, Patterns: model.Patterns, TriggerSource: model.TriggerSource, + ClusterId: model.ClusterId, MuteNewMetrics: model.MuteNewMetrics, AloneMetrics: model.AloneMetrics, UpdatedBy: model.UpdatedBy, @@ -128,6 +136,7 @@ func CreateTriggerModel(trigger *moira.Trigger) TriggerModel { Patterns: trigger.Patterns, IsRemote: trigger.TriggerSource == moira.GraphiteRemote, TriggerSource: trigger.TriggerSource, + ClusterId: trigger.ClusterId, MuteNewMetrics: trigger.MuteNewMetrics, AloneMetrics: trigger.AloneMetrics, CreatedAt: getDateTime(trigger.CreatedAt), @@ -185,9 +194,10 @@ func (trigger *Trigger) Bind(request *http.Request) error { } trigger.TriggerSource = trigger.TriggerSource.FillInIfNotSet(trigger.IsRemote) + trigger.ClusterId = trigger.ClusterId.FillInIfNotSet() metricsSourceProvider := middleware.GetTriggerTargetsSourceProvider(request) - metricsSource, err := metricsSourceProvider.GetMetricSource(trigger.TriggerSource) + metricsSource, err := metricsSourceProvider.GetMetricSource(trigger.ClusterKey()) if err != nil { return err } diff --git a/api/dto/triggers_test.go b/api/dto/triggers_test.go index ddd710437..6b4d317aa 100644 --- a/api/dto/triggers_test.go +++ b/api/dto/triggers_test.go @@ -25,7 +25,7 @@ func TestTriggerValidation(t *testing.T) { localSource := mock_metric_source.NewMockMetricSource(mockCtrl) remoteSource := mock_metric_source.NewMockMetricSource(mockCtrl) fetchResult := mock_metric_source.NewMockFetchResult(mockCtrl) - sourceProvider := metricSource.CreateMetricSourceProvider(localSource, remoteSource, nil) + sourceProvider := metricSource.CreateTestMetricSourceProvider(localSource, remoteSource, nil) request, _ := http.NewRequest("PUT", "/api/trigger", nil) request.Header.Set("Content-Type", "application/json") @@ -47,11 +47,11 @@ func TestTriggerValidation(t *testing.T) { TTLState: &moira.TTLStateNODATA, TTL: 600, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, MuteNewMetrics: false, } Convey("Test FallingTrigger", func() { - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() localSource.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes() localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() fetchResult.EXPECT().GetPatterns().Return(make([]string, 0), nil).AnyTimes() @@ -94,7 +94,6 @@ func TestTriggerValidation(t *testing.T) { }) }) Convey("Test RisingTrigger", func() { - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() localSource.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes() localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() fetchResult.EXPECT().GetPatterns().Return(make([]string, 0), nil).AnyTimes() @@ -137,7 +136,6 @@ func TestTriggerValidation(t *testing.T) { }) }) Convey("Test ExpressionTrigger", func() { - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() localSource.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes() localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() fetchResult.EXPECT().GetPatterns().Return(make([]string, 0), nil).AnyTimes() @@ -173,7 +171,6 @@ func TestTriggerValidation(t *testing.T) { }) Convey("Test alone metrics", func() { - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() localSource.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes() localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() fetchResult.EXPECT().GetPatterns().Return(make([]string, 0), nil).AnyTimes() @@ -216,7 +213,6 @@ func TestTriggerValidation(t *testing.T) { }) Convey("Test patterns", func() { - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() localSource.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes() localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() fetchResult.EXPECT().GetMetricsData().Return([]metricSource.MetricData{*metricSource.MakeMetricData("", []float64{}, 0, 0)}).AnyTimes() @@ -272,6 +268,7 @@ func TestTriggerModel_ToMoiraTrigger(t *testing.T) { Expression: expression, Patterns: []string{"pattern-1", "pattern-2"}, TriggerSource: moira.GraphiteRemote, + ClusterId: moira.DefaultCluster, MuteNewMetrics: true, AloneMetrics: map[string]bool{ "t1": true, @@ -305,6 +302,7 @@ func TestTriggerModel_ToMoiraTrigger(t *testing.T) { Expression: &expression, Patterns: []string{"pattern-1", "pattern-2"}, TriggerSource: moira.GraphiteRemote, + ClusterId: moira.DefaultCluster, MuteNewMetrics: true, AloneMetrics: map[string]bool{ "t1": true, @@ -346,6 +344,7 @@ func TestCreateTriggerModel(t *testing.T) { Expression: &expression, Patterns: []string{"pattern-1", "pattern-2"}, TriggerSource: moira.GraphiteRemote, + ClusterId: moira.DefaultCluster, MuteNewMetrics: true, AloneMetrics: map[string]bool{ "t1": true, @@ -377,6 +376,7 @@ func TestCreateTriggerModel(t *testing.T) { Expression: expression, Patterns: []string{"pattern-1", "pattern-2"}, TriggerSource: moira.GraphiteRemote, + ClusterId: moira.DefaultCluster, IsRemote: true, MuteNewMetrics: true, AloneMetrics: map[string]bool{ diff --git a/api/handler/handler.go b/api/handler/handler.go index d62f4d1ab..0a16d7224 100644 --- a/api/handler/handler.go +++ b/api/handler/handler.go @@ -98,9 +98,7 @@ func NewHandler( router.Get("/config", getWebConfig(webConfig)) router.Route("/user", user) router.With(moiramiddle.Triggers( - apiConfig.GraphiteLocalMetricTTL, - apiConfig.GraphiteRemoteMetricTTL, - apiConfig.PrometheusRemoteMetricTTL, + apiConfig.MetricsTTL, )).Route("/trigger", triggers(metricSourceProvider, searchIndex)) router.Route("/tag", tag) router.Route("/pattern", pattern) diff --git a/api/handler/trigger.go b/api/handler/trigger.go index a02e59d89..1aa662908 100644 --- a/api/handler/trigger.go +++ b/api/handler/trigger.go @@ -95,7 +95,11 @@ func needValidate(request *http.Request) bool { // validateTargets checks targets of trigger. // Returns tree of problems if there is any invalid child, else returns nil. func validateTargets(request *http.Request, trigger *dto.Trigger) ([]dto.TreeOfProblems, *api.ErrorResponse) { - ttl := getMetricTTLByTrigger(request, trigger) + ttl, err := getMetricTTLByTrigger(request, trigger) + if err != nil { + return nil, api.ErrorInvalidRequest(err) + } + treesOfProblems, err := dto.TargetVerification(trigger.Targets, ttl, trigger.TriggerSource) if err != nil { diff --git a/api/handler/trigger_render_test.go b/api/handler/trigger_render_test.go index 02efbd345..a23927e7e 100644 --- a/api/handler/trigger_render_test.go +++ b/api/handler/trigger_render_test.go @@ -22,7 +22,7 @@ func TestRenderTrigger(t *testing.T) { localSource := mock_metric_source.NewMockMetricSource(mockCtrl) remoteSource := mock_metric_source.NewMockMetricSource(mockCtrl) - sourceProvider := metricSource.CreateMetricSourceProvider(localSource, remoteSource, nil) + sourceProvider := metricSource.CreateTestMetricSourceProvider(localSource, remoteSource, nil) responseWriter := httptest.NewRecorder() mockDb := mock_moira_alert.NewMockDatabase(mockCtrl) @@ -53,8 +53,8 @@ func TestRenderTrigger(t *testing.T) { ID: "triggerID-0000000000001", Targets: []string{"t1"}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, nil).Times(1) - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes().Times(1) fetchResult := mock_metric_source.NewMockFetchResult(mockCtrl) fetchResult.EXPECT().GetMetricsData().Return([]metricSource.MetricData{*metricSource.MakeMetricData("", []float64{}, 0, 0)}).Times(1) localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).Times(1) @@ -86,8 +86,8 @@ func TestRenderTrigger(t *testing.T) { ID: "triggerID-0000000000001", Targets: []string{"t1"}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, nil).Times(1) - localSource.EXPECT().IsConfigured().Return(true, nil).Times(1) fetchResult := mock_metric_source.NewMockFetchResult(mockCtrl) fetchResult.EXPECT().GetMetricsData().Return([]metricSource.MetricData{*metricSource.MakeMetricData("", []float64{}, 0, 0)}).Times(1) localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).Times(1) diff --git a/api/handler/trigger_test.go b/api/handler/trigger_test.go index d0ce8b2b9..be0fe57fb 100644 --- a/api/handler/trigger_test.go +++ b/api/handler/trigger_test.go @@ -35,6 +35,7 @@ func TestGetTrigger(t *testing.T) { mockDb.EXPECT().GetTrigger("triggerID-0000000000001").Return(moira.Trigger{ ID: "triggerID-0000000000001", TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, nil) mockDb.EXPECT().GetTriggerThrottling("triggerID-0000000000001").Return(throttlingTime, throttlingTime) database = mockDb @@ -51,7 +52,7 @@ func TestGetTrigger(t *testing.T) { contentBytes, _ := io.ReadAll(response.Body) contents := string(contentBytes) - expected := "{\"id\":\"triggerID-0000000000001\",\"name\":\"\",\"targets\":null,\"warn_value\":null,\"error_value\":null,\"trigger_type\":\"\",\"tags\":null,\"expression\":\"\",\"patterns\":null,\"is_remote\":false,\"trigger_source\":\"graphite_local\",\"mute_new_metrics\":false,\"alone_metrics\":null,\"created_at\":null,\"updated_at\":null,\"created_by\":\"\",\"updated_by\":\"\",\"throttling\":0}\n" + expected := "{\"id\":\"triggerID-0000000000001\",\"name\":\"\",\"targets\":null,\"warn_value\":null,\"error_value\":null,\"trigger_type\":\"\",\"tags\":null,\"expression\":\"\",\"patterns\":null,\"is_remote\":false,\"trigger_source\":\"graphite_local\",\"cluster_id\":\"default\",\"mute_new_metrics\":false,\"alone_metrics\":null,\"created_at\":null,\"updated_at\":null,\"created_by\":\"\",\"updated_by\":\"\",\"throttling\":0}\n" So(contents, ShouldEqual, expected) }) @@ -64,6 +65,7 @@ func TestGetTrigger(t *testing.T) { ID: "triggerID-0000000000001", CreatedAt: &triggerTime, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, UpdatedAt: &triggerTime, }, nil) @@ -82,7 +84,7 @@ func TestGetTrigger(t *testing.T) { contentBytes, _ := io.ReadAll(response.Body) contents := string(contentBytes) - expected := "{\"id\":\"triggerID-0000000000001\",\"name\":\"\",\"targets\":null,\"warn_value\":null,\"error_value\":null,\"trigger_type\":\"\",\"tags\":null,\"expression\":\"\",\"patterns\":null,\"is_remote\":false,\"trigger_source\":\"graphite_local\",\"mute_new_metrics\":false,\"alone_metrics\":null,\"created_at\":\"2022-06-07T10:00:00Z\",\"updated_at\":\"2022-06-07T10:00:00Z\",\"created_by\":\"\",\"updated_by\":\"\",\"throttling\":0}\n" + expected := "{\"id\":\"triggerID-0000000000001\",\"name\":\"\",\"targets\":null,\"warn_value\":null,\"error_value\":null,\"trigger_type\":\"\",\"tags\":null,\"expression\":\"\",\"patterns\":null,\"is_remote\":false,\"trigger_source\":\"graphite_local\",\"cluster_id\":\"default\",\"mute_new_metrics\":false,\"alone_metrics\":null,\"created_at\":\"2022-06-07T10:00:00Z\",\"updated_at\":\"2022-06-07T10:00:00Z\",\"created_by\":\"\",\"updated_by\":\"\",\"throttling\":0}\n" So(contents, ShouldEqual, expected) }) @@ -113,9 +115,8 @@ func TestUpdateTrigger(t *testing.T) { localSource := mock_metric_source.NewMockMetricSource(mockCtrl) remoteSource := mock_metric_source.NewMockMetricSource(mockCtrl) - sourceProvider := metricSource.CreateMetricSourceProvider(localSource, remoteSource, nil) + sourceProvider := metricSource.CreateTestMetricSourceProvider(localSource, remoteSource, nil) - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() localSource.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes() fetchResult := mock_metric_source.NewMockFetchResult(mockCtrl) localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() @@ -152,6 +153,7 @@ func TestUpdateTrigger(t *testing.T) { ErrorValue: &triggerErrorValue, Targets: []string{"my.metric"}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, } mockDb.EXPECT().GetTrigger(gomock.Any()).Return(trigger, nil) @@ -159,7 +161,7 @@ func TestUpdateTrigger(t *testing.T) { testRequest := httptest.NewRequest("", url, bytes.NewBuffer(jsonTrigger)) testRequest.Header.Add("content-type", "application/json") testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "metricSourceProvider", sourceProvider)) - testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "localMetricTTL", to.Duration("65m"))) + testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "clustersMetricTTL", MakeTestTTLs())) testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), triggerIDKey, triggerID)) @@ -193,6 +195,7 @@ func TestUpdateTrigger(t *testing.T) { ErrorValue: &triggerErrorValue, Targets: []string{}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, } @@ -200,7 +203,7 @@ func TestUpdateTrigger(t *testing.T) { request := httptest.NewRequest("", url, bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) request = request.WithContext(middleware.SetContextValueForTest(request.Context(), triggerIDKey, triggerID)) responseWriter := httptest.NewRecorder() @@ -239,7 +242,7 @@ func TestUpdateTrigger(t *testing.T) { request := httptest.NewRequest("", "/", bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) request = request.WithContext(middleware.SetContextValueForTest(request.Context(), triggerIDKey, triggerID)) responseWriter := httptest.NewRecorder() @@ -264,7 +267,7 @@ func TestUpdateTrigger(t *testing.T) { request := httptest.NewRequest("", fmt.Sprintf("/trigger?%s", validateFlag), bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) request = request.WithContext(middleware.SetContextValueForTest(request.Context(), triggerIDKey, triggerID)) responseWriter := httptest.NewRecorder() @@ -327,7 +330,7 @@ func TestUpdateTrigger(t *testing.T) { request := httptest.NewRequest("", "/", bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) request = request.WithContext(middleware.SetContextValueForTest(request.Context(), triggerIDKey, triggerID)) responseWriter := httptest.NewRecorder() @@ -345,7 +348,7 @@ func TestUpdateTrigger(t *testing.T) { request := httptest.NewRequest("", fmt.Sprintf("/trigger?%s", validateFlag), bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) request = request.WithContext(middleware.SetContextValueForTest(request.Context(), triggerIDKey, triggerID)) responseWriter := httptest.NewRecorder() @@ -540,3 +543,10 @@ func isTriggerUpdated(response *http.Response) bool { return actual.Message == expected } + +func MakeTestTTLs() map[moira.ClusterKey]time.Duration { + return map[moira.ClusterKey]time.Duration{ + moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster): to.Duration("65m"), + moira.DefaultGraphiteRemoteCluster: to.Duration("168h"), + } +} diff --git a/api/handler/triggers.go b/api/handler/triggers.go index 1a60d8758..0cfdd5478 100644 --- a/api/handler/triggers.go +++ b/api/handler/triggers.go @@ -177,21 +177,16 @@ func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorRespo } // getMetricTTLByTrigger gets metric ttl duration time from request context for local or remote trigger. -func getMetricTTLByTrigger(request *http.Request, trigger *dto.Trigger) time.Duration { - var ttl time.Duration +func getMetricTTLByTrigger(request *http.Request, trigger *dto.Trigger) (time.Duration, error) { + metricTTLs := middleware.GetMetricTTL(request) + key := trigger.ClusterKey() - switch trigger.TriggerSource { - case moira.GraphiteLocal: - ttl = middleware.GetLocalMetricTTL(request) - - case moira.GraphiteRemote: - ttl = middleware.GetRemoteMetricTTL(request) - - case moira.PrometheusRemote: - ttl = middleware.GetPrometheusMetricTTL(request) + ttl, ok := metricTTLs[key] + if !ok { + return 0, fmt.Errorf("can't get ttl: unknown cluster %s", key.String()) } - return ttl + return ttl, nil } // nolint: gofmt,goimports @@ -221,7 +216,11 @@ func triggerCheck(writer http.ResponseWriter, request *http.Request) { } } - ttl := getMetricTTLByTrigger(request, trigger) + ttl, err := getMetricTTLByTrigger(request, trigger) + if err != nil { + render.Render(writer, request, api.ErrorInvalidRequest(err)) //nolint + return + } if len(trigger.Targets) > 0 { var err error diff --git a/api/handler/triggers_test.go b/api/handler/triggers_test.go index 61d3680f7..f77c48a8b 100644 --- a/api/handler/triggers_test.go +++ b/api/handler/triggers_test.go @@ -24,7 +24,6 @@ import ( "github.com/moira-alert/moira/api/dto" "github.com/moira-alert/moira/api/middleware" . "github.com/smartystreets/goconvey/convey" - "github.com/xiam/to" ) func TestGetSearchRequestString(t *testing.T) { @@ -55,9 +54,8 @@ func TestGetTriggerFromRequest(t *testing.T) { localSource := mock_metric_source.NewMockMetricSource(mockCtrl) remoteSource := mock_metric_source.NewMockMetricSource(mockCtrl) fetchResult := mock_metric_source.NewMockFetchResult(mockCtrl) - sourceProvider := metricSource.CreateMetricSourceProvider(localSource, remoteSource, nil) + sourceProvider := metricSource.CreateTestMetricSourceProvider(localSource, remoteSource, nil) - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() localSource.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes() localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() fetchResult.EXPECT().GetPatterns().Return(make([]string, 0), nil).AnyTimes() @@ -83,6 +81,7 @@ func TestGetTriggerFromRequest(t *testing.T) { Expression: "", Patterns: []string{}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, MuteNewMetrics: false, AloneMetrics: map[string]bool{}, CreatedAt: &time.Time{}, @@ -141,26 +140,31 @@ func TestGetTriggerFromRequest(t *testing.T) { func TestGetMetricTTLByTrigger(t *testing.T) { request := httptest.NewRequest("", "/", strings.NewReader("")) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "remoteMetricTTL", to.Duration("168h"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) Convey("Given a local trigger", t, func() { trigger := dto.Trigger{TriggerModel: dto.TriggerModel{ TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }} Convey("It's metric ttl should be equal to local", func() { - So(getMetricTTLByTrigger(request, &trigger), ShouldEqual, 65*time.Minute) + ttl, err := getMetricTTLByTrigger(request, &trigger) + So(err, ShouldBeNil) + So(ttl, ShouldEqual, 65*time.Minute) }) }) Convey("Given a remote trigger", t, func() { trigger := dto.Trigger{TriggerModel: dto.TriggerModel{ TriggerSource: moira.GraphiteRemote, + ClusterId: moira.DefaultCluster, }} Convey("It's metric ttl should be equal to remote", func() { - So(getMetricTTLByTrigger(request, &trigger), ShouldEqual, 168*time.Hour) + ttl, err := getMetricTTLByTrigger(request, &trigger) + So(err, ShouldBeNil) + So(ttl, ShouldEqual, 168*time.Hour) }) }) } @@ -174,15 +178,13 @@ func TestTriggerCheckHandler(t *testing.T) { localSource := mock_metric_source.NewMockMetricSource(mockCtrl) remoteSource := mock_metric_source.NewMockMetricSource(mockCtrl) fetchResult := mock_metric_source.NewMockFetchResult(mockCtrl) - sourceProvider := metricSource.CreateMetricSourceProvider(localSource, remoteSource, nil) + sourceProvider := metricSource.CreateTestMetricSourceProvider(localSource, remoteSource, nil) - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() localSource.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes() localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() fetchResult.EXPECT().GetPatterns().Return(make([]string, 0), nil).AnyTimes() fetchResult.EXPECT().GetMetricsData().Return([]metricSource.MetricData{*metricSource.MakeMetricData("", []float64{}, 0, 0)}).AnyTimes() - remoteSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() remoteSource.EXPECT().GetMetricsTTLSeconds().Return(int64(604800)).AnyTimes() remoteSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() @@ -245,8 +247,7 @@ func TestTriggerCheckHandler(t *testing.T) { testRequest := httptest.NewRequest("", "/", bytes.NewBuffer(jsonTrigger)) testRequest.Header.Add("content-type", "application/json") testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "metricSourceProvider", sourceProvider)) - testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "localMetricTTL", to.Duration("65m"))) - testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "remoteMetricTTL", to.Duration("168h"))) + testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "clustersMetricTTL", MakeTestTTLs())) triggerCheck(responseWriter, testRequest) @@ -268,9 +269,8 @@ func TestCreateTriggerHandler(t *testing.T) { localSource := mock_metric_source.NewMockMetricSource(mockCtrl) remoteSource := mock_metric_source.NewMockMetricSource(mockCtrl) - sourceProvider := metricSource.CreateMetricSourceProvider(localSource, remoteSource, nil) + sourceProvider := metricSource.CreateTestMetricSourceProvider(localSource, remoteSource, nil) - localSource.EXPECT().IsConfigured().Return(true, nil).AnyTimes() localSource.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)).AnyTimes() fetchResult := mock_metric_source.NewMockFetchResult(mockCtrl) localSource.EXPECT().Fetch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fetchResult, nil).AnyTimes() @@ -311,7 +311,7 @@ func TestCreateTriggerHandler(t *testing.T) { testRequest := httptest.NewRequest("", url, bytes.NewBuffer(jsonTrigger)) testRequest.Header.Add("content-type", "application/json") testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "metricSourceProvider", sourceProvider)) - testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "localMetricTTL", to.Duration("65m"))) + testRequest = testRequest.WithContext(middleware.SetContextValueForTest(testRequest.Context(), "clustersMetricTTL", MakeTestTTLs())) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, testRequest) @@ -348,7 +348,7 @@ func TestCreateTriggerHandler(t *testing.T) { request := httptest.NewRequest("", url, bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -386,7 +386,7 @@ func TestCreateTriggerHandler(t *testing.T) { request := httptest.NewRequest("", "/", bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -409,7 +409,7 @@ func TestCreateTriggerHandler(t *testing.T) { request := httptest.NewRequest("", fmt.Sprintf("/trigger?%s", validateFlag), bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -471,7 +471,7 @@ func TestCreateTriggerHandler(t *testing.T) { request := httptest.NewRequest("", "/", bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -488,7 +488,7 @@ func TestCreateTriggerHandler(t *testing.T) { request := httptest.NewRequest("", fmt.Sprintf("/trigger?%s", validateFlag), bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -531,11 +531,12 @@ func TestCreateTriggerHandler(t *testing.T) { func TestTriggersCreatedWithTriggerSource(t *testing.T) { mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() localSource := mock_metric_source.NewMockMetricSource(mockCtrl) remoteSource := mock_metric_source.NewMockMetricSource(mockCtrl) prometheusSource := mock_metric_source.NewMockMetricSource(mockCtrl) - sourceProvider := metricSource.CreateMetricSourceProvider(localSource, remoteSource, prometheusSource) + sourceProvider := metricSource.CreateTestMetricSourceProvider(localSource, remoteSource, prometheusSource) db := mock_moira_alert.NewMockDatabase(mockCtrl) database = db @@ -549,7 +550,7 @@ func TestTriggersCreatedWithTriggerSource(t *testing.T) { request := newTriggerCreateRequest(sourceProvider, triggerId, jsonTrigger) Convey("Expect trigger to be graphite local", func() { - setupExpectationsForCreateTrigger(localSource, db, target, triggerId, moira.GraphiteLocal) + setupExpectationsForCreateTrigger(localSource, db, target, triggerId, moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster)) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -563,7 +564,7 @@ func TestTriggersCreatedWithTriggerSource(t *testing.T) { request := newTriggerCreateRequest(sourceProvider, triggerId, jsonTrigger) Convey("Expect trigger to be graphite remote", func() { - setupExpectationsForCreateTrigger(remoteSource, db, target, triggerId, moira.GraphiteRemote) + setupExpectationsForCreateTrigger(remoteSource, db, target, triggerId, moira.DefaultGraphiteRemoteCluster) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -577,7 +578,7 @@ func TestTriggersCreatedWithTriggerSource(t *testing.T) { request := newTriggerCreateRequest(sourceProvider, triggerId, jsonTrigger) Convey("Expect trigger to be graphite local", func() { - setupExpectationsForCreateTrigger(localSource, db, target, triggerId, moira.GraphiteLocal) + setupExpectationsForCreateTrigger(localSource, db, target, triggerId, moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster)) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -591,7 +592,7 @@ func TestTriggersCreatedWithTriggerSource(t *testing.T) { request := newTriggerCreateRequest(sourceProvider, triggerId, jsonTrigger) Convey("Expect trigger to be graphite remote", func() { - setupExpectationsForCreateTrigger(remoteSource, db, target, triggerId, moira.GraphiteRemote) + setupExpectationsForCreateTrigger(remoteSource, db, target, triggerId, moira.DefaultGraphiteRemoteCluster) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -605,7 +606,7 @@ func TestTriggersCreatedWithTriggerSource(t *testing.T) { request := newTriggerCreateRequest(sourceProvider, triggerId, jsonTrigger) Convey("Expect trigger to be prometheus remote", func() { - setupExpectationsForCreateTrigger(prometheusSource, db, target, triggerId, moira.PrometheusRemote) + setupExpectationsForCreateTrigger(prometheusSource, db, target, triggerId, moira.MakeClusterKey(moira.PrometheusRemote, moira.DefaultCluster)) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -619,7 +620,41 @@ func TestTriggersCreatedWithTriggerSource(t *testing.T) { request := newTriggerCreateRequest(sourceProvider, triggerId, jsonTrigger) Convey("Expect trigger to be graphite local", func() { - setupExpectationsForCreateTrigger(localSource, db, target, triggerId, moira.GraphiteLocal) + setupExpectationsForCreateTrigger(localSource, db, target, triggerId, moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster)) + + responseWriter := httptest.NewRecorder() + createTrigger(responseWriter, request) + + So(responseWriter.Code, ShouldEqual, 200) + }) + }) +} + +func TestTriggersCreatedWithNonDefaultClusterId(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + localSource := mock_metric_source.NewMockMetricSource(mockCtrl) + + remoteStagingCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.ClusterId("staging")) + remoteStagingSource := mock_metric_source.NewMockMetricSource(mockCtrl) + + sourceProvider := metricSource.CreateTestMetricSourceProvider(localSource, nil, nil) + sourceProvider.RegisterSource(remoteStagingCluster, remoteStagingSource) + + db := mock_moira_alert.NewMockDatabase(mockCtrl) + database = db + defer func() { database = nil }() + + triggerId := "test" + target := `test_target_value` + + Convey("Given cluster_id is set", t, func() { + jsonTrigger := makeTestTriggerJson(target, triggerId, `"trigger_source": "graphite_local", "cluster_id": "staging"`) + request := newTriggerCreateRequest(sourceProvider, triggerId, jsonTrigger) + + Convey("Expect trigger have non default cluster id", func() { + setupExpectationsForCreateTrigger(remoteStagingSource, db, target, triggerId, remoteStagingCluster) responseWriter := httptest.NewRecorder() createTrigger(responseWriter, request) @@ -650,9 +685,8 @@ func setupExpectationsForCreateTrigger( source *mock_metric_source.MockMetricSource, db *mock_moira_alert.MockDatabase, target, triggerId string, - triggerSource moira.TriggerSource, + clusterKey moira.ClusterKey, ) { - source.EXPECT().IsConfigured().Return(true, nil) source.EXPECT().GetMetricsTTLSeconds().Return(int64(3600)) source.EXPECT().Fetch(target, gomock.Any(), gomock.Any(), gomock.Any()).Return(&local.FetchResult{}, nil) @@ -660,7 +694,7 @@ func setupExpectationsForCreateTrigger( db.EXPECT().AcquireTriggerCheckLock(triggerId, gomock.Any()).Return(nil) db.EXPECT().DeleteTriggerCheckLock(triggerId).Return(nil) db.EXPECT().GetTriggerLastCheck(triggerId).Return(moira.CheckData{}, dataBase.ErrNil) - db.EXPECT().SetTriggerLastCheck(triggerId, gomock.Any(), triggerSource).Return(nil) + db.EXPECT().SetTriggerLastCheck(triggerId, gomock.Any(), clusterKey).Return(nil) db.EXPECT().SaveTrigger(triggerId, gomock.Any()).Return(nil) } @@ -672,7 +706,7 @@ func newTriggerCreateRequest( request := httptest.NewRequest("PUT", "/trigger", bytes.NewBuffer(jsonTrigger)) request.Header.Add("content-type", "application/json") request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "metricSourceProvider", sourceProvider)) - request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "localMetricTTL", to.Duration("65m"))) + request = request.WithContext(middleware.SetContextValueForTest(request.Context(), "clustersMetricTTL", MakeTestTTLs())) request = request.WithContext(middleware.SetContextValueForTest(request.Context(), triggerIDKey, triggerId)) return request diff --git a/api/middleware/context.go b/api/middleware/context.go index e0020fbd0..1921bffcd 100644 --- a/api/middleware/context.go +++ b/api/middleware/context.go @@ -182,14 +182,12 @@ func Populate(defaultPopulated bool) func(next http.Handler) http.Handler { } // Triggers gets string value target from URI query and set it to request context. If query has not values sets given values -func Triggers(localMetricTTL, remoteMetricTTL, prometheusMetricTTL time.Duration) func(next http.Handler) http.Handler { +func Triggers(metricTTL map[moira.ClusterKey]time.Duration) func(next http.Handler) http.Handler { return func(next http.Handler) http.Handler { return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { ctx := request.Context() - ctx = context.WithValue(ctx, localMetricTTLKey, localMetricTTL) - ctx = context.WithValue(ctx, remoteMetricTTLKey, remoteMetricTTL) - ctx = context.WithValue(ctx, prometheusMetricTTLKey, prometheusMetricTTL) + ctx = context.WithValue(ctx, clustersMetricTTLKey, metricTTL) next.ServeHTTP(writer, request.WithContext(ctx)) }) diff --git a/api/middleware/middleware.go b/api/middleware/middleware.go index b6b088674..2903df403 100644 --- a/api/middleware/middleware.go +++ b/api/middleware/middleware.go @@ -17,29 +17,27 @@ func (key ContextKey) String() string { } var ( - databaseKey ContextKey = "database" - searcherKey ContextKey = "searcher" - triggerIDKey ContextKey = "triggerID" - localMetricTTLKey ContextKey = "localMetricTTL" - remoteMetricTTLKey ContextKey = "remoteMetricTTL" - prometheusMetricTTLKey ContextKey = "prometheusMetricTTL" - populateKey ContextKey = "populated" - contactIDKey ContextKey = "contactID" - tagKey ContextKey = "tag" - subscriptionIDKey ContextKey = "subscriptionID" - pageKey ContextKey = "page" - sizeKey ContextKey = "size" - pagerIDKey ContextKey = "pagerID" - createPagerKey ContextKey = "createPager" - fromKey ContextKey = "from" - toKey ContextKey = "to" - loginKey ContextKey = "login" - timeSeriesNamesKey ContextKey = "timeSeriesNames" - metricSourceProvider ContextKey = "metricSourceProvider" - targetNameKey ContextKey = "target" - teamIDKey ContextKey = "teamID" - teamUserIDKey ContextKey = "teamUserIDKey" - anonymousUser = "anonymous" + databaseKey ContextKey = "database" + searcherKey ContextKey = "searcher" + triggerIDKey ContextKey = "triggerID" + clustersMetricTTLKey ContextKey = "clustersMetricTTL" + populateKey ContextKey = "populated" + contactIDKey ContextKey = "contactID" + tagKey ContextKey = "tag" + subscriptionIDKey ContextKey = "subscriptionID" + pageKey ContextKey = "page" + sizeKey ContextKey = "size" + pagerIDKey ContextKey = "pagerID" + createPagerKey ContextKey = "createPager" + fromKey ContextKey = "from" + toKey ContextKey = "to" + loginKey ContextKey = "login" + timeSeriesNamesKey ContextKey = "timeSeriesNames" + metricSourceProvider ContextKey = "metricSourceProvider" + targetNameKey ContextKey = "target" + teamIDKey ContextKey = "teamID" + teamUserIDKey ContextKey = "teamUserIDKey" + anonymousUser = "anonymous" ) // GetDatabase gets moira.Database realization from request context @@ -64,18 +62,8 @@ func GetTriggerID(request *http.Request) string { } // GetLocalMetricTTL gets local metric ttl duration time from request context, which was sets in TriggerContext middleware -func GetLocalMetricTTL(request *http.Request) time.Duration { - return request.Context().Value(localMetricTTLKey).(time.Duration) -} - -// GetRemoteMetricTTL gets remote metric ttl duration time from request context, which was sets in TriggerContext middleware -func GetRemoteMetricTTL(request *http.Request) time.Duration { - return request.Context().Value(remoteMetricTTLKey).(time.Duration) -} - -// GetRemoteMetricTTL gets remote metric ttl duration time from request context, which was sets in TriggerContext middleware -func GetPrometheusMetricTTL(request *http.Request) time.Duration { - return request.Context().Value(prometheusMetricTTLKey).(time.Duration) +func GetMetricTTL(request *http.Request) map[moira.ClusterKey]time.Duration { + return request.Context().Value(clustersMetricTTLKey).(map[moira.ClusterKey]time.Duration) } // GetPopulated get populate bool from request context, which was sets in TriggerContext middleware diff --git a/checker/check.go b/checker/check.go index 25090f8b7..c60494d62 100644 --- a/checker/check.go +++ b/checker/check.go @@ -59,7 +59,7 @@ func (triggerChecker *TriggerChecker) Check() error { return triggerChecker.database.SetTriggerLastCheck( triggerChecker.triggerID, &checkData, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ) } @@ -105,7 +105,7 @@ func (triggerChecker *TriggerChecker) handlePrepareError(checkData moira.CheckDa err = triggerChecker.database.SetTriggerLastCheck( triggerChecker.triggerID, &checkData, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ) return MustStopCheck, checkData, err @@ -130,7 +130,7 @@ func (triggerChecker *TriggerChecker) handleFetchError(checkData moira.CheckData return triggerChecker.database.SetTriggerLastCheck( triggerChecker.triggerID, &checkData, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ) } case remote.ErrRemoteTriggerResponse: @@ -158,7 +158,7 @@ func (triggerChecker *TriggerChecker) handleFetchError(checkData moira.CheckData return triggerChecker.database.SetTriggerLastCheck( triggerChecker.triggerID, &checkData, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ) } @@ -182,7 +182,7 @@ func (triggerChecker *TriggerChecker) handleUndefinedError(checkData moira.Check return triggerChecker.database.SetTriggerLastCheck( triggerChecker.triggerID, &checkData, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ) } diff --git a/checker/check_test.go b/checker/check_test.go index 747513ac2..d63f8f0d5 100644 --- a/checker/check_test.go +++ b/checker/check_test.go @@ -20,14 +20,18 @@ import ( . "github.com/smartystreets/goconvey/convey" ) +var defaultLocalClusterKey = moira.MakeClusterKey(moira.GraphiteLocal, "default") + func TestGetMetricDataState(t *testing.T) { logger, _ := logging.GetLogger("Test") var warnValue float64 = 10 var errValue float64 = 20 - checkerMetrics := metrics.ConfigureCheckerMetrics(metrics.NewDummyRegistry(), false, false) + checkerMetrics, _ := metrics. + ConfigureCheckerMetrics(metrics.NewDummyRegistry(), []moira.ClusterKey{defaultLocalClusterKey}). + GetCheckMetricsBySource(defaultLocalClusterKey) triggerChecker := TriggerChecker{ logger: logger, - metrics: checkerMetrics.LocalMetrics, + metrics: checkerMetrics, until: 67, from: 17, trigger: &moira.Trigger{ @@ -535,9 +539,11 @@ func TestCheckForNODATA(t *testing.T) { var ttl int64 = 600 - checkerMetrics := metrics.ConfigureCheckerMetrics(metrics.NewDummyRegistry(), false, false) + checkerMetrics, _ := metrics. + ConfigureCheckerMetrics(metrics.NewDummyRegistry(), []moira.ClusterKey{defaultLocalClusterKey}). + GetCheckMetricsBySource(defaultLocalClusterKey) triggerChecker := TriggerChecker{ - metrics: checkerMetrics.LocalMetrics, + metrics: checkerMetrics, logger: logger, ttl: ttl, lastCheck: &moira.CheckData{ @@ -656,14 +662,16 @@ func TestCheck(t *testing.T) { var ttl int64 = 30 - checkerMetrics := metrics.ConfigureCheckerMetrics(metrics.NewDummyRegistry(), false, false) + checkerMetrics, _ := metrics. + ConfigureCheckerMetrics(metrics.NewDummyRegistry(), []moira.ClusterKey{defaultLocalClusterKey}). + GetCheckMetricsBySource(defaultLocalClusterKey) triggerChecker := TriggerChecker{ triggerID: "SuperId", database: dataBase, source: source, logger: logger, config: &Config{}, - metrics: checkerMetrics.LocalMetrics, + metrics: checkerMetrics, from: 17, until: 67, ttl: ttl, @@ -713,7 +721,7 @@ func TestCheck(t *testing.T) { dataBase.EXPECT().SetTriggerLastCheck( triggerChecker.triggerID, &lastCheck, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ).Return(nil), ) err := triggerChecker.Check() @@ -748,7 +756,7 @@ func TestCheck(t *testing.T) { dataBase.EXPECT().SetTriggerLastCheck( triggerChecker.triggerID, &lastCheck, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ).Return(nil), ) err := triggerChecker.Check() @@ -797,7 +805,7 @@ func TestCheck(t *testing.T) { dataBase.EXPECT().SetTriggerLastCheck( triggerChecker.triggerID, &lastCheck, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ).Return(nil), ) err := triggerChecker.Check() @@ -845,7 +853,7 @@ func TestCheck(t *testing.T) { dataBase.EXPECT().SetTriggerLastCheck( triggerChecker.triggerID, &lastCheck, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ).Return(nil), ) err := triggerChecker.Check() @@ -891,7 +899,7 @@ func TestCheck(t *testing.T) { dataBase.EXPECT().SetTriggerLastCheck( triggerChecker.triggerID, &lastCheck, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ).Return(nil) err := triggerChecker.Check() So(err, ShouldBeNil) @@ -965,7 +973,7 @@ func TestCheck(t *testing.T) { dataBase.EXPECT().SetTriggerLastCheck( triggerChecker.triggerID, &lastCheck, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ).Return(nil), ) err := triggerChecker.Check() @@ -1499,13 +1507,16 @@ func TestTriggerChecker_Check(t *testing.T) { var ttl int64 = 30 + checkerMetrics, _ := metrics. + ConfigureCheckerMetrics(metrics.NewDummyRegistry(), []moira.ClusterKey{defaultLocalClusterKey}). + GetCheckMetricsBySource(defaultLocalClusterKey) triggerChecker := TriggerChecker{ triggerID: "SuperId", database: dataBase, source: source, logger: logger, config: &Config{}, - metrics: metrics.ConfigureCheckerMetrics(metrics.NewDummyRegistry(), false, false).LocalMetrics, + metrics: checkerMetrics, from: 17, until: 67, ttl: ttl, @@ -1559,7 +1570,7 @@ func TestTriggerChecker_Check(t *testing.T) { dataBase.EXPECT().SetTriggerLastCheck( triggerChecker.triggerID, &lastCheck, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ).Return(nil) _ = triggerChecker.Check() } @@ -1582,13 +1593,16 @@ func BenchmarkTriggerChecker_Check(b *testing.B) { var ttl int64 = 30 + checkerMetrics, _ := metrics. + ConfigureCheckerMetrics(metrics.NewDummyRegistry(), []moira.ClusterKey{defaultLocalClusterKey}). + GetCheckMetricsBySource(defaultLocalClusterKey) triggerChecker := TriggerChecker{ triggerID: "SuperId", database: dataBase, source: source, logger: logger, config: &Config{}, - metrics: metrics.ConfigureCheckerMetrics(metrics.NewDummyRegistry(), false, false).LocalMetrics, + metrics: checkerMetrics, from: 17, until: 67, ttl: ttl, @@ -1642,7 +1656,7 @@ func BenchmarkTriggerChecker_Check(b *testing.B) { dataBase.EXPECT().SetTriggerLastCheck( triggerChecker.triggerID, &lastCheck, - triggerChecker.trigger.TriggerSource, + triggerChecker.trigger.ClusterKey(), ).Return(nil).AnyTimes() for n := 0; n < b.N; n++ { @@ -1786,6 +1800,7 @@ func TestTriggerChecker_handlePrepareError(t *testing.T) { trigger := &moira.Trigger{ TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, } triggerChecker := TriggerChecker{ triggerID: "test trigger", @@ -1829,7 +1844,7 @@ func TestTriggerChecker_handlePrepareError(t *testing.T) { Metric: triggerChecker.trigger.Name, MessageEventInfo: nil, }, true) - dataBase.EXPECT().SetTriggerLastCheck("test trigger", &expectedCheckData, moira.GraphiteLocal) + dataBase.EXPECT().SetTriggerLastCheck("test trigger", &expectedCheckData, trigger.ClusterKey()) pass, checkDataReturn, errReturn := triggerChecker.handlePrepareError(checkData, err) So(errReturn, ShouldBeNil) So(pass, ShouldEqual, MustStopCheck) @@ -1847,7 +1862,7 @@ func TestTriggerChecker_handlePrepareError(t *testing.T) { State: moira.StateNODATA, EventTimestamp: 10, } - dataBase.EXPECT().SetTriggerLastCheck("test trigger", &expectedCheckData, moira.GraphiteLocal) + dataBase.EXPECT().SetTriggerLastCheck("test trigger", &expectedCheckData, trigger.ClusterKey()) pass, checkDataReturn, errReturn := triggerChecker.handlePrepareError(checkData, err) So(errReturn, ShouldBeNil) So(pass, ShouldEqual, MustStopCheck) diff --git a/checker/config.go b/checker/config.go index a8ae75cdf..8c9675e78 100644 --- a/checker/config.go +++ b/checker/config.go @@ -2,21 +2,26 @@ package checker import ( "time" + + "github.com/moira-alert/moira" ) // Config represent checker config type Config struct { Enabled bool NoDataCheckInterval time.Duration - CheckInterval time.Duration LazyTriggersCheckInterval time.Duration + SourceCheckConfigs map[moira.ClusterKey]SourceCheckConfig StopCheckingIntervalSeconds int64 - MaxParallelLocalChecks int - MaxParallelRemoteChecks int - MaxParallelPrometheusChecks int LogFile string LogLevel string LogTriggersToLevel map[string]string MetricEventPopBatchSize int64 MetricEventPopDelay time.Duration } + +// SourceCheckConfig represents check parameters for a single metric source +type SourceCheckConfig struct { + CheckInterval time.Duration + MaxParallelChecks int +} diff --git a/checker/errors.go b/checker/errors.go index ad6fe5b52..59ae265be 100644 --- a/checker/errors.go +++ b/checker/errors.go @@ -48,5 +48,5 @@ type ErrTriggerHasEmptyTargets struct { // ErrTriggerHasEmptyTargets implementation with error message func (err ErrTriggerHasEmptyTargets) Error() string { - return fmt.Sprintf("target t%v has no metrics", strings.Join(err.targets, ", ")) + return fmt.Sprintf("target %v has no metrics", strings.Join(err.targets, ", ")) } diff --git a/checker/trigger_checker.go b/checker/trigger_checker.go index 3763d67bd..80a310274 100644 --- a/checker/trigger_checker.go +++ b/checker/trigger_checker.go @@ -68,11 +68,16 @@ func MakeTriggerChecker( } } + triggerMetrics, err := metrics.GetCheckMetrics(&trigger) + if err != nil { + return nil, err + } + triggerChecker := &TriggerChecker{ database: dataBase, logger: triggerLogger, config: config, - metrics: metrics.GetCheckMetrics(&trigger), + metrics: triggerMetrics, source: source, from: calculateFrom(lastCheck.Timestamp, trigger.TTL), diff --git a/checker/trigger_checker_test.go b/checker/trigger_checker_test.go index 91273ad0d..647d5a6c2 100644 --- a/checker/trigger_checker_test.go +++ b/checker/trigger_checker_test.go @@ -22,6 +22,10 @@ func TestInitTriggerChecker(t *testing.T) { dataBase := mock_moira_alert.NewMockDatabase(mockCtrl) localSource := local.Create(dataBase) triggerID := "superId" + checkerMetrics := metrics.ConfigureCheckerMetrics( + metrics.NewDummyRegistry(), + []moira.ClusterKey{moira.DefaultLocalCluster}, + ) defer mockCtrl.Finish() Convey("Test errors", t, func() { @@ -29,8 +33,9 @@ func TestInitTriggerChecker(t *testing.T) { getTriggerError := fmt.Errorf("Oppps! Can't read trigger") dataBase.EXPECT().GetTrigger(triggerID).Return(moira.Trigger{ TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, getTriggerError) - _, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateMetricSourceProvider(localSource, nil, nil), &metrics.CheckerMetrics{}) + _, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateTestMetricSourceProvider(localSource, nil, nil), checkerMetrics) So(err, ShouldBeError) So(err, ShouldResemble, getTriggerError) }) @@ -38,8 +43,9 @@ func TestInitTriggerChecker(t *testing.T) { Convey("No trigger error", func() { dataBase.EXPECT().GetTrigger(triggerID).Return(moira.Trigger{ TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, database.ErrNil) - _, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateMetricSourceProvider(localSource, nil, nil), &metrics.CheckerMetrics{}) + _, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateTestMetricSourceProvider(localSource, nil, nil), checkerMetrics) So(err, ShouldBeError) So(err, ShouldResemble, ErrTriggerNotExists) }) @@ -49,9 +55,10 @@ func TestInitTriggerChecker(t *testing.T) { dataBase.EXPECT().GetTrigger(triggerID).Return(moira.Trigger{ TriggerType: moira.RisingTrigger, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, nil) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, readLastCheckError) - _, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateMetricSourceProvider(localSource, nil, nil), &metrics.CheckerMetrics{}) + _, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateTestMetricSourceProvider(localSource, nil, nil), checkerMetrics) So(err, ShouldBeError) So(err, ShouldResemble, readLastCheckError) }) @@ -73,8 +80,11 @@ func TestInitTriggerChecker(t *testing.T) { Patterns: []string{"Egais.elasticsearch.*.*.jvm.gc.collection.time"}, TTL: ttl, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, } + metrics, _ := checkerMetrics.GetCheckMetrics(&trigger) + lastCheck := moira.CheckData{ Timestamp: 1502694487, State: moira.StateOK, @@ -107,7 +117,7 @@ func TestInitTriggerChecker(t *testing.T) { Convey("Test trigger checker with lastCheck", t, func() { dataBase.EXPECT().GetTrigger(triggerID).Return(trigger, nil) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(lastCheck, nil) - actual, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateMetricSourceProvider(localSource, nil, nil), &metrics.CheckerMetrics{}) + actual, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateTestMetricSourceProvider(localSource, nil, nil), checkerMetrics) So(err, ShouldBeNil) expected := TriggerChecker{ @@ -122,14 +132,15 @@ func TestInitTriggerChecker(t *testing.T) { lastCheck: &lastCheck, from: lastCheck.Timestamp - ttl, until: actual.until, + metrics: metrics, } - So(*actual, ShouldResemble, expected) + So(actual, ShouldResemble, &expected) }) Convey("Test trigger checker without lastCheck", t, func() { dataBase.EXPECT().GetTrigger(triggerID).Return(trigger, nil) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - actual, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateMetricSourceProvider(localSource, nil, nil), &metrics.CheckerMetrics{}) + actual, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateTestMetricSourceProvider(localSource, nil, nil), checkerMetrics) So(err, ShouldBeNil) expected := TriggerChecker{ @@ -146,8 +157,9 @@ func TestInitTriggerChecker(t *testing.T) { State: moira.StateOK, Timestamp: actual.until - 3600, }, - from: actual.until - 3600 - ttl, - until: actual.until, + from: actual.until - 3600 - ttl, + until: actual.until, + metrics: metrics, } So(*actual, ShouldResemble, expected) }) @@ -158,7 +170,7 @@ func TestInitTriggerChecker(t *testing.T) { Convey("Test trigger checker without lastCheck and ttl", t, func() { dataBase.EXPECT().GetTrigger(triggerID).Return(trigger, nil) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(moira.CheckData{}, database.ErrNil) - actual, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateMetricSourceProvider(localSource, nil, nil), &metrics.CheckerMetrics{}) + actual, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateTestMetricSourceProvider(localSource, nil, nil), checkerMetrics) So(err, ShouldBeNil) expected := TriggerChecker{ @@ -175,8 +187,9 @@ func TestInitTriggerChecker(t *testing.T) { State: moira.StateOK, Timestamp: actual.until - 3600, }, - from: actual.until - 3600 - 600, - until: actual.until, + from: actual.until - 3600 - 600, + until: actual.until, + metrics: metrics, } So(*actual, ShouldResemble, expected) }) @@ -184,7 +197,7 @@ func TestInitTriggerChecker(t *testing.T) { Convey("Test trigger checker with lastCheck and without ttl", t, func() { dataBase.EXPECT().GetTrigger(triggerID).Return(trigger, nil) dataBase.EXPECT().GetTriggerLastCheck(triggerID).Return(lastCheck, nil) - actual, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateMetricSourceProvider(localSource, nil, nil), &metrics.CheckerMetrics{}) + actual, err := MakeTriggerChecker(triggerID, dataBase, logger, config, metricSource.CreateTestMetricSourceProvider(localSource, nil, nil), checkerMetrics) So(err, ShouldBeNil) @@ -200,6 +213,7 @@ func TestInitTriggerChecker(t *testing.T) { lastCheck: &lastCheck, from: lastCheck.Timestamp - 600, until: actual.until, + metrics: metrics, } So(*actual, ShouldResemble, expected) }) diff --git a/checker/worker/lazy_triggers.go b/checker/worker/lazy_triggers.go index 2104ae66f..515cd4c36 100644 --- a/checker/worker/lazy_triggers.go +++ b/checker/worker/lazy_triggers.go @@ -1,38 +1,46 @@ package worker import ( + "fmt" "math/rand" "time" + + "github.com/moira-alert/moira" ) const ( lazyTriggersWorkerTicker = time.Second * 10 ) -func (check *Checker) lazyTriggersWorker() error { - if check.Config.LazyTriggersCheckInterval <= check.Config.CheckInterval { - check.Logger.Info(). - Interface("lazy_triggers_check_interval", check.Config.LazyTriggersCheckInterval). - Interface("check_interval", check.Config.CheckInterval). +func (manager *WorkerManager) lazyTriggersWorker() error { + localConfig, ok := manager.Config.SourceCheckConfigs[moira.DefaultLocalCluster] + if !ok { + return fmt.Errorf("can not initialize lazyTriggersWorker: default local source is not configured") + } + + if manager.Config.LazyTriggersCheckInterval <= localConfig.CheckInterval { + manager.Logger.Info(). + Interface("lazy_triggers_check_interval", manager.Config.LazyTriggersCheckInterval). + Interface("check_interval", localConfig.CheckInterval). Msg("Lazy triggers worker won't start because lazy triggers interval is less or equal to check interval") return nil } checkTicker := time.NewTicker(lazyTriggersWorkerTicker) - check.Logger.Info(). - Interface("lazy_triggers_check_interval", check.Config.LazyTriggersCheckInterval). + manager.Logger.Info(). + Interface("lazy_triggers_check_interval", manager.Config.LazyTriggersCheckInterval). Interface("update_lazy_triggers_every", lazyTriggersWorkerTicker). Msg("Start lazy triggers worker") for { select { - case <-check.tomb.Dying(): + case <-manager.tomb.Dying(): checkTicker.Stop() - check.Logger.Info().Msg("Lazy triggers worker stopped") + manager.Logger.Info().Msg("Lazy triggers worker stopped") return nil case <-checkTicker.C: - err := check.fillLazyTriggerIDs() + err := manager.fillLazyTriggerIDs() if err != nil { - check.Logger.Error(). + manager.Logger.Error(). Error(err). Msg("Failed to get lazy triggers") } @@ -40,8 +48,8 @@ func (check *Checker) lazyTriggersWorker() error { } } -func (check *Checker) fillLazyTriggerIDs() error { - triggerIDs, err := check.Database.GetUnusedTriggerIDs() +func (manager *WorkerManager) fillLazyTriggerIDs() error { + triggerIDs, err := manager.Database.GetUnusedTriggerIDs() if err != nil { return err } @@ -49,13 +57,13 @@ func (check *Checker) fillLazyTriggerIDs() error { for _, triggerID := range triggerIDs { newLazyTriggerIDs[triggerID] = true } - check.lazyTriggerIDs.Store(newLazyTriggerIDs) - check.Metrics.UnusedTriggersCount.Update(int64(len(newLazyTriggerIDs))) + manager.lazyTriggerIDs.Store(newLazyTriggerIDs) + manager.Metrics.UnusedTriggersCount.Update(int64(len(newLazyTriggerIDs))) return nil } -func (check *Checker) getRandomLazyCacheDuration() time.Duration { - maxLazyCacheSeconds := check.Config.LazyTriggersCheckInterval.Seconds() +func (manager *WorkerManager) getRandomLazyCacheDuration() time.Duration { + maxLazyCacheSeconds := manager.Config.LazyTriggersCheckInterval.Seconds() min := maxLazyCacheSeconds / 2 //nolint i := rand.Float64()*min + min return time.Duration(i) * time.Second diff --git a/checker/worker/local.go b/checker/worker/local.go deleted file mode 100644 index 4febcb5b1..000000000 --- a/checker/worker/local.go +++ /dev/null @@ -1,103 +0,0 @@ -package worker - -import ( - "time" - - "github.com/moira-alert/moira/metrics" - w "github.com/moira-alert/moira/worker" -) - -const ( - nodataCheckerLockName = "moira-nodata-checker" - nodataCheckerLockTTL = time.Second * 15 - nodataWorkerName = "NODATA checker" -) - -type localChecker struct { - check *Checker -} - -func newLocalChecker(check *Checker) checkerWorker { - return &localChecker{ - check: check, - } -} - -func (ch *localChecker) Name() string { - return "Local" -} - -func (ch *localChecker) IsEnabled() bool { - return true -} - -func (ch *localChecker) MaxParallelChecks() int { - return ch.check.Config.MaxParallelLocalChecks -} - -func (ch *localChecker) Metrics() *metrics.CheckMetrics { - return ch.check.Metrics.LocalMetrics -} - -// localTriggerGetter starts NODATA checker and manages its subscription in Redis -// to make sure there is always only one working checker -func (ch *localChecker) StartTriggerGetter() error { - w.NewWorker( - nodataWorkerName, - ch.check.Logger, - ch.check.Database.NewLock(nodataCheckerLockName, nodataCheckerLockTTL), - ch.localChecker, - ).Run(ch.check.tomb.Dying()) - - return nil -} - -func (ch *localChecker) GetTriggersToCheck(count int) ([]string, error) { - return ch.check.Database.GetLocalTriggersToCheck(count) -} - -func (ch *localChecker) localChecker(stop <-chan struct{}) error { - checkTicker := time.NewTicker(ch.check.Config.NoDataCheckInterval) - ch.check.Logger.Info().Msg("Local checker started") - for { - select { - case <-stop: - ch.check.Logger.Info().Msg("Local checker stopped") - checkTicker.Stop() - return nil - - case <-checkTicker.C: - if err := ch.addLocalTriggersToCheckQueue(); err != nil { - ch.check.Logger.Error(). - Error(err). - Msg("Local check failed") - } - } - } -} - -func (ch *localChecker) addLocalTriggersToCheckQueue() error { - now := time.Now().UTC().Unix() - if ch.check.lastData+ch.check.Config.StopCheckingIntervalSeconds < now { - ch.check.Logger.Info(). - Int64("no_metrics_for_sec", now-ch.check.lastData). - Msg("Checking Local disabled. No metrics for some seconds") - return nil - } - - ch.check.Logger.Info().Msg("Checking Local") - triggerIds, err := ch.check.Database.GetLocalTriggerIDs() - if err != nil { - return err - } - ch.check.addLocalTriggerIDsIfNeeded(triggerIds) - - return nil -} - -func (check *Checker) addLocalTriggerIDsIfNeeded(triggerIDs []string) { - needToCheckTriggerIDs := check.getTriggerIDsToCheck(triggerIDs) - if len(needToCheckTriggerIDs) > 0 { - check.Database.AddLocalTriggersToCheck(needToCheckTriggerIDs) //nolint - } -} diff --git a/checker/worker/metric_events.go b/checker/worker/metric_events.go new file mode 100644 index 000000000..67c686115 --- /dev/null +++ b/checker/worker/metric_events.go @@ -0,0 +1,121 @@ +package worker + +import ( + "errors" + "fmt" + "time" + + "github.com/moira-alert/moira" + "github.com/patrickmn/go-cache" +) + +const defaultMetricEventPopBatchSize = 100 + +func (manager *WorkerManager) startLocalMetricEvents() error { + if manager.Config.MetricEventPopBatchSize < 0 { + return errors.New("MetricEventPopBatchSize param was less than zero") + } + + manager.Logger.Info().Msg("Starting local metric event handler") + + if manager.Config.MetricEventPopBatchSize == 0 { + manager.Config.MetricEventPopBatchSize = defaultMetricEventPopBatchSize + } + + subscribeMetricEventsParams := moira.SubscribeMetricEventsParams{ + BatchSize: manager.Config.MetricEventPopBatchSize, + Delay: manager.Config.MetricEventPopDelay, + } + + metricEventsChannel, err := manager.Database.SubscribeMetricEvents(&manager.tomb, &subscribeMetricEventsParams) + if err != nil { + return err + } + + defaultLocalKey := moira.DefaultLocalCluster + localConfig, ok := manager.Config.SourceCheckConfigs[defaultLocalKey] + if !ok { + return fmt.Errorf("can not initialize localMetricEvents: default local source is not configured") + } + + for i := 0; i < localConfig.MaxParallelChecks; i++ { + manager.tomb.Go(func() error { + return manager.newMetricsHandler(metricEventsChannel) + }) + } + + manager.tomb.Go(func() error { + return manager.checkMetricEventsChannelLen(metricEventsChannel) + }) + + manager.Logger.Info().Msg("Checking new events started") + + go func() { + <-manager.tomb.Dying() + manager.Logger.Info().Msg("Checking for new events stopped") + }() + + return nil +} + +func (manager *WorkerManager) newMetricsHandler(metricEventsChannel <-chan *moira.MetricEvent) error { + for { + metricEvent, ok := <-metricEventsChannel + if !ok { + return nil + } + pattern := metricEvent.Pattern + if manager.needHandlePattern(pattern) { + if err := manager.handleMetricEvent(pattern); err != nil { + manager.Logger.Error(). + Error(err). + Msg("Failed to handle metricEvent") + } + } + } +} + +func (manager *WorkerManager) needHandlePattern(pattern string) bool { + err := manager.PatternCache.Add(pattern, true, cache.DefaultExpiration) + return err == nil +} + +func (manager *WorkerManager) handleMetricEvent(pattern string) error { + start := time.Now() + defer manager.Metrics.MetricEventsHandleTime.UpdateSince(start) + + manager.lastData = time.Now().UTC().Unix() + triggerIds, err := manager.Database.GetPatternTriggerIDs(pattern) + if err != nil { + return err + } + + // Cleanup pattern and its metrics if this pattern doesn't match to any trigger + if len(triggerIds) == 0 { + if err := manager.Database.RemovePatternWithMetrics(pattern); err != nil { + return err + } + } + + manager.scheduleLocalTriggerIDsIfNeeded(triggerIds) + return nil +} + +func (manager *WorkerManager) scheduleLocalTriggerIDsIfNeeded(triggerIDs []string) { + needToCheckTriggerIDs := manager.filterOutLazyTriggerIDs(triggerIDs) + if len(needToCheckTriggerIDs) > 0 { + manager.Database.AddTriggersToCheck(moira.DefaultLocalCluster, needToCheckTriggerIDs) //nolint + } +} + +func (manager *WorkerManager) checkMetricEventsChannelLen(ch <-chan *moira.MetricEvent) error { + checkTicker := time.NewTicker(time.Millisecond * 100) //nolint + for { + select { + case <-manager.tomb.Dying(): + return nil + case <-checkTicker.C: + manager.Metrics.MetricEventsChannelLen.Update(int64(len(ch))) + } + } +} diff --git a/checker/worker/metrics.go b/checker/worker/metrics.go deleted file mode 100644 index 086bf674d..000000000 --- a/checker/worker/metrics.go +++ /dev/null @@ -1,51 +0,0 @@ -package worker - -import ( - "time" - - "github.com/moira-alert/moira" - "github.com/patrickmn/go-cache" -) - -func (check *Checker) newMetricsHandler(metricEventsChannel <-chan *moira.MetricEvent) error { - for { - metricEvent, ok := <-metricEventsChannel - if !ok { - return nil - } - pattern := metricEvent.Pattern - if check.needHandlePattern(pattern) { - if err := check.handleMetricEvent(pattern); err != nil { - check.Logger.Error(). - Error(err). - Msg("Failed to handle metricEvent") - } - } - } -} - -func (check *Checker) needHandlePattern(pattern string) bool { - err := check.PatternCache.Add(pattern, true, cache.DefaultExpiration) - return err == nil -} - -func (check *Checker) handleMetricEvent(pattern string) error { - start := time.Now() - defer check.Metrics.MetricEventsHandleTime.UpdateSince(start) - - check.lastData = time.Now().UTC().Unix() - triggerIds, err := check.Database.GetPatternTriggerIDs(pattern) - if err != nil { - return err - } - - // Cleanup pattern and its metrics if this pattern doesn't match to any trigger - if len(triggerIds) == 0 { - if err := check.Database.RemovePatternWithMetrics(pattern); err != nil { - return err - } - } - - check.addLocalTriggerIDsIfNeeded(triggerIds) - return nil -} diff --git a/checker/worker/prometheus.go b/checker/worker/prometheus.go deleted file mode 100644 index b8fdca815..000000000 --- a/checker/worker/prometheus.go +++ /dev/null @@ -1,106 +0,0 @@ -package worker - -import ( - "time" - - "github.com/moira-alert/moira/metrics" - w "github.com/moira-alert/moira/worker" -) - -const ( - prometheusTriggerLockName = "moira-prometheus-checker" - prometheusTriggerName = "Prometheus checker" -) - -type prometheusChecker struct { - check *Checker -} - -func newPrometheusChecker(check *Checker) checkerWorker { - return &prometheusChecker{ - check: check, - } -} - -func (ch *prometheusChecker) Name() string { - return "Prometheus" -} - -func (ch *prometheusChecker) IsEnabled() bool { - return ch.check.PrometheusConfig.Enabled -} - -func (ch *prometheusChecker) MaxParallelChecks() int { - return ch.check.Config.MaxParallelPrometheusChecks -} - -func (ch *prometheusChecker) Metrics() *metrics.CheckMetrics { - return ch.check.Metrics.PrometheusMetrics -} - -func (ch *prometheusChecker) StartTriggerGetter() error { - w.NewWorker( - remoteTriggerName, - ch.check.Logger, - ch.check.Database.NewLock(prometheusTriggerLockName, nodataCheckerLockTTL), - ch.prometheusTriggerChecker, - ).Run(ch.check.tomb.Dying()) - - return nil -} - -func (ch *prometheusChecker) GetTriggersToCheck(count int) ([]string, error) { - return ch.check.Database.GetPrometheusTriggersToCheck(count) -} - -func (ch *prometheusChecker) prometheusTriggerChecker(stop <-chan struct{}) error { - checkTicker := time.NewTicker(ch.check.PrometheusConfig.CheckInterval) - ch.check.Logger.Info().Msg(prometheusTriggerName + " started") - for { - select { - case <-stop: - ch.check.Logger.Info().Msg(prometheusTriggerName + " stopped") - checkTicker.Stop() - return nil - case <-checkTicker.C: - if err := ch.checkPrometheus(); err != nil { - ch.check.Logger.Error(). - Error(err). - Msg("Prometheus trigger failed") - } - } - } -} - -func (ch *prometheusChecker) checkPrometheus() error { - source, err := ch.check.SourceProvider.GetPrometheus() - if err != nil { - return err - } - - available, err := source.IsAvailable() - if !available { - ch.check.Logger.Info(). - Error(err). - Msg("Prometheus API is unavailable. Stop checking prometheus triggers") - return nil - } - - ch.check.Logger.Debug().Msg("Checking prometheus triggers") - triggerIds, err := ch.check.Database.GetPrometheusTriggerIDs() - - if err != nil { - return err - } - - ch.addPrometheusTriggerIDsIfNeeded(triggerIds) - - return nil -} - -func (ch *prometheusChecker) addPrometheusTriggerIDsIfNeeded(triggerIDs []string) { - needToCheckPrometheusTriggerIDs := ch.check.getTriggerIDsToCheck(triggerIDs) - if len(needToCheckPrometheusTriggerIDs) > 0 { - ch.check.Database.AddPrometheusTriggersToCheck(needToCheckPrometheusTriggerIDs) //nolint - } -} diff --git a/checker/worker/remote.go b/checker/worker/remote.go deleted file mode 100644 index f7f59f07e..000000000 --- a/checker/worker/remote.go +++ /dev/null @@ -1,106 +0,0 @@ -package worker - -import ( - "time" - - "github.com/moira-alert/moira/metrics" - w "github.com/moira-alert/moira/worker" -) - -const ( - remoteTriggerLockName = "moira-remote-checker" - remoteTriggerName = "Remote checker" -) - -type remoteChecker struct { - check *Checker -} - -func newRemoteChecker(check *Checker) checkerWorker { - return &remoteChecker{ - check: check, - } -} - -func (ch *remoteChecker) Name() string { - return "Remote" -} - -func (ch *remoteChecker) IsEnabled() bool { - return ch.check.RemoteConfig.Enabled -} - -func (ch *remoteChecker) MaxParallelChecks() int { - return ch.check.Config.MaxParallelRemoteChecks -} - -func (ch *remoteChecker) Metrics() *metrics.CheckMetrics { - return ch.check.Metrics.RemoteMetrics -} - -func (ch *remoteChecker) StartTriggerGetter() error { - w.NewWorker( - remoteTriggerName, - ch.check.Logger, - ch.check.Database.NewLock(remoteTriggerLockName, nodataCheckerLockTTL), - ch.remoteTriggerChecker, - ).Run(ch.check.tomb.Dying()) - - return nil -} - -func (ch *remoteChecker) GetTriggersToCheck(count int) ([]string, error) { - return ch.check.Database.GetRemoteTriggersToCheck(count) -} - -func (ch *remoteChecker) remoteTriggerChecker(stop <-chan struct{}) error { - checkTicker := time.NewTicker(ch.check.RemoteConfig.CheckInterval) - ch.check.Logger.Info().Msg(remoteTriggerName + " started") - for { - select { - case <-stop: - ch.check.Logger.Info().Msg(remoteTriggerName + " stopped") - checkTicker.Stop() - return nil - - case <-checkTicker.C: - if err := ch.checkRemote(); err != nil { - ch.check.Logger.Error(). - Error(err). - Msg("Remote trigger failed") - } - } - } -} - -func (ch *remoteChecker) checkRemote() error { - source, err := ch.check.SourceProvider.GetRemote() - if err != nil { - return err - } - - available, err := source.IsAvailable() - if !available { - ch.check.Logger.Info(). - Error(err). - Msg("Remote API is unavailable. Stop checking remote triggers") - return nil - } - - ch.check.Logger.Debug().Msg("Checking remote triggers") - - triggerIds, err := ch.check.Database.GetRemoteTriggerIDs() - if err != nil { - return err - } - ch.addRemoteTriggerIDsIfNeeded(triggerIds) - - return nil -} - -func (ch *remoteChecker) addRemoteTriggerIDsIfNeeded(triggerIDs []string) { - needToCheckRemoteTriggerIDs := ch.check.getTriggerIDsToCheck(triggerIDs) - if len(needToCheckRemoteTriggerIDs) > 0 { - ch.check.Database.AddRemoteTriggersToCheck(needToCheckRemoteTriggerIDs) //nolint - } -} diff --git a/checker/worker/scheduler.go b/checker/worker/scheduler.go new file mode 100644 index 000000000..77eb14b98 --- /dev/null +++ b/checker/worker/scheduler.go @@ -0,0 +1,117 @@ +package worker + +import ( + "time" + + "github.com/moira-alert/moira" + "github.com/moira-alert/moira/checker" + "github.com/moira-alert/moira/metrics" + w "github.com/moira-alert/moira/worker" +) + +const checkerLockTTL = time.Second * 15 + +type scheduler struct { + manager *WorkerManager + clusterKey moira.ClusterKey + sourceCheckConfig checker.SourceCheckConfig + name string + lockName string + validateSource func() error + metrics *metrics.CheckMetrics +} + +func newScheduler(manager *WorkerManager, clusterKey moira.ClusterKey, validateSource func() error) (*scheduler, error) { + metrics, err := manager.Metrics.GetCheckMetricsBySource(clusterKey) + if err != nil { + return nil, err + } + + name := clusterKey.String() + lockName := "moira-" + name + "-lock" + + return &scheduler{ + manager: manager, + sourceCheckConfig: manager.Config.SourceCheckConfigs[clusterKey], + metrics: metrics, + name: name, + lockName: lockName, + clusterKey: clusterKey, + validateSource: validateSource, + }, nil +} + +func (ch *scheduler) getMaxParallelChecks() int { + return ch.sourceCheckConfig.MaxParallelChecks +} + +func (ch *scheduler) startTriggerScheduler() error { + w.NewWorker( + ch.name, + ch.manager.Logger, + ch.manager.Database.NewLock(ch.lockName, checkerLockTTL), + ch.triggerScheduler, + ).Run(ch.manager.tomb.Dying()) + + return nil +} + +func (ch *scheduler) getTriggersToCheck(count int) ([]string, error) { + return ch.manager.Database.GetTriggersToCheck(ch.clusterKey, count) +} + +func (ch *scheduler) triggerScheduler(stop <-chan struct{}) error { + checkTicker := time.NewTicker(ch.sourceCheckConfig.CheckInterval) + + ch.manager.Logger.Info().Msg(ch.name + " started") + for { + select { + case <-stop: + ch.manager.Logger.Info().Msg(ch.name + " stopped") + checkTicker.Stop() + return nil + + case <-checkTicker.C: + if err := ch.scheduleTriggersToCheck(); err != nil { + ch.manager.Logger.Error(). + Error(err). + Msg(ch.name + " trigger failed") + } + } + } +} + +func (ch *scheduler) scheduleTriggersToCheck() error { + err := ch.validateSource() + if err != nil { + ch.manager.Logger.Info(). + Error(err). + String("cluster_key", ch.clusterKey.String()). + Msg("Source is invalid. Stop scheduling trigger checks") + return nil + } + + ch.manager.Logger.Debug(). + String("cluster_key", ch.clusterKey.String()). + Msg("Scheduling triggers") + + triggerIds, err := ch.manager.Database.GetTriggerIDs(ch.clusterKey) + if err != nil { + return err + } + + err = ch.scheduleTriggerIDsIfNeeded(ch.clusterKey, triggerIds) + if err != nil { + return err + } + + return nil +} + +func (ch *scheduler) scheduleTriggerIDsIfNeeded(clusterKey moira.ClusterKey, triggerIDs []string) error { + needToCheckTriggerIDs := ch.manager.filterOutLazyTriggerIDs(triggerIDs) + if len(needToCheckTriggerIDs) > 0 { + return ch.manager.Database.AddTriggersToCheck(clusterKey, needToCheckTriggerIDs) + } + return nil +} diff --git a/checker/worker/handler.go b/checker/worker/trigger_handler.go similarity index 56% rename from checker/worker/handler.go rename to checker/worker/trigger_handler.go index ac688d27b..1b2e617ac 100644 --- a/checker/worker/handler.go +++ b/checker/worker/trigger_handler.go @@ -14,18 +14,18 @@ import ( const sleepAfterCheckingError = time.Second * 2 // startTriggerHandler is a blocking func -func (check *Checker) startTriggerHandler(triggerIDsToCheck <-chan string, metrics *metrics.CheckMetrics) error { +func (manager *WorkerManager) startTriggerHandler(triggerIDsToCheck <-chan string, metrics *metrics.CheckMetrics) error { for { triggerID, ok := <-triggerIDsToCheck if !ok { return nil } - err := check.handleTrigger(triggerID, metrics) + err := manager.handleTrigger(triggerID, metrics) if err != nil { metrics.HandleError.Mark(1) - check.Logger.Error(). + manager.Logger.Error(). String(moira.LogFieldNameTriggerID, triggerID). Error(err). Msg("Failed to handle trigger") @@ -35,18 +35,20 @@ func (check *Checker) startTriggerHandler(triggerIDsToCheck <-chan string, metri } } -func (check *Checker) handleTrigger(triggerID string, metrics *metrics.CheckMetrics) (err error) { +func (manager *WorkerManager) handleTrigger(triggerID string, metrics *metrics.CheckMetrics) (err error) { defer func() { if r := recover(); r != nil { err = fmt.Errorf("panic: '%s' stack: %s", r, debug.Stack()) } }() - err = check.handleTriggerInLock(triggerID, metrics) + err = manager.handleTriggerInLock(triggerID, metrics) return err } -func (check *Checker) handleTriggerInLock(triggerID string, metrics *metrics.CheckMetrics) error { - acquired, err := check.Database.SetTriggerCheckLock(triggerID) +func (manager *WorkerManager) handleTriggerInLock(triggerID string, metrics *metrics.CheckMetrics) error { + acquired, err := manager.Database.SetTriggerCheckLock(triggerID) + defer manager.Database.DeleteTriggerCheckLock(triggerID) //nolint + if err != nil { return err } @@ -58,20 +60,18 @@ func (check *Checker) handleTriggerInLock(triggerID string, metrics *metrics.Che startedAt := time.Now() defer metrics.TriggersCheckTime.UpdateSince(startedAt) - err = check.checkTrigger(triggerID) + err = manager.checkTrigger(triggerID) return err } -func (check *Checker) checkTrigger(triggerID string) error { - defer check.Database.DeleteTriggerCheckLock(triggerID) //nolint - +func (manager *WorkerManager) checkTrigger(triggerID string) error { triggerChecker, err := checker.MakeTriggerChecker( triggerID, - check.Database, - check.Logger, - check.Config, - check.SourceProvider, - check.Metrics, + manager.Database, + manager.Logger, + manager.Config, + manager.SourceProvider, + manager.Metrics, ) if errors.Is(err, checker.ErrTriggerNotExists) { diff --git a/checker/worker/trigger_queue_pipe.go b/checker/worker/trigger_queue_pipe.go new file mode 100644 index 000000000..16c5d0e31 --- /dev/null +++ b/checker/worker/trigger_queue_pipe.go @@ -0,0 +1,81 @@ +package worker + +import ( + "time" + + "github.com/patrickmn/go-cache" +) + +const ( + sleepAfterGetTriggerIDError = time.Second * 1 + sleepWhenNoTriggerToCheck = time.Millisecond * 500 +) + +func (manager *WorkerManager) pipeTriggerToCheckQueue(fetch func(int) ([]string, error), batchSize int) <-chan string { + triggerIDsToCheck := make(chan string, batchSize*2) //nolint + manager.tomb.Go(func() error { + return manager.pipeTriggerToCheckQueueToChan(fetch, batchSize, triggerIDsToCheck) + }) + return triggerIDsToCheck +} + +func (manager *WorkerManager) pipeTriggerToCheckQueueToChan(fetch func(int) ([]string, error), batchSize int, triggerIDsToCheck chan<- string) error { + var fetchDelay time.Duration + for { + startFetch := time.After(fetchDelay) + + select { + case <-manager.tomb.Dying(): + close(triggerIDsToCheck) + return nil + + case <-startFetch: + triggerIDs, err := fetch(batchSize) + fetchDelay = manager.handleFetchResponse(triggerIDs, err, triggerIDsToCheck) + } + } +} + +func (manager *WorkerManager) handleFetchResponse(triggerIDs []string, fetchError error, triggerIDsToCheck chan<- string) time.Duration { + if fetchError != nil { + manager.Logger.Error(). + Error(fetchError). + Msg("Failed to handle trigger loop") + + return sleepAfterGetTriggerIDError + } + + if len(triggerIDs) == 0 { + return sleepWhenNoTriggerToCheck + } + + for _, triggerID := range triggerIDs { + triggerIDsToCheck <- triggerID + } + + return time.Duration(0) +} + +func (manager *WorkerManager) filterOutLazyTriggerIDs(triggerIDs []string) []string { + triggerIDsToCheck := make([]string, 0, len(triggerIDs)) + + lazyTriggerIDs := manager.lazyTriggerIDs.Load().(map[string]bool) + + for _, triggerID := range triggerIDs { + if _, ok := lazyTriggerIDs[triggerID]; ok { + randomDuration := manager.getRandomLazyCacheDuration() + cacheContainsIdErr := manager.LazyTriggersCache.Add(triggerID, true, randomDuration) + + if cacheContainsIdErr != nil { + continue + } + } + + cacheContainsIdErr := manager.TriggerCache.Add(triggerID, true, cache.DefaultExpiration) + if cacheContainsIdErr == nil { + triggerIDsToCheck = append(triggerIDsToCheck, triggerID) + } + } + + return triggerIDsToCheck +} diff --git a/checker/worker/trigger_to_check.go b/checker/worker/trigger_to_check.go deleted file mode 100644 index fc8dbe501..000000000 --- a/checker/worker/trigger_to_check.go +++ /dev/null @@ -1,72 +0,0 @@ -package worker - -import ( - "time" - - "github.com/patrickmn/go-cache" -) - -const sleepAfterGetTriggerIDError = time.Second * 1 -const sleepWhenNoTriggerToCheck = time.Millisecond * 500 - -func (check *Checker) startTriggerToCheckGetter(fetch func(int) ([]string, error), batchSize int) <-chan string { - triggerIDsToCheck := make(chan string, batchSize*2) //nolint - check.tomb.Go(func() error { - return check.triggerToCheckGetter(fetch, batchSize, triggerIDsToCheck) - }) - return triggerIDsToCheck -} - -func (check *Checker) triggerToCheckGetter(fetch func(int) ([]string, error), batchSize int, triggerIDsToCheck chan<- string) error { - var fetchDelay time.Duration - for { - startFetch := time.After(fetchDelay) - select { - case <-check.tomb.Dying(): - close(triggerIDsToCheck) - return nil - case <-startFetch: - triggerIDs, err := fetch(batchSize) - fetchDelay = check.handleFetchResponse(triggerIDs, err, triggerIDsToCheck) - } - } -} - -func (check *Checker) handleFetchResponse(triggerIDs []string, fetchError error, triggerIDsToCheck chan<- string) time.Duration { - if fetchError != nil { - check.Logger.Error(). - Error(fetchError). - Msg("Failed to handle trigger loop") - return sleepAfterGetTriggerIDError - } - if len(triggerIDs) == 0 { - return sleepWhenNoTriggerToCheck - } - for _, triggerID := range triggerIDs { - triggerIDsToCheck <- triggerID - } - return time.Duration(0) -} - -func (check *Checker) getTriggerIDsToCheck(triggerIDs []string) []string { - triggerIDsToCheck := make([]string, 0, len(triggerIDs)) - - lazyTriggerIDs := check.lazyTriggerIDs.Load().(map[string]bool) - for _, triggerID := range triggerIDs { - if _, ok := lazyTriggerIDs[triggerID]; ok { - randomDuration := check.getRandomLazyCacheDuration() - cacheContainsIdErr := check.LazyTriggersCache.Add(triggerID, true, randomDuration) - - if cacheContainsIdErr != nil { - continue - } - } - - cacheContainsIdErr := check.TriggerCache.Add(triggerID, true, cache.DefaultExpiration) - if cacheContainsIdErr == nil { - triggerIDsToCheck = append(triggerIDsToCheck, triggerID) - } - } - - return triggerIDsToCheck -} diff --git a/checker/worker/worker.go b/checker/worker/worker.go deleted file mode 100644 index fc9f58a3f..000000000 --- a/checker/worker/worker.go +++ /dev/null @@ -1,204 +0,0 @@ -package worker - -import ( - "errors" - "sync/atomic" - "time" - - "github.com/moira-alert/moira/metrics" - - metricSource "github.com/moira-alert/moira/metric_source" - "github.com/moira-alert/moira/metric_source/prometheus" - "github.com/moira-alert/moira/metric_source/remote" - "github.com/patrickmn/go-cache" - "gopkg.in/tomb.v2" - - "github.com/moira-alert/moira" - "github.com/moira-alert/moira/checker" -) - -// Checker represents workers for periodically triggers checking based by new events -type Checker struct { - Logger moira.Logger - Database moira.Database - Config *checker.Config - RemoteConfig *remote.Config - PrometheusConfig *prometheus.Config - SourceProvider *metricSource.SourceProvider - Metrics *metrics.CheckerMetrics - TriggerCache *cache.Cache - LazyTriggersCache *cache.Cache - PatternCache *cache.Cache - lazyTriggerIDs atomic.Value - lastData int64 - tomb tomb.Tomb -} - -// Start start schedule new MetricEvents and check for NODATA triggers -func (check *Checker) Start() error { - var err error - - err = check.startLazyTriggers() - if err != nil { - return err - } - - err = check.startLocalMetricEvents() - if err != nil { - return err - } - - err = check.startCheckerWorker(newRemoteChecker(check)) - if err != nil { - return err - } - - err = check.startCheckerWorker(newPrometheusChecker(check)) - if err != nil { - return err - } - - err = check.startCheckerWorker(newLocalChecker(check)) - if err != nil { - return err - } - - return nil -} - -func (check *Checker) startLocalMetricEvents() error { - if check.Config.MetricEventPopBatchSize < 0 { - return errors.New("MetricEventPopBatchSize param was less than zero") - } - - if check.Config.MetricEventPopBatchSize == 0 { - check.Config.MetricEventPopBatchSize = 100 - } - - subscribeMetricEventsParams := moira.SubscribeMetricEventsParams{ - BatchSize: check.Config.MetricEventPopBatchSize, - Delay: check.Config.MetricEventPopDelay, - } - - metricEventsChannel, err := check.Database.SubscribeMetricEvents(&check.tomb, &subscribeMetricEventsParams) - if err != nil { - return err - } - - for i := 0; i < check.Config.MaxParallelLocalChecks; i++ { - check.tomb.Go(func() error { - return check.newMetricsHandler(metricEventsChannel) - }) - } - - check.tomb.Go(func() error { - return check.checkMetricEventsChannelLen(metricEventsChannel) - }) - - check.Logger.Info().Msg("Checking new events started") - - go func() { - <-check.tomb.Dying() - check.Logger.Info().Msg("Checking for new events stopped") - }() - - return nil -} - -type checkerWorker interface { - // Returns the name of the worker for logging - Name() string - // Returns true if worker is enabled, false otherwise - IsEnabled() bool - // Returns the max number of parallel checks for this worker - MaxParallelChecks() int - // Returns the metrics for this worker - Metrics() *metrics.CheckMetrics - // Starts separate goroutine that fetches triggers for this worker from database and adds them to the check queue - StartTriggerGetter() error - // Fetches triggers from the queue - GetTriggersToCheck(count int) ([]string, error) -} - -func (check *Checker) startCheckerWorker(w checkerWorker) error { - if !w.IsEnabled() { - check.Logger.Info().Msg(w.Name() + " checker disabled") - return nil - } - - const maxParallelChecksMaxValue = 1024 * 8 - if w.MaxParallelChecks() > maxParallelChecksMaxValue { - return errors.New("MaxParallel" + w.Name() + "Checks value is too large") - } - - check.tomb.Go(w.StartTriggerGetter) - check.Logger.Info().Msg(w.Name() + "checker started") - - triggerIdsToCheckChan := check.startTriggerToCheckGetter( - w.GetTriggersToCheck, - w.MaxParallelChecks(), - ) - - for i := 0; i < w.MaxParallelChecks(); i++ { - check.tomb.Go(func() error { - return check.startTriggerHandler( - triggerIdsToCheckChan, - w.Metrics(), - ) - }) - } - - return nil -} - -func (check *Checker) startLazyTriggers() error { - check.lastData = time.Now().UTC().Unix() - - check.lazyTriggerIDs.Store(make(map[string]bool)) - check.tomb.Go(check.lazyTriggersWorker) - - check.tomb.Go(check.checkTriggersToCheckCount) - - return nil -} - -func (check *Checker) checkTriggersToCheckCount() error { - checkTicker := time.NewTicker(time.Millisecond * 100) //nolint - var triggersToCheckCount, remoteTriggersToCheckCount int64 - var err error - for { - select { - case <-check.tomb.Dying(): - return nil - case <-checkTicker.C: - triggersToCheckCount, err = check.Database.GetLocalTriggersToCheckCount() - if err == nil { - check.Metrics.LocalMetrics.TriggersToCheckCount.Update(triggersToCheckCount) - } - if check.RemoteConfig.Enabled { - remoteTriggersToCheckCount, err = check.Database.GetRemoteTriggersToCheckCount() - if err == nil { - check.Metrics.RemoteMetrics.TriggersToCheckCount.Update(remoteTriggersToCheckCount) - } - } - } - } -} - -func (check *Checker) checkMetricEventsChannelLen(ch <-chan *moira.MetricEvent) error { - checkTicker := time.NewTicker(time.Millisecond * 100) //nolint - for { - select { - case <-check.tomb.Dying(): - return nil - case <-checkTicker.C: - check.Metrics.MetricEventsChannelLen.Update(int64(len(ch))) - } - } -} - -// Stop stops checks triggers -func (check *Checker) Stop() error { - check.tomb.Kill(nil) - return check.tomb.Wait() -} diff --git a/checker/worker/worker_manager.go b/checker/worker/worker_manager.go new file mode 100644 index 000000000..02cec4676 --- /dev/null +++ b/checker/worker/worker_manager.go @@ -0,0 +1,168 @@ +package worker + +import ( + "errors" + "fmt" + "sync/atomic" + "time" + + "github.com/moira-alert/moira/metrics" + + metricSource "github.com/moira-alert/moira/metric_source" + "github.com/patrickmn/go-cache" + "gopkg.in/tomb.v2" + + "github.com/moira-alert/moira" + "github.com/moira-alert/moira/checker" +) + +// WorkerManager represents workers for periodically triggers checking based by new events +type WorkerManager struct { + Logger moira.Logger + Database moira.Database + + Config *checker.Config + SourceProvider *metricSource.SourceProvider + Metrics *metrics.CheckerMetrics + + TriggerCache *cache.Cache + LazyTriggersCache *cache.Cache + PatternCache *cache.Cache + lazyTriggerIDs atomic.Value + lastData int64 + tomb tomb.Tomb +} + +// StartWorkers start schedule new MetricEvents and check for NODATA triggers +func (manager *WorkerManager) StartWorkers() error { + var err error + + err = manager.startLazyTriggers() + if err != nil { + return err + } + + err = manager.startLocalMetricEvents() + if err != nil { + return err + } + + for clusterKey := range manager.Config.SourceCheckConfigs { + validator, err := manager.makeSourceValidator(clusterKey) + if err != nil { + return err + } + + checker, err := newScheduler(manager, clusterKey, validator) + if err != nil { + return err + } + err = manager.startCheckerWorker(checker) + if err != nil { + return err + } + } + + return nil +} + +func (manager *WorkerManager) makeSourceValidator(clusterKey moira.ClusterKey) (func() error, error) { + if clusterKey.TriggerSource == moira.GraphiteLocal { + return manager.validateGraphiteLocal, nil + } + + source, err := manager.SourceProvider.GetMetricSource(clusterKey) + if err != nil { + return nil, err + } + + return func() error { + if available, err := source.IsAvailable(); !available { + return fmt.Errorf("source is not available: %w", err) + } + return nil + }, nil +} + +func (manager *WorkerManager) validateGraphiteLocal() error { + now := time.Now().UTC().Unix() + + if manager.lastData+manager.Config.StopCheckingIntervalSeconds < now { + return nil + } + + return fmt.Errorf("graphite local source invalid: no metrics for %d second", manager.Config.StopCheckingIntervalSeconds) +} + +func (manager *WorkerManager) startCheckerWorker(w *scheduler) error { + const maxParallelChecksMaxValue = 1024 * 8 + if w.getMaxParallelChecks() > maxParallelChecksMaxValue { + return errors.New("MaxParallel" + w.name + "Checks value is too large") + } + + manager.tomb.Go(w.startTriggerScheduler) + manager.Logger.Info().Msg(w.name + " scheduler started") + + triggerIdsToCheckChan := manager.pipeTriggerToCheckQueue( + w.getTriggersToCheck, + w.getMaxParallelChecks(), + ) + + for i := 0; i < w.getMaxParallelChecks(); i++ { + manager.tomb.Go(func() error { + return manager.startTriggerHandler( + triggerIdsToCheckChan, + w.metrics, + ) + }) + } + + return nil +} + +func (manager *WorkerManager) startLazyTriggers() error { + manager.lastData = time.Now().UTC().Unix() + + manager.lazyTriggerIDs.Store(make(map[string]bool)) + manager.tomb.Go(manager.lazyTriggersWorker) + + manager.tomb.Go(manager.checkTriggersToCheckCount) + + return nil +} + +func (manager *WorkerManager) checkTriggersToCheckCount() error { + checkTicker := time.NewTicker(time.Millisecond * 100) //nolint + defer checkTicker.Stop() + + for { + select { + case <-manager.tomb.Dying(): + return nil + + case <-checkTicker.C: + for clusterKey := range manager.Config.SourceCheckConfigs { + metrics, err := manager.Metrics.GetCheckMetricsBySource(clusterKey) + if err != nil { + continue + } + + triggersToCheck, err := getTriggersToCheck(manager.Database, clusterKey) + if err != nil { + continue + } + metrics.TriggersToCheckCount.Update(triggersToCheck) + } + } + } +} + +func getTriggersToCheck(database moira.Database, clusterKey moira.ClusterKey) (int64, error) { + return database.GetTriggersToCheckCount(clusterKey) +} + +// Stop stops checks triggers +func (manager *WorkerManager) Stop() error { + manager.tomb.Kill(nil) + return manager.tomb.Wait() +} diff --git a/cmd/api/config.go b/cmd/api/config.go index e55e9fb1c..28343331f 100644 --- a/cmd/api/config.go +++ b/cmd/api/config.go @@ -1,6 +1,9 @@ package main import ( + "time" + + "github.com/moira-alert/moira" "github.com/moira-alert/moira/notifier" "github.com/xiam/to" @@ -15,11 +18,29 @@ type config struct { API apiConfig `yaml:"api"` Web webConfig `yaml:"web"` Telemetry cmd.TelemetryConfig `yaml:"telemetry"` - Remote cmd.RemoteConfig `yaml:"remote"` - Prometheus cmd.PrometheusConfig `yaml:"prometheus"` + Remotes cmd.RemotesConfig `yaml:",inline"` NotificationHistory cmd.NotificationHistoryConfig `yaml:"notification_history"` } +// ClustersMetricTTL parses TTLs of all clusters provided in config +func (config *config) ClustersMetricTTL() map[moira.ClusterKey]time.Duration { + result := make(map[moira.ClusterKey]time.Duration) + + result[moira.DefaultLocalCluster] = to.Duration(config.Redis.MetricsTTL) + + for _, remote := range config.Remotes.Graphite { + key := moira.MakeClusterKey(moira.GraphiteRemote, remote.ClusterId) + result[key] = to.Duration(remote.MetricsTTL) + } + + for _, remote := range config.Remotes.Prometheus { + key := moira.MakeClusterKey(moira.PrometheusRemote, remote.ClusterId) + result[key] = to.Duration(remote.MetricsTTL) + } + + return result +} + type apiConfig struct { // Api local network address. Default is ':8081' so api will be available at http://moira.company.com:8081/api. Listen string `yaml:"listen"` @@ -74,19 +95,18 @@ type featureFlags struct { } func (config *apiConfig) getSettings( - localMetricTTL, remoteMetricTTL string, + metricsTTL map[moira.ClusterKey]time.Duration, flags api.FeatureFlags, ) *api.Config { return &api.Config{ - EnableCORS: config.EnableCORS, - Listen: config.Listen, - GraphiteLocalMetricTTL: to.Duration(localMetricTTL), - GraphiteRemoteMetricTTL: to.Duration(remoteMetricTTL), - Flags: flags, + EnableCORS: config.EnableCORS, + Listen: config.Listen, + MetricsTTL: metricsTTL, + Flags: flags, } } -func (config *webConfig) getSettings(isRemoteEnabled bool) *api.WebConfig { +func (config *webConfig) getSettings(isRemoteEnabled bool, remotes cmd.RemotesConfig) *api.WebConfig { webContacts := make([]api.WebContact, 0, len(config.Contacts)) for _, configContact := range config.Contacts { contact := api.WebContact{ @@ -99,12 +119,37 @@ func (config *webConfig) getSettings(isRemoteEnabled bool) *api.WebConfig { webContacts = append(webContacts, contact) } + clusters := []api.MetricSourceCluster{{ + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, + ClusterName: "Graphite Local", + }} + + for _, remote := range remotes.Graphite { + cluster := api.MetricSourceCluster{ + TriggerSource: moira.GraphiteRemote, + ClusterId: remote.ClusterId, + ClusterName: remote.ClusterName, + } + clusters = append(clusters, cluster) + } + + for _, remote := range remotes.Prometheus { + cluster := api.MetricSourceCluster{ + TriggerSource: moira.PrometheusRemote, + ClusterId: remote.ClusterId, + ClusterName: remote.ClusterName, + } + clusters = append(clusters, cluster) + } + return &api.WebConfig{ - SupportEmail: config.SupportEmail, - RemoteAllowed: isRemoteEnabled, - Contacts: webContacts, - FeatureFlags: config.getFeatureFlags(), - Sentry: config.Sentry.getSettings(), + SupportEmail: config.SupportEmail, + RemoteAllowed: isRemoteEnabled, + MetricSourceClusters: clusters, + Contacts: webContacts, + FeatureFlags: config.getFeatureFlags(), + Sentry: config.Sentry.getSettings(), } } @@ -157,15 +202,6 @@ func getDefault() config { }, Pprof: cmd.ProfilerConfig{Enabled: false}, }, - Remote: cmd.RemoteConfig{ - Timeout: "60s", - MetricsTTL: "7d", - }, - Prometheus: cmd.PrometheusConfig{ - Timeout: "60s", - MetricsTTL: "7d", - Retries: 1, - RetryTimeout: "10s", - }, + Remotes: cmd.RemotesConfig{}, } } diff --git a/cmd/api/config_test.go b/cmd/api/config_test.go index 5c459a85c..4516910a5 100644 --- a/cmd/api/config_test.go +++ b/cmd/api/config_test.go @@ -4,6 +4,7 @@ import ( "testing" "time" + "github.com/moira-alert/moira" "github.com/moira-alert/moira/cmd" "github.com/moira-alert/moira/api" @@ -13,20 +14,24 @@ import ( func Test_apiConfig_getSettings(t *testing.T) { Convey("Settings successfully filled", t, func() { + metricTTLs := map[moira.ClusterKey]time.Duration{ + moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster): time.Hour, + moira.DefaultGraphiteRemoteCluster: 24 * time.Hour, + } + apiConf := apiConfig{ Listen: "0000", EnableCORS: true, } expectedResult := &api.Config{ - EnableCORS: true, - Listen: "0000", - GraphiteLocalMetricTTL: time.Hour, - GraphiteRemoteMetricTTL: 24 * time.Hour, - Flags: api.FeatureFlags{IsReadonlyEnabled: true}, + EnableCORS: true, + Listen: "0000", + MetricsTTL: metricTTLs, + Flags: api.FeatureFlags{IsReadonlyEnabled: true}, } - result := apiConf.getSettings("1h", "24h", api.FeatureFlags{IsReadonlyEnabled: true}) + result := apiConf.getSettings(metricTTLs, api.FeatureFlags{IsReadonlyEnabled: true}) So(result, ShouldResemble, expectedResult) }) } @@ -89,16 +94,7 @@ func Test_webConfig_getDefault(t *testing.T) { }, Pprof: cmd.ProfilerConfig{Enabled: false}, }, - Remote: cmd.RemoteConfig{ - Timeout: "60s", - MetricsTTL: "7d", - }, - Prometheus: cmd.PrometheusConfig{ - Timeout: "60s", - MetricsTTL: "7d", - Retries: 1, - RetryTimeout: "10s", - }, + Remotes: cmd.RemotesConfig{}, NotificationHistory: cmd.NotificationHistoryConfig{ NotificationHistoryTTL: "48h", NotificationHistoryQueryLimit: -1, @@ -111,20 +107,28 @@ func Test_webConfig_getDefault(t *testing.T) { } func Test_webConfig_getSettings(t *testing.T) { + metricSourceClustersDefault := []api.MetricSourceCluster{{ + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, + ClusterName: "Graphite Local", + }} + remotesDefault := cmd.RemotesConfig{} + Convey("Empty config, fill it", t, func() { config := webConfig{} - settings := config.getSettings(true) + settings := config.getSettings(true, remotesDefault) So(settings, ShouldResemble, &api.WebConfig{ - RemoteAllowed: true, - Contacts: []api.WebContact{}, + RemoteAllowed: true, + Contacts: []api.WebContact{}, + MetricSourceClusters: metricSourceClustersDefault, }) }) Convey("Default config, fill it", t, func() { config := getDefault() - settings := config.Web.getSettings(true) + settings := config.Web.getSettings(true, remotesDefault) So(settings, ShouldResemble, &api.WebConfig{ RemoteAllowed: true, Contacts: []api.WebContact{}, @@ -133,6 +137,7 @@ func Test_webConfig_getSettings(t *testing.T) { IsPlottingAvailable: true, IsSubscriptionToAllTagsAvailable: true, }, + MetricSourceClusters: metricSourceClustersDefault, }) }) @@ -161,7 +166,7 @@ func Test_webConfig_getSettings(t *testing.T) { }, } - settings := config.getSettings(true) + settings := config.getSettings(true, remotesDefault) So(settings, ShouldResemble, &api.WebConfig{ SupportEmail: "lalal@mail.la", RemoteAllowed: true, @@ -183,6 +188,48 @@ func Test_webConfig_getSettings(t *testing.T) { DSN: "test dsn", Platform: "dev", }, + MetricSourceClusters: metricSourceClustersDefault, + }) + }) + + Convey("Empty config, non default cluster list", t, func() { + config := webConfig{} + remotes := cmd.RemotesConfig{ + Graphite: []cmd.GraphiteRemoteConfig{{ + RemoteCommonConfig: cmd.RemoteCommonConfig{ + ClusterId: "default", + ClusterName: "Graphite Remote 123", + }, + }}, + Prometheus: []cmd.PrometheusRemoteConfig{{ + RemoteCommonConfig: cmd.RemoteCommonConfig{ + ClusterId: "default", + ClusterName: "Prometheus Remote 888", + }, + }}, + } + + settings := config.getSettings(true, remotes) + So(settings, ShouldResemble, &api.WebConfig{ + RemoteAllowed: true, + Contacts: []api.WebContact{}, + MetricSourceClusters: []api.MetricSourceCluster{ + { + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, + ClusterName: "Graphite Local", + }, + { + TriggerSource: moira.GraphiteRemote, + ClusterId: moira.DefaultCluster, + ClusterName: "Graphite Remote 123", + }, + { + TriggerSource: moira.PrometheusRemote, + ClusterId: moira.DefaultCluster, + ClusterName: "Prometheus Remote 888", + }, + }, }) }) } diff --git a/cmd/api/main.go b/cmd/api/main.go index 8d282177d..c62c5478c 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -17,10 +17,6 @@ import ( "github.com/moira-alert/moira/database/redis" "github.com/moira-alert/moira/index" logging "github.com/moira-alert/moira/logging/zerolog_adapter" - metricSource "github.com/moira-alert/moira/metric_source" - "github.com/moira-alert/moira/metric_source/local" - "github.com/moira-alert/moira/metric_source/prometheus" - "github.com/moira-alert/moira/metric_source/remote" _ "go.uber.org/automaxprocs" ) @@ -62,8 +58,7 @@ func main() { } apiConfig := applicationConfig.API.getSettings( - applicationConfig.Redis.MetricsTTL, - applicationConfig.Remote.MetricsTTL, + applicationConfig.ClustersMetricTTL(), applicationConfig.Web.getFeatureFlags(), ) @@ -103,10 +98,6 @@ func main() { } defer searchIndex.Stop() //nolint - stats := newTriggerStats(logger, database, telemetry.Metrics) - stats.Start() - defer stats.Stop() //nolint - if !searchIndex.IsReady() { logger.Fatal().Msg("Search index is not ready, exit") } @@ -123,25 +114,18 @@ func main() { String("listen_address", apiConfig.Listen). Msg("Start listening") - remoteConfig := applicationConfig.Remote.GetRemoteSourceSettings() - prometheusConfig := applicationConfig.Prometheus.GetPrometheusSourceSettings() - - localSource := local.Create(database) - remoteSource := remote.Create(remoteConfig) - prometheusSource, err := prometheus.Create(prometheusConfig, logger) + metricSourceProvider, err := cmd.InitMetricSources(applicationConfig.Remotes, database, logger) if err != nil { logger.Fatal(). Error(err). - Msg("Failed to initialize prometheus metric source") + Msg("Failed to initialize metric sources") } - metricSourceProvider := metricSource.CreateMetricSourceProvider( - localSource, - remoteSource, - prometheusSource, - ) + stats := newTriggerStats(metricSourceProvider.GetClusterList(), logger, database, telemetry.Metrics) + stats.start() + defer stats.stop() //nolint - webConfig := applicationConfig.Web.getSettings(remoteConfig.Enabled || prometheusConfig.Enabled) + webConfig := applicationConfig.Web.getSettings(len(metricSourceProvider.GetAllSources()) > 0, applicationConfig.Remotes) httpHandler := handler.NewHandler( database, diff --git a/cmd/api/trigger_stats.go b/cmd/api/trigger_stats.go index 79de6a3f8..e73d3f5ae 100644 --- a/cmd/api/trigger_stats.go +++ b/cmd/api/trigger_stats.go @@ -1,7 +1,6 @@ package main import ( - "fmt" "time" "github.com/moira-alert/moira" @@ -12,11 +11,13 @@ import ( type triggerStats struct { tomb tomb.Tomb metrics *metrics.TriggersMetrics + clusters []moira.ClusterKey database moira.Database logger moira.Logger } func newTriggerStats( + clusters []moira.ClusterKey, logger moira.Logger, database moira.Database, metricsRegistry metrics.Registry, @@ -24,11 +25,12 @@ func newTriggerStats( return &triggerStats{ logger: logger, database: database, - metrics: metrics.NewTriggersMetrics(metricsRegistry), + metrics: metrics.NewTriggersMetrics(metricsRegistry, clusters), + clusters: clusters, } } -func (stats *triggerStats) Start() { +func (stats *triggerStats) start() { stats.tomb.Go(stats.startCheckingTriggerCount) } @@ -45,13 +47,13 @@ func (stats *triggerStats) startCheckingTriggerCount() error { } } -func (stats *triggerStats) Stop() error { +func (stats *triggerStats) stop() error { stats.tomb.Kill(nil) return stats.tomb.Wait() } func (stats *triggerStats) checkTriggerCount() { - triggersCount, err := stats.database.GetTriggerCount() + triggersCount, err := stats.database.GetTriggerCount(stats.clusters) if err != nil { stats.logger.Warning(). Error(err). @@ -61,6 +63,5 @@ func (stats *triggerStats) checkTriggerCount() { for source, count := range triggersCount { stats.metrics.Mark(source, count) - stats.logger.Debug().Msg(fmt.Sprintf("source: %s, count: %d", string(source), count)) } } diff --git a/cmd/api/trigger_stats_test.go b/cmd/api/trigger_stats_test.go index 2186bd006..636141b4c 100644 --- a/cmd/api/trigger_stats_test.go +++ b/cmd/api/trigger_stats_test.go @@ -20,29 +20,34 @@ func TestTriggerStatsCheckTriggerCount(t *testing.T) { graphiteLocalCount := int64(12) graphiteRemoteCount := int64(24) - promethteusRemoteCount := int64(42) + prometheusRemoteCount := int64(42) graphiteLocalMeter := mock_metrics.NewMockMeter(mockCtrl) graphiteRemoteMeter := mock_metrics.NewMockMeter(mockCtrl) - promethteusRemoteMeter := mock_metrics.NewMockMeter(mockCtrl) + prometheusRemoteMeter := mock_metrics.NewMockMeter(mockCtrl) - registry.EXPECT().NewMeter("triggers", string(moira.GraphiteLocal), "count").Return(graphiteLocalMeter) - registry.EXPECT().NewMeter("triggers", string(moira.GraphiteRemote), "count").Return(graphiteRemoteMeter) - registry.EXPECT().NewMeter("triggers", string(moira.PrometheusRemote), "count").Return(promethteusRemoteMeter) + registry.EXPECT().NewMeter("triggers", moira.GraphiteLocal.String(), moira.DefaultCluster.String()).Return(graphiteLocalMeter) + registry.EXPECT().NewMeter("triggers", moira.GraphiteRemote.String(), moira.DefaultCluster.String()).Return(graphiteRemoteMeter) + registry.EXPECT().NewMeter("triggers", moira.PrometheusRemote.String(), moira.DefaultCluster.String()).Return(prometheusRemoteMeter) dataBase := mock_moira_alert.NewMockDatabase(mockCtrl) - dataBase.EXPECT().GetTriggerCount().Return(map[moira.TriggerSource]int64{ - moira.GraphiteLocal: graphiteLocalCount, - moira.GraphiteRemote: graphiteRemoteCount, - moira.PrometheusRemote: promethteusRemoteCount, + dataBase.EXPECT().GetTriggerCount(gomock.Any()).Return(map[moira.ClusterKey]int64{ + moira.DefaultLocalCluster: graphiteLocalCount, + moira.DefaultGraphiteRemoteCluster: graphiteRemoteCount, + moira.DefaultPrometheusRemoteCluster: prometheusRemoteCount, }, nil) graphiteLocalMeter.EXPECT().Mark(graphiteLocalCount) graphiteRemoteMeter.EXPECT().Mark(graphiteRemoteCount) - promethteusRemoteMeter.EXPECT().Mark(promethteusRemoteCount) + prometheusRemoteMeter.EXPECT().Mark(prometheusRemoteCount) logger, _ := zerolog_adapter.GetLogger("Test") - triggerStats := newTriggerStats(logger, dataBase, registry) + clusters := []moira.ClusterKey{ + moira.DefaultLocalCluster, + moira.DefaultGraphiteRemoteCluster, + moira.DefaultPrometheusRemoteCluster, + } + triggerStats := newTriggerStats(clusters, logger, dataBase, registry) triggerStats.checkTriggerCount() }) diff --git a/cmd/checker/config.go b/cmd/checker/config.go index 73904b9a6..d05be0f71 100644 --- a/cmd/checker/config.go +++ b/cmd/checker/config.go @@ -10,12 +10,12 @@ import ( ) type config struct { - Redis cmd.RedisConfig `yaml:"redis"` - Logger cmd.LoggerConfig `yaml:"log"` - Checker checkerConfig `yaml:"checker"` - Telemetry cmd.TelemetryConfig `yaml:"telemetry"` - Remote cmd.RemoteConfig `yaml:"remote"` - Prometheus cmd.PrometheusConfig `yaml:"prometheus"` + Redis cmd.RedisConfig `yaml:"redis"` + Logger cmd.LoggerConfig `yaml:"log"` + Checker checkerConfig `yaml:"checker"` + Telemetry cmd.TelemetryConfig `yaml:"telemetry"` + Local localCheckConfig `yaml:"local"` + Remotes cmd.RemotesConfig `yaml:",inline"` } type triggerLogConfig struct { @@ -27,22 +27,19 @@ type triggersLogConfig struct { TriggersToLevel []triggerLogConfig `yaml:"triggers"` } +type localCheckConfig struct { + CheckInterval string `yaml:"check_interval"` + MaxParallelChecks int `yaml:"max_parallel_checks"` +} + type checkerConfig struct { // Period for every trigger to perform forced check on NoDataCheckInterval string `yaml:"nodata_check_interval"` // Period for every trigger to cancel forced check (earlier than 'NoDataCheckInterval') if no metrics were received StopCheckingInterval string `yaml:"stop_checking_interval"` - // Min period to perform triggers re-check. Note: Reducing of this value leads to increasing of CPU and memory usage values - CheckInterval string `yaml:"check_interval"` // Max period to perform lazy triggers re-check. Note: lazy triggers are triggers which has no subscription for it. Moira will check its state less frequently. // Delay for check lazy trigger is random between LazyTriggersCheckInterval/2 and LazyTriggersCheckInterval. LazyTriggersCheckInterval string `yaml:"lazy_triggers_check_interval"` - // Max concurrent checkers to run. Equals to the number of processor cores found on Moira host by default or when variable is defined as 0. - MaxParallelChecks int `yaml:"max_parallel_checks"` - // Max concurrent remote checkers to run. Equals to the number of processor cores found on Moira host by default or when variable is defined as 0. - MaxParallelRemoteChecks int `yaml:"max_parallel_remote_checks"` - // Max concurrent prometheus checkers to run. Equals to the number of processor cores found on Moira host by default or when variable is defined as 0. - MaxParallelPrometheusChecks int `yaml:"max_parallel_prometheus_checks"` // Specify log level by entities SetLogLevel triggersLogConfig `yaml:"set_log_level"` // Metric event pop operation batch size @@ -60,44 +57,68 @@ func handleParallelChecks(parallelChecks *int) bool { return false } -func (config *checkerConfig) getSettings(logger moira.Logger) *checker.Config { +func (config *config) getSettings(logger moira.Logger) *checker.Config { logTriggersToLevel := make(map[string]string) - for _, v := range config.SetLogLevel.TriggersToLevel { + for _, v := range config.Checker.SetLogLevel.TriggersToLevel { logTriggersToLevel[v.ID] = v.Level } logger.Info(). Int("number_of_triggers", len(logTriggersToLevel)). Msg("Found dynamic log rules in config for some triggers") - if handleParallelChecks(&config.MaxParallelChecks) { + sourceCheckConfigs := make(map[moira.ClusterKey]checker.SourceCheckConfig) + + localCheckConfig := checker.SourceCheckConfig{ + CheckInterval: to.Duration(config.Local.CheckInterval), + MaxParallelChecks: config.Local.MaxParallelChecks, + } + if handleParallelChecks(&localCheckConfig.MaxParallelChecks) { logger.Info(). - Int("number_of_cpu", config.MaxParallelChecks). + Int("number_of_cpu", localCheckConfig.MaxParallelChecks). + String("trigger_source", moira.GraphiteLocal.String()). + String("cluster_id", "default"). Msg("MaxParallelChecks is not configured, set it to the number of CPU") } + sourceCheckConfigs[moira.DefaultLocalCluster] = localCheckConfig - if handleParallelChecks(&config.MaxParallelRemoteChecks) { - logger.Info(). - Int("number_of_cpu", config.MaxParallelRemoteChecks). - Msg("MaxParallelRemoteChecks is not configured, set it to the number of CPU") + for _, remote := range config.Remotes.Graphite { + checkConfig := checker.SourceCheckConfig{ + CheckInterval: to.Duration(remote.CheckInterval), + MaxParallelChecks: remote.MaxParallelChecks, + } + if handleParallelChecks(&checkConfig.MaxParallelChecks) { + logger.Info(). + Int("number_of_cpu", checkConfig.MaxParallelChecks). + String("trigger_source", moira.GraphiteRemote.String()). + String("cluster_id", remote.ClusterId.String()). + Msg("MaxParallelChecks is not configured, set it to the number of CPU") + } + sourceCheckConfigs[moira.MakeClusterKey(moira.GraphiteRemote, remote.ClusterId)] = checkConfig } - if handleParallelChecks(&config.MaxParallelPrometheusChecks) { - logger.Info(). - Int("number_of_cpu", config.MaxParallelPrometheusChecks). - Msg("MaxParallelPrometheusChecks is not configured, set it to the number of CPU") + for _, remote := range config.Remotes.Prometheus { + checkConfig := checker.SourceCheckConfig{ + CheckInterval: to.Duration(remote.CheckInterval), + MaxParallelChecks: remote.MaxParallelChecks, + } + if handleParallelChecks(&checkConfig.MaxParallelChecks) { + logger.Info(). + Int("number_of_cpu", checkConfig.MaxParallelChecks). + String("trigger_source", moira.PrometheusRemote.String()). + String("cluster_id", remote.ClusterId.String()). + Msg("MaxParallelChecks is not configured, set it to the number of CPU") + } + sourceCheckConfigs[moira.MakeClusterKey(moira.PrometheusRemote, remote.ClusterId)] = checkConfig } return &checker.Config{ - CheckInterval: to.Duration(config.CheckInterval), - LazyTriggersCheckInterval: to.Duration(config.LazyTriggersCheckInterval), - NoDataCheckInterval: to.Duration(config.NoDataCheckInterval), - StopCheckingIntervalSeconds: int64(to.Duration(config.StopCheckingInterval).Seconds()), - MaxParallelLocalChecks: config.MaxParallelChecks, - MaxParallelRemoteChecks: config.MaxParallelRemoteChecks, - MaxParallelPrometheusChecks: config.MaxParallelPrometheusChecks, + SourceCheckConfigs: sourceCheckConfigs, + LazyTriggersCheckInterval: to.Duration(config.Checker.LazyTriggersCheckInterval), + NoDataCheckInterval: to.Duration(config.Checker.NoDataCheckInterval), + StopCheckingIntervalSeconds: int64(to.Duration(config.Checker.StopCheckingInterval).Seconds()), LogTriggersToLevel: logTriggersToLevel, - MetricEventPopBatchSize: int64(config.MetricEventPopBatchSize), - MetricEventPopDelay: to.Duration(config.MetricEventPopDelay), + MetricEventPopBatchSize: int64(config.Checker.MetricEventPopBatchSize), + MetricEventPopDelay: to.Duration(config.Checker.MetricEventPopDelay), } } @@ -115,11 +136,8 @@ func getDefault() config { }, Checker: checkerConfig{ NoDataCheckInterval: "60s", - CheckInterval: "5s", LazyTriggersCheckInterval: "10m", StopCheckingInterval: "30s", - MaxParallelChecks: 0, - MaxParallelRemoteChecks: 0, }, Telemetry: cmd.TelemetryConfig{ Listen: ":8092", @@ -132,17 +150,9 @@ func getDefault() config { }, Pprof: cmd.ProfilerConfig{Enabled: false}, }, - Remote: cmd.RemoteConfig{ - CheckInterval: "60s", - Timeout: "60s", - MetricsTTL: "7d", - }, - Prometheus: cmd.PrometheusConfig{ + Local: localCheckConfig{ CheckInterval: "60s", - Timeout: "60s", - MetricsTTL: "7d", - Retries: 1, - RetryTimeout: "10s", }, + Remotes: cmd.RemotesConfig{}, } } diff --git a/cmd/checker/main.go b/cmd/checker/main.go index 29b85735c..ac993d86c 100644 --- a/cmd/checker/main.go +++ b/cmd/checker/main.go @@ -10,9 +10,6 @@ import ( "github.com/moira-alert/moira/checker/worker" metricSource "github.com/moira-alert/moira/metric_source" - "github.com/moira-alert/moira/metric_source/local" - "github.com/moira-alert/moira/metric_source/prometheus" - "github.com/moira-alert/moira/metric_source/remote" "github.com/patrickmn/go-cache" "github.com/moira-alert/moira" @@ -85,54 +82,40 @@ func main() { databaseSettings := config.Redis.GetSettings() database := redis.NewDatabase(logger, databaseSettings, redis.NotificationHistoryConfig{}, redis.NotificationConfig{}, redis.Checker) - remoteConfig := config.Remote.GetRemoteSourceSettings() - prometheusConfig := config.Prometheus.GetPrometheusSourceSettings() - - localSource := local.Create(database) - remoteSource := remote.Create(remoteConfig) - prometheusSource, err := prometheus.Create(prometheusConfig, logger) + metricSourceProvider, err := cmd.InitMetricSources(config.Remotes, database, logger) if err != nil { logger.Fatal(). Error(err). - Msg("Failed to initialize prometheus metric source") + Msg("Failed to initialize metric sources") } - // TODO: Abstractions over sources, so that they all are handled the same way - metricSourceProvider := metricSource.CreateMetricSourceProvider( - localSource, - remoteSource, - prometheusSource, - ) - - remoteConfigured, _ := remoteSource.IsConfigured() - prometheusConfigured, _ := prometheusSource.IsConfigured() - - checkerMetrics := metrics.ConfigureCheckerMetrics(telemetry.Metrics, remoteConfigured, prometheusConfigured) - checkerSettings := config.Checker.getSettings(logger) + checkerMetrics := metrics.ConfigureCheckerMetrics(telemetry.Metrics, clusterKeyList(metricSourceProvider)) + checkerSettings := config.getSettings(logger) if triggerID != nil && *triggerID != "" { checkSingleTrigger(database, checkerMetrics, checkerSettings, metricSourceProvider) } - checkerWorker := &worker.Checker{ + cacheDefaultExpiration := checkerSettings. + SourceCheckConfigs[moira.DefaultLocalCluster]. + CheckInterval + checkerWorkerManager := &worker.WorkerManager{ Logger: logger, Database: database, Config: checkerSettings, - RemoteConfig: remoteConfig, - PrometheusConfig: prometheusConfig, SourceProvider: metricSourceProvider, Metrics: checkerMetrics, - TriggerCache: cache.New(checkerSettings.CheckInterval, time.Minute*60), //nolint - LazyTriggersCache: cache.New(time.Minute*10, time.Minute*60), //nolint - PatternCache: cache.New(checkerSettings.CheckInterval, time.Minute*60), //nolint + TriggerCache: cache.New(cacheDefaultExpiration, time.Minute*60), //nolint + LazyTriggersCache: cache.New(time.Minute*10, time.Minute*60), //nolint + PatternCache: cache.New(cacheDefaultExpiration, time.Minute*60), //nolint } - err = checkerWorker.Start() + err = checkerWorkerManager.StartWorkers() if err != nil { logger.Fatal(). Error(err). Msg("Failed to start worker check") } - defer stopChecker(checkerWorker) + defer stopChecker(checkerWorkerManager) logger.Info(). String("moira_version", MoiraVersion). @@ -165,10 +148,18 @@ func checkSingleTrigger(database moira.Database, metrics *metrics.CheckerMetrics os.Exit(0) } -func stopChecker(service *worker.Checker) { +func stopChecker(service *worker.WorkerManager) { if err := service.Stop(); err != nil { logger.Error(). Error(err). Msg("Failed to Stop Moira Checker") } } + +func clusterKeyList(provider *metricSource.SourceProvider) []moira.ClusterKey { + keys := make([]moira.ClusterKey, 0, len(provider.GetAllSources())) + for ck := range provider.GetAllSources() { + keys = append(keys, ck) + } + return keys +} diff --git a/cmd/cli/main.go b/cmd/cli/main.go index c559ec17b..f45ff6286 100644 --- a/cmd/cli/main.go +++ b/cmd/cli/main.go @@ -316,7 +316,7 @@ func main() { //nolint database, dump.Trigger.ID, &dump.LastCheck, - dump.Trigger.TriggerSource, + dump.Trigger.ClusterKey(), ); err != nil { logger.Fatal(). Error(err). diff --git a/cmd/config.go b/cmd/config.go index 69d2827cf..04328237e 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -1,15 +1,17 @@ package cmd import ( + "errors" "fmt" "os" "strings" + "github.com/moira-alert/moira" "github.com/moira-alert/moira/metrics" "github.com/moira-alert/moira/image_store/s3" - "github.com/moira-alert/moira/metric_source/prometheus" - remoteSource "github.com/moira-alert/moira/metric_source/remote" + prometheusRemoteSource "github.com/moira-alert/moira/metric_source/prometheus" + graphiteRemoteSource "github.com/moira-alert/moira/metric_source/remote" "github.com/xiam/to" "gopkg.in/yaml.v2" @@ -151,47 +153,103 @@ type ProfilerConfig struct { Enabled bool `yaml:"enabled"` } -// RemoteConfig is remote graphite settings structure -type RemoteConfig struct { +// RemotesConfig is designed to be embedded in config files to configure all remote sources +type RemotesConfig struct { + Graphite []GraphiteRemoteConfig `yaml:"graphite_remote"` + Prometheus []PrometheusRemoteConfig `yaml:"prometheus_remote"` +} + +// Validate returns nil if config is valid, or error if it is malformed +func (remotes *RemotesConfig) Validate() error { + errs := make([]error, 0) + + errs = append(errs, validateRemotes[GraphiteRemoteConfig](remotes.Graphite)...) + errs = append(errs, validateRemotes[PrometheusRemoteConfig](remotes.Prometheus)...) + + if len(errs) == 0 { + return nil + } + return errors.Join(errs...) +} + +func validateRemotes[T remoteCommon](remotes []T) []error { + errs := make([]error, 0) + + keys := make(map[moira.ClusterId]int) + for _, remote := range remotes { + common := remote.getRemoteCommon() + if common.ClusterId == moira.ClusterNotSet { + err := fmt.Errorf("cluster id must be set for remote source (name: `%s`, url: `%s`)", + common.ClusterName, common.URL, + ) + errs = append(errs, err) + } + keys[common.ClusterId]++ + } + + for key, count := range keys { + if count > 1 { + err := fmt.Errorf("cluster id must be unique, non unique cluster id found: %s", key.String()) + errs = append(errs, err) + } + } + + return errs +} + +// RemoteCommonConfig is designed to be embedded in remote configs, It contains fields that are similar for all remotes +type RemoteCommonConfig struct { + // Unique id of the cluster + ClusterId moira.ClusterId `yaml:"cluster_id"` + // Human-readable name of the cluster + ClusterName string `yaml:"cluster_name"` // graphite url e.g http://graphite/render URL string `yaml:"url"` // Min period to perform triggers re-check. Note: Reducing of this value leads to increasing of CPU and memory usage values CheckInterval string `yaml:"check_interval"` + // Number of checks that can be run in parallel + // If empty will be set to number of cpu cores + MaxParallelChecks int `yaml:"max_parallel_checks"` // Moira won't fetch metrics older than this value from remote storage. Note that Moira doesn't delete old data from // remote storage. Large values will lead to OOM problems in checker. // See https://github.com/moira-alert/moira/pull/519 MetricsTTL string `yaml:"metrics_ttl"` +} + +type remoteCommon interface { + getRemoteCommon() *RemoteCommonConfig +} + +// GraphiteRemoteConfig is remote graphite settings structure +type GraphiteRemoteConfig struct { + RemoteCommonConfig `yaml:",inline"` // Timeout for remote requests Timeout string `yaml:"timeout"` // Username for basic auth User string `yaml:"user"` // Password for basic auth Password string `yaml:"password"` - // If true, remote worker will be enabled. - Enabled bool `yaml:"enabled"` +} + +func (config GraphiteRemoteConfig) getRemoteCommon() *RemoteCommonConfig { + return &config.RemoteCommonConfig } // GetRemoteSourceSettings returns remote config parsed from moira config files -func (config *RemoteConfig) GetRemoteSourceSettings() *remoteSource.Config { - return &remoteSource.Config{ +func (config *GraphiteRemoteConfig) GetRemoteSourceSettings() *graphiteRemoteSource.Config { + return &graphiteRemoteSource.Config{ URL: config.URL, CheckInterval: to.Duration(config.CheckInterval), MetricsTTL: to.Duration(config.MetricsTTL), Timeout: to.Duration(config.Timeout), User: config.User, Password: config.Password, - Enabled: config.Enabled, } } -type PrometheusConfig struct { - // Url of prometheus API - URL string `yaml:"url"` - // Min period to perform triggers re-check - CheckInterval string `yaml:"check_interval"` - // Moira won't fetch metrics older than this value from prometheus remote storage. - // Large values will lead to OOM problems in checker. - MetricsTTL string `yaml:"metrics_ttl"` +// GraphiteRemoteConfig is remote prometheus settings structure +type PrometheusRemoteConfig struct { + RemoteCommonConfig `yaml:",inline"` // Timeout for prometheus api requests Timeout string `yaml:"timeout"` // Number of retries for prometheus api requests @@ -202,14 +260,15 @@ type PrometheusConfig struct { User string `yaml:"user"` // Password for basic auth Password string `yaml:"password"` - // If true, prometheus remote worker will be enabled. - Enabled bool `yaml:"enabled"` +} + +func (config PrometheusRemoteConfig) getRemoteCommon() *RemoteCommonConfig { + return &config.RemoteCommonConfig } // GetRemoteSourceSettings returns remote config parsed from moira config files -func (config *PrometheusConfig) GetPrometheusSourceSettings() *prometheus.Config { - return &prometheus.Config{ - Enabled: config.Enabled, +func (config *PrometheusRemoteConfig) GetPrometheusSourceSettings() *prometheusRemoteSource.Config { + return &prometheusRemoteSource.Config{ URL: config.URL, CheckInterval: to.Duration(config.CheckInterval), MetricsTTL: to.Duration(config.MetricsTTL), diff --git a/cmd/notifier/config.go b/cmd/notifier/config.go index 74a5631e8..8a210e686 100644 --- a/cmd/notifier/config.go +++ b/cmd/notifier/config.go @@ -17,8 +17,7 @@ type config struct { Logger cmd.LoggerConfig `yaml:"log"` Notifier notifierConfig `yaml:"notifier"` Telemetry cmd.TelemetryConfig `yaml:"telemetry"` - Remote cmd.RemoteConfig `yaml:"remote"` - Prometheus cmd.PrometheusConfig `yaml:"prometheus"` + Remotes cmd.RemotesConfig `yaml:",inline"` ImageStores cmd.ImageStoreConfig `yaml:"image_store"` NotificationHistory cmd.NotificationHistoryConfig `yaml:"notification_history"` Notification cmd.NotificationConfig `yaml:"notification"` @@ -127,16 +126,7 @@ func getDefault() config { }, Pprof: cmd.ProfilerConfig{Enabled: false}, }, - Remote: cmd.RemoteConfig{ - Timeout: "60s", - MetricsTTL: "24h", - }, - Prometheus: cmd.PrometheusConfig{ - Timeout: "60s", - MetricsTTL: "7d", - Retries: 1, - RetryTimeout: "10s", - }, + Remotes: cmd.RemotesConfig{}, ImageStores: cmd.ImageStoreConfig{}, } } diff --git a/cmd/notifier/main.go b/cmd/notifier/main.go index 22c8293fe..8c8d51959 100644 --- a/cmd/notifier/main.go +++ b/cmd/notifier/main.go @@ -7,11 +7,6 @@ import ( "os/signal" "syscall" - metricSource "github.com/moira-alert/moira/metric_source" - "github.com/moira-alert/moira/metric_source/local" - "github.com/moira-alert/moira/metric_source/prometheus" - "github.com/moira-alert/moira/metric_source/remote" - "github.com/moira-alert/moira" "github.com/moira-alert/moira/cmd" "github.com/moira-alert/moira/database/redis" @@ -84,20 +79,13 @@ func main() { notificationSettings := config.Notification.GetSettings() database := redis.NewDatabase(logger, databaseSettings, notificationHistorySettings, notificationSettings, redis.Notifier) - remoteConfig := config.Remote.GetRemoteSourceSettings() - prometheusConfig := config.Prometheus.GetPrometheusSourceSettings() - - localSource := local.Create(database) - remoteSource := remote.Create(remoteConfig) - prometheusSource, err := prometheus.Create(prometheusConfig, logger) + metricSourceProvider, err := cmd.InitMetricSources(config.Remotes, database, logger) if err != nil { logger.Fatal(). Error(err). - Msg("Failed to initialize prometheus metric source") + Msg("Failed to initialize metric sources") } - metricSourceProvider := metricSource.CreateMetricSourceProvider(localSource, remoteSource, prometheusSource) - // Initialize the image store imageStoreMap := cmd.InitImageStores(config.ImageStores, logger) diff --git a/cmd/source_provider.go b/cmd/source_provider.go new file mode 100644 index 000000000..5149dc914 --- /dev/null +++ b/cmd/source_provider.go @@ -0,0 +1,42 @@ +package cmd + +import ( + "fmt" + + "github.com/moira-alert/moira" + metricSource "github.com/moira-alert/moira/metric_source" + "github.com/moira-alert/moira/metric_source/local" + "github.com/moira-alert/moira/metric_source/prometheus" + "github.com/moira-alert/moira/metric_source/remote" +) + +// InitMetricSources initializes SourceProvider from given remote source configs +func InitMetricSources(remotes RemotesConfig, database moira.Database, logger moira.Logger) (*metricSource.SourceProvider, error) { + err := remotes.Validate() + if err != nil { + return nil, fmt.Errorf("remotes config validation failed: %w", err) + } + + provider := metricSource.CreateMetricSourceProvider() + provider.RegisterSource(moira.DefaultLocalCluster, local.Create(database)) + + for _, graphite := range remotes.Graphite { + config := graphite.GetRemoteSourceSettings() + source, err := remote.Create(config) + if err != nil { + return nil, err + } + provider.RegisterSource(moira.MakeClusterKey(moira.GraphiteRemote, graphite.ClusterId), source) + } + + for _, prom := range remotes.Prometheus { + config := prom.GetPrometheusSourceSettings() + source, err := prometheus.Create(config, logger) + if err != nil { + return nil, err + } + provider.RegisterSource(moira.MakeClusterKey(moira.PrometheusRemote, prom.ClusterId), source) + } + + return provider, nil +} diff --git a/database/redis/last_check.go b/database/redis/last_check.go index e014e90e7..231f2b9d5 100644 --- a/database/redis/last_check.go +++ b/database/redis/last_check.go @@ -29,8 +29,8 @@ func (connector *DbConnector) GetTriggerLastCheck(triggerID string) (moira.Check } // SetTriggerLastCheck sets trigger last check data -func (connector *DbConnector) SetTriggerLastCheck(triggerID string, checkData *moira.CheckData, triggerSource moira.TriggerSource) error { - selfStateCheckCountKey := connector.getSelfStateCheckCountKey(triggerSource) +func (connector *DbConnector) SetTriggerLastCheck(triggerID string, checkData *moira.CheckData, clusterKey moira.ClusterKey) error { + selfStateCheckCountKey := connector.getSelfStateCheckCountKey(clusterKey) bytes, err := reply.GetCheckBytes(*checkData) if err != nil { return err @@ -66,23 +66,32 @@ func (connector *DbConnector) SetTriggerLastCheck(triggerID string, checkData *m return nil } -func (connector *DbConnector) getSelfStateCheckCountKey(triggerSource moira.TriggerSource) string { +func (connector *DbConnector) getSelfStateCheckCountKey(clusterKey moira.ClusterKey) string { if connector.source != Checker { return "" } - switch triggerSource { + + var key string + + switch clusterKey.TriggerSource { case moira.GraphiteLocal: - return selfStateChecksCounterKey + key = selfStateChecksCounterKey case moira.GraphiteRemote: - return selfStateRemoteChecksCounterKey + key = selfStateRemoteChecksCounterKey case moira.PrometheusRemote: - return selfStatePrometheusChecksCounterKey + key = selfStatePrometheusChecksCounterKey default: return "" } + + if clusterKey.ClusterId != moira.DefaultCluster { + key = key + ":" + clusterKey.ClusterId.String() + } + + return key } func appendRemoveTriggerLastCheckToRedisPipeline(ctx context.Context, pipe redis.Pipeliner, triggerID string) redis.Pipeliner { diff --git a/database/redis/last_check_test.go b/database/redis/last_check_test.go index f4fc17e05..6f1fe94c8 100644 --- a/database/redis/last_check_test.go +++ b/database/redis/last_check_test.go @@ -19,10 +19,11 @@ func TestLastCheck(t *testing.T) { defer dataBase.Flush() var triggerMaintenanceTS int64 + defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) Convey("LastCheck manipulation", t, func() { Convey("Test read write delete", func() { triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) actual, err := dataBase.GetTriggerLastCheck(triggerID) @@ -54,7 +55,7 @@ func TestLastCheck(t *testing.T) { Convey("While no metrics", func() { triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, defaultLocalCluster) So(err, ShouldBeNil) err = dataBase.SetTriggerCheckMaintenance(triggerID, map[string]int64{"metric1": 1, "metric5": 5}, nil, "", 0) @@ -67,7 +68,7 @@ func TestLastCheck(t *testing.T) { Convey("While no metrics to change", func() { triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) err = dataBase.SetTriggerCheckMaintenance(triggerID, map[string]int64{"metric11": 1, "metric55": 5}, nil, "", 0) @@ -81,7 +82,7 @@ func TestLastCheck(t *testing.T) { Convey("Has metrics to change", func() { checkData := lastCheckTest triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &checkData, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &checkData, defaultLocalCluster) So(err, ShouldBeNil) err = dataBase.SetTriggerCheckMaintenance(triggerID, map[string]int64{"metric1": 1, "metric5": 5}, nil, "", 0) @@ -108,7 +109,7 @@ func TestLastCheck(t *testing.T) { Convey("Set metrics maintenance while no metrics", func() { triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, defaultLocalCluster) So(err, ShouldBeNil) err = dataBase.SetTriggerCheckMaintenance(triggerID, map[string]int64{"metric1": 1, "metric5": 5}, nil, "", 0) @@ -121,7 +122,7 @@ func TestLastCheck(t *testing.T) { Convey("Set trigger maintenance while no metrics", func() { triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, defaultLocalCluster) So(err, ShouldBeNil) triggerMaintenanceTS = 1000 @@ -136,7 +137,7 @@ func TestLastCheck(t *testing.T) { Convey("Set metrics maintenance while no metrics to change", func() { triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) err = dataBase.SetTriggerCheckMaintenance(triggerID, map[string]int64{"metric11": 1, "metric55": 5}, nil, "", 0) @@ -151,7 +152,7 @@ func TestLastCheck(t *testing.T) { newLastCheckTest := lastCheckTest newLastCheckTest.Maintenance = 1000 triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) triggerMaintenanceTS = 1000 @@ -166,7 +167,7 @@ func TestLastCheck(t *testing.T) { Convey("Set metrics maintenance while has metrics to change", func() { checkData := lastCheckTest triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &checkData, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &checkData, defaultLocalCluster) So(err, ShouldBeNil) err = dataBase.SetTriggerCheckMaintenance(triggerID, map[string]int64{"metric1": 1, "metric5": 5}, nil, "", 0) @@ -186,7 +187,7 @@ func TestLastCheck(t *testing.T) { Convey("Set trigger and metrics maintenance while has metrics to change", func() { checkData := lastCheckTest triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &checkData, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &checkData, defaultLocalCluster) So(err, ShouldBeNil) triggerMaintenanceTS = 1000 @@ -208,7 +209,7 @@ func TestLastCheck(t *testing.T) { Convey("Set trigger maintenance to 0 and metrics maintenance", func() { checkData := lastCheckTest triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &checkData, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &checkData, defaultLocalCluster) So(err, ShouldBeNil) triggerMaintenanceTS = 0 @@ -230,7 +231,7 @@ func TestLastCheck(t *testing.T) { So(dataBase.checkDataScoreChanged(triggerID, &lastCheckWithNoMetrics), ShouldBeTrue) // set new last check. Should add a trigger to a reindex set - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, defaultLocalCluster) So(err, ShouldBeNil) So(dataBase.checkDataScoreChanged(triggerID, &lastCheckWithNoMetrics), ShouldBeFalse) @@ -243,7 +244,7 @@ func TestLastCheck(t *testing.T) { time.Sleep(time.Second) - err = dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, moira.GraphiteLocal) + err = dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) actual, err = dataBase.FetchTriggersToReindex(time.Now().Unix() - 10) @@ -282,7 +283,7 @@ func TestLastCheck(t *testing.T) { Value: &value, }, }, - }, moira.GraphiteLocal) + }, defaultLocalCluster) So(err, ShouldBeNil) actual, err := dataBase.GetTriggerLastCheck(triggerID) @@ -312,27 +313,34 @@ func TestCleanUpAbandonedTriggerLastCheck(t *testing.T) { dataBase := NewTestDatabase(logger) dataBase.Flush() defer dataBase.Flush() + + defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) Convey("Test clean up abandoned trigger last check", t, func() { Convey("Given trigger with last check", func() { trigger := moira.Trigger{ - ID: "triggerID-0000000000001", - Name: "test trigger 1 v1.0", - Targets: []string{"test.target.1"}, - Tags: []string{"test-tag-1"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, - TTLState: &moira.TTLStateNODATA, - AloneMetrics: map[string]bool{}, + ID: "triggerID-0000000000001", + Name: "test trigger 1 v1.0", + Targets: []string{"test.target.1"}, + Tags: []string{"test-tag-1"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TTLState: &moira.TTLStateNODATA, + AloneMetrics: map[string]bool{}, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, } - _ = dataBase.SaveTrigger(trigger.ID, &trigger) + err := dataBase.SaveTrigger(trigger.ID, &trigger) + So(err, ShouldBeNil) + + err = dataBase.SetTriggerLastCheck(trigger.ID, &lastCheckTest, defaultLocalCluster) + So(err, ShouldBeNil) - _ = dataBase.SetTriggerLastCheck(trigger.ID, &lastCheckTest, moira.GraphiteLocal) - _, err := dataBase.GetTriggerLastCheck(trigger.ID) + _, err = dataBase.GetTriggerLastCheck(trigger.ID) So(err, ShouldBeNil) Convey("Given abandoned last check (without saved trigger)", func() { removedTriggerID := uuid.Must(uuid.NewV4()).String() - err = dataBase.SetTriggerLastCheck(removedTriggerID, &lastCheckTest, moira.GraphiteLocal) + err = dataBase.SetTriggerLastCheck(removedTriggerID, &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) _, err = dataBase.GetTriggerLastCheck(removedTriggerID) @@ -360,11 +368,12 @@ func TestRemoteLastCheck(t *testing.T) { dataBase := NewTestDatabase(logger) dataBase.Flush() defer dataBase.Flush() + defaultRemoteCluster := moira.DefaultGraphiteRemoteCluster Convey("LastCheck manipulation", t, func() { Convey("Test read write delete", func() { triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, moira.GraphiteRemote) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, defaultRemoteCluster) So(err, ShouldBeNil) actual, err := dataBase.GetTriggerLastCheck(triggerID) @@ -396,7 +405,7 @@ func TestRemoteLastCheck(t *testing.T) { Convey("While no metrics", func() { triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, moira.GraphiteRemote) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckWithNoMetrics, defaultRemoteCluster) So(err, ShouldBeNil) err = dataBase.SetTriggerCheckMaintenance(triggerID, map[string]int64{"metric1": 1, "metric5": 5}, nil, "", 0) @@ -409,7 +418,7 @@ func TestRemoteLastCheck(t *testing.T) { Convey("While no metrics to change", func() { triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, moira.GraphiteRemote) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, defaultRemoteCluster) So(err, ShouldBeNil) err = dataBase.SetTriggerCheckMaintenance(triggerID, map[string]int64{"metric11": 1, "metric55": 5}, nil, "", 0) @@ -423,7 +432,7 @@ func TestRemoteLastCheck(t *testing.T) { Convey("Has metrics to change", func() { checkData := lastCheckTest triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &checkData, moira.GraphiteRemote) + err := dataBase.SetTriggerLastCheck(triggerID, &checkData, defaultRemoteCluster) So(err, ShouldBeNil) err = dataBase.SetTriggerCheckMaintenance(triggerID, map[string]int64{"metric1": 1, "metric5": 5}, nil, "", 0) @@ -448,12 +457,14 @@ func TestLastCheckErrorConnection(t *testing.T) { dataBase := NewTestDatabaseWithIncorrectConfig(logger) dataBase.Flush() defer dataBase.Flush() + defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) + Convey("Should throw error when no connection", t, func() { actual1, err := dataBase.GetTriggerLastCheck("123") So(actual1, ShouldResemble, moira.CheckData{}) So(err, ShouldNotBeNil) - err = dataBase.SetTriggerLastCheck("123", &lastCheckTest, moira.GraphiteLocal) + err = dataBase.SetTriggerLastCheck("123", &lastCheckTest, defaultLocalCluster) So(err, ShouldNotBeNil) err = dataBase.RemoveTriggerLastCheck("123") @@ -475,17 +486,19 @@ func TestGetTriggersLastCheck(t *testing.T) { dataBase.Flush() defer dataBase.Flush() + defaultSourceNotSetCluster := moira.MakeClusterKey(moira.TriggerSourceNotSet, moira.DefaultCluster) + _ = dataBase.SetTriggerLastCheck("test1", &moira.CheckData{ Timestamp: 1, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) _ = dataBase.SetTriggerLastCheck("test2", &moira.CheckData{ Timestamp: 2, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) _ = dataBase.SetTriggerLastCheck("test3", &moira.CheckData{ Timestamp: 3, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) Convey("getTriggersLastCheck manipulations", t, func() { Convey("Test with nil id array", func() { @@ -518,7 +531,7 @@ func TestGetTriggersLastCheck(t *testing.T) { defer func() { _ = dataBase.SetTriggerLastCheck("test2", &moira.CheckData{ Timestamp: 2, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) }() actual, err := dataBase.getTriggersLastCheck([]string{"test1", "test2", "test3"}) @@ -576,6 +589,7 @@ func TestMaintenanceUserSave(t *testing.T) { dataBase.Flush() defer dataBase.Flush() var triggerMaintenanceTS int64 + defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) Convey("Check user saving for trigger maintenance", t, func() { userLogin := "test" @@ -587,7 +601,7 @@ func TestMaintenanceUserSave(t *testing.T) { newLastCheckTest.MaintenanceInfo.StartUser = &userLogin newLastCheckTest.MaintenanceInfo.StartTime = &startTime triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) triggerMaintenanceTS = 1000 @@ -605,7 +619,7 @@ func TestMaintenanceUserSave(t *testing.T) { newLastCheckTest.MaintenanceInfo.StopUser = &userLogin newLastCheckTest.MaintenanceInfo.StopTime = &startTime triggerID := uuid.Must(uuid.NewV4()).String() - err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) triggerMaintenanceTS = 1000 @@ -624,7 +638,7 @@ func TestMaintenanceUserSave(t *testing.T) { checkData.MaintenanceInfo = moira.MaintenanceInfo{} userLogin := "test" var timeCallMaintenance = int64(3) - err := dataBase.SetTriggerLastCheck(triggerID, &checkData, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck(triggerID, &checkData, defaultLocalCluster) So(err, ShouldBeNil) triggerMaintenanceTS = 1000 diff --git a/database/redis/notification_test.go b/database/redis/notification_test.go index fbbcfd907..06048b813 100644 --- a/database/redis/notification_test.go +++ b/database/redis/notification_test.go @@ -497,6 +497,7 @@ func TestFilterNotificationsByState(t *testing.T) { database := NewTestDatabase(logger) database.Flush() defer database.Flush() + defaultSourceNotSetCluster := moira.MakeClusterKey(moira.TriggerSourceNotSet, moira.DefaultCluster) notificationOld := &moira.ScheduledNotification{ Trigger: moira.TriggerData{ @@ -532,13 +533,13 @@ func TestFilterNotificationsByState(t *testing.T) { CreatedAt: now, } - _ = database.SetTriggerLastCheck("test1", &moira.CheckData{}, moira.TriggerSourceNotSet) + _ = database.SetTriggerLastCheck("test1", &moira.CheckData{}, defaultSourceNotSetCluster) _ = database.SetTriggerLastCheck("test2", &moira.CheckData{ Metrics: map[string]moira.MetricState{ "test": {}, }, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) Convey("Test filter notifications by state", t, func() { Convey("With empty notifications", func() { @@ -562,7 +563,7 @@ func TestFilterNotificationsByState(t *testing.T) { Convey("With removed check data", func() { database.RemoveTriggerLastCheck("test1") //nolint defer func() { - _ = database.SetTriggerLastCheck("test1", &moira.CheckData{}, moira.TriggerSourceNotSet) + _ = database.SetTriggerLastCheck("test1", &moira.CheckData{}, defaultSourceNotSetCluster) }() types, err := database.filterNotificationsByState([]*moira.ScheduledNotification{notificationOld, notification, notificationNew}) @@ -612,6 +613,7 @@ func TestHandleNotifications(t *testing.T) { database := NewTestDatabase(logger) database.Flush() defer database.Flush() + defaultSourceNotSetCluster := moira.MakeClusterKey(moira.TriggerSourceNotSet, moira.DefaultCluster) notificationOld := &moira.ScheduledNotification{ Trigger: moira.TriggerData{ @@ -673,13 +675,13 @@ func TestHandleNotifications(t *testing.T) { CreatedAt: now, } - _ = database.SetTriggerLastCheck("test1", &moira.CheckData{}, moira.TriggerSourceNotSet) + _ = database.SetTriggerLastCheck("test1", &moira.CheckData{}, defaultSourceNotSetCluster) _ = database.SetTriggerLastCheck("test2", &moira.CheckData{ Metrics: map[string]moira.MetricState{ "test": {}, }, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) Convey("Test handle notifications", t, func() { Convey("Without delayed notifications", func() { @@ -704,7 +706,7 @@ func TestHandleNotifications(t *testing.T) { Convey("With both delayed and not delayed notifications and removed check data", func() { database.RemoveTriggerLastCheck("test1") //nolint defer func() { - _ = database.SetTriggerLastCheck("test1", &moira.CheckData{}, moira.TriggerSourceNotSet) + _ = database.SetTriggerLastCheck("test1", &moira.CheckData{}, defaultSourceNotSetCluster) }() types, err := database.handleNotifications([]*moira.ScheduledNotification{notificationOld, notificationOld2, notification, notificationNew, notificationNew2, notificationNew3}) @@ -823,18 +825,20 @@ func TestFetchNotificationsDo(t *testing.T) { var limit int64 + defaultSourceNotSetCluster := moira.MakeClusterKey(moira.TriggerSourceNotSet, moira.DefaultCluster) + _ = database.SetTriggerLastCheck("test1", &moira.CheckData{ Metrics: map[string]moira.MetricState{ "test1": {}, }, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) _ = database.SetTriggerLastCheck("test2", &moira.CheckData{ Metrics: map[string]moira.MetricState{ "test1": {}, "test2": {}, }, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) now := time.Now().Unix() notificationOld := moira.ScheduledNotification{ @@ -990,7 +994,7 @@ func TestFetchNotificationsDo(t *testing.T) { Metrics: map[string]moira.MetricState{ "test1": {}, }, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) }() Convey("With big limit", func() { @@ -1209,13 +1213,14 @@ func TestGetNotificationsTriggerChecks(t *testing.T) { database := NewTestDatabase(logger) database.Flush() defer database.Flush() + defaultSourceNotSetCluster := moira.MakeClusterKey(moira.TriggerSourceNotSet, moira.DefaultCluster) _ = database.SetTriggerLastCheck("test1", &moira.CheckData{ Timestamp: 1, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) _ = database.SetTriggerLastCheck("test2", &moira.CheckData{ Timestamp: 2, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) Convey("getNotificationsTriggerChecks manipulations", t, func() { notification1 := &moira.ScheduledNotification{ @@ -1272,7 +1277,7 @@ func TestGetNotificationsTriggerChecks(t *testing.T) { defer func() { _ = database.SetTriggerLastCheck("test1", &moira.CheckData{ Timestamp: 1, - }, moira.TriggerSourceNotSet) + }, defaultSourceNotSetCluster) }() notifications := []*moira.ScheduledNotification{notification1, notification2, notification3} diff --git a/database/redis/reply/trigger.go b/database/redis/reply/trigger.go index 14f54ccb3..5f1547291 100644 --- a/database/redis/reply/trigger.go +++ b/database/redis/reply/trigger.go @@ -29,6 +29,7 @@ type triggerStorageElement struct { TTL string `json:"ttl,omitempty"` IsRemote bool `json:"is_remote"` TriggerSource moira.TriggerSource `json:"trigger_source,omitempty"` + ClusterId moira.ClusterId `json:"cluster_id,omitempty"` MuteNewMetrics bool `json:"mute_new_metrics,omitempty"` AloneMetrics map[string]bool `json:"alone_metrics"` CreatedAt *int64 `json:"created_at"` @@ -50,6 +51,7 @@ func (storageElement *triggerStorageElement) toTrigger() moira.Trigger { // TODO(litleleprikon): END remove in moira v2.8.0. Compatibility with moira < v2.6.0 triggerSource := storageElement.TriggerSource.FillInIfNotSet(storageElement.IsRemote) + clusterId := storageElement.ClusterId.FillInIfNotSet() return moira.Trigger{ ID: storageElement.ID, Name: storageElement.Name, @@ -66,6 +68,7 @@ func (storageElement *triggerStorageElement) toTrigger() moira.Trigger { Patterns: storageElement.Patterns, TTL: getTriggerTTL(storageElement.TTL), TriggerSource: triggerSource, + ClusterId: clusterId, MuteNewMetrics: storageElement.MuteNewMetrics, AloneMetrics: storageElement.AloneMetrics, CreatedAt: storageElement.CreatedAt, @@ -93,6 +96,7 @@ func toTriggerStorageElement(trigger *moira.Trigger, triggerID string) *triggerS TTL: getTriggerTTLString(trigger.TTL), IsRemote: trigger.TriggerSource == moira.GraphiteRemote, TriggerSource: trigger.TriggerSource, + ClusterId: trigger.ClusterId, MuteNewMetrics: trigger.MuteNewMetrics, AloneMetrics: trigger.AloneMetrics, CreatedAt: trigger.CreatedAt, diff --git a/database/redis/selfstate_test.go b/database/redis/selfstate_test.go index 191ab2a62..3b7dae737 100644 --- a/database/redis/selfstate_test.go +++ b/database/redis/selfstate_test.go @@ -15,6 +15,9 @@ func TestSelfCheckWithWritesInChecker(t *testing.T) { dataBase.source = Checker dataBase.Flush() defer dataBase.Flush() + defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) + defaultRemoteCluster := moira.DefaultGraphiteRemoteCluster + Convey("Self state triggers manipulation", t, func() { Convey("Empty config", func() { count, err := dataBase.GetMetricsUpdatesCount() @@ -40,14 +43,14 @@ func TestSelfCheckWithWritesInChecker(t *testing.T) { }) Convey("Update metrics checks updates count", func() { - err := dataBase.SetTriggerLastCheck("123", &lastCheckTest, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck("123", &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) count, err := dataBase.GetChecksUpdatesCount() So(count, ShouldEqual, 1) So(err, ShouldBeNil) - err = dataBase.SetTriggerLastCheck("12345", &lastCheckTest, moira.GraphiteRemote) + err = dataBase.SetTriggerLastCheck("12345", &lastCheckTest, defaultRemoteCluster) So(err, ShouldBeNil) count, err = dataBase.GetRemoteChecksUpdatesCount() @@ -70,16 +73,19 @@ func testSelfCheckWithWritesInDBSource(t *testing.T, dbSource DBSource) { dataBase.source = dbSource dataBase.Flush() defer dataBase.Flush() + defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) + defaultRemoteCluster := moira.DefaultGraphiteRemoteCluster + Convey(fmt.Sprintf("Self state triggers manipulation in %s", dbSource), t, func() { Convey("Update metrics checks updates count", func() { - err := dataBase.SetTriggerLastCheck("123", &lastCheckTest, moira.GraphiteLocal) + err := dataBase.SetTriggerLastCheck("123", &lastCheckTest, defaultLocalCluster) So(err, ShouldBeNil) count, err := dataBase.GetChecksUpdatesCount() So(count, ShouldEqual, 0) So(err, ShouldBeNil) - err = dataBase.SetTriggerLastCheck("12345", &lastCheckTest, moira.GraphiteRemote) + err = dataBase.SetTriggerLastCheck("12345", &lastCheckTest, defaultRemoteCluster) So(err, ShouldBeNil) count, err = dataBase.GetRemoteChecksUpdatesCount() diff --git a/database/redis/trigger.go b/database/redis/trigger.go index 59c50b8e4..befc7bb6f 100644 --- a/database/redis/trigger.go +++ b/database/redis/trigger.go @@ -17,72 +17,57 @@ import ( // GetAllTriggerIDs gets all moira triggerIDs func (connector *DbConnector) GetAllTriggerIDs() ([]string, error) { c := *connector.client - triggerIds, err := c.SMembers(connector.context, triggersListKey).Result() + triggerIds, err := c.SMembers(connector.context, allTriggersListKey).Result() if err != nil { return nil, fmt.Errorf("failed to get all triggers-list: %s", err.Error()) } return triggerIds, nil } -// GetLocalTriggerIDs gets moira local triggerIDs -func (connector *DbConnector) GetLocalTriggerIDs() ([]string, error) { +// GetTriggerIDs returns list of ids of triggers with given cluster key +func (connector *DbConnector) GetTriggerIDs(clusterKey moira.ClusterKey) ([]string, error) { c := *connector.client - triggerIds, err := c.SMembers(connector.context, localTriggersListKey).Result() + key, err := makeTriggerListKey(clusterKey) if err != nil { - return nil, fmt.Errorf("failed to get local triggers-list: %s", err.Error()) + return nil, fmt.Errorf("failed to get triggers-list: %w", err) } - return triggerIds, nil -} - -// GetRemoteTriggerIDs gets moira remote triggerIDs -func (connector *DbConnector) GetRemoteTriggerIDs() ([]string, error) { - c := *connector.client - triggerIds, err := c.SMembers(connector.context, remoteTriggersListKey).Result() - if err != nil { - return nil, fmt.Errorf("failed to get remote triggers-list: %s", err.Error()) - } - return triggerIds, nil -} -func (connector *DbConnector) GetPrometheusTriggerIDs() ([]string, error) { - c := *connector.client - triggerIds, err := c.SMembers(connector.context, prometheusTriggersListKey).Result() + triggerIds, err := c.SMembers(connector.context, key).Result() if err != nil { - return nil, fmt.Errorf("failed to get prometheus triggers-list: %s", err.Error()) + return nil, fmt.Errorf("failed to get triggers-list: %w", err) } return triggerIds, nil } -func (connector *DbConnector) GetTriggerCount() (map[moira.TriggerSource]int64, error) { +func (connector *DbConnector) GetTriggerCount(clusterKeys []moira.ClusterKey) (map[moira.ClusterKey]int64, error) { pipe := (*connector.client).TxPipeline() - local := pipe.SCard(connector.context, localTriggersListKey) - remote := pipe.SCard(connector.context, remoteTriggersListKey) - prometheus := pipe.SCard(connector.context, prometheusTriggersListKey) + cmds := make(map[moira.ClusterKey]*redis.IntCmd, len(clusterKeys)) + for _, key := range clusterKeys { + redisKey, err := makeTriggerListKey(key) + if err != nil { + return nil, err + } - _, err := pipe.Exec(connector.context) - if err != nil { - return nil, err + cmds[key] = pipe.SCard(connector.context, redisKey) } - localCount, err := local.Result() - if err != nil { - return nil, err - } - remoteCount, err := remote.Result() + _, err := pipe.Exec(connector.context) if err != nil { return nil, err } - prometheusCount, err := prometheus.Result() - if err != nil { - return nil, err + + res := make(map[moira.ClusterKey]int64, len(clusterKeys)) + for key, cmd := range cmds { + value, err := cmd.Result() + if err != nil { + return nil, err + } + + res[key] = value } - return map[moira.TriggerSource]int64{ - moira.GraphiteLocal: localCount, - moira.GraphiteRemote: remoteCount, - moira.PrometheusRemote: prometheusCount, - }, nil + return res, nil } // GetTrigger gets trigger and trigger tags by given ID and return it in merged object @@ -168,7 +153,7 @@ func (connector *DbConnector) SaveTrigger(triggerID string, trigger *moira.Trigg err := connector.updateTrigger(triggerID, trigger, oldTrigger) if err != nil { - return fmt.Errorf("failed to update trigger: %s", err.Error()) + return fmt.Errorf("failed to update trigger: %w", err) } hasSubscriptions, err := connector.triggerHasSubscriptions(trigger) @@ -225,31 +210,24 @@ func (connector *DbConnector) updateTrigger(triggerID string, newTrigger *moira. pipe.SRem(connector.context, tagTriggersKey(tag), triggerID) } - if newTrigger.TriggerSource != oldTrigger.TriggerSource { - switch oldTrigger.TriggerSource { - case moira.GraphiteLocal: - pipe.SRem(connector.context, localTriggersListKey, triggerID) - - case moira.GraphiteRemote: - pipe.SRem(connector.context, remoteTriggersListKey, triggerID) - - case moira.PrometheusRemote: - pipe.SRem(connector.context, prometheusTriggersListKey, triggerID) + if newTrigger.ClusterKey() != oldTrigger.ClusterKey() { + var oldTriggersListKey string + oldTriggersListKey, err = makeTriggerListKey(oldTrigger.ClusterKey()) + if err != nil { + return fmt.Errorf("could not update trigger: %w", err) } + pipe.SRem(connector.context, oldTriggersListKey, triggerID) } } pipe.Set(connector.context, triggerKey(triggerID), bytes, redis.KeepTTL) - pipe.SAdd(connector.context, triggersListKey, triggerID) - - switch newTrigger.TriggerSource { - case moira.GraphiteRemote: - pipe.SAdd(connector.context, remoteTriggersListKey, triggerID) + pipe.SAdd(connector.context, allTriggersListKey, triggerID) - case moira.PrometheusRemote: - pipe.SAdd(connector.context, prometheusTriggersListKey, triggerID) - - case moira.GraphiteLocal: - pipe.SAdd(connector.context, localTriggersListKey, triggerID) + newTriggersListKey, err := makeTriggerListKey(newTrigger.ClusterKey()) + if err != nil { + return fmt.Errorf("could not update trigger: %w", err) + } + pipe.SAdd(connector.context, newTriggersListKey, triggerID) + if newTrigger.TriggerSource == moira.GraphiteLocal { for _, pattern := range newTrigger.Patterns { pipe.SAdd(connector.context, patternsListKey, pattern) pipe.SAdd(connector.context, patternTriggersKey(pattern), triggerID) @@ -311,18 +289,13 @@ func (connector *DbConnector) removeTrigger(triggerID string, trigger *moira.Tri pipe.Del(connector.context, triggerKey(triggerID)) pipe.Del(connector.context, triggerTagsKey(triggerID)) pipe.Del(connector.context, triggerEventsKey(triggerID)) - pipe.SRem(connector.context, triggersListKey, triggerID) - - switch trigger.TriggerSource { - case moira.GraphiteLocal: - pipe.SRem(connector.context, localTriggersListKey, triggerID) + pipe.SRem(connector.context, allTriggersListKey, triggerID) - case moira.GraphiteRemote: - pipe.SRem(connector.context, remoteTriggersListKey, triggerID) - - case moira.PrometheusRemote: - pipe.SRem(connector.context, prometheusTriggersListKey, triggerID) + triggerListKey, err := makeTriggerListKey(trigger.ClusterKey()) + if err != nil { + return fmt.Errorf("could not remove trigger: %w", err) } + pipe.SRem(connector.context, triggerListKey, triggerID) pipe.SRem(connector.context, unusedTriggersKey, triggerID) for _, tag := range trigger.Tags { @@ -450,11 +423,35 @@ func (connector *DbConnector) triggerHasSubscriptions(trigger *moira.Trigger) (b return false, nil } -var triggersListKey = "{moira-triggers-list}:moira-triggers-list" +var allTriggersListKey = "{moira-triggers-list}:moira-triggers-list" var localTriggersListKey = "{moira-triggers-list}:moira-local-triggers-list" var remoteTriggersListKey = "{moira-triggers-list}:moira-remote-triggers-list" var prometheusTriggersListKey = "{moira-triggers-list}:moira-prometheus-triggers-list" +func makeTriggerListKey(clusterKey moira.ClusterKey) (string, error) { + var key string + + switch clusterKey.TriggerSource { + case moira.GraphiteLocal: + key = localTriggersListKey + + case moira.GraphiteRemote: + key = remoteTriggersListKey + + case moira.PrometheusRemote: + key = prometheusTriggersListKey + + default: + return "", fmt.Errorf("unknown trigger source %s", clusterKey.TriggerSource) + } + + if clusterKey.ClusterId != moira.DefaultCluster { + key = key + ":" + clusterKey.ClusterId.String() + } + + return key, nil +} + func triggerKey(triggerID string) string { return "moira-trigger:" + triggerID } diff --git a/database/redis/trigger_test.go b/database/redis/trigger_test.go index c68ecb8f4..edd75651c 100644 --- a/database/redis/trigger_test.go +++ b/database/redis/trigger_test.go @@ -71,7 +71,7 @@ func TestTriggerStoring(t *testing.T) { So(err, ShouldBeNil) So(actual, ShouldResemble, *trigger) - ids, err := dataBase.GetLocalTriggerIDs() + ids, err := dataBase.GetTriggerIDs(moira.DefaultLocalCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{trigger.ID}) @@ -192,7 +192,7 @@ func TestTriggerStoring(t *testing.T) { So(err, ShouldResemble, database.ErrNil) So(actual, ShouldResemble, moira.Trigger{}) - ids, err = dataBase.GetLocalTriggerIDs() + ids, err = dataBase.GetTriggerIDs(moira.DefaultLocalCluster) So(err, ShouldBeNil) So(ids, ShouldBeEmpty) @@ -248,7 +248,7 @@ func TestTriggerStoring(t *testing.T) { So(actualTriggerChecks, ShouldResemble, []*moira.TriggerCheck{triggerCheck}) //Add check data - err = dataBase.SetTriggerLastCheck(trigger.ID, &lastCheckTest, moira.GraphiteLocal) + err = dataBase.SetTriggerLastCheck(trigger.ID, &lastCheckTest, moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster)) So(err, ShouldBeNil) triggerCheck.LastCheck = lastCheckTest @@ -312,6 +312,7 @@ func TestTriggerStoring(t *testing.T) { Patterns: []string{pattern1}, TriggerType: moira.RisingTrigger, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, AloneMetrics: map[string]bool{}, } @@ -323,6 +324,7 @@ func TestTriggerStoring(t *testing.T) { Patterns: []string{pattern2}, TriggerType: moira.RisingTrigger, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, AloneMetrics: map[string]bool{}, } @@ -352,7 +354,7 @@ func TestTriggerStoring(t *testing.T) { So(err, ShouldBeNil) So(actual, ShouldResemble, *triggerVer1) - ids, err := dataBase.GetLocalTriggerIDs() + ids, err := dataBase.GetTriggerIDs(moira.DefaultLocalCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{triggerVer1.ID}) @@ -398,7 +400,7 @@ func TestTriggerStoring(t *testing.T) { So(err, ShouldBeNil) So(actual, ShouldResemble, *triggerVer2) - ids, err = dataBase.GetLocalTriggerIDs() + ids, err = dataBase.GetTriggerIDs(moira.DefaultLocalCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{triggerVer2.ID}) @@ -448,7 +450,7 @@ func TestTriggerStoring(t *testing.T) { So(err, ShouldResemble, database.ErrNil) So(actual, ShouldResemble, moira.Trigger{}) - ids, err = dataBase.GetLocalTriggerIDs() + ids, err = dataBase.GetTriggerIDs(moira.DefaultLocalCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{}) @@ -541,6 +543,7 @@ func TestRemoteTrigger(t *testing.T) { Targets: []string{"test.target.remote1"}, Patterns: []string{pattern}, TriggerSource: moira.GraphiteRemote, + ClusterId: moira.DefaultCluster, TriggerType: moira.RisingTrigger, AloneMetrics: map[string]bool{}, } @@ -568,12 +571,12 @@ func TestRemoteTrigger(t *testing.T) { So(valueStoredAtKey, ShouldResemble, []string{trigger.ID}) }) Convey("Trigger should not be added to local triggers collection", func() { - ids, err := dataBase.GetLocalTriggerIDs() + ids, err := dataBase.GetTriggerIDs(moira.DefaultLocalCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{}) }) Convey("Trigger should be added to remote triggers collection", func() { - ids, err := dataBase.GetRemoteTriggerIDs() + ids, err := dataBase.GetTriggerIDs(moira.DefaultGraphiteRemoteCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{trigger.ID}) valueStoredAtKey := client.SMembers(dataBase.context, "{moira-triggers-list}:moira-remote-triggers-list").Val() @@ -606,7 +609,7 @@ func TestRemoteTrigger(t *testing.T) { So(*actual.CreatedAt, ShouldResemble, time.Date(2022, time.June, 7, 10, 0, 0, 0, time.UTC).Unix()) }) Convey("Trigger should be added to triggers collection", func() { - ids, err := dataBase.GetLocalTriggerIDs() + ids, err := dataBase.GetTriggerIDs(moira.DefaultLocalCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{trigger.ID}) }) @@ -616,7 +619,7 @@ func TestRemoteTrigger(t *testing.T) { So(ids, ShouldResemble, []string{trigger.ID}) }) Convey("Trigger shouldn't be added to remote triggers collection", func() { - ids, err := dataBase.GetRemoteTriggerIDs() + ids, err := dataBase.GetTriggerIDs(moira.DefaultGraphiteRemoteCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{}) }) @@ -649,7 +652,7 @@ func TestRemoteTrigger(t *testing.T) { So(*actual.UpdatedAt, ShouldResemble, time.Date(2022, time.June, 7, 10, 0, 0, 0, time.UTC).Unix()) }) Convey("Trigger should be deleted from local triggers collection", func() { - ids, err := dataBase.GetLocalTriggerIDs() + ids, err := dataBase.GetTriggerIDs(moira.DefaultLocalCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{}) }) @@ -659,7 +662,7 @@ func TestRemoteTrigger(t *testing.T) { So(ids, ShouldResemble, []string{trigger.ID}) }) Convey("Trigger should be added to remote triggers collection", func() { - ids, err := dataBase.GetRemoteTriggerIDs() + ids, err := dataBase.GetTriggerIDs(moira.DefaultGraphiteRemoteCluster) So(err, ShouldBeNil) So(ids, ShouldResemble, []string{trigger.ID}) }) @@ -689,7 +692,7 @@ func TestTriggerErrorConnection(t *testing.T) { }) Convey("Should throw error when no connection", t, func() { - actual, err := dataBase.GetLocalTriggerIDs() + actual, err := dataBase.GetTriggerIDs(moira.DefaultLocalCluster) So(err, ShouldNotBeNil) So(actual, ShouldBeNil) @@ -805,13 +808,19 @@ func TestDbConnector_GetTriggerIDsStartWith(t *testing.T) { Convey("Given 3 triggers in DB", t, func() { const prefix = "prefix" var triggerWithPrefix1 = moira.Trigger{ - ID: prefix + "1", + ID: prefix + "1", + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.ClusterNotSet, } var triggerWithPrefix2 = moira.Trigger{ - ID: prefix + "2", + ID: prefix + "2", + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.ClusterNotSet, } var triggerWithoutPrefix = moira.Trigger{ - ID: "without-prefix", + ID: "without-prefix", + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.ClusterNotSet, } var triggers = []moira.Trigger{ triggerWithPrefix1, @@ -852,6 +861,7 @@ var testTriggers = []moira.Trigger{ AloneMetrics: map[string]bool{}, // TODO: Test that empty TriggerSource is filled on getting vale from db TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { ID: "triggerID-0000000000001", @@ -862,6 +872,7 @@ var testTriggers = []moira.Trigger{ TriggerType: moira.RisingTrigger, AloneMetrics: map[string]bool{"t2": true}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { ID: "triggerID-0000000000001", @@ -872,6 +883,7 @@ var testTriggers = []moira.Trigger{ TriggerType: moira.RisingTrigger, AloneMetrics: map[string]bool{}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { ID: "triggerID-0000000000004", @@ -881,6 +893,7 @@ var testTriggers = []moira.Trigger{ TriggerType: moira.RisingTrigger, AloneMetrics: map[string]bool{}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { ID: "triggerID-0000000000005", @@ -890,6 +903,7 @@ var testTriggers = []moira.Trigger{ TriggerType: moira.RisingTrigger, AloneMetrics: map[string]bool{}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { ID: "triggerID-0000000000006", @@ -899,6 +913,7 @@ var testTriggers = []moira.Trigger{ TriggerType: moira.RisingTrigger, AloneMetrics: map[string]bool{}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { ID: "triggerID-0000000000007", @@ -908,6 +923,7 @@ var testTriggers = []moira.Trigger{ TriggerType: moira.RisingTrigger, AloneMetrics: map[string]bool{}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { ID: "triggerID-0000000000008", @@ -917,6 +933,7 @@ var testTriggers = []moira.Trigger{ TriggerType: moira.RisingTrigger, AloneMetrics: map[string]bool{}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { ID: "triggerID-0000000000009", @@ -926,5 +943,6 @@ var testTriggers = []moira.Trigger{ TriggerType: moira.RisingTrigger, AloneMetrics: map[string]bool{}, TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, } diff --git a/database/redis/triggers_to_check.go b/database/redis/triggers_to_check.go index cbb7f5d30..315aa241a 100644 --- a/database/redis/triggers_to_check.go +++ b/database/redis/triggers_to_check.go @@ -5,9 +5,34 @@ import ( "fmt" "github.com/go-redis/redis/v8" + "github.com/moira-alert/moira" "github.com/moira-alert/moira/database" ) +func (connector *DbConnector) AddTriggersToCheck(clusterKey moira.ClusterKey, triggerIDs []string) error { + key, err := makeTriggersToCheckKey(clusterKey) + if err != nil { + return err + } + return connector.addTriggersToCheck(key, triggerIDs) +} + +func (connector *DbConnector) GetTriggersToCheck(clusterKey moira.ClusterKey, count int) ([]string, error) { + key, err := makeTriggersToCheckKey(clusterKey) + if err != nil { + return nil, err + } + return connector.getTriggersToCheck(key, count) +} + +func (connector *DbConnector) GetTriggersToCheckCount(clusterKey moira.ClusterKey) (int64, error) { + key, err := makeTriggersToCheckKey(clusterKey) + if err != nil { + return 0, err + } + return connector.getTriggersToCheckCount(key) +} + // AddLocalTriggersToCheck gets trigger IDs and save it to Redis Set func (connector *DbConnector) AddLocalTriggersToCheck(triggerIDs []string) error { return connector.addTriggersToCheck(localTriggersToCheckKey, triggerIDs) @@ -93,6 +118,32 @@ func (connector *DbConnector) getTriggersToCheckCount(key string) (int64, error) return triggersToCheckCount, nil } -var remoteTriggersToCheckKey = "moira-remote-triggers-to-check" -var prometheusTriggersToCheckKey = "moira-prometheus-triggers-to-check" -var localTriggersToCheckKey = "moira-triggers-to-check" +const ( + remoteTriggersToCheckKey = "moira-remote-triggers-to-check" + prometheusTriggersToCheckKey = "moira-prometheus-triggers-to-check" + localTriggersToCheckKey = "moira-triggers-to-check" +) + +func makeTriggersToCheckKey(clusterKey moira.ClusterKey) (string, error) { + var key string + + switch clusterKey.TriggerSource { + case moira.GraphiteLocal: + key = localTriggersToCheckKey + + case moira.GraphiteRemote: + key = remoteTriggersToCheckKey + + case moira.PrometheusRemote: + key = prometheusTriggersToCheckKey + + default: + return "", fmt.Errorf("unknown trigger source `%s`", clusterKey.TriggerSource.String()) + } + + if clusterKey.ClusterId != moira.DefaultCluster { + key = key + ":" + clusterKey.ClusterId.String() + } + + return key, nil +} diff --git a/database/redis/unused_triggers_test.go b/database/redis/unused_triggers_test.go index bb0c117a9..dbed460f1 100644 --- a/database/redis/unused_triggers_test.go +++ b/database/redis/unused_triggers_test.go @@ -86,30 +86,36 @@ func TestUnusedTriggers(t *testing.T) { Convey("Check triggers are marked used and unused properly", t, func() { trigger1Ver1 := &moira.Trigger{ - ID: "triggerID-0000000000001", - Name: "test trigger 1 v1.0", - Targets: []string{"test.target.1"}, - Tags: []string{"test-tag-1"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, + ID: "triggerID-0000000000001", + Name: "test trigger 1 v1.0", + Targets: []string{"test.target.1"}, + Tags: []string{"test-tag-1"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, } trigger1Ver2 := &moira.Trigger{ - ID: "triggerID-0000000000001", - Name: "test trigger 1 v2.0", - Targets: []string{"test.target.1"}, - Tags: []string{"test-tag-2", "test-tag-1"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, + ID: "triggerID-0000000000001", + Name: "test trigger 1 v2.0", + Targets: []string{"test.target.1"}, + Tags: []string{"test-tag-2", "test-tag-1"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, } trigger1Ver3 := &moira.Trigger{ - ID: "triggerID-0000000000001", - Name: "test trigger 1 v3.0", - Targets: []string{"test.target.1"}, - Tags: []string{"test-tag-2", "test-tag-3"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, + ID: "triggerID-0000000000001", + Name: "test trigger 1 v3.0", + Targets: []string{"test.target.1"}, + Tags: []string{"test-tag-2", "test-tag-3"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, } subscription1Ver1 := &moira.SubscriptionData{ @@ -180,52 +186,64 @@ func TestUnusedTriggers(t *testing.T) { triggers := []*moira.Trigger{ { - ID: "new-trigger-1", - Name: "Very New trigger 1", - Targets: []string{"new.target.1"}, - Tags: []string{"new-tag-1"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, + ID: "new-trigger-1", + Name: "Very New trigger 1", + Targets: []string{"new.target.1"}, + Tags: []string{"new-tag-1"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { - ID: "new-trigger-2", - Name: "Very New trigger 2", - Targets: []string{"new.target.2"}, - Tags: []string{"new-tag-2"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, + ID: "new-trigger-2", + Name: "Very New trigger 2", + Targets: []string{"new.target.2"}, + Tags: []string{"new-tag-2"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { - ID: "new-trigger-3", - Name: "Very New trigger 3", - Targets: []string{"new.target.3"}, - Tags: []string{"new-tag-3"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, + ID: "new-trigger-3", + Name: "Very New trigger 3", + Targets: []string{"new.target.3"}, + Tags: []string{"new-tag-3"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { - ID: "new-trigger-4", - Name: "Very New trigger 4", - Targets: []string{"new.target.4"}, - Tags: []string{"new-tag-1", "new-tag-2", "new-tag-3"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, + ID: "new-trigger-4", + Name: "Very New trigger 4", + Targets: []string{"new.target.4"}, + Tags: []string{"new-tag-1", "new-tag-2", "new-tag-3"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { - ID: "new-trigger-5", - Name: "Very New trigger 5", - Targets: []string{"new.target.5"}, - Tags: []string{"new-tag-1", "new-tag-2"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, + ID: "new-trigger-5", + Name: "Very New trigger 5", + Targets: []string{"new.target.5"}, + Tags: []string{"new-tag-1", "new-tag-2"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, { - ID: "new-trigger-6", - Name: "Very New trigger 6", - Targets: []string{"new.target.6"}, - Tags: []string{"new-tag-5", "new-tag-6"}, - Patterns: []string{"test.pattern.1"}, - TriggerType: moira.RisingTrigger, + ID: "new-trigger-6", + Name: "Very New trigger 6", + Targets: []string{"new.target.6"}, + Tags: []string{"new-tag-5", "new-tag-6"}, + Patterns: []string{"test.pattern.1"}, + TriggerType: moira.RisingTrigger, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, }, } subscriptions := []*moira.SubscriptionData{ diff --git a/datatypes.go b/datatypes.go index 833fa2815..0c6e63c46 100644 --- a/datatypes.go +++ b/datatypes.go @@ -166,9 +166,11 @@ type TriggerData struct { ErrorValue float64 `json:"error_value" example:"1000"` IsRemote bool `json:"is_remote" example:"false"` TriggerSource TriggerSource `json:"trigger_source,omitempty" example:"graphite_local"` + ClusterId ClusterId `json:"cluster_id,omitempty" example:"default"` Tags []string `json:"__notifier_trigger_tags" example:"server,disk"` } +// GetTriggerSource returns trigger source associated with the trigger func (trigger TriggerData) GetTriggerSource() TriggerSource { return trigger.TriggerSource.FillInIfNotSet(trigger.IsRemote) } @@ -332,6 +334,7 @@ type Trigger struct { PythonExpression *string `json:"python_expression,omitempty" extensions:"x-nullable"` Patterns []string `json:"patterns" example:""` TriggerSource TriggerSource `json:"trigger_source,omitempty" example:"graphite_local"` + ClusterId ClusterId `json:"cluster_id,omitempty" example:"default"` MuteNewMetrics bool `json:"mute_new_metrics" example:"false"` AloneMetrics map[string]bool `json:"alone_metrics" example:"t1:true"` CreatedAt *int64 `json:"created_at" format:"int64" extensions:"x-nullable"` @@ -340,6 +343,12 @@ type Trigger struct { UpdatedBy string `json:"updated_by"` } +// ClusterKey returns cluster key composed of trigger source and cluster id associated with the trigger +func (trigger *Trigger) ClusterKey() ClusterKey { + return MakeClusterKey(trigger.TriggerSource, trigger.ClusterId) +} + +// TriggerSource is a enum which values correspond to types of moira's metric sources type TriggerSource string var ( @@ -365,10 +374,10 @@ func (s *TriggerSource) UnmarshalJSON(data []byte) error { return nil } -// Neede for backwards compatibility with moira versions that used oly isRemote flag -func (triggerSource TriggerSource) FillInIfNotSet(isRempte bool) TriggerSource { +// Needed for backwards compatibility with moira versions that used only isRemote flag +func (triggerSource TriggerSource) FillInIfNotSet(isRemote bool) TriggerSource { if triggerSource == TriggerSourceNotSet { - if isRempte { + if isRemote { return GraphiteRemote } else { return GraphiteLocal @@ -377,6 +386,52 @@ func (triggerSource TriggerSource) FillInIfNotSet(isRempte bool) TriggerSource { return triggerSource } +func (triggerSource TriggerSource) String() string { + return string(triggerSource) +} + +// ClusterId represent the unique id for each cluster with the same TriggerSource +type ClusterId string + +var ( + ClusterNotSet ClusterId = "" + DefaultCluster ClusterId = "default" +) + +// FillInIfNotSet returns new ClusterId with value set to default if it was empty +func (clusterId ClusterId) FillInIfNotSet() ClusterId { + if clusterId == ClusterNotSet { + return DefaultCluster + } + return clusterId +} + +func (clusterId ClusterId) String() string { + return string(clusterId) +} + +// ClusterKey represents unique key of a metric source +type ClusterKey struct { + TriggerSource TriggerSource + ClusterId ClusterId +} + +var DefaultLocalCluster = MakeClusterKey(GraphiteLocal, DefaultCluster) +var DefaultGraphiteRemoteCluster = MakeClusterKey(GraphiteRemote, DefaultCluster) +var DefaultPrometheusRemoteCluster = MakeClusterKey(PrometheusRemote, DefaultCluster) + +// MakeClusterKey creates new cluster key with given trigger source and cluster id +func MakeClusterKey(triggerSource TriggerSource, clusterId ClusterId) ClusterKey { + return ClusterKey{ + TriggerSource: triggerSource, + ClusterId: clusterId, + } +} + +func (clusterKey ClusterKey) String() string { + return fmt.Sprintf("%s.%s", clusterKey.TriggerSource, clusterKey.ClusterId) +} + // TriggerCheck represents trigger data with last check data and check timestamp type TriggerCheck struct { Trigger diff --git a/docker-compose.yml b/docker-compose.yml index f8b54a9f8..488244e01 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,6 +24,15 @@ services: - "9080:9090" restart: always + prometheus_2: + image: prom/prometheus + volumes: + - ./local/prometheus.yml:/etc/prometheus/prometheus.yml + command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus" + ports: + - "9081:9090" + restart: always + filter: build: context: . diff --git a/integration_tests/notifier/notifier_test.go b/integration_tests/notifier/notifier_test.go index 51bc894a9..2961398c9 100644 --- a/integration_tests/notifier/notifier_test.go +++ b/integration_tests/notifier/notifier_test.go @@ -55,10 +55,12 @@ var subscription = moira.SubscriptionData{ } var trigger = moira.Trigger{ - ID: "triggerID-0000000000001", - Name: "test trigger 1", - Targets: []string{"test.target.1"}, - Tags: []string{"test-tag-1"}, + ID: "triggerID-0000000000001", + Name: "test trigger 1", + Targets: []string{"test.target.1"}, + Tags: []string{"test-tag-1"}, + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, } var triggerData = moira.TriggerData{ @@ -81,12 +83,36 @@ func TestNotifier(t *testing.T) { defer mockCtrl.Finish() database := redis.NewTestDatabase(logger) - database.SaveContact(&contact) //nolint - database.SaveSubscription(&subscription) //nolint - database.SaveTrigger(trigger.ID, &trigger) //nolint - database.PushNotificationEvent(&event, true) //nolint - metricsSourceProvider := metricSource.CreateMetricSourceProvider(local.Create(database), nil, nil) + err := database.SaveContact(&contact) + if err != nil { + t.Fail() + fmt.Printf("Error occurred: %s\n", err.Error()) + return + } + + err = database.SaveSubscription(&subscription) + if err != nil { + t.Fail() + fmt.Printf("Error occurred: %s\n", err.Error()) + return + } + + err = database.SaveTrigger(trigger.ID, &trigger) + if err != nil { + t.Fail() + fmt.Printf("Error occurred: %s\n", err.Error()) + return + } + + err = database.PushNotificationEvent(&event, true) + if err != nil { + t.Fail() + fmt.Printf("Error occurred: %s\n", err.Error()) + return + } + + metricsSourceProvider := metricSource.CreateTestMetricSourceProvider(local.Create(database), nil, nil) notifierInstance := notifier.NewNotifier( database, diff --git a/interfaces.go b/interfaces.go index 8b5ffbd1f..ff9f10224 100644 --- a/interfaces.go +++ b/interfaces.go @@ -27,18 +27,16 @@ type Database interface { // LastCheck storing GetTriggerLastCheck(triggerID string) (CheckData, error) - SetTriggerLastCheck(triggerID string, checkData *CheckData, triggerSource TriggerSource) error + SetTriggerLastCheck(triggerID string, checkData *CheckData, clusterKey ClusterKey) error RemoveTriggerLastCheck(triggerID string) error SetTriggerCheckMaintenance(triggerID string, metrics map[string]int64, triggerMaintenance *int64, userLogin string, timeCallMaintenance int64) error CleanUpAbandonedTriggerLastCheck() error // Trigger storing GetAllTriggerIDs() ([]string, error) - GetLocalTriggerIDs() ([]string, error) - GetRemoteTriggerIDs() ([]string, error) - GetPrometheusTriggerIDs() ([]string, error) + GetTriggerIDs(clusterKey ClusterKey) ([]string, error) - GetTriggerCount() (map[TriggerSource]int64, error) + GetTriggerCount(clusterKeys []ClusterKey) (map[ClusterKey]int64, error) GetTrigger(triggerID string) (Trigger, error) GetTriggers(triggerIDs []string) ([]*Trigger, error) @@ -113,17 +111,9 @@ type Database interface { RemoveMetricsValues(metrics []string, toTime int64) error GetMetricsTTLSeconds() int64 - AddLocalTriggersToCheck(triggerIDs []string) error - GetLocalTriggersToCheck(count int) ([]string, error) - GetLocalTriggersToCheckCount() (int64, error) - - AddRemoteTriggersToCheck(triggerIDs []string) error - GetRemoteTriggersToCheck(count int) ([]string, error) - GetRemoteTriggersToCheckCount() (int64, error) - - AddPrometheusTriggersToCheck(triggerIDs []string) error - GetPrometheusTriggersToCheck(count int) ([]string, error) - GetPrometheusTriggersToCheckCount() (int64, error) + AddTriggersToCheck(clusterKey ClusterKey, triggerIDs []string) error + GetTriggersToCheck(clusterKey ClusterKey, count int) ([]string, error) + GetTriggersToCheckCount(clusterKey ClusterKey) (int64, error) // TriggerCheckLock storing AcquireTriggerCheckLock(triggerID string, maxAttemptsCount int) error diff --git a/local/api.yml b/local/api.yml index 4368b8d70..9217effa8 100644 --- a/local/api.yml +++ b/local/api.yml @@ -12,18 +12,28 @@ telemetry: pprof: enabled: true listen: ":8091" -remote: - enabled: true - url: "http://graphite:80/render" - check_interval: 60s - timeout: 60s - metrics_ttl: 168h -prometheus: - url: "http://prometheus:9090" - enabled: true - check_interval: 60s - timeout: 60s - metrics_ttl: 168h +graphite_remote: + - cluster_id: default + cluster_name: Graphite 1 + url: "http://graphite:80/render" + check_interval: 60s + timeout: 60s + metrics_ttl: 168h +prometheus_remote: + - cluster_id: default + cluster_name: Prometheus 1 + url: "http://prometheus:9090" + check_interval: 60s + timeout: 60s + metrics_ttl: 168h + - cluster_id: staging + cluster_name: Prometheus 2 + url: "http://prometheus_2:9090" + check_interval: 60s + timeout: 60s + metrics_ttl: 168h + retries: 5 + retry_timeout: 15s api: listen: ":8081" enable_cors: false diff --git a/local/checker.yml b/local/checker.yml index eacd43a5c..fcf973465 100644 --- a/local/checker.yml +++ b/local/checker.yml @@ -12,22 +12,35 @@ telemetry: pprof: enabled: true listen: ":8092" -remote: - enabled: true - url: "http://graphite:80/render" +local: check_interval: 60s - timeout: 60s - metrics_ttl: 168h -prometheus: - url: "http://prometheus:9090" - enabled: true - check_interval: 60s - timeout: 60s - metrics_ttl: 168h +graphite_remote: + - cluster_id: default + cluster_name: Graphite 1 + url: "http://graphite:80/render" + check_interval: 60s + timeout: 60s + metrics_ttl: 168h +prometheus_remote: + - cluster_id: default + cluster_name: Prometheus 1 + url: "http://prometheus:9090" + check_interval: 60s + timeout: 60s + metrics_ttl: 168h + - cluster_id: staging + cluster_name: Prometheus 2 + url: "http://prometheus_2:9090" + check_interval: 60s + timeout: 60s + metrics_ttl: 168h + retries: 5 + retry_timeout: 15s checker: nodata_check_interval: 60s check_interval: 10s stop_checking_interval: 30s + lazy_triggers_check_interval: 60s log: log_file: stdout log_level: debug diff --git a/local/notifier.yml b/local/notifier.yml index ad402f23f..63aa9cf9d 100644 --- a/local/notifier.yml +++ b/local/notifier.yml @@ -12,18 +12,20 @@ telemetry: pprof: enabled: true listen: ":8093" -remote: - enabled: true - url: "http://graphite:80/render" - check_interval: 60s - timeout: 60s - metrics_ttl: 168h -prometheus: - url: "http://prometheus:9090" - enabled: true - check_interval: 60s - timeout: 60s - metrics_ttl: 168h +graphite_remote: + - cluster_id: default + cluster_name: Graphite 1 + url: "http://graphite:80/render" + check_interval: 60s + timeout: 60s + metrics_ttl: 168h +prometheus_remote: + - cluster_id: default + cluster_name: Prometheus 1 + url: "http://prometheus:9090" + check_interval: 60s + timeout: 60s + metrics_ttl: 168h notifier: sender_timeout: 10s resending_timeout: "1:00" diff --git a/metric_source/local/local_test.go b/metric_source/local/local_test.go index 88403d72b..fd5f7a94b 100644 --- a/metric_source/local/local_test.go +++ b/metric_source/local/local_test.go @@ -489,19 +489,6 @@ func TestLocalMetricsTTL(t *testing.T) { }) } -func TestLocal_IsConfigured(t *testing.T) { - mockCtrl := gomock.NewController(t) - defer mockCtrl.Finish() - dataBase := mock_moira_alert.NewMockDatabase(mockCtrl) - localSource := Create(dataBase) - - Convey("Always true", t, func() { - actual, err := localSource.IsConfigured() - So(err, ShouldBeNil) - So(actual, ShouldBeTrue) - }) -} - func TestLocal_evalExpr(t *testing.T) { Convey("When everything is correct, we don't return any error", t, func() { ctx := evalCtx{from: time.Now().Add(-1 * time.Hour).Unix(), until: time.Now().Unix()} diff --git a/metric_source/prometheus/convert.go b/metric_source/prometheus/convert.go index 0792a1375..368f8dd1b 100644 --- a/metric_source/prometheus/convert.go +++ b/metric_source/prometheus/convert.go @@ -14,14 +14,14 @@ func convertToFetchResult(mat model.Matrix, from, until int64, allowRealTimeAler } for _, res := range mat { - resValues := TrimValuesIfNescesary(res.Values, allowRealTimeAlerting) + resValues := trimValuesIfNecessary(res.Values, allowRealTimeAlerting) values := make([]float64, 0, len(resValues)) for _, v := range resValues { values = append(values, float64(v.Value)) } - start, stop := StartStopFromValues(resValues, from, until) + start, stop := startStopFromValues(resValues, from, until) data := metricSource.MetricData{ Name: targetFromTags(res.Metric), StartTime: start, @@ -36,7 +36,7 @@ func convertToFetchResult(mat model.Matrix, from, until int64, allowRealTimeAler return &result } -func StartStopFromValues(values []model.SamplePair, from, until int64) (int64, int64) { +func startStopFromValues(values []model.SamplePair, from, until int64) (int64, int64) { start, stop := from, until if len(values) != 0 { start = values[0].Timestamp.Unix() @@ -45,7 +45,7 @@ func StartStopFromValues(values []model.SamplePair, from, until int64) (int64, i return start, stop } -func TrimValuesIfNescesary(values []model.SamplePair, allowRealTimeAlerting bool) []model.SamplePair { +func trimValuesIfNecessary(values []model.SamplePair, allowRealTimeAlerting bool) []model.SamplePair { if allowRealTimeAlerting || len(values) == 0 { return values } diff --git a/metric_source/prometheus/prometheus.go b/metric_source/prometheus/prometheus.go index cc80dcebc..d554c68db 100644 --- a/metric_source/prometheus/prometheus.go +++ b/metric_source/prometheus/prometheus.go @@ -13,7 +13,6 @@ const StepTimeSeconds int64 = 60 var ErrPrometheusStorageDisabled = fmt.Errorf("remote prometheus storage is not enabled") type Config struct { - Enabled bool CheckInterval time.Duration MetricsTTL time.Duration RequestTimeout time.Duration @@ -44,10 +43,7 @@ func (prometheus *Prometheus) GetMetricsTTLSeconds() int64 { } func (prometheus *Prometheus) IsConfigured() (bool, error) { - if prometheus.config.Enabled { - return prometheus.config.Enabled, nil - } - return false, ErrPrometheusStorageDisabled + return true, nil } func (prometheus *Prometheus) IsAvailable() (bool, error) { diff --git a/metric_source/prometheus/prometheus_test.go b/metric_source/prometheus/prometheus_test.go deleted file mode 100644 index 10744fbe1..000000000 --- a/metric_source/prometheus/prometheus_test.go +++ /dev/null @@ -1,23 +0,0 @@ -package prometheus - -import ( - "testing" - - . "github.com/smartystreets/goconvey/convey" -) - -func TestIsConfigured(t *testing.T) { - Convey("Metric source is not configured", t, func() { - source, _ := Create(&Config{URL: "", Enabled: false}, nil) - isConfigured, err := source.IsConfigured() - So(isConfigured, ShouldBeFalse) - So(err, ShouldResemble, ErrPrometheusStorageDisabled) - }) - - Convey("Metric source is configured", t, func() { - source, _ := Create(&Config{URL: "http://host", Enabled: true}, nil) - isConfigured, err := source.IsConfigured() - So(isConfigured, ShouldBeTrue) - So(err, ShouldBeEmpty) - }) -} diff --git a/metric_source/provider.go b/metric_source/provider.go index fed85b087..820ce3804 100644 --- a/metric_source/provider.go +++ b/metric_source/provider.go @@ -6,65 +6,66 @@ import ( "github.com/moira-alert/moira" ) -// ErrMetricSourceIsNotConfigured is used then metric source return false on IsConfigured method call with nil error -var ErrMetricSourceIsNotConfigured = fmt.Errorf("metric source is not configured") - // SourceProvider is a provider for all known metrics sources type SourceProvider struct { - local MetricSource - remote MetricSource - prometheus MetricSource + sources map[moira.ClusterKey]MetricSource } // CreateMetricSourceProvider just creates SourceProvider with all known metrics sources -func CreateMetricSourceProvider(graphiteLocal, graphiteRemote, prometheusRemote MetricSource) *SourceProvider { +func CreateMetricSourceProvider() *SourceProvider { return &SourceProvider{ - remote: graphiteRemote, - local: graphiteLocal, - prometheus: prometheusRemote, + sources: make(map[moira.ClusterKey]MetricSource), } } -// GetLocal gets local metric source. If it not configured returns not empty error -func (provider *SourceProvider) GetLocal() (MetricSource, error) { - return returnSource(provider.local) -} +// CreateTestMetricSourceProvider creates source provider and registers default clusters for each trigger source if given +func CreateTestMetricSourceProvider(local, graphiteRemote, prometheusRemote MetricSource) *SourceProvider { + provider := CreateMetricSourceProvider() -// GetRemote gets remote metric source. If it not configured returns not empty error -func (provider *SourceProvider) GetRemote() (MetricSource, error) { - return returnSource(provider.remote) -} + if local != nil { + provider.RegisterSource(moira.DefaultLocalCluster, local) + } + if graphiteRemote != nil { + provider.RegisterSource(moira.DefaultGraphiteRemoteCluster, graphiteRemote) + } + if prometheusRemote != nil { + provider.RegisterSource(moira.DefaultPrometheusRemoteCluster, prometheusRemote) + } -// GetRemote gets remote metric source. If it not configured returns not empty error -func (provider *SourceProvider) GetPrometheus() (MetricSource, error) { - return returnSource(provider.prometheus) + return provider } -// GetTriggerMetricSource get metrics source by given trigger. If it not configured returns not empty error -func (provider *SourceProvider) GetTriggerMetricSource(trigger *moira.Trigger) (MetricSource, error) { - return provider.GetMetricSource(trigger.TriggerSource) +// RegisterSource adds given metric source with given cluster key to pool of available trigger sources +func (provider *SourceProvider) RegisterSource(clusterKey moira.ClusterKey, source MetricSource) { + provider.sources[clusterKey] = source } -// GetMetricSource return metric source depending on trigger flag: is remote trigger or not. GetLocal if not. -func (provider *SourceProvider) GetMetricSource(triggerSource moira.TriggerSource) (MetricSource, error) { - switch triggerSource { - case moira.GraphiteLocal: - return provider.GetLocal() +// GetAllSources returns all registered cluster keys mapped to corresponding sources +func (provider *SourceProvider) GetAllSources() map[moira.ClusterKey]MetricSource { + return provider.sources +} - case moira.GraphiteRemote: - return provider.GetRemote() +// GetClusterList returns a list of all registered cluster keys +func (provider *SourceProvider) GetClusterList() []moira.ClusterKey { + result := make([]moira.ClusterKey, 0, len(provider.sources)) - case moira.PrometheusRemote: - return provider.GetPrometheus() + for key := range provider.sources { + result = append(result, key) } - return nil, fmt.Errorf("unknown metric source") + return result +} + +// GetTriggerMetricSource get metrics source by given trigger. If it not configured returns not empty error +func (provider *SourceProvider) GetTriggerMetricSource(trigger *moira.Trigger) (MetricSource, error) { + return provider.GetMetricSource(trigger.ClusterKey()) } -func returnSource(source MetricSource) (MetricSource, error) { - isConfigured, err := source.IsConfigured() - if !isConfigured && err == nil { - return source, ErrMetricSourceIsNotConfigured +// GetMetricSource return metric source depending on trigger flag: is remote trigger or not. GetLocal if not. +func (provider *SourceProvider) GetMetricSource(clusterKey moira.ClusterKey) (MetricSource, error) { + if source, ok := provider.sources[clusterKey]; ok { + return source, nil } - return source, err + + return nil, fmt.Errorf("unknown metric source with cluster key `%s`", clusterKey.String()) } diff --git a/metric_source/remote/config.go b/metric_source/remote/config.go index 63278e13d..ded7d7949 100644 --- a/metric_source/remote/config.go +++ b/metric_source/remote/config.go @@ -10,10 +10,4 @@ type Config struct { Timeout time.Duration User string Password string - Enabled bool -} - -// isEnabled checks that remote config is enabled (url is defined and enabled flag is set) -func (c *Config) isEnabled() bool { - return c.Enabled && c.URL != "" } diff --git a/metric_source/remote/config_test.go b/metric_source/remote/config_test.go deleted file mode 100644 index 67b1bea4a..000000000 --- a/metric_source/remote/config_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package remote - -import ( - "testing" - - . "github.com/smartystreets/goconvey/convey" -) - -func TestConfig(t *testing.T) { - Convey("Given config without url and enabled", t, func() { - cfg := &Config{ - URL: "", - Enabled: true, - } - Convey("remote triggers should be disabled", func() { - So(cfg.isEnabled(), ShouldBeFalse) - }) - }) - - Convey("Given config with url and enabled", t, func() { - cfg := &Config{ - URL: "http://host", - Enabled: true, - } - Convey("remote triggers should be enabled", func() { - So(cfg.isEnabled(), ShouldBeTrue) - }) - }) - - Convey("Given config with url and disabled", t, func() { - cfg := &Config{ - URL: "http://host", - Enabled: false, - } - Convey("remote triggers should be disabled", func() { - So(cfg.isEnabled(), ShouldBeFalse) - }) - }) - - Convey("Given config without url and disabled", t, func() { - cfg := &Config{ - URL: "", - Enabled: false, - } - Convey("remote triggers should be disabled", func() { - So(cfg.isEnabled(), ShouldBeFalse) - }) - }) -} diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index 4e9aedd17..d42ce47c7 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -30,11 +30,14 @@ type Remote struct { } // Create configures remote metric source -func Create(config *Config) metricSource.MetricSource { +func Create(config *Config) (metricSource.MetricSource, error) { + if config.URL == "" { + return nil, fmt.Errorf("remote graphite URL should not be empty") + } return &Remote{ config: config, client: &http.Client{Timeout: config.Timeout}, - } + }, nil } // Fetch fetches remote metrics and converts them to expected format @@ -75,10 +78,7 @@ func (remote *Remote) GetMetricsTTLSeconds() int64 { // IsConfigured returns false in cases that user does not properly configure remote settings like graphite URL func (remote *Remote) IsConfigured() (bool, error) { - if remote.config.isEnabled() { - return true, nil - } - return false, ErrRemoteStorageDisabled + return true, nil } // IsRemoteAvailable checks if graphite API is available and returns 200 response diff --git a/metric_source/remote/remote_test.go b/metric_source/remote/remote_test.go index 001c224e3..fd4539572 100644 --- a/metric_source/remote/remote_test.go +++ b/metric_source/remote/remote_test.go @@ -9,22 +9,6 @@ import ( . "github.com/smartystreets/goconvey/convey" ) -func TestIsConfigured(t *testing.T) { - Convey("Remote is not configured", t, func() { - remote := Create(&Config{URL: "", Enabled: true}) - isConfigured, err := remote.IsConfigured() - So(isConfigured, ShouldBeFalse) - So(err, ShouldResemble, ErrRemoteStorageDisabled) - }) - - Convey("Remote is configured", t, func() { - remote := Create(&Config{URL: "http://host", Enabled: true}) - isConfigured, err := remote.IsConfigured() - So(isConfigured, ShouldBeTrue) - So(err, ShouldBeEmpty) - }) -} - func TestIsRemoteAvailable(t *testing.T) { Convey("Is available", t, func() { server := createServer([]byte("Some string"), http.StatusOK) diff --git a/metric_source/source.go b/metric_source/source.go index 620255fe9..386f1be0a 100644 --- a/metric_source/source.go +++ b/metric_source/source.go @@ -4,7 +4,6 @@ package metricsource type MetricSource interface { Fetch(target string, from int64, until int64, allowRealTimeAlerting bool) (FetchResult, error) GetMetricsTTLSeconds() int64 - IsConfigured() (bool, error) IsAvailable() (bool, error) } diff --git a/metrics/checker.go b/metrics/checker.go index 7503d882e..53a89a88c 100644 --- a/metrics/checker.go +++ b/metrics/checker.go @@ -1,37 +1,31 @@ package metrics -import "github.com/moira-alert/moira" +import ( + "fmt" + + "github.com/moira-alert/moira" +) // CheckerMetrics is a collection of metrics used in checker type CheckerMetrics struct { - LocalMetrics *CheckMetrics - RemoteMetrics *CheckMetrics - PrometheusMetrics *CheckMetrics + MetricsBySource map[moira.ClusterKey]*CheckMetrics MetricEventsChannelLen Histogram UnusedTriggersCount Histogram MetricEventsHandleTime Timer } // GetCheckMetrics return check metrics dependent on given trigger type -func (metrics *CheckerMetrics) GetCheckMetrics(trigger *moira.Trigger) *CheckMetrics { - return metrics.GetCheckMetricsBySource(trigger.TriggerSource) +func (metrics *CheckerMetrics) GetCheckMetrics(trigger *moira.Trigger) (*CheckMetrics, error) { + return metrics.GetCheckMetricsBySource(trigger.ClusterKey()) } // GetCheckMetricsBySource return check metrics dependent on given trigger type -func (metrics *CheckerMetrics) GetCheckMetricsBySource(triggerSource moira.TriggerSource) *CheckMetrics { - switch triggerSource { - case moira.GraphiteLocal: - return metrics.LocalMetrics - - case moira.GraphiteRemote: - return metrics.RemoteMetrics - - case moira.PrometheusRemote: - return metrics.PrometheusMetrics - - default: - return nil +func (metrics *CheckerMetrics) GetCheckMetricsBySource(clusterKey moira.ClusterKey) (*CheckMetrics, error) { + if checkMetrics, ok := metrics.MetricsBySource[clusterKey]; ok { + return checkMetrics, nil } + + return nil, fmt.Errorf("can't get check metrics: unknown cluster with key `%s`", clusterKey.String()) } // CheckMetrics is a collection of metrics for trigger checks @@ -43,27 +37,25 @@ type CheckMetrics struct { } // ConfigureCheckerMetrics is checker metrics configurator -func ConfigureCheckerMetrics(registry Registry, remoteEnabled, prometheusEnabled bool) *CheckerMetrics { - m := &CheckerMetrics{ - LocalMetrics: configureCheckMetrics(registry, "local"), +func ConfigureCheckerMetrics(registry Registry, sources []moira.ClusterKey) *CheckerMetrics { + metrics := &CheckerMetrics{ + MetricsBySource: make(map[moira.ClusterKey]*CheckMetrics), MetricEventsChannelLen: registry.NewHistogram("metricEvents"), MetricEventsHandleTime: registry.NewTimer("metricEventsHandle"), UnusedTriggersCount: registry.NewHistogram("triggers", "unused"), } - if remoteEnabled { - m.RemoteMetrics = configureCheckMetrics(registry, "remote") - } - if prometheusEnabled { - m.PrometheusMetrics = configureCheckMetrics(registry, "prometheus") + for _, clusterKey := range sources { + metrics.MetricsBySource[clusterKey] = configureCheckMetrics(registry, clusterKey) } - return m + return metrics } -func configureCheckMetrics(registry Registry, prefix string) *CheckMetrics { +func configureCheckMetrics(registry Registry, clusterKey moira.ClusterKey) *CheckMetrics { + source, id := clusterKey.TriggerSource.String(), clusterKey.ClusterId.String() return &CheckMetrics{ - CheckError: registry.NewMeter(prefix, "errors", "check"), - HandleError: registry.NewMeter(prefix, "errors", "handle"), - TriggersCheckTime: registry.NewTimer(prefix, "triggers"), - TriggersToCheckCount: registry.NewHistogram(prefix, "triggersToCheck"), + CheckError: registry.NewMeter(source, id, "errors", "check"), + HandleError: registry.NewMeter(source, id, "errors", "handle"), + TriggersCheckTime: registry.NewTimer(source, id, "triggers"), + TriggersToCheckCount: registry.NewHistogram(source, id, "triggersToCheck"), } } diff --git a/metrics/triggers.go b/metrics/triggers.go index 45cd38c21..26e4d6e28 100644 --- a/metrics/triggers.go +++ b/metrics/triggers.go @@ -1,24 +1,27 @@ package metrics -import "github.com/moira-alert/moira" +import ( + "github.com/moira-alert/moira" +) // Collection of metrics for trigger count metrics type TriggersMetrics struct { - countByTriggerSource map[moira.TriggerSource]Meter + triggerCounts map[moira.ClusterKey]Meter } // Creates and configurates the instance of TriggersMetrics -func NewTriggersMetrics(registry Registry) *TriggersMetrics { +func NewTriggersMetrics(registry Registry, clusterKeys []moira.ClusterKey) *TriggersMetrics { + meters := make(map[moira.ClusterKey]Meter, len(clusterKeys)) + for _, key := range clusterKeys { + meters[key] = registry.NewMeter("triggers", key.TriggerSource.String(), key.ClusterId.String()) + } + return &TriggersMetrics{ - countByTriggerSource: map[moira.TriggerSource]Meter{ - moira.GraphiteLocal: registry.NewMeter("triggers", string(moira.GraphiteLocal), "count"), - moira.GraphiteRemote: registry.NewMeter("triggers", string(moira.GraphiteRemote), "count"), - moira.PrometheusRemote: registry.NewMeter("triggers", string(moira.PrometheusRemote), "count"), - }, + triggerCounts: meters, } } // Marks the number of trigger for given trigger source -func (metrics *TriggersMetrics) Mark(source moira.TriggerSource, count int64) { - metrics.countByTriggerSource[source].Mark(count) +func (metrics *TriggersMetrics) Mark(source moira.ClusterKey, count int64) { + metrics.triggerCounts[source].Mark(count) } diff --git a/mock/metric_source/source.go b/mock/metric_source/source.go index 7366920fd..3aa4c3ac9 100644 --- a/mock/metric_source/source.go +++ b/mock/metric_source/source.go @@ -77,18 +77,3 @@ func (mr *MockMetricSourceMockRecorder) IsAvailable() *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsAvailable", reflect.TypeOf((*MockMetricSource)(nil).IsAvailable)) } - -// IsConfigured mocks base method. -func (m *MockMetricSource) IsConfigured() (bool, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "IsConfigured") - ret0, _ := ret[0].(bool) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// IsConfigured indicates an expected call of IsConfigured. -func (mr *MockMetricSourceMockRecorder) IsConfigured() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsConfigured", reflect.TypeOf((*MockMetricSource)(nil).IsConfigured)) -} diff --git a/mock/moira-alert/database.go b/mock/moira-alert/database.go index 2d14bb7a8..836be4624 100644 --- a/mock/moira-alert/database.go +++ b/mock/moira-alert/database.go @@ -50,20 +50,6 @@ func (mr *MockDatabaseMockRecorder) AcquireTriggerCheckLock(arg0, arg1 interface return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AcquireTriggerCheckLock", reflect.TypeOf((*MockDatabase)(nil).AcquireTriggerCheckLock), arg0, arg1) } -// AddLocalTriggersToCheck mocks base method. -func (m *MockDatabase) AddLocalTriggersToCheck(arg0 []string) error { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AddLocalTriggersToCheck", arg0) - ret0, _ := ret[0].(error) - return ret0 -} - -// AddLocalTriggersToCheck indicates an expected call of AddLocalTriggersToCheck. -func (mr *MockDatabaseMockRecorder) AddLocalTriggersToCheck(arg0 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddLocalTriggersToCheck", reflect.TypeOf((*MockDatabase)(nil).AddLocalTriggersToCheck), arg0) -} - // AddNotification mocks base method. func (m *MockDatabase) AddNotification(arg0 *moira.ScheduledNotification) error { m.ctrl.T.Helper() @@ -106,32 +92,18 @@ func (mr *MockDatabaseMockRecorder) AddPatternMetric(arg0, arg1 interface{}) *go return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddPatternMetric", reflect.TypeOf((*MockDatabase)(nil).AddPatternMetric), arg0, arg1) } -// AddPrometheusTriggersToCheck mocks base method. -func (m *MockDatabase) AddPrometheusTriggersToCheck(arg0 []string) error { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AddPrometheusTriggersToCheck", arg0) - ret0, _ := ret[0].(error) - return ret0 -} - -// AddPrometheusTriggersToCheck indicates an expected call of AddPrometheusTriggersToCheck. -func (mr *MockDatabaseMockRecorder) AddPrometheusTriggersToCheck(arg0 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddPrometheusTriggersToCheck", reflect.TypeOf((*MockDatabase)(nil).AddPrometheusTriggersToCheck), arg0) -} - -// AddRemoteTriggersToCheck mocks base method. -func (m *MockDatabase) AddRemoteTriggersToCheck(arg0 []string) error { +// AddTriggersToCheck mocks base method. +func (m *MockDatabase) AddTriggersToCheck(arg0 moira.ClusterKey, arg1 []string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AddRemoteTriggersToCheck", arg0) + ret := m.ctrl.Call(m, "AddTriggersToCheck", arg0, arg1) ret0, _ := ret[0].(error) return ret0 } -// AddRemoteTriggersToCheck indicates an expected call of AddRemoteTriggersToCheck. -func (mr *MockDatabaseMockRecorder) AddRemoteTriggersToCheck(arg0 interface{}) *gomock.Call { +// AddTriggersToCheck indicates an expected call of AddTriggersToCheck. +func (mr *MockDatabaseMockRecorder) AddTriggersToCheck(arg0, arg1 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddRemoteTriggersToCheck", reflect.TypeOf((*MockDatabase)(nil).AddRemoteTriggersToCheck), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddTriggersToCheck", reflect.TypeOf((*MockDatabase)(nil).AddTriggersToCheck), arg0, arg1) } // CleanUpAbandonedRetentions mocks base method. @@ -382,51 +354,6 @@ func (mr *MockDatabaseMockRecorder) GetIDByUsername(arg0, arg1 interface{}) *gom return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetIDByUsername", reflect.TypeOf((*MockDatabase)(nil).GetIDByUsername), arg0, arg1) } -// GetLocalTriggerIDs mocks base method. -func (m *MockDatabase) GetLocalTriggerIDs() ([]string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetLocalTriggerIDs") - ret0, _ := ret[0].([]string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetLocalTriggerIDs indicates an expected call of GetLocalTriggerIDs. -func (mr *MockDatabaseMockRecorder) GetLocalTriggerIDs() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetLocalTriggerIDs", reflect.TypeOf((*MockDatabase)(nil).GetLocalTriggerIDs)) -} - -// GetLocalTriggersToCheck mocks base method. -func (m *MockDatabase) GetLocalTriggersToCheck(arg0 int) ([]string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetLocalTriggersToCheck", arg0) - ret0, _ := ret[0].([]string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetLocalTriggersToCheck indicates an expected call of GetLocalTriggersToCheck. -func (mr *MockDatabaseMockRecorder) GetLocalTriggersToCheck(arg0 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetLocalTriggersToCheck", reflect.TypeOf((*MockDatabase)(nil).GetLocalTriggersToCheck), arg0) -} - -// GetLocalTriggersToCheckCount mocks base method. -func (m *MockDatabase) GetLocalTriggersToCheckCount() (int64, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetLocalTriggersToCheckCount") - ret0, _ := ret[0].(int64) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetLocalTriggersToCheckCount indicates an expected call of GetLocalTriggersToCheckCount. -func (mr *MockDatabaseMockRecorder) GetLocalTriggersToCheckCount() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetLocalTriggersToCheckCount", reflect.TypeOf((*MockDatabase)(nil).GetLocalTriggersToCheckCount)) -} - // GetMetricRetention mocks base method. func (m *MockDatabase) GetMetricRetention(arg0 string) (int64, error) { m.ctrl.T.Helper() @@ -621,51 +548,6 @@ func (mr *MockDatabaseMockRecorder) GetPrometheusChecksUpdatesCount() *gomock.Ca return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPrometheusChecksUpdatesCount", reflect.TypeOf((*MockDatabase)(nil).GetPrometheusChecksUpdatesCount)) } -// GetPrometheusTriggerIDs mocks base method. -func (m *MockDatabase) GetPrometheusTriggerIDs() ([]string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetPrometheusTriggerIDs") - ret0, _ := ret[0].([]string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetPrometheusTriggerIDs indicates an expected call of GetPrometheusTriggerIDs. -func (mr *MockDatabaseMockRecorder) GetPrometheusTriggerIDs() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPrometheusTriggerIDs", reflect.TypeOf((*MockDatabase)(nil).GetPrometheusTriggerIDs)) -} - -// GetPrometheusTriggersToCheck mocks base method. -func (m *MockDatabase) GetPrometheusTriggersToCheck(arg0 int) ([]string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetPrometheusTriggersToCheck", arg0) - ret0, _ := ret[0].([]string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetPrometheusTriggersToCheck indicates an expected call of GetPrometheusTriggersToCheck. -func (mr *MockDatabaseMockRecorder) GetPrometheusTriggersToCheck(arg0 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPrometheusTriggersToCheck", reflect.TypeOf((*MockDatabase)(nil).GetPrometheusTriggersToCheck), arg0) -} - -// GetPrometheusTriggersToCheckCount mocks base method. -func (m *MockDatabase) GetPrometheusTriggersToCheckCount() (int64, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetPrometheusTriggersToCheckCount") - ret0, _ := ret[0].(int64) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetPrometheusTriggersToCheckCount indicates an expected call of GetPrometheusTriggersToCheckCount. -func (mr *MockDatabaseMockRecorder) GetPrometheusTriggersToCheckCount() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPrometheusTriggersToCheckCount", reflect.TypeOf((*MockDatabase)(nil).GetPrometheusTriggersToCheckCount)) -} - // GetRemoteChecksUpdatesCount mocks base method. func (m *MockDatabase) GetRemoteChecksUpdatesCount() (int64, error) { m.ctrl.T.Helper() @@ -681,51 +563,6 @@ func (mr *MockDatabaseMockRecorder) GetRemoteChecksUpdatesCount() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRemoteChecksUpdatesCount", reflect.TypeOf((*MockDatabase)(nil).GetRemoteChecksUpdatesCount)) } -// GetRemoteTriggerIDs mocks base method. -func (m *MockDatabase) GetRemoteTriggerIDs() ([]string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetRemoteTriggerIDs") - ret0, _ := ret[0].([]string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetRemoteTriggerIDs indicates an expected call of GetRemoteTriggerIDs. -func (mr *MockDatabaseMockRecorder) GetRemoteTriggerIDs() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRemoteTriggerIDs", reflect.TypeOf((*MockDatabase)(nil).GetRemoteTriggerIDs)) -} - -// GetRemoteTriggersToCheck mocks base method. -func (m *MockDatabase) GetRemoteTriggersToCheck(arg0 int) ([]string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetRemoteTriggersToCheck", arg0) - ret0, _ := ret[0].([]string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetRemoteTriggersToCheck indicates an expected call of GetRemoteTriggersToCheck. -func (mr *MockDatabaseMockRecorder) GetRemoteTriggersToCheck(arg0 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRemoteTriggersToCheck", reflect.TypeOf((*MockDatabase)(nil).GetRemoteTriggersToCheck), arg0) -} - -// GetRemoteTriggersToCheckCount mocks base method. -func (m *MockDatabase) GetRemoteTriggersToCheckCount() (int64, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetRemoteTriggersToCheckCount") - ret0, _ := ret[0].(int64) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetRemoteTriggersToCheckCount indicates an expected call of GetRemoteTriggersToCheckCount. -func (mr *MockDatabaseMockRecorder) GetRemoteTriggersToCheckCount() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRemoteTriggersToCheckCount", reflect.TypeOf((*MockDatabase)(nil).GetRemoteTriggersToCheckCount)) -} - // GetSubscription mocks base method. func (m *MockDatabase) GetSubscription(arg0 string) (moira.SubscriptionData, error) { m.ctrl.T.Helper() @@ -892,18 +729,33 @@ func (mr *MockDatabaseMockRecorder) GetTriggerChecks(arg0 interface{}) *gomock.C } // GetTriggerCount mocks base method. -func (m *MockDatabase) GetTriggerCount() (map[moira.TriggerSource]int64, error) { +func (m *MockDatabase) GetTriggerCount(arg0 []moira.ClusterKey) (map[moira.ClusterKey]int64, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetTriggerCount") - ret0, _ := ret[0].(map[moira.TriggerSource]int64) + ret := m.ctrl.Call(m, "GetTriggerCount", arg0) + ret0, _ := ret[0].(map[moira.ClusterKey]int64) ret1, _ := ret[1].(error) return ret0, ret1 } // GetTriggerCount indicates an expected call of GetTriggerCount. -func (mr *MockDatabaseMockRecorder) GetTriggerCount() *gomock.Call { +func (mr *MockDatabaseMockRecorder) GetTriggerCount(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTriggerCount", reflect.TypeOf((*MockDatabase)(nil).GetTriggerCount), arg0) +} + +// GetTriggerIDs mocks base method. +func (m *MockDatabase) GetTriggerIDs(arg0 moira.ClusterKey) ([]string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetTriggerIDs", arg0) + ret0, _ := ret[0].([]string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetTriggerIDs indicates an expected call of GetTriggerIDs. +func (mr *MockDatabaseMockRecorder) GetTriggerIDs(arg0 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTriggerCount", reflect.TypeOf((*MockDatabase)(nil).GetTriggerCount)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTriggerIDs", reflect.TypeOf((*MockDatabase)(nil).GetTriggerIDs), arg0) } // GetTriggerIDsStartWith mocks base method. @@ -982,6 +834,36 @@ func (mr *MockDatabaseMockRecorder) GetTriggersSearchResults(arg0, arg1, arg2 in return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTriggersSearchResults", reflect.TypeOf((*MockDatabase)(nil).GetTriggersSearchResults), arg0, arg1, arg2) } +// GetTriggersToCheck mocks base method. +func (m *MockDatabase) GetTriggersToCheck(arg0 moira.ClusterKey, arg1 int) ([]string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetTriggersToCheck", arg0, arg1) + ret0, _ := ret[0].([]string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetTriggersToCheck indicates an expected call of GetTriggersToCheck. +func (mr *MockDatabaseMockRecorder) GetTriggersToCheck(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTriggersToCheck", reflect.TypeOf((*MockDatabase)(nil).GetTriggersToCheck), arg0, arg1) +} + +// GetTriggersToCheckCount mocks base method. +func (m *MockDatabase) GetTriggersToCheckCount(arg0 moira.ClusterKey) (int64, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetTriggersToCheckCount", arg0) + ret0, _ := ret[0].(int64) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetTriggersToCheckCount indicates an expected call of GetTriggersToCheckCount. +func (mr *MockDatabaseMockRecorder) GetTriggersToCheckCount(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTriggersToCheckCount", reflect.TypeOf((*MockDatabase)(nil).GetTriggersToCheckCount), arg0) +} + // GetUnusedTriggerIDs mocks base method. func (m *MockDatabase) GetUnusedTriggerIDs() ([]string, error) { m.ctrl.T.Helper() @@ -1586,7 +1468,7 @@ func (mr *MockDatabaseMockRecorder) SetTriggerCheckMaintenance(arg0, arg1, arg2, } // SetTriggerLastCheck mocks base method. -func (m *MockDatabase) SetTriggerLastCheck(arg0 string, arg1 *moira.CheckData, arg2 moira.TriggerSource) error { +func (m *MockDatabase) SetTriggerLastCheck(arg0 string, arg1 *moira.CheckData, arg2 moira.ClusterKey) error { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "SetTriggerLastCheck", arg0, arg1, arg2) ret0, _ := ret[0].(error) diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index e3085e3f9..2ef939aab 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -211,7 +211,7 @@ func configureNotifier(t *testing.T) { logger, _ = logging.GetLogger("Scheduler") scheduler = mock_scheduler.NewMockScheduler(mockCtrl) sender = mock_moira_alert.NewMockSender(mockCtrl) - metricsSourceProvider := metricSource.CreateMetricSourceProvider(local.Create(dataBase), nil, nil) + metricsSourceProvider := metricSource.CreateTestMetricSourceProvider(local.Create(dataBase), nil, nil) notif = NewNotifier(dataBase, logger, config, notifierMetrics, metricsSourceProvider, map[string]moira.ImageStore{}) notif.scheduler = scheduler diff --git a/notifier/selfstate/heartbeat/filter.go b/notifier/selfstate/heartbeat/filter.go index ca3b862d9..011725a75 100644 --- a/notifier/selfstate/heartbeat/filter.go +++ b/notifier/selfstate/heartbeat/filter.go @@ -27,7 +27,8 @@ func GetFilter(delay int64, logger moira.Logger, database moira.Database) Heartb } func (check *filter) Check(nowTS int64) (int64, bool, error) { - triggersCount, err := check.database.GetLocalTriggersToCheckCount() + defaultLocalCluster := moira.DefaultLocalCluster + triggersCount, err := check.database.GetTriggersToCheckCount(defaultLocalCluster) if err != nil { return 0, false, err } diff --git a/notifier/selfstate/heartbeat/filter_test.go b/notifier/selfstate/heartbeat/filter_test.go index e8323b827..36b691a37 100644 --- a/notifier/selfstate/heartbeat/filter_test.go +++ b/notifier/selfstate/heartbeat/filter_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/moira-alert/moira" mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" "github.com/golang/mock/gomock" @@ -19,6 +20,7 @@ func TestFilter(t *testing.T) { check, mockCtrl := createFilterTest(t) defer mockCtrl.Finish() database := check.database.(*mock_moira_alert.MockDatabase) + defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) Convey("Checking the created filter", func() { expected := &filter{ @@ -35,7 +37,7 @@ func TestFilter(t *testing.T) { }) Convey("Filter error handling test", func() { - database.EXPECT().GetLocalTriggersToCheckCount().Return(int64(1), err) + database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), err) value, needSend, errActual := check.Check(now) So(errActual, ShouldEqual, err) @@ -46,7 +48,7 @@ func TestFilter(t *testing.T) { Convey("Test update lastSuccessfulCheck", func() { now += 1000 database.EXPECT().GetMetricsUpdatesCount().Return(int64(1), nil) - database.EXPECT().GetLocalTriggersToCheckCount().Return(int64(1), nil) + database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) value, needSend, errActual := check.Check(now) So(errActual, ShouldBeNil) @@ -59,7 +61,7 @@ func TestFilter(t *testing.T) { check.lastSuccessfulCheck = now - check.delay - 1 database.EXPECT().GetMetricsUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetLocalTriggersToCheckCount().Return(int64(1), nil) + database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) value, needSend, errActual := check.Check(now) So(errActual, ShouldBeNil) @@ -69,7 +71,7 @@ func TestFilter(t *testing.T) { Convey("Exit without action", func() { database.EXPECT().GetMetricsUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetLocalTriggersToCheckCount().Return(int64(1), nil) + database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) value, needSend, errActual := check.Check(now) So(errActual, ShouldBeNil) diff --git a/notifier/selfstate/heartbeat/local_checker.go b/notifier/selfstate/heartbeat/local_checker.go index 3c5d07db1..7c4c88cfd 100644 --- a/notifier/selfstate/heartbeat/local_checker.go +++ b/notifier/selfstate/heartbeat/local_checker.go @@ -24,7 +24,8 @@ func GetLocalChecker(delay int64, logger moira.Logger, database moira.Database) } func (check *localChecker) Check(nowTS int64) (int64, bool, error) { - triggersCount, err := check.database.GetLocalTriggersToCheckCount() + defaultLocalCluster := moira.DefaultLocalCluster + triggersCount, err := check.database.GetTriggersToCheckCount(defaultLocalCluster) if err != nil { return 0, false, err } diff --git a/notifier/selfstate/heartbeat/local_checker_test.go b/notifier/selfstate/heartbeat/local_checker_test.go index 54e224ea3..2d0595e5f 100644 --- a/notifier/selfstate/heartbeat/local_checker_test.go +++ b/notifier/selfstate/heartbeat/local_checker_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/moira-alert/moira" mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" "github.com/golang/mock/gomock" @@ -13,6 +14,7 @@ import ( ) func TestCheckDelay_Check(t *testing.T) { + defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) Convey("Test local checker heartbeat", t, func() { err := errors.New("test error localChecker") now := time.Now().Unix() @@ -27,7 +29,7 @@ func TestCheckDelay_Check(t *testing.T) { }) Convey("GraphiteLocalChecker error handling test", func() { - database.EXPECT().GetLocalTriggersToCheckCount().Return(int64(1), err) + database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), err) value, needSend, errActual := check.Check(now) So(errActual, ShouldEqual, err) @@ -38,7 +40,7 @@ func TestCheckDelay_Check(t *testing.T) { Convey("Test update lastSuccessfulCheck", func() { now += 1000 database.EXPECT().GetChecksUpdatesCount().Return(int64(1), nil) - database.EXPECT().GetLocalTriggersToCheckCount().Return(int64(1), nil) + database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) value, needSend, errActual := check.Check(now) So(errActual, ShouldBeNil) @@ -50,7 +52,7 @@ func TestCheckDelay_Check(t *testing.T) { Convey("Test get notification", func() { check.lastSuccessfulCheck = now - check.delay - 1 database.EXPECT().GetChecksUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetLocalTriggersToCheckCount().Return(int64(1), nil) + database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) value, needSend, errActual := check.Check(now) So(errActual, ShouldBeNil) @@ -60,7 +62,7 @@ func TestCheckDelay_Check(t *testing.T) { Convey("Exit without action", func() { database.EXPECT().GetChecksUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetLocalTriggersToCheckCount().Return(int64(1), nil) + database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) value, needSend, errActual := check.Check(now) So(errActual, ShouldBeNil) diff --git a/notifier/selfstate/heartbeat/remote_checker.go b/notifier/selfstate/heartbeat/remote_checker.go index 25781c232..4b0e76113 100644 --- a/notifier/selfstate/heartbeat/remote_checker.go +++ b/notifier/selfstate/heartbeat/remote_checker.go @@ -24,7 +24,8 @@ func GetRemoteChecker(delay int64, logger moira.Logger, database moira.Database) } func (check *remoteChecker) Check(nowTS int64) (int64, bool, error) { - triggerCount, err := check.database.GetRemoteTriggersToCheckCount() + defaultRemoteCluster := moira.DefaultGraphiteRemoteCluster + triggerCount, err := check.database.GetTriggersToCheckCount(defaultRemoteCluster) if err != nil { return 0, false, err } diff --git a/notifier/selfstate/heartbeat/remote_checker_test.go b/notifier/selfstate/heartbeat/remote_checker_test.go index 93dfc4ef9..5a3180753 100644 --- a/notifier/selfstate/heartbeat/remote_checker_test.go +++ b/notifier/selfstate/heartbeat/remote_checker_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/moira-alert/moira" mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" "github.com/golang/mock/gomock" @@ -13,6 +14,8 @@ import ( ) func TestGraphiteRemoteChecker(t *testing.T) { + defaultRemoteCluster := moira.DefaultGraphiteRemoteCluster + Convey("Test remote checker heartbeat", t, func() { err := errors.New("test error remoteChecker") now := time.Now().Unix() @@ -27,7 +30,7 @@ func TestGraphiteRemoteChecker(t *testing.T) { }) Convey("GraphiteRemoteChecker error handling test", func() { - database.EXPECT().GetRemoteTriggersToCheckCount().Return(int64(0), err) + database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(0), err) value, needSend, errActual := check.Check(now) So(errActual, ShouldEqual, err) @@ -38,7 +41,7 @@ func TestGraphiteRemoteChecker(t *testing.T) { Convey("Test update lastSuccessfulCheck", func() { now += 1000 database.EXPECT().GetRemoteChecksUpdatesCount().Return(int64(1), nil) - database.EXPECT().GetRemoteTriggersToCheckCount().Return(int64(1), nil) + database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(1), nil) value, needSend, errActual := check.Check(now) So(errActual, ShouldBeNil) @@ -51,7 +54,7 @@ func TestGraphiteRemoteChecker(t *testing.T) { check.lastSuccessfulCheck = now - check.delay - 1 database.EXPECT().GetRemoteChecksUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetRemoteTriggersToCheckCount().Return(int64(1), nil) + database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(1), nil) value, needSend, errActual := check.Check(now) So(errActual, ShouldBeNil) @@ -61,7 +64,7 @@ func TestGraphiteRemoteChecker(t *testing.T) { Convey("Exit without action", func() { database.EXPECT().GetRemoteChecksUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetRemoteTriggersToCheckCount().Return(int64(1), nil) + database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(1), nil) value, needSend, errActual := check.Check(now) So(errActual, ShouldBeNil) diff --git a/notifier/selfstate/selfstate_test.go b/notifier/selfstate/selfstate_test.go index fcee9a219..de318bdb1 100644 --- a/notifier/selfstate/selfstate_test.go +++ b/notifier/selfstate/selfstate_test.go @@ -28,14 +28,17 @@ type selfCheckWorkerMock struct { } func TestSelfCheckWorker_selfStateChecker(t *testing.T) { + defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) + defaultRemoteCluster := moira.DefaultGraphiteRemoteCluster + mock := configureWorker(t, true) Convey("SelfCheckWorker should call all heartbeats checks", t, func() { mock.database.EXPECT().GetChecksUpdatesCount().Return(int64(1), nil).Times(2) mock.database.EXPECT().GetMetricsUpdatesCount().Return(int64(1), nil) mock.database.EXPECT().GetRemoteChecksUpdatesCount().Return(int64(1), nil) mock.database.EXPECT().GetNotifierState().Return(moira.SelfStateOK, nil) - mock.database.EXPECT().GetRemoteTriggersToCheckCount().Return(int64(1), nil) - mock.database.EXPECT().GetLocalTriggersToCheckCount().Return(int64(1), nil).Times(2) + mock.database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil).Times(2) + mock.database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(1), nil) // Start worker after configuring Mock to avoid race conditions err := mock.selfCheckWorker.Start() diff --git a/support/trigger.go b/support/trigger.go index 782df7344..0dba834f3 100644 --- a/support/trigger.go +++ b/support/trigger.go @@ -134,10 +134,10 @@ func HandlePushTriggerLastCheck( database moira.Database, triggerID string, lastCheck *moira.CheckData, - triggerSource moira.TriggerSource, + clusterKey moira.ClusterKey, ) error { logger.Info().Msg("Save trigger last check") - if err := database.SetTriggerLastCheck(triggerID, lastCheck, triggerSource); err != nil { + if err := database.SetTriggerLastCheck(triggerID, lastCheck, clusterKey); err != nil { return fmt.Errorf("cannot set trigger last check: %w", err) } logger.Info().Msg("Trigger last check was saved")