Skip to content

Commit

Permalink
fix: checker handle source unavailable (#1134)
Browse files Browse the repository at this point in the history
  • Loading branch information
AleksandrMatsko authored Feb 5, 2025
1 parent 7ae84c8 commit 4448cde
Show file tree
Hide file tree
Showing 4 changed files with 260 additions and 21 deletions.
27 changes: 10 additions & 17 deletions checker/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,24 +132,17 @@ func (triggerChecker *TriggerChecker) handleFetchError(checkData moira.CheckData
triggerChecker.trigger.ClusterKey(),
)
}
case remote.ErrRemoteTriggerResponse:
timeSinceLastSuccessfulCheck := checkData.Timestamp - checkData.LastSuccessfulCheckTimestamp
if timeSinceLastSuccessfulCheck >= triggerChecker.ttl {
checkData.State = moira.StateEXCEPTION
checkData.Message = fmt.Sprintf("Remote server unavailable. Trigger is not checked for %d seconds", timeSinceLastSuccessfulCheck)

var comparingErr error
checkData, comparingErr = triggerChecker.compareTriggerStates(checkData)
if comparingErr != nil {
triggerChecker.logger.Error().
Error(comparingErr).
String(moira.LogFieldNameTriggerID, triggerChecker.triggerID).
Msg("cannot compare trigger states")
}
}
case remote.ErrRemoteUnavailable:
checkData.State = moira.StateEXCEPTION
checkData.Message = fmt.Sprintf("Remote server unavailable. Trigger is not checked since: %v", checkData.LastSuccessfulCheckTimestamp)

logTriggerCheckException(triggerChecker.logger, triggerChecker.triggerID, err)
case local.ErrUnknownFunction, local.ErrEvalExpr:
triggerChecker.logger.Error().
String(moira.LogFieldNameTriggerID, triggerChecker.triggerID).
String("trigger.cluster_id", triggerChecker.trigger.ClusterId.String()).
String("trigger.source", triggerChecker.trigger.TriggerSource.String()).
Error(err).
Msg("Trigger check failed")
case local.ErrUnknownFunction, local.ErrEvalExpr, remote.ErrRemoteTriggerResponse:
checkData.State = moira.StateEXCEPTION
checkData.Message = err.Error()
logTriggerCheckException(triggerChecker.logger, triggerChecker.triggerID, err)
Expand Down
241 changes: 241 additions & 0 deletions checker/check_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package checker

import (
"errors"
"fmt"
"math"
"testing"
Expand All @@ -12,6 +13,7 @@ import (
logging "github.com/moira-alert/moira/logging/zerolog_adapter"
metricSource "github.com/moira-alert/moira/metric_source"
"github.com/moira-alert/moira/metric_source/local"
"github.com/moira-alert/moira/metric_source/remote"
"go.uber.org/mock/gomock"

"github.com/moira-alert/moira/metrics"
Expand Down Expand Up @@ -1924,3 +1926,242 @@ func TestTriggerChecker_handlePrepareError(t *testing.T) {
})
})
}

func TestTriggerChecker_handleFetchError(t *testing.T) {
Convey("Test handleFetchError", t, func() {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
dataBase := mock_moira_alert.NewMockDatabase(mockCtrl)
logger, _ := logging.GetLogger("Test")

metric := "some.metric"
testTime := time.Date(2022, time.June, 6, 10, 0, 0, 0, time.UTC).Unix()

trigger := &moira.Trigger{
ID: "test trigger",
TriggerSource: moira.GraphiteLocal,
ClusterId: moira.DefaultCluster,
}
triggerChecker := TriggerChecker{
triggerID: trigger.ID,
from: testTime - 50,
until: testTime,
trigger: trigger,
database: dataBase,
logger: logger,
ttlState: moira.TTLStateNODATA,
lastCheck: &moira.CheckData{
State: moira.StateOK,
Timestamp: testTime - 10,
Metrics: map[string]moira.MetricState{
metric: {
State: moira.StateOK,
Timestamp: testTime - 41,
},
},
},
}

Convey("with ErrTriggerHasEmptyTargets, ErrTriggerHasOnlyWildcards", func() {
errorList := []error{
ErrTriggerHasEmptyTargets{},
ErrTriggerHasOnlyWildcards{},
}

Convey("when triggerChecker.ttl == 0", func() {
triggerChecker.ttl = 0

for i, givenErr := range errorList {
Convey(fmt.Sprintf("Case %v: %T", i+1, givenErr), func() {
expectedCheckData := moira.CheckData{
Score: int64(1_000),
Metrics: triggerChecker.lastCheck.Metrics,
State: triggerChecker.ttlState.ToTriggerState(),
Timestamp: triggerChecker.until,
Message: givenErr.Error(),
MetricsToTargetRelation: map[string]string{},
}

dataBase.EXPECT().SetTriggerLastCheck(
triggerChecker.triggerID,
&expectedCheckData,
triggerChecker.trigger.ClusterKey(),
).Return(nil).Times(1)

err := triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr)
So(err, ShouldBeNil)
})
}
})

Convey("when triggerChecker.ttl != 0", func() {
triggerChecker.ttl = 600

for i, givenErr := range errorList {
Convey(fmt.Sprintf("Case %v: %T", i+1, givenErr), func() {
expectedCheckData := moira.CheckData{
Score: int64(1_000),
Metrics: triggerChecker.lastCheck.Metrics,
State: triggerChecker.ttlState.ToTriggerState(),
Timestamp: triggerChecker.until,
EventTimestamp: triggerChecker.until,
Message: givenErr.Error(),
MetricsToTargetRelation: map[string]string{},
}

dataBase.EXPECT().PushNotificationEvent(
&moira.NotificationEvent{
IsTriggerEvent: true,
TriggerID: triggerChecker.triggerID,
State: triggerChecker.ttlState.ToTriggerState(),
OldState: triggerChecker.lastCheck.State,
Timestamp: triggerChecker.until,
Metric: triggerChecker.trigger.Name,
},
true,
).Return(nil).Times(1)
dataBase.EXPECT().SetTriggerLastCheck(
triggerChecker.triggerID,
&expectedCheckData,
triggerChecker.trigger.ClusterKey(),
).Return(nil).Times(1)

err := triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr)
So(err, ShouldBeNil)
})
}
})
})

Convey("with graphite remote unavailable", func() {
givenErr := remote.ErrRemoteUnavailable{
InternalError: errors.New("some err"),
}

triggerChecker.ttl = 10
triggerChecker.lastCheck.LastSuccessfulCheckTimestamp = triggerChecker.until - 20

expectedCheckData := moira.CheckData{
Score: int64(100_000),
Metrics: triggerChecker.lastCheck.Metrics,
State: moira.StateEXCEPTION,
Timestamp: triggerChecker.until,
EventTimestamp: triggerChecker.until,
LastSuccessfulCheckTimestamp: triggerChecker.lastCheck.LastSuccessfulCheckTimestamp,
Message: fmt.Sprintf(
"Remote server unavailable. Trigger is not checked since: %v",
triggerChecker.lastCheck.LastSuccessfulCheckTimestamp),
MetricsToTargetRelation: map[string]string{},
}

dataBase.EXPECT().PushNotificationEvent(
&moira.NotificationEvent{
IsTriggerEvent: true,
TriggerID: triggerChecker.triggerID,
State: moira.StateEXCEPTION,
OldState: triggerChecker.lastCheck.State,
Timestamp: triggerChecker.until,
Metric: triggerChecker.trigger.Name,
},
true,
).Return(nil).Times(1)
dataBase.EXPECT().SetTriggerLastCheck(
triggerChecker.triggerID,
&expectedCheckData,
triggerChecker.trigger.ClusterKey(),
).Return(nil).Times(1)

err := triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr)
So(err, ShouldBeNil)
})

Convey("with bad functions, problems in expressions, etc", func() {
errorsList := []error{
local.ErrorUnknownFunction(errors.New("unknown func \"dance\"")),
local.ErrorEvalExpression(errors.New("eval expr"), "badExpr(dancing.mops"),
remote.ErrRemoteTriggerResponse{
InternalError: errors.New("user write bad target"),
Target: "bad target",
},
}

for i, givenErr := range errorsList {
Convey(fmt.Sprintf("Case %v: %T", i+1, givenErr), func() {
expectedCheckData := moira.CheckData{
Score: int64(100_000),
Metrics: triggerChecker.lastCheck.Metrics,
State: moira.StateEXCEPTION,
Timestamp: triggerChecker.until,
EventTimestamp: triggerChecker.until,
LastSuccessfulCheckTimestamp: triggerChecker.lastCheck.LastSuccessfulCheckTimestamp,
Message: givenErr.Error(),
MetricsToTargetRelation: map[string]string{},
}

dataBase.EXPECT().PushNotificationEvent(
&moira.NotificationEvent{
IsTriggerEvent: true,
TriggerID: triggerChecker.triggerID,
State: moira.StateEXCEPTION,
OldState: triggerChecker.lastCheck.State,
Timestamp: triggerChecker.until,
Metric: triggerChecker.trigger.Name,
},
true,
).Return(nil).Times(1)
dataBase.EXPECT().SetTriggerLastCheck(
triggerChecker.triggerID,
&expectedCheckData,
triggerChecker.trigger.ClusterKey(),
).Return(nil).Times(1)

err := triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr)
So(err, ShouldBeNil)
})
}
})

Convey("with undefined err", func() {
givenErr := errors.New("some undefined error")

checkMetrics, err := metrics.ConfigureCheckerMetrics(
metrics.NewDummyRegistry(),
[]moira.ClusterKey{moira.DefaultLocalCluster},
).GetCheckMetricsBySource(moira.DefaultLocalCluster)
So(err, ShouldBeNil)

triggerChecker.metrics = checkMetrics

expectedCheckData := moira.CheckData{
Score: int64(100_000),
Metrics: triggerChecker.lastCheck.Metrics,
State: moira.StateEXCEPTION,
Timestamp: triggerChecker.until,
EventTimestamp: triggerChecker.until,
LastSuccessfulCheckTimestamp: triggerChecker.lastCheck.LastSuccessfulCheckTimestamp,
Message: givenErr.Error(),
MetricsToTargetRelation: map[string]string{},
}

dataBase.EXPECT().PushNotificationEvent(
&moira.NotificationEvent{
IsTriggerEvent: true,
TriggerID: triggerChecker.triggerID,
State: moira.StateEXCEPTION,
OldState: triggerChecker.lastCheck.State,
Timestamp: triggerChecker.until,
Metric: triggerChecker.trigger.Name,
},
true,
).Return(nil).Times(1)
dataBase.EXPECT().SetTriggerLastCheck(
triggerChecker.triggerID,
&expectedCheckData,
triggerChecker.trigger.ClusterKey(),
).Return(nil).Times(1)

err = triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr)
So(err, ShouldBeNil)
})
})
}
8 changes: 8 additions & 0 deletions metric_source/local/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ type ErrEvalExpr struct {
target string
}

// ErrorEvalExpression creates ErrEvalExpr with given err and target.
func ErrorEvalExpression(err error, target string) ErrEvalExpr {
return ErrEvalExpr{
internalError: err,
target: target,
}
}

// Error is implementation of golang error interface for ErrEvalExpr struct.
func (err ErrEvalExpr) Error() string {
return fmt.Sprintf("failed to evaluate target '%s': %s", err.target, err.internalError.Error())
Expand Down
5 changes: 1 addition & 4 deletions metric_source/local/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,7 @@ func (eval *evaluator) Eval(
} else if isErrUnknownFunction(err) {
err = ErrorUnknownFunction(err)
} else {
err = ErrEvalExpr{
target: exp.ToString(),
internalError: err,
}
err = ErrorEvalExpression(err, exp.ToString())
}
}

Expand Down

0 comments on commit 4448cde

Please sign in to comment.