Skip to content

Commit

Permalink
fix: missing related logs or traces links in alert notification (#5946)
Browse files Browse the repository at this point in the history
  • Loading branch information
srikanthccv authored Sep 13, 2024
1 parent f854cdd commit 39f9fc6
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 9 deletions.
1 change: 1 addition & 0 deletions pkg/query-service/rules/base_rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ func NewBaseRule(id string, p *PostableRule, reader interfaces.Reader, opts ...R
id: id,
name: p.AlertName,
source: p.Source,
typ: p.AlertType,
ruleCondition: p.RuleCondition,
evalWindow: time.Duration(p.EvalWindow),
labels: qslabels.FromMap(p.Labels),
Expand Down
2 changes: 1 addition & 1 deletion pkg/query-service/rules/promrule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
func TestPromRuleShouldAlert(t *testing.T) {
postableRule := PostableRule{
AlertName: "Test Rule",
AlertType: "METRIC_BASED_ALERT",
AlertType: AlertTypeMetric,
RuleType: RuleTypeProm,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
Expand Down
214 changes: 206 additions & 8 deletions pkg/query-service/rules/threshold_rule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
func TestThresholdRuleShouldAlert(t *testing.T) {
postableRule := PostableRule{
AlertName: "Tricky Condition Tests",
AlertType: "METRIC_BASED_ALERT",
AlertType: AlertTypeMetric,
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
Expand Down Expand Up @@ -788,7 +788,7 @@ func TestPrepareLinksToLogs(t *testing.T) {
func TestPrepareLinksToTraces(t *testing.T) {
postableRule := PostableRule{
AlertName: "Links to traces test",
AlertType: "TRACES_BASED_ALERT",
AlertType: AlertTypeTraces,
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
Expand Down Expand Up @@ -830,7 +830,7 @@ func TestPrepareLinksToTraces(t *testing.T) {
func TestThresholdRuleLabelNormalization(t *testing.T) {
postableRule := PostableRule{
AlertName: "Tricky Condition Tests",
AlertType: "METRIC_BASED_ALERT",
AlertType: AlertTypeMetric,
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
Expand Down Expand Up @@ -914,7 +914,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
func TestThresholdRuleEvalDelay(t *testing.T) {
postableRule := PostableRule{
AlertName: "Test Eval Delay",
AlertType: "METRIC_BASED_ALERT",
AlertType: AlertTypeMetric,
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
Expand Down Expand Up @@ -963,7 +963,7 @@ func TestThresholdRuleEvalDelay(t *testing.T) {
func TestThresholdRuleClickHouseTmpl(t *testing.T) {
postableRule := PostableRule{
AlertName: "Tricky Condition Tests",
AlertType: "METRIC_BASED_ALERT",
AlertType: AlertTypeMetric,
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
Expand Down Expand Up @@ -1019,7 +1019,7 @@ func (m *queryMatcherAny) Match(string, string) error {
func TestThresholdRuleUnitCombinations(t *testing.T) {
postableRule := PostableRule{
AlertName: "Units test",
AlertType: "METRIC_BASED_ALERT",
AlertType: AlertTypeMetric,
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
Expand Down Expand Up @@ -1170,8 +1170,8 @@ func TestThresholdRuleUnitCombinations(t *testing.T) {

func TestThresholdRuleNoData(t *testing.T) {
postableRule := PostableRule{
AlertName: "Units test",
AlertType: "METRIC_BASED_ALERT",
AlertName: "No data test",
AlertType: AlertTypeMetric,
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
Expand Down Expand Up @@ -1261,3 +1261,201 @@ func TestThresholdRuleNoData(t *testing.T) {
}
}
}

func TestThresholdRuleTracesLink(t *testing.T) {
postableRule := PostableRule{
AlertName: "Traces link test",
AlertType: AlertTypeTraces,
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
RuleCondition: &RuleCondition{
CompositeQuery: &v3.CompositeQuery{
QueryType: v3.QueryTypeBuilder,
BuilderQueries: map[string]*v3.BuilderQuery{
"A": {
QueryName: "A",
StepInterval: 60,
AggregateAttribute: v3.AttributeKey{
Key: "durationNano",
},
AggregateOperator: v3.AggregateOperatorP95,
DataSource: v3.DataSourceTraces,
Expression: "A",
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{Key: "httpMethod", IsColumn: true, Type: v3.AttributeKeyTypeTag, DataType: v3.AttributeKeyDataTypeString},
Value: "GET",
Operator: v3.FilterOperatorEqual,
},
},
},
},
},
},
},
}
fm := featureManager.StartManager()
mock, err := cmock.NewClickHouseWithQueryMatcher(nil, &queryMatcherAny{})
if err != nil {
t.Errorf("an error '%s' was not expected when opening a stub database connection", err)
}

cols := make([]cmock.ColumnType, 0)
cols = append(cols, cmock.ColumnType{Name: "value", Type: "Float64"})
cols = append(cols, cmock.ColumnType{Name: "attr", Type: "String"})
cols = append(cols, cmock.ColumnType{Name: "timestamp", Type: "String"})

for idx, c := range testCases {
rows := cmock.NewRows(cols, c.values)

// We are testing the eval logic after the query is run
// so we don't care about the query string here
queryString := "SELECT any"
mock.
ExpectQuery(queryString).
WillReturnRows(rows)
postableRule.RuleCondition.CompareOp = CompareOp(c.compareOp)
postableRule.RuleCondition.MatchType = MatchType(c.matchType)
postableRule.RuleCondition.Target = &c.target
postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit
postableRule.RuleCondition.TargetUnit = c.targetUnit
postableRule.Annotations = map[string]string{
"description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})",
"summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}",
}

options := clickhouseReader.NewOptions("", 0, 0, 0, "", "archiveNamespace")
reader := clickhouseReader.NewReaderFromClickhouseConnection(mock, options, nil, "", fm, "", true)

rule, err := NewThresholdRule("69", &postableRule, fm, reader, true)
rule.temporalityMap = map[string]map[v3.Temporality]bool{
"signoz_calls_total": {
v3.Delta: true,
},
}
if err != nil {
assert.NoError(t, err)
}

retVal, err := rule.Eval(context.Background(), time.Now())
if err != nil {
assert.NoError(t, err)
}

if c.expectAlerts == 0 {
assert.Equal(t, 0, retVal.(int), "case %d", idx)
} else {
assert.Equal(t, c.expectAlerts, retVal.(int), "case %d", idx)
for _, item := range rule.active {
for name, value := range item.Annotations.Map() {
if name == "related_traces" {
assert.NotEmpty(t, value, "case %d", idx)
assert.Contains(t, value, "GET")
}
}
}
}
}
}

func TestThresholdRuleLogsLink(t *testing.T) {
postableRule := PostableRule{
AlertName: "Logs link test",
AlertType: AlertTypeLogs,
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
RuleCondition: &RuleCondition{
CompositeQuery: &v3.CompositeQuery{
QueryType: v3.QueryTypeBuilder,
BuilderQueries: map[string]*v3.BuilderQuery{
"A": {
QueryName: "A",
StepInterval: 60,
AggregateAttribute: v3.AttributeKey{
Key: "component",
},
AggregateOperator: v3.AggregateOperatorCountDistinct,
DataSource: v3.DataSourceLogs,
Expression: "A",
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{Key: "k8s.container.name", IsColumn: false, Type: v3.AttributeKeyTypeTag, DataType: v3.AttributeKeyDataTypeString},
Value: "testcontainer",
Operator: v3.FilterOperatorEqual,
},
},
},
},
},
},
},
}
fm := featureManager.StartManager()
mock, err := cmock.NewClickHouseWithQueryMatcher(nil, &queryMatcherAny{})
if err != nil {
t.Errorf("an error '%s' was not expected when opening a stub database connection", err)
}

cols := make([]cmock.ColumnType, 0)
cols = append(cols, cmock.ColumnType{Name: "value", Type: "Float64"})
cols = append(cols, cmock.ColumnType{Name: "attr", Type: "String"})
cols = append(cols, cmock.ColumnType{Name: "timestamp", Type: "String"})

for idx, c := range testCases {
rows := cmock.NewRows(cols, c.values)

// We are testing the eval logic after the query is run
// so we don't care about the query string here
queryString := "SELECT any"
mock.
ExpectQuery(queryString).
WillReturnRows(rows)
postableRule.RuleCondition.CompareOp = CompareOp(c.compareOp)
postableRule.RuleCondition.MatchType = MatchType(c.matchType)
postableRule.RuleCondition.Target = &c.target
postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit
postableRule.RuleCondition.TargetUnit = c.targetUnit
postableRule.Annotations = map[string]string{
"description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})",
"summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}",
}

options := clickhouseReader.NewOptions("", 0, 0, 0, "", "archiveNamespace")
reader := clickhouseReader.NewReaderFromClickhouseConnection(mock, options, nil, "", fm, "", true)

rule, err := NewThresholdRule("69", &postableRule, fm, reader, true)
rule.temporalityMap = map[string]map[v3.Temporality]bool{
"signoz_calls_total": {
v3.Delta: true,
},
}
if err != nil {
assert.NoError(t, err)
}

retVal, err := rule.Eval(context.Background(), time.Now())
if err != nil {
assert.NoError(t, err)
}

if c.expectAlerts == 0 {
assert.Equal(t, 0, retVal.(int), "case %d", idx)
} else {
assert.Equal(t, c.expectAlerts, retVal.(int), "case %d", idx)
for _, item := range rule.active {
for name, value := range item.Annotations.Map() {
if name == "related_logs" {
assert.NotEmpty(t, value, "case %d", idx)
assert.Contains(t, value, "testcontainer")
}
}
}
}
}
}
68 changes: 68 additions & 0 deletions pkg/query-service/rules/threshold_rule_test_data.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package rules

import "time"

var (
testCases = []struct {
targetUnit string
yAxisUnit string
values [][]interface{}
expectAlerts int
compareOp string
matchType string
target float64
summaryAny []string
}{
{
targetUnit: "s",
yAxisUnit: "ns",
values: [][]interface{}{
{float64(572588400), "attr", time.Now()}, // 0.57 seconds
{float64(572386400), "attr", time.Now().Add(1 * time.Second)}, // 0.57 seconds
{float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 0.3 seconds
{float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 0.3 seconds
{float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 0.06 seconds
},
expectAlerts: 0,
compareOp: "1", // Above
matchType: "1", // Once
target: 1, // 1 second
},
{
targetUnit: "ms",
yAxisUnit: "ns",
values: [][]interface{}{
{float64(572588400), "attr", time.Now()}, // 572.58 ms
{float64(572386400), "attr", time.Now().Add(1 * time.Second)}, // 572.38 ms
{float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 300.94 ms
{float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 299.31 ms
{float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 66.64 ms
},
expectAlerts: 4,
compareOp: "1", // Above
matchType: "1", // Once
target: 200, // 200 ms
summaryAny: []string{
"observed metric value is 299 ms",
"the observed metric value is 573 ms",
"the observed metric value is 572 ms",
"the observed metric value is 301 ms",
},
},
{
targetUnit: "decgbytes",
yAxisUnit: "bytes",
values: [][]interface{}{
{float64(2863284053), "attr", time.Now()}, // 2.86 GB
{float64(2863388842), "attr", time.Now().Add(1 * time.Second)}, // 2.86 GB
{float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 0.3 GB
{float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 0.3 GB
{float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 66.64 MB
},
expectAlerts: 0,
compareOp: "1", // Above
matchType: "1", // Once
target: 200, // 200 GB
},
}
)

0 comments on commit 39f9fc6

Please sign in to comment.