From 7533414b7cc4e75ddbb46b07ceff659cdcfbcfad Mon Sep 17 00:00:00 2001 From: nityanandagohain Date: Thu, 5 Sep 2024 23:21:11 +0530 Subject: [PATCH 1/4] feat: logsV4 resource table query builder --- .../app/logs/v4/query_builder.go | 31 ++ .../app/logs/v4/resource_query_builder.go | 204 ++++++++ .../logs/v4/resource_query_builder_test.go | 473 ++++++++++++++++++ 3 files changed, 708 insertions(+) create mode 100644 pkg/query-service/app/logs/v4/query_builder.go create mode 100644 pkg/query-service/app/logs/v4/resource_query_builder.go create mode 100644 pkg/query-service/app/logs/v4/resource_query_builder_test.go diff --git a/pkg/query-service/app/logs/v4/query_builder.go b/pkg/query-service/app/logs/v4/query_builder.go new file mode 100644 index 0000000000..08024756bd --- /dev/null +++ b/pkg/query-service/app/logs/v4/query_builder.go @@ -0,0 +1,31 @@ +package v4 + +import ( + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +var logOperators = map[v3.FilterOperator]string{ + v3.FilterOperatorEqual: "=", + v3.FilterOperatorNotEqual: "!=", + v3.FilterOperatorLessThan: "<", + v3.FilterOperatorLessThanOrEq: "<=", + v3.FilterOperatorGreaterThan: ">", + v3.FilterOperatorGreaterThanOrEq: ">=", + v3.FilterOperatorLike: "LIKE", + v3.FilterOperatorNotLike: "NOT LIKE", + v3.FilterOperatorContains: "LIKE", + v3.FilterOperatorNotContains: "NOT LIKE", + v3.FilterOperatorRegex: "match(%s, %s)", + v3.FilterOperatorNotRegex: "NOT match(%s, %s)", + v3.FilterOperatorIn: "IN", + v3.FilterOperatorNotIn: "NOT IN", + v3.FilterOperatorExists: "mapContains(%s_%s, '%s')", + v3.FilterOperatorNotExists: "not mapContains(%s_%s, '%s')", +} + +const ( + BODY = "body" + DISTRIBUTED_LOGS_V2 = "distributed_logs_v2" + DISTRIBUTED_LOGS_V2_RESOURCE = "distributed_logs_v2_resource" + NANOSECOND = 1000000000 +) diff --git a/pkg/query-service/app/logs/v4/resource_query_builder.go b/pkg/query-service/app/logs/v4/resource_query_builder.go new file mode 100644 index 0000000000..e551725567 --- /dev/null +++ b/pkg/query-service/app/logs/v4/resource_query_builder.go @@ -0,0 +1,204 @@ +package v4 + +import ( + "fmt" + "strings" + + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" + "go.signoz.io/signoz/pkg/query-service/utils" +) + +// buildResourceFilter builds a clickhouse filter string for resource labels +func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value interface{}) string { + // we are using lower(labels) as we want case insensitive filtering + searchKey := fmt.Sprintf("simpleJSONExtractString(lower(labels), '%s')", key) + + chFmtVal := utils.ClickHouseFormattedValue(value) + + switch op { + case v3.FilterOperatorExists: + return fmt.Sprintf("simpleJSONHas(lower(labels), '%s')", key) + case v3.FilterOperatorNotExists: + return fmt.Sprintf("not simpleJSONHas(lower(labels), '%s')", key) + case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex: + return fmt.Sprintf(logsOp, searchKey, chFmtVal) + case v3.FilterOperatorContains, v3.FilterOperatorNotContains: + // this is required as clickhouseFormattedValue add's quotes to the string + lowerEscapedStringValue := utils.QuoteEscapedString(strings.ToLower(fmt.Sprintf("%s", value))) + return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, lowerEscapedStringValue) + default: + chFmtValLower := strings.ToLower(chFmtVal) + return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtValLower) + } +} + +// buildIndexFilterForInOperator builds a clickhouse filter string for in operator +// example:= x in a,b,c = (labels like '%x%a%' or labels like '%"x":"b"%' or labels like '%"x"="c"%') +// example:= x nin a,b,c = (labels nlike '%x%a%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%') +func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value interface{}) string { + conditions := []string{} + separator := " OR " + sqlOp := "like" + if op == v3.FilterOperatorNotIn { + separator = " AND " + sqlOp = "not like" + } + + // values is a slice of strings, we need to convert value to this type + // value can be string or []interface{} + values := []string{} + switch value.(type) { + case string: + values = append(values, value.(string)) + case []interface{}: + for _, v := range (value).([]interface{}) { + // also resources attributes are always string values + strV, ok := v.(string) + if !ok { + continue + } + values = append(values, strV) + } + } + + // if there are no values to filter on, return an empty string + if len(values) > 0 { + for _, v := range values { + conditions = append(conditions, fmt.Sprintf("lower(labels) %s '%%\"%s\":\"%s\"%%'", sqlOp, key, strings.ToLower(v))) + } + return "(" + strings.Join(conditions, separator) + ")" + } + return "" +} + +// buildResourceIndexFilter builds a clickhouse filter string for resource labels +// example:= x like '%john%' = lower(labels) like '%x%john%' +func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string { + // not using clickhouseFormattedValue as we don't wan't the quotes + formattedValueEscapedLower := utils.QuoteEscapedString(strings.ToLower(fmt.Sprintf("%s", value))) + + // add index filters + switch op { + case v3.FilterOperatorContains, v3.FilterOperatorEqual, v3.FilterOperatorLike: + return fmt.Sprintf("lower(labels) like '%%%s%%%s%%'", key, formattedValueEscapedLower) + case v3.FilterOperatorNotContains, v3.FilterOperatorNotEqual, v3.FilterOperatorNotLike: + return fmt.Sprintf("lower(labels) not like '%%%s%%%s%%'", key, formattedValueEscapedLower) + case v3.FilterOperatorNotRegex: + return fmt.Sprintf("lower(labels) not like '%%%s%%'", key) + case v3.FilterOperatorIn, v3.FilterOperatorNotIn: + return buildIndexFilterForInOperator(key, op, value) + default: + return fmt.Sprintf("lower(labels) like '%%%s%%'", key) + } +} + +// buildResourceFiltersFromFilterItems builds a list of clickhouse filter strings for resource labels from a FilterSet. +// It skips any filter items that are not resource attributes and checks that the operator is supported and the data type is correct. +func buildResourceFiltersFromFilterItems(fs *v3.FilterSet) ([]string, error) { + var conditions []string + if fs == nil || len(fs.Items) == 0 { + return nil, nil + } + for _, item := range fs.Items { + // skip anything other than resource attribute + if item.Key.Type != v3.AttributeKeyTypeResource { + continue + } + + // since out map is in lower case we are converting it to lowercase + operatorLower := strings.ToLower(string(item.Operator)) + op := v3.FilterOperator(operatorLower) + keyName := strings.ToLower(item.Key.Key) + + // resource filter value data type will always be string + // will be an interface if the operator is IN or NOT IN + if item.Key.DataType != v3.AttributeKeyDataTypeString && + (op != v3.FilterOperatorIn && op != v3.FilterOperatorNotIn) { + return nil, fmt.Errorf("invalid data type for resource attribute: %s", item.Key.Key) + } + + var value interface{} + var err error + if op != v3.FilterOperatorExists && op != v3.FilterOperatorNotExists { + // make sure to cast the value regardless of the actual type + value, err = utils.ValidateAndCastValue(item.Value, item.Key.DataType) + if err != nil { + return nil, fmt.Errorf("failed to validate and cast value for %s: %v", item.Key.Key, err) + } + } + + if logsOp, ok := logOperators[op]; ok { + // the filter + if resourceFilter := buildResourceFilter(logsOp, keyName, op, value); resourceFilter != "" { + conditions = append(conditions, resourceFilter) + } + // the additional filter for better usage of the index + if resourceIndexFilter := buildResourceIndexFilter(keyName, op, value); resourceIndexFilter != "" { + conditions = append(conditions, resourceIndexFilter) + } + } else { + return nil, fmt.Errorf("unsupported operator: %s", op) + } + + } + + return conditions, nil +} + +func buildResourceFiltersFromGroupBy(groupBy []v3.AttributeKey) []string { + var conditions []string + + for _, attr := range groupBy { + if attr.Type != v3.AttributeKeyTypeResource { + continue + } + key := strings.ToLower(attr.Key) + conditions = append(conditions, fmt.Sprintf("(simpleJSONHas(lower(labels), '%s') AND lower(labels) like '%%%s%%')", key, key)) + } + return conditions +} + +func buildResourceFiltersFromAggregateAttribute(aggregateAttribute v3.AttributeKey) string { + if aggregateAttribute.Key != "" && aggregateAttribute.Type == v3.AttributeKeyTypeResource { + key := strings.ToLower(aggregateAttribute.Key) + return fmt.Sprintf("(simpleJSONHas(lower(labels), '%s') AND lower(labels) like '%%%s%%')", key, key) + } + + return "" +} + +func buildResourceSubQuery(bucketStart, bucketEnd int64, fs *v3.FilterSet, groupBy []v3.AttributeKey, aggregateAttribute v3.AttributeKey) (string, error) { + + // BUILD THE WHERE CLAUSE + var conditions []string + // only add the resource attributes to the filters here + rs, err := buildResourceFiltersFromFilterItems(fs) + if err != nil { + return "", err + } + conditions = append(conditions, rs...) + + // for aggregate attribute add exists check in resources + aggregateAttributeResourceFilter := buildResourceFiltersFromAggregateAttribute(aggregateAttribute) + if aggregateAttributeResourceFilter != "" { + conditions = append(conditions, aggregateAttributeResourceFilter) + } + + groupByResourceFilters := buildResourceFiltersFromGroupBy(groupBy) + if len(groupByResourceFilters) > 0 { + // TODO: change AND to OR once we know how to solve for group by ( i.e show values if one is not present) + groupByStr := "( " + strings.Join(groupByResourceFilters, " AND ") + " )" + conditions = append(conditions, groupByStr) + } + if len(conditions) == 0 { + return "", nil + } + conditionStr := strings.Join(conditions, " AND ") + + // BUILD THE FINAL QUERY + query := fmt.Sprintf("(SELECT fingerprint FROM signoz_logs.%s WHERE (seen_at_ts_bucket_start >= %d) AND (seen_at_ts_bucket_start <= %d) AND ", DISTRIBUTED_LOGS_V2_RESOURCE, bucketStart, bucketEnd) + + query = query + conditionStr + ")" + + return query, nil +} diff --git a/pkg/query-service/app/logs/v4/resource_query_builder_test.go b/pkg/query-service/app/logs/v4/resource_query_builder_test.go new file mode 100644 index 0000000000..2b1267ba4e --- /dev/null +++ b/pkg/query-service/app/logs/v4/resource_query_builder_test.go @@ -0,0 +1,473 @@ +package v4 + +import ( + "reflect" + "testing" + + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +func Test_buildResourceFilter(t *testing.T) { + type args struct { + logsOp string + key string + op v3.FilterOperator + value interface{} + } + tests := []struct { + name string + args args + want string + }{ + { + name: "test exists", + args: args{ + key: "service.name", + op: v3.FilterOperatorExists, + }, + want: `simpleJSONHas(lower(labels), 'service.name')`, + }, + { + name: "test nexists", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotExists, + }, + want: `not simpleJSONHas(lower(labels), 'service.name')`, + }, + { + name: "test regex", + args: args{ + logsOp: "match(%s, %s)", + key: "service.name", + op: v3.FilterOperatorRegex, + value: ".*", + }, + want: `match(simpleJSONExtractString(lower(labels), 'service.name'), '.*')`, + }, + { + name: "test contains", + args: args{ + logsOp: "LIKE", + key: "service.name", + op: v3.FilterOperatorContains, + value: "application", + }, + want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE '%application%'`, + }, + { + name: "test eq", + args: args{ + logsOp: "=", + key: "service.name", + op: v3.FilterOperatorEqual, + value: "Application", + }, + want: `simpleJSONExtractString(lower(labels), 'service.name') = 'application'`, + }, + { + name: "test value with quotes", + args: args{ + logsOp: "=", + key: "service.name", + op: v3.FilterOperatorEqual, + value: "Application's", + }, + want: `simpleJSONExtractString(lower(labels), 'service.name') = 'application\'s'`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceFilter(tt.args.logsOp, tt.args.key, tt.args.op, tt.args.value); got != tt.want { + t.Errorf("buildResourceFilter() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildIndexFilterForInOperator(t *testing.T) { + type args struct { + key string + op v3.FilterOperator + value interface{} + } + tests := []struct { + name string + args args + want string + }{ + { + name: "test in array", + args: args{ + key: "service.name", + op: v3.FilterOperatorIn, + value: []interface{}{"Application", "Test"}, + }, + want: `(lower(labels) like '%"service.name":"application"%' OR lower(labels) like '%"service.name":"test"%')`, + }, + { + name: "test nin array", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotIn, + value: []interface{}{"Application", "Test"}, + }, + want: `(lower(labels) not like '%"service.name":"application"%' AND lower(labels) not like '%"service.name":"test"%')`, + }, + { + name: "test in string", + args: args{ + key: "service.name", + op: v3.FilterOperatorIn, + value: "application", + }, + want: `(lower(labels) like '%"service.name":"application"%')`, + }, + { + name: "test nin string", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotIn, + value: "application'\"s", + }, + want: `(lower(labels) not like '%"service.name":"application'"s"%')`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildIndexFilterForInOperator(tt.args.key, tt.args.op, tt.args.value); got != tt.want { + t.Errorf("buildIndexFilterForInOperator() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceIndexFilter(t *testing.T) { + type args struct { + key string + op v3.FilterOperator + value interface{} + } + tests := []struct { + name string + args args + want string + }{ + { + name: "test contains", + args: args{ + key: "service.name", + op: v3.FilterOperatorContains, + value: "application", + }, + want: `lower(labels) like '%service.name%application%'`, + }, + { + name: "test not contains", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotContains, + value: "application", + }, + want: `lower(labels) not like '%service.name%application%'`, + }, + { + name: "test not regex", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotRegex, + value: ".*", + }, + want: `lower(labels) not like '%service.name%'`, + }, + { + name: "test in", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotIn, + value: []interface{}{"Application", "Test"}, + }, + want: `(lower(labels) not like '%"service.name":"application"%' AND lower(labels) not like '%"service.name":"test"%')`, + }, + { + name: "test eq", + args: args{ + key: "service.name", + op: v3.FilterOperatorEqual, + value: "Application", + }, + want: `lower(labels) like '%service.name%application%'`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceIndexFilter(tt.args.key, tt.args.op, tt.args.value); got != tt.want { + t.Errorf("buildResourceIndexFilter() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceFiltersFromFilterItems(t *testing.T) { + type args struct { + fs *v3.FilterSet + } + tests := []struct { + name string + args args + want []string + wantErr bool + }{ + { + name: "ignore attribute", + args: args{ + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeTag, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + }, + }, + }, + want: nil, + wantErr: false, + }, + { + name: "build filter", + args: args{ + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + }, + }, + }, + want: []string{ + "simpleJSONExtractString(lower(labels), 'service.name') = 'test'", + "lower(labels) like '%service.name%test%'", + }, + wantErr: false, + }, + { + name: "build filter with multiple items", + args: args{ + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + { + Key: v3.AttributeKey{ + Key: "namespace", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorContains, + Value: "test1", + }, + }, + }, + }, + want: []string{ + "simpleJSONExtractString(lower(labels), 'service.name') = 'test'", + "lower(labels) like '%service.name%test%'", + "simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%'", + "lower(labels) like '%namespace%test1%'", + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := buildResourceFiltersFromFilterItems(tt.args.fs) + if (err != nil) != tt.wantErr { + t.Errorf("buildResourceFiltersFromFilterItems() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("buildResourceFiltersFromFilterItems() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceFiltersFromGroupBy(t *testing.T) { + type args struct { + groupBy []v3.AttributeKey + } + tests := []struct { + name string + args args + want []string + }{ + { + name: "build filter", + args: args{ + groupBy: []v3.AttributeKey{ + { + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + }, + want: []string{ + "(simpleJSONHas(lower(labels), 'service.name') AND lower(labels) like '%service.name%')", + }, + }, + { + name: "build filter multiple group by", + args: args{ + groupBy: []v3.AttributeKey{ + { + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + { + Key: "namespace", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + }, + want: []string{ + "(simpleJSONHas(lower(labels), 'service.name') AND lower(labels) like '%service.name%')", + "(simpleJSONHas(lower(labels), 'namespace') AND lower(labels) like '%namespace%')", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceFiltersFromGroupBy(tt.args.groupBy); !reflect.DeepEqual(got, tt.want) { + t.Errorf("buildResourceFiltersFromGroupBy() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceFiltersFromAggregateAttribute(t *testing.T) { + type args struct { + aggregateAttribute v3.AttributeKey + } + tests := []struct { + name string + args args + want string + }{ + { + name: "build filter", + args: args{ + aggregateAttribute: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + want: "(simpleJSONHas(lower(labels), 'service.name') AND lower(labels) like '%service.name%')", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceFiltersFromAggregateAttribute(tt.args.aggregateAttribute); got != tt.want { + t.Errorf("buildResourceFiltersFromAggregateAttribute() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceSubQuery(t *testing.T) { + type args struct { + bucketStart int64 + bucketEnd int64 + fs *v3.FilterSet + groupBy []v3.AttributeKey + aggregateAttribute v3.AttributeKey + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "build sub query", + args: args{ + bucketStart: 1680064560, + bucketEnd: 1680066458, + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + { + Key: v3.AttributeKey{ + Key: "namespace", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorContains, + Value: "test1", + }, + }, + }, + groupBy: []v3.AttributeKey{ + { + Key: "host.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + aggregateAttribute: v3.AttributeKey{ + Key: "cluster.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + want: "(SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE " + + "(seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND " + + "simpleJSONExtractString(lower(labels), 'service.name') = 'test' AND lower(labels) like '%service.name%test%' " + + "AND simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%' AND lower(labels) like '%namespace%test1%' " + + "AND (simpleJSONHas(lower(labels), 'cluster.name') AND lower(labels) like '%cluster.name%') AND " + + "( (simpleJSONHas(lower(labels), 'host.name') AND lower(labels) like '%host.name%') ))", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := buildResourceSubQuery(tt.args.bucketStart, tt.args.bucketEnd, tt.args.fs, tt.args.groupBy, tt.args.aggregateAttribute) + if (err != nil) != tt.wantErr { + t.Errorf("buildResourceSubQuery() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("buildResourceSubQuery() = %v, want %v", got, tt.want) + } + }) + } +} From c5fed02b27988b5fa0f6071174da20fd8d2821cc Mon Sep 17 00:00:00 2001 From: nityanandagohain Date: Fri, 6 Sep 2024 10:19:46 +0530 Subject: [PATCH 2/4] fix: address pr comments --- .../app/logs/v4/resource_query_builder.go | 7 ++++--- .../app/logs/v4/resource_query_builder_test.go | 11 ++++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/pkg/query-service/app/logs/v4/resource_query_builder.go b/pkg/query-service/app/logs/v4/resource_query_builder.go index e551725567..d1f5b9da5a 100644 --- a/pkg/query-service/app/logs/v4/resource_query_builder.go +++ b/pkg/query-service/app/logs/v4/resource_query_builder.go @@ -64,7 +64,8 @@ func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value inter // if there are no values to filter on, return an empty string if len(values) > 0 { for _, v := range values { - conditions = append(conditions, fmt.Sprintf("lower(labels) %s '%%\"%s\":\"%s\"%%'", sqlOp, key, strings.ToLower(v))) + value := utils.QuoteEscapedString(strings.ToLower(v)) + conditions = append(conditions, fmt.Sprintf("lower(labels) %s '%%\"%s\":\"%s\"%%'", sqlOp, key, value)) } return "(" + strings.Join(conditions, separator) + ")" } @@ -196,9 +197,9 @@ func buildResourceSubQuery(bucketStart, bucketEnd int64, fs *v3.FilterSet, group conditionStr := strings.Join(conditions, " AND ") // BUILD THE FINAL QUERY - query := fmt.Sprintf("(SELECT fingerprint FROM signoz_logs.%s WHERE (seen_at_ts_bucket_start >= %d) AND (seen_at_ts_bucket_start <= %d) AND ", DISTRIBUTED_LOGS_V2_RESOURCE, bucketStart, bucketEnd) + query := fmt.Sprintf("SELECT fingerprint FROM signoz_logs.%s WHERE (seen_at_ts_bucket_start >= %d) AND (seen_at_ts_bucket_start <= %d) AND ", DISTRIBUTED_LOGS_V2_RESOURCE, bucketStart, bucketEnd) - query = query + conditionStr + ")" + query = "(" + query + conditionStr + ")" return query, nil } diff --git a/pkg/query-service/app/logs/v4/resource_query_builder_test.go b/pkg/query-service/app/logs/v4/resource_query_builder_test.go index 2b1267ba4e..cf474c4637 100644 --- a/pkg/query-service/app/logs/v4/resource_query_builder_test.go +++ b/pkg/query-service/app/logs/v4/resource_query_builder_test.go @@ -130,7 +130,7 @@ func Test_buildIndexFilterForInOperator(t *testing.T) { op: v3.FilterOperatorNotIn, value: "application'\"s", }, - want: `(lower(labels) not like '%"service.name":"application'"s"%')`, + want: `(lower(labels) not like '%"service.name":"application\'"s"%')`, }, } for _, tt := range tests { @@ -171,6 +171,15 @@ func Test_buildResourceIndexFilter(t *testing.T) { }, want: `lower(labels) not like '%service.name%application%'`, }, + { + name: "test contains with % and _", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotContains, + value: "application%_test", + }, + want: `lower(labels) not like '%service.name%application%_test%'`, + }, { name: "test not regex", args: args{ From a0b42bb0e140648a2fbb22de66972ac3d02f950b Mon Sep 17 00:00:00 2001 From: nityanandagohain Date: Fri, 6 Sep 2024 10:43:00 +0530 Subject: [PATCH 3/4] fix: escape %, _ for contains queries --- .../app/logs/v4/resource_query_builder.go | 6 +++--- .../app/logs/v4/resource_query_builder_test.go | 10 +++++----- pkg/query-service/utils/format.go | 8 ++++++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pkg/query-service/app/logs/v4/resource_query_builder.go b/pkg/query-service/app/logs/v4/resource_query_builder.go index d1f5b9da5a..93832d4a0b 100644 --- a/pkg/query-service/app/logs/v4/resource_query_builder.go +++ b/pkg/query-service/app/logs/v4/resource_query_builder.go @@ -24,7 +24,7 @@ func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value return fmt.Sprintf(logsOp, searchKey, chFmtVal) case v3.FilterOperatorContains, v3.FilterOperatorNotContains: // this is required as clickhouseFormattedValue add's quotes to the string - lowerEscapedStringValue := utils.QuoteEscapedString(strings.ToLower(fmt.Sprintf("%s", value))) + lowerEscapedStringValue := utils.QuoteEscapedStringForContains(strings.ToLower(fmt.Sprintf("%s", value))) return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, lowerEscapedStringValue) default: chFmtValLower := strings.ToLower(chFmtVal) @@ -64,7 +64,7 @@ func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value inter // if there are no values to filter on, return an empty string if len(values) > 0 { for _, v := range values { - value := utils.QuoteEscapedString(strings.ToLower(v)) + value := utils.QuoteEscapedStringForContains(strings.ToLower(v)) conditions = append(conditions, fmt.Sprintf("lower(labels) %s '%%\"%s\":\"%s\"%%'", sqlOp, key, value)) } return "(" + strings.Join(conditions, separator) + ")" @@ -76,7 +76,7 @@ func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value inter // example:= x like '%john%' = lower(labels) like '%x%john%' func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string { // not using clickhouseFormattedValue as we don't wan't the quotes - formattedValueEscapedLower := utils.QuoteEscapedString(strings.ToLower(fmt.Sprintf("%s", value))) + formattedValueEscapedLower := utils.QuoteEscapedStringForContains(strings.ToLower(fmt.Sprintf("%s", value))) // add index filters switch op { diff --git a/pkg/query-service/app/logs/v4/resource_query_builder_test.go b/pkg/query-service/app/logs/v4/resource_query_builder_test.go index cf474c4637..803a2d378b 100644 --- a/pkg/query-service/app/logs/v4/resource_query_builder_test.go +++ b/pkg/query-service/app/logs/v4/resource_query_builder_test.go @@ -51,9 +51,9 @@ func Test_buildResourceFilter(t *testing.T) { logsOp: "LIKE", key: "service.name", op: v3.FilterOperatorContains, - value: "application", + value: "application%_", }, - want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE '%application%'`, + want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE '%application\%\_%'`, }, { name: "test eq", @@ -128,9 +128,9 @@ func Test_buildIndexFilterForInOperator(t *testing.T) { args: args{ key: "service.name", op: v3.FilterOperatorNotIn, - value: "application'\"s", + value: "application'\"_s", }, - want: `(lower(labels) not like '%"service.name":"application\'"s"%')`, + want: `(lower(labels) not like '%"service.name":"application\'"\_s"%')`, }, } for _, tt := range tests { @@ -178,7 +178,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorNotContains, value: "application%_test", }, - want: `lower(labels) not like '%service.name%application%_test%'`, + want: `lower(labels) not like '%service.name%application\%\_test%'`, }, { name: "test not regex", diff --git a/pkg/query-service/utils/format.go b/pkg/query-service/utils/format.go index 4de081940d..c623d3e8e0 100644 --- a/pkg/query-service/utils/format.go +++ b/pkg/query-service/utils/format.go @@ -154,6 +154,14 @@ func QuoteEscapedString(str string) string { return str } +func QuoteEscapedStringForContains(str string) string { + // https: //clickhouse.com/docs/en/sql-reference/functions/string-search-functions#like + str = QuoteEscapedString(str) + str = strings.ReplaceAll(str, `%`, `\%`) + str = strings.ReplaceAll(str, `_`, `\_`) + return str +} + // ClickHouseFormattedValue formats the value to be used in clickhouse query func ClickHouseFormattedValue(v interface{}) string { // if it's pointer convert it to a value From 4f2bfaf8e43bc0eb3b9956c7f07c9d1dda38ec82 Mon Sep 17 00:00:00 2001 From: nityanandagohain Date: Fri, 6 Sep 2024 13:48:18 +0530 Subject: [PATCH 4/4] fix: resource attribute filtering case sensitive --- .../app/logs/v4/resource_query_builder.go | 38 +++++------- .../logs/v4/resource_query_builder_test.go | 62 +++++++++---------- 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/pkg/query-service/app/logs/v4/resource_query_builder.go b/pkg/query-service/app/logs/v4/resource_query_builder.go index 93832d4a0b..004c9269fb 100644 --- a/pkg/query-service/app/logs/v4/resource_query_builder.go +++ b/pkg/query-service/app/logs/v4/resource_query_builder.go @@ -10,25 +10,23 @@ import ( // buildResourceFilter builds a clickhouse filter string for resource labels func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value interface{}) string { - // we are using lower(labels) as we want case insensitive filtering - searchKey := fmt.Sprintf("simpleJSONExtractString(lower(labels), '%s')", key) + searchKey := fmt.Sprintf("simpleJSONExtractString(labels, '%s')", key) chFmtVal := utils.ClickHouseFormattedValue(value) switch op { case v3.FilterOperatorExists: - return fmt.Sprintf("simpleJSONHas(lower(labels), '%s')", key) + return fmt.Sprintf("simpleJSONHas(labels, '%s')", key) case v3.FilterOperatorNotExists: - return fmt.Sprintf("not simpleJSONHas(lower(labels), '%s')", key) + return fmt.Sprintf("not simpleJSONHas(labels, '%s')", key) case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex: return fmt.Sprintf(logsOp, searchKey, chFmtVal) case v3.FilterOperatorContains, v3.FilterOperatorNotContains: // this is required as clickhouseFormattedValue add's quotes to the string - lowerEscapedStringValue := utils.QuoteEscapedStringForContains(strings.ToLower(fmt.Sprintf("%s", value))) - return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, lowerEscapedStringValue) + escapedStringValue := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value)) + return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, escapedStringValue) default: - chFmtValLower := strings.ToLower(chFmtVal) - return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtValLower) + return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtVal) } } @@ -64,8 +62,8 @@ func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value inter // if there are no values to filter on, return an empty string if len(values) > 0 { for _, v := range values { - value := utils.QuoteEscapedStringForContains(strings.ToLower(v)) - conditions = append(conditions, fmt.Sprintf("lower(labels) %s '%%\"%s\":\"%s\"%%'", sqlOp, key, value)) + value := utils.QuoteEscapedStringForContains(v) + conditions = append(conditions, fmt.Sprintf("labels %s '%%\"%s\":\"%s\"%%'", sqlOp, key, value)) } return "(" + strings.Join(conditions, separator) + ")" } @@ -73,23 +71,23 @@ func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value inter } // buildResourceIndexFilter builds a clickhouse filter string for resource labels -// example:= x like '%john%' = lower(labels) like '%x%john%' +// example:= x like '%john%' = labels like '%x%john%' func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string { // not using clickhouseFormattedValue as we don't wan't the quotes - formattedValueEscapedLower := utils.QuoteEscapedStringForContains(strings.ToLower(fmt.Sprintf("%s", value))) + formattedValueEscaped := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value)) // add index filters switch op { case v3.FilterOperatorContains, v3.FilterOperatorEqual, v3.FilterOperatorLike: - return fmt.Sprintf("lower(labels) like '%%%s%%%s%%'", key, formattedValueEscapedLower) + return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscaped) case v3.FilterOperatorNotContains, v3.FilterOperatorNotEqual, v3.FilterOperatorNotLike: - return fmt.Sprintf("lower(labels) not like '%%%s%%%s%%'", key, formattedValueEscapedLower) + return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscaped) case v3.FilterOperatorNotRegex: - return fmt.Sprintf("lower(labels) not like '%%%s%%'", key) + return fmt.Sprintf("labels not like '%%%s%%'", key) case v3.FilterOperatorIn, v3.FilterOperatorNotIn: return buildIndexFilterForInOperator(key, op, value) default: - return fmt.Sprintf("lower(labels) like '%%%s%%'", key) + return fmt.Sprintf("labels like '%%%s%%'", key) } } @@ -109,7 +107,7 @@ func buildResourceFiltersFromFilterItems(fs *v3.FilterSet) ([]string, error) { // since out map is in lower case we are converting it to lowercase operatorLower := strings.ToLower(string(item.Operator)) op := v3.FilterOperator(operatorLower) - keyName := strings.ToLower(item.Key.Key) + keyName := item.Key.Key // resource filter value data type will always be string // will be an interface if the operator is IN or NOT IN @@ -153,16 +151,14 @@ func buildResourceFiltersFromGroupBy(groupBy []v3.AttributeKey) []string { if attr.Type != v3.AttributeKeyTypeResource { continue } - key := strings.ToLower(attr.Key) - conditions = append(conditions, fmt.Sprintf("(simpleJSONHas(lower(labels), '%s') AND lower(labels) like '%%%s%%')", key, key)) + conditions = append(conditions, fmt.Sprintf("(simpleJSONHas(labels, '%s') AND labels like '%%%s%%')", attr.Key, attr.Key)) } return conditions } func buildResourceFiltersFromAggregateAttribute(aggregateAttribute v3.AttributeKey) string { if aggregateAttribute.Key != "" && aggregateAttribute.Type == v3.AttributeKeyTypeResource { - key := strings.ToLower(aggregateAttribute.Key) - return fmt.Sprintf("(simpleJSONHas(lower(labels), '%s') AND lower(labels) like '%%%s%%')", key, key) + return fmt.Sprintf("(simpleJSONHas(labels, '%s') AND labels like '%%%s%%')", aggregateAttribute.Key, aggregateAttribute.Key) } return "" diff --git a/pkg/query-service/app/logs/v4/resource_query_builder_test.go b/pkg/query-service/app/logs/v4/resource_query_builder_test.go index 803a2d378b..1616c29e08 100644 --- a/pkg/query-service/app/logs/v4/resource_query_builder_test.go +++ b/pkg/query-service/app/logs/v4/resource_query_builder_test.go @@ -25,7 +25,7 @@ func Test_buildResourceFilter(t *testing.T) { key: "service.name", op: v3.FilterOperatorExists, }, - want: `simpleJSONHas(lower(labels), 'service.name')`, + want: `simpleJSONHas(labels, 'service.name')`, }, { name: "test nexists", @@ -33,7 +33,7 @@ func Test_buildResourceFilter(t *testing.T) { key: "service.name", op: v3.FilterOperatorNotExists, }, - want: `not simpleJSONHas(lower(labels), 'service.name')`, + want: `not simpleJSONHas(labels, 'service.name')`, }, { name: "test regex", @@ -43,7 +43,7 @@ func Test_buildResourceFilter(t *testing.T) { op: v3.FilterOperatorRegex, value: ".*", }, - want: `match(simpleJSONExtractString(lower(labels), 'service.name'), '.*')`, + want: `match(simpleJSONExtractString(labels, 'service.name'), '.*')`, }, { name: "test contains", @@ -51,9 +51,9 @@ func Test_buildResourceFilter(t *testing.T) { logsOp: "LIKE", key: "service.name", op: v3.FilterOperatorContains, - value: "application%_", + value: "Application%_", }, - want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE '%application\%\_%'`, + want: `simpleJSONExtractString(labels, 'service.name') LIKE '%Application\%\_%'`, }, { name: "test eq", @@ -63,7 +63,7 @@ func Test_buildResourceFilter(t *testing.T) { op: v3.FilterOperatorEqual, value: "Application", }, - want: `simpleJSONExtractString(lower(labels), 'service.name') = 'application'`, + want: `simpleJSONExtractString(labels, 'service.name') = 'Application'`, }, { name: "test value with quotes", @@ -73,7 +73,7 @@ func Test_buildResourceFilter(t *testing.T) { op: v3.FilterOperatorEqual, value: "Application's", }, - want: `simpleJSONExtractString(lower(labels), 'service.name') = 'application\'s'`, + want: `simpleJSONExtractString(labels, 'service.name') = 'Application\'s'`, }, } for _, tt := range tests { @@ -103,7 +103,7 @@ func Test_buildIndexFilterForInOperator(t *testing.T) { op: v3.FilterOperatorIn, value: []interface{}{"Application", "Test"}, }, - want: `(lower(labels) like '%"service.name":"application"%' OR lower(labels) like '%"service.name":"test"%')`, + want: `(labels like '%"service.name":"Application"%' OR labels like '%"service.name":"Test"%')`, }, { name: "test nin array", @@ -112,7 +112,7 @@ func Test_buildIndexFilterForInOperator(t *testing.T) { op: v3.FilterOperatorNotIn, value: []interface{}{"Application", "Test"}, }, - want: `(lower(labels) not like '%"service.name":"application"%' AND lower(labels) not like '%"service.name":"test"%')`, + want: `(labels not like '%"service.name":"Application"%' AND labels not like '%"service.name":"Test"%')`, }, { name: "test in string", @@ -121,7 +121,7 @@ func Test_buildIndexFilterForInOperator(t *testing.T) { op: v3.FilterOperatorIn, value: "application", }, - want: `(lower(labels) like '%"service.name":"application"%')`, + want: `(labels like '%"service.name":"application"%')`, }, { name: "test nin string", @@ -130,7 +130,7 @@ func Test_buildIndexFilterForInOperator(t *testing.T) { op: v3.FilterOperatorNotIn, value: "application'\"_s", }, - want: `(lower(labels) not like '%"service.name":"application\'"\_s"%')`, + want: `(labels not like '%"service.name":"application\'"\_s"%')`, }, } for _, tt := range tests { @@ -160,7 +160,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorContains, value: "application", }, - want: `lower(labels) like '%service.name%application%'`, + want: `labels like '%service.name%application%'`, }, { name: "test not contains", @@ -169,7 +169,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorNotContains, value: "application", }, - want: `lower(labels) not like '%service.name%application%'`, + want: `labels not like '%service.name%application%'`, }, { name: "test contains with % and _", @@ -178,7 +178,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorNotContains, value: "application%_test", }, - want: `lower(labels) not like '%service.name%application\%\_test%'`, + want: `labels not like '%service.name%application\%\_test%'`, }, { name: "test not regex", @@ -187,7 +187,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorNotRegex, value: ".*", }, - want: `lower(labels) not like '%service.name%'`, + want: `labels not like '%service.name%'`, }, { name: "test in", @@ -196,7 +196,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorNotIn, value: []interface{}{"Application", "Test"}, }, - want: `(lower(labels) not like '%"service.name":"application"%' AND lower(labels) not like '%"service.name":"test"%')`, + want: `(labels not like '%"service.name":"Application"%' AND labels not like '%"service.name":"Test"%')`, }, { name: "test eq", @@ -205,7 +205,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorEqual, value: "Application", }, - want: `lower(labels) like '%service.name%application%'`, + want: `labels like '%service.name%Application%'`, }, } for _, tt := range tests { @@ -265,8 +265,8 @@ func Test_buildResourceFiltersFromFilterItems(t *testing.T) { }, }, want: []string{ - "simpleJSONExtractString(lower(labels), 'service.name') = 'test'", - "lower(labels) like '%service.name%test%'", + "simpleJSONExtractString(labels, 'service.name') = 'test'", + "labels like '%service.name%test%'", }, wantErr: false, }, @@ -297,10 +297,10 @@ func Test_buildResourceFiltersFromFilterItems(t *testing.T) { }, }, want: []string{ - "simpleJSONExtractString(lower(labels), 'service.name') = 'test'", - "lower(labels) like '%service.name%test%'", - "simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%'", - "lower(labels) like '%namespace%test1%'", + "simpleJSONExtractString(labels, 'service.name') = 'test'", + "labels like '%service.name%test%'", + "simpleJSONExtractString(labels, 'namespace') LIKE '%test1%'", + "labels like '%namespace%test1%'", }, wantErr: false, }, @@ -340,7 +340,7 @@ func Test_buildResourceFiltersFromGroupBy(t *testing.T) { }, }, want: []string{ - "(simpleJSONHas(lower(labels), 'service.name') AND lower(labels) like '%service.name%')", + "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')", }, }, { @@ -360,8 +360,8 @@ func Test_buildResourceFiltersFromGroupBy(t *testing.T) { }, }, want: []string{ - "(simpleJSONHas(lower(labels), 'service.name') AND lower(labels) like '%service.name%')", - "(simpleJSONHas(lower(labels), 'namespace') AND lower(labels) like '%namespace%')", + "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')", + "(simpleJSONHas(labels, 'namespace') AND labels like '%namespace%')", }, }, } @@ -392,7 +392,7 @@ func Test_buildResourceFiltersFromAggregateAttribute(t *testing.T) { Type: v3.AttributeKeyTypeResource, }, }, - want: "(simpleJSONHas(lower(labels), 'service.name') AND lower(labels) like '%service.name%')", + want: "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')", }, } for _, tt := range tests { @@ -460,10 +460,10 @@ func Test_buildResourceSubQuery(t *testing.T) { }, want: "(SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE " + "(seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND " + - "simpleJSONExtractString(lower(labels), 'service.name') = 'test' AND lower(labels) like '%service.name%test%' " + - "AND simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%' AND lower(labels) like '%namespace%test1%' " + - "AND (simpleJSONHas(lower(labels), 'cluster.name') AND lower(labels) like '%cluster.name%') AND " + - "( (simpleJSONHas(lower(labels), 'host.name') AND lower(labels) like '%host.name%') ))", + "simpleJSONExtractString(labels, 'service.name') = 'test' AND labels like '%service.name%test%' " + + "AND simpleJSONExtractString(labels, 'namespace') LIKE '%test1%' AND labels like '%namespace%test1%' " + + "AND (simpleJSONHas(labels, 'cluster.name') AND labels like '%cluster.name%') AND " + + "( (simpleJSONHas(labels, 'host.name') AND labels like '%host.name%') ))", wantErr: false, }, }