diff --git a/pkg/query-service/app/logs/v4/query_builder.go b/pkg/query-service/app/logs/v4/query_builder.go new file mode 100644 index 0000000000..08024756bd --- /dev/null +++ b/pkg/query-service/app/logs/v4/query_builder.go @@ -0,0 +1,31 @@ +package v4 + +import ( + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +var logOperators = map[v3.FilterOperator]string{ + v3.FilterOperatorEqual: "=", + v3.FilterOperatorNotEqual: "!=", + v3.FilterOperatorLessThan: "<", + v3.FilterOperatorLessThanOrEq: "<=", + v3.FilterOperatorGreaterThan: ">", + v3.FilterOperatorGreaterThanOrEq: ">=", + v3.FilterOperatorLike: "LIKE", + v3.FilterOperatorNotLike: "NOT LIKE", + v3.FilterOperatorContains: "LIKE", + v3.FilterOperatorNotContains: "NOT LIKE", + v3.FilterOperatorRegex: "match(%s, %s)", + v3.FilterOperatorNotRegex: "NOT match(%s, %s)", + v3.FilterOperatorIn: "IN", + v3.FilterOperatorNotIn: "NOT IN", + v3.FilterOperatorExists: "mapContains(%s_%s, '%s')", + v3.FilterOperatorNotExists: "not mapContains(%s_%s, '%s')", +} + +const ( + BODY = "body" + DISTRIBUTED_LOGS_V2 = "distributed_logs_v2" + DISTRIBUTED_LOGS_V2_RESOURCE = "distributed_logs_v2_resource" + NANOSECOND = 1000000000 +) diff --git a/pkg/query-service/app/logs/v4/resource_query_builder.go b/pkg/query-service/app/logs/v4/resource_query_builder.go new file mode 100644 index 0000000000..004c9269fb --- /dev/null +++ b/pkg/query-service/app/logs/v4/resource_query_builder.go @@ -0,0 +1,201 @@ +package v4 + +import ( + "fmt" + "strings" + + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" + "go.signoz.io/signoz/pkg/query-service/utils" +) + +// buildResourceFilter builds a clickhouse filter string for resource labels +func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value interface{}) string { + searchKey := fmt.Sprintf("simpleJSONExtractString(labels, '%s')", key) + + chFmtVal := utils.ClickHouseFormattedValue(value) + + switch op { + case v3.FilterOperatorExists: + return fmt.Sprintf("simpleJSONHas(labels, '%s')", key) + case v3.FilterOperatorNotExists: + return fmt.Sprintf("not simpleJSONHas(labels, '%s')", key) + case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex: + return fmt.Sprintf(logsOp, searchKey, chFmtVal) + case v3.FilterOperatorContains, v3.FilterOperatorNotContains: + // this is required as clickhouseFormattedValue add's quotes to the string + escapedStringValue := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value)) + return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, escapedStringValue) + default: + return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtVal) + } +} + +// buildIndexFilterForInOperator builds a clickhouse filter string for in operator +// example:= x in a,b,c = (labels like '%x%a%' or labels like '%"x":"b"%' or labels like '%"x"="c"%') +// example:= x nin a,b,c = (labels nlike '%x%a%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%') +func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value interface{}) string { + conditions := []string{} + separator := " OR " + sqlOp := "like" + if op == v3.FilterOperatorNotIn { + separator = " AND " + sqlOp = "not like" + } + + // values is a slice of strings, we need to convert value to this type + // value can be string or []interface{} + values := []string{} + switch value.(type) { + case string: + values = append(values, value.(string)) + case []interface{}: + for _, v := range (value).([]interface{}) { + // also resources attributes are always string values + strV, ok := v.(string) + if !ok { + continue + } + values = append(values, strV) + } + } + + // if there are no values to filter on, return an empty string + if len(values) > 0 { + for _, v := range values { + value := utils.QuoteEscapedStringForContains(v) + conditions = append(conditions, fmt.Sprintf("labels %s '%%\"%s\":\"%s\"%%'", sqlOp, key, value)) + } + return "(" + strings.Join(conditions, separator) + ")" + } + return "" +} + +// buildResourceIndexFilter builds a clickhouse filter string for resource labels +// example:= x like '%john%' = labels like '%x%john%' +func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string { + // not using clickhouseFormattedValue as we don't wan't the quotes + formattedValueEscaped := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value)) + + // add index filters + switch op { + case v3.FilterOperatorContains, v3.FilterOperatorEqual, v3.FilterOperatorLike: + return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscaped) + case v3.FilterOperatorNotContains, v3.FilterOperatorNotEqual, v3.FilterOperatorNotLike: + return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscaped) + case v3.FilterOperatorNotRegex: + return fmt.Sprintf("labels not like '%%%s%%'", key) + case v3.FilterOperatorIn, v3.FilterOperatorNotIn: + return buildIndexFilterForInOperator(key, op, value) + default: + return fmt.Sprintf("labels like '%%%s%%'", key) + } +} + +// buildResourceFiltersFromFilterItems builds a list of clickhouse filter strings for resource labels from a FilterSet. +// It skips any filter items that are not resource attributes and checks that the operator is supported and the data type is correct. +func buildResourceFiltersFromFilterItems(fs *v3.FilterSet) ([]string, error) { + var conditions []string + if fs == nil || len(fs.Items) == 0 { + return nil, nil + } + for _, item := range fs.Items { + // skip anything other than resource attribute + if item.Key.Type != v3.AttributeKeyTypeResource { + continue + } + + // since out map is in lower case we are converting it to lowercase + operatorLower := strings.ToLower(string(item.Operator)) + op := v3.FilterOperator(operatorLower) + keyName := item.Key.Key + + // resource filter value data type will always be string + // will be an interface if the operator is IN or NOT IN + if item.Key.DataType != v3.AttributeKeyDataTypeString && + (op != v3.FilterOperatorIn && op != v3.FilterOperatorNotIn) { + return nil, fmt.Errorf("invalid data type for resource attribute: %s", item.Key.Key) + } + + var value interface{} + var err error + if op != v3.FilterOperatorExists && op != v3.FilterOperatorNotExists { + // make sure to cast the value regardless of the actual type + value, err = utils.ValidateAndCastValue(item.Value, item.Key.DataType) + if err != nil { + return nil, fmt.Errorf("failed to validate and cast value for %s: %v", item.Key.Key, err) + } + } + + if logsOp, ok := logOperators[op]; ok { + // the filter + if resourceFilter := buildResourceFilter(logsOp, keyName, op, value); resourceFilter != "" { + conditions = append(conditions, resourceFilter) + } + // the additional filter for better usage of the index + if resourceIndexFilter := buildResourceIndexFilter(keyName, op, value); resourceIndexFilter != "" { + conditions = append(conditions, resourceIndexFilter) + } + } else { + return nil, fmt.Errorf("unsupported operator: %s", op) + } + + } + + return conditions, nil +} + +func buildResourceFiltersFromGroupBy(groupBy []v3.AttributeKey) []string { + var conditions []string + + for _, attr := range groupBy { + if attr.Type != v3.AttributeKeyTypeResource { + continue + } + conditions = append(conditions, fmt.Sprintf("(simpleJSONHas(labels, '%s') AND labels like '%%%s%%')", attr.Key, attr.Key)) + } + return conditions +} + +func buildResourceFiltersFromAggregateAttribute(aggregateAttribute v3.AttributeKey) string { + if aggregateAttribute.Key != "" && aggregateAttribute.Type == v3.AttributeKeyTypeResource { + return fmt.Sprintf("(simpleJSONHas(labels, '%s') AND labels like '%%%s%%')", aggregateAttribute.Key, aggregateAttribute.Key) + } + + return "" +} + +func buildResourceSubQuery(bucketStart, bucketEnd int64, fs *v3.FilterSet, groupBy []v3.AttributeKey, aggregateAttribute v3.AttributeKey) (string, error) { + + // BUILD THE WHERE CLAUSE + var conditions []string + // only add the resource attributes to the filters here + rs, err := buildResourceFiltersFromFilterItems(fs) + if err != nil { + return "", err + } + conditions = append(conditions, rs...) + + // for aggregate attribute add exists check in resources + aggregateAttributeResourceFilter := buildResourceFiltersFromAggregateAttribute(aggregateAttribute) + if aggregateAttributeResourceFilter != "" { + conditions = append(conditions, aggregateAttributeResourceFilter) + } + + groupByResourceFilters := buildResourceFiltersFromGroupBy(groupBy) + if len(groupByResourceFilters) > 0 { + // TODO: change AND to OR once we know how to solve for group by ( i.e show values if one is not present) + groupByStr := "( " + strings.Join(groupByResourceFilters, " AND ") + " )" + conditions = append(conditions, groupByStr) + } + if len(conditions) == 0 { + return "", nil + } + conditionStr := strings.Join(conditions, " AND ") + + // BUILD THE FINAL QUERY + query := fmt.Sprintf("SELECT fingerprint FROM signoz_logs.%s WHERE (seen_at_ts_bucket_start >= %d) AND (seen_at_ts_bucket_start <= %d) AND ", DISTRIBUTED_LOGS_V2_RESOURCE, bucketStart, bucketEnd) + + query = "(" + query + conditionStr + ")" + + return query, nil +} diff --git a/pkg/query-service/app/logs/v4/resource_query_builder_test.go b/pkg/query-service/app/logs/v4/resource_query_builder_test.go new file mode 100644 index 0000000000..1616c29e08 --- /dev/null +++ b/pkg/query-service/app/logs/v4/resource_query_builder_test.go @@ -0,0 +1,482 @@ +package v4 + +import ( + "reflect" + "testing" + + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +func Test_buildResourceFilter(t *testing.T) { + type args struct { + logsOp string + key string + op v3.FilterOperator + value interface{} + } + tests := []struct { + name string + args args + want string + }{ + { + name: "test exists", + args: args{ + key: "service.name", + op: v3.FilterOperatorExists, + }, + want: `simpleJSONHas(labels, 'service.name')`, + }, + { + name: "test nexists", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotExists, + }, + want: `not simpleJSONHas(labels, 'service.name')`, + }, + { + name: "test regex", + args: args{ + logsOp: "match(%s, %s)", + key: "service.name", + op: v3.FilterOperatorRegex, + value: ".*", + }, + want: `match(simpleJSONExtractString(labels, 'service.name'), '.*')`, + }, + { + name: "test contains", + args: args{ + logsOp: "LIKE", + key: "service.name", + op: v3.FilterOperatorContains, + value: "Application%_", + }, + want: `simpleJSONExtractString(labels, 'service.name') LIKE '%Application\%\_%'`, + }, + { + name: "test eq", + args: args{ + logsOp: "=", + key: "service.name", + op: v3.FilterOperatorEqual, + value: "Application", + }, + want: `simpleJSONExtractString(labels, 'service.name') = 'Application'`, + }, + { + name: "test value with quotes", + args: args{ + logsOp: "=", + key: "service.name", + op: v3.FilterOperatorEqual, + value: "Application's", + }, + want: `simpleJSONExtractString(labels, 'service.name') = 'Application\'s'`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceFilter(tt.args.logsOp, tt.args.key, tt.args.op, tt.args.value); got != tt.want { + t.Errorf("buildResourceFilter() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildIndexFilterForInOperator(t *testing.T) { + type args struct { + key string + op v3.FilterOperator + value interface{} + } + tests := []struct { + name string + args args + want string + }{ + { + name: "test in array", + args: args{ + key: "service.name", + op: v3.FilterOperatorIn, + value: []interface{}{"Application", "Test"}, + }, + want: `(labels like '%"service.name":"Application"%' OR labels like '%"service.name":"Test"%')`, + }, + { + name: "test nin array", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotIn, + value: []interface{}{"Application", "Test"}, + }, + want: `(labels not like '%"service.name":"Application"%' AND labels not like '%"service.name":"Test"%')`, + }, + { + name: "test in string", + args: args{ + key: "service.name", + op: v3.FilterOperatorIn, + value: "application", + }, + want: `(labels like '%"service.name":"application"%')`, + }, + { + name: "test nin string", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotIn, + value: "application'\"_s", + }, + want: `(labels not like '%"service.name":"application\'"\_s"%')`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildIndexFilterForInOperator(tt.args.key, tt.args.op, tt.args.value); got != tt.want { + t.Errorf("buildIndexFilterForInOperator() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceIndexFilter(t *testing.T) { + type args struct { + key string + op v3.FilterOperator + value interface{} + } + tests := []struct { + name string + args args + want string + }{ + { + name: "test contains", + args: args{ + key: "service.name", + op: v3.FilterOperatorContains, + value: "application", + }, + want: `labels like '%service.name%application%'`, + }, + { + name: "test not contains", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotContains, + value: "application", + }, + want: `labels not like '%service.name%application%'`, + }, + { + name: "test contains with % and _", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotContains, + value: "application%_test", + }, + want: `labels not like '%service.name%application\%\_test%'`, + }, + { + name: "test not regex", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotRegex, + value: ".*", + }, + want: `labels not like '%service.name%'`, + }, + { + name: "test in", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotIn, + value: []interface{}{"Application", "Test"}, + }, + want: `(labels not like '%"service.name":"Application"%' AND labels not like '%"service.name":"Test"%')`, + }, + { + name: "test eq", + args: args{ + key: "service.name", + op: v3.FilterOperatorEqual, + value: "Application", + }, + want: `labels like '%service.name%Application%'`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceIndexFilter(tt.args.key, tt.args.op, tt.args.value); got != tt.want { + t.Errorf("buildResourceIndexFilter() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceFiltersFromFilterItems(t *testing.T) { + type args struct { + fs *v3.FilterSet + } + tests := []struct { + name string + args args + want []string + wantErr bool + }{ + { + name: "ignore attribute", + args: args{ + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeTag, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + }, + }, + }, + want: nil, + wantErr: false, + }, + { + name: "build filter", + args: args{ + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + }, + }, + }, + want: []string{ + "simpleJSONExtractString(labels, 'service.name') = 'test'", + "labels like '%service.name%test%'", + }, + wantErr: false, + }, + { + name: "build filter with multiple items", + args: args{ + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + { + Key: v3.AttributeKey{ + Key: "namespace", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorContains, + Value: "test1", + }, + }, + }, + }, + want: []string{ + "simpleJSONExtractString(labels, 'service.name') = 'test'", + "labels like '%service.name%test%'", + "simpleJSONExtractString(labels, 'namespace') LIKE '%test1%'", + "labels like '%namespace%test1%'", + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := buildResourceFiltersFromFilterItems(tt.args.fs) + if (err != nil) != tt.wantErr { + t.Errorf("buildResourceFiltersFromFilterItems() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("buildResourceFiltersFromFilterItems() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceFiltersFromGroupBy(t *testing.T) { + type args struct { + groupBy []v3.AttributeKey + } + tests := []struct { + name string + args args + want []string + }{ + { + name: "build filter", + args: args{ + groupBy: []v3.AttributeKey{ + { + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + }, + want: []string{ + "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')", + }, + }, + { + name: "build filter multiple group by", + args: args{ + groupBy: []v3.AttributeKey{ + { + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + { + Key: "namespace", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + }, + want: []string{ + "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')", + "(simpleJSONHas(labels, 'namespace') AND labels like '%namespace%')", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceFiltersFromGroupBy(tt.args.groupBy); !reflect.DeepEqual(got, tt.want) { + t.Errorf("buildResourceFiltersFromGroupBy() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceFiltersFromAggregateAttribute(t *testing.T) { + type args struct { + aggregateAttribute v3.AttributeKey + } + tests := []struct { + name string + args args + want string + }{ + { + name: "build filter", + args: args{ + aggregateAttribute: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + want: "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceFiltersFromAggregateAttribute(tt.args.aggregateAttribute); got != tt.want { + t.Errorf("buildResourceFiltersFromAggregateAttribute() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceSubQuery(t *testing.T) { + type args struct { + bucketStart int64 + bucketEnd int64 + fs *v3.FilterSet + groupBy []v3.AttributeKey + aggregateAttribute v3.AttributeKey + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "build sub query", + args: args{ + bucketStart: 1680064560, + bucketEnd: 1680066458, + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + { + Key: v3.AttributeKey{ + Key: "namespace", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorContains, + Value: "test1", + }, + }, + }, + groupBy: []v3.AttributeKey{ + { + Key: "host.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + aggregateAttribute: v3.AttributeKey{ + Key: "cluster.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + want: "(SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE " + + "(seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND " + + "simpleJSONExtractString(labels, 'service.name') = 'test' AND labels like '%service.name%test%' " + + "AND simpleJSONExtractString(labels, 'namespace') LIKE '%test1%' AND labels like '%namespace%test1%' " + + "AND (simpleJSONHas(labels, 'cluster.name') AND labels like '%cluster.name%') AND " + + "( (simpleJSONHas(labels, 'host.name') AND labels like '%host.name%') ))", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := buildResourceSubQuery(tt.args.bucketStart, tt.args.bucketEnd, tt.args.fs, tt.args.groupBy, tt.args.aggregateAttribute) + if (err != nil) != tt.wantErr { + t.Errorf("buildResourceSubQuery() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("buildResourceSubQuery() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/query-service/utils/format.go b/pkg/query-service/utils/format.go index 4de081940d..c623d3e8e0 100644 --- a/pkg/query-service/utils/format.go +++ b/pkg/query-service/utils/format.go @@ -154,6 +154,14 @@ func QuoteEscapedString(str string) string { return str } +func QuoteEscapedStringForContains(str string) string { + // https: //clickhouse.com/docs/en/sql-reference/functions/string-search-functions#like + str = QuoteEscapedString(str) + str = strings.ReplaceAll(str, `%`, `\%`) + str = strings.ReplaceAll(str, `_`, `\_`) + return str +} + // ClickHouseFormattedValue formats the value to be used in clickhouse query func ClickHouseFormattedValue(v interface{}) string { // if it's pointer convert it to a value