From 236b1a5dbb5d13ae94b07088c37e07488ff8ef07 Mon Sep 17 00:00:00 2001 From: nityanandagohain Date: Fri, 20 Sep 2024 17:39:29 +0530 Subject: [PATCH] feat: support for case insensitive for contains and like queries --- .../app/logs/v4/query_builder.go | 10 +-- .../app/logs/v4/query_builder_test.go | 6 +- .../app/logs/v4/resource_query_builder.go | 63 ++++++++++++++----- .../logs/v4/resource_query_builder_test.go | 20 +++--- 4 files changed, 68 insertions(+), 31 deletions(-) diff --git a/pkg/query-service/app/logs/v4/query_builder.go b/pkg/query-service/app/logs/v4/query_builder.go index e906c605a1..49e585e64b 100644 --- a/pkg/query-service/app/logs/v4/query_builder.go +++ b/pkg/query-service/app/logs/v4/query_builder.go @@ -17,10 +17,10 @@ var logOperators = map[v3.FilterOperator]string{ v3.FilterOperatorLessThanOrEq: "<=", v3.FilterOperatorGreaterThan: ">", v3.FilterOperatorGreaterThanOrEq: ">=", - v3.FilterOperatorLike: "LIKE", - v3.FilterOperatorNotLike: "NOT LIKE", - v3.FilterOperatorContains: "LIKE", - v3.FilterOperatorNotContains: "NOT LIKE", + v3.FilterOperatorLike: "ILIKE", + v3.FilterOperatorNotLike: "NOT ILIKE", + v3.FilterOperatorContains: "ILIKE", + v3.FilterOperatorNotContains: "NOT ILIKE", v3.FilterOperatorRegex: "match(%s, %s)", v3.FilterOperatorNotRegex: "NOT match(%s, %s)", v3.FilterOperatorIn: "IN", @@ -150,6 +150,7 @@ func buildAttributeFilter(item v3.FilterItem) (string, error) { val := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", item.Value)) // for body the contains is case insensitive if keyName == BODY { + logsOp = strings.Replace(logsOp, "ILIKE", "LIKE", 1) // removing i from ilike and not ilike return fmt.Sprintf("lower(%s) %s lower('%%%s%%')", keyName, logsOp, val), nil } else { return fmt.Sprintf("%s %s '%%%s%%'", keyName, logsOp, val), nil @@ -158,6 +159,7 @@ func buildAttributeFilter(item v3.FilterItem) (string, error) { // for body use lower for like and ilike val := utils.QuoteEscapedString(fmt.Sprintf("%s", item.Value)) if keyName == BODY { + logsOp = strings.Replace(logsOp, "ILIKE", "LIKE", 1) // removing i from ilike and not ilike return fmt.Sprintf("lower(%s) %s lower('%s')", keyName, logsOp, val), nil } else { return fmt.Sprintf("%s %s '%s'", keyName, logsOp, val), nil diff --git a/pkg/query-service/app/logs/v4/query_builder_test.go b/pkg/query-service/app/logs/v4/query_builder_test.go index 34ea7e1f6f..9c2b1fd2e5 100644 --- a/pkg/query-service/app/logs/v4/query_builder_test.go +++ b/pkg/query-service/app/logs/v4/query_builder_test.go @@ -250,7 +250,7 @@ func Test_buildAttributeFilter(t *testing.T) { Value: "test", }, }, - want: "resources_string['service.name'] LIKE '%test%'", + want: "resources_string['service.name'] ILIKE '%test%'", }, { name: "build attribute filter contains- body", @@ -280,7 +280,7 @@ func Test_buildAttributeFilter(t *testing.T) { Value: "test%", }, }, - want: "resources_string['service.name'] LIKE 'test%'", + want: "resources_string['service.name'] ILIKE 'test%'", }, { name: "build attribute filter like-body", @@ -956,7 +956,7 @@ func TestPrepareLogsQuery(t *testing.T) { }, want: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body, attributes_string, attributes_number, attributes_bool, resources_string from " + "signoz_logs.distributed_logs_v2 where attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND " + - "(resource_fingerprint GLOBAL IN (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE simpleJSONExtractString(labels, 'service.name') LIKE '%app%' AND labels like '%service.name%app%' AND ", + "(resource_fingerprint GLOBAL IN (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE simpleJSONExtractString(lower(labels), 'service.name') LIKE '%app%' AND lower(labels) like '%service.name%app%' AND ", }, { name: "Live Tail Query W/O filter", diff --git a/pkg/query-service/app/logs/v4/resource_query_builder.go b/pkg/query-service/app/logs/v4/resource_query_builder.go index 2a56549b43..5004bdbc3b 100644 --- a/pkg/query-service/app/logs/v4/resource_query_builder.go +++ b/pkg/query-service/app/logs/v4/resource_query_builder.go @@ -8,12 +8,37 @@ import ( "go.signoz.io/signoz/pkg/query-service/utils" ) +var resourceLogOperators = map[v3.FilterOperator]string{ + v3.FilterOperatorEqual: "=", + v3.FilterOperatorNotEqual: "!=", + v3.FilterOperatorLessThan: "<", + v3.FilterOperatorLessThanOrEq: "<=", + v3.FilterOperatorGreaterThan: ">", + v3.FilterOperatorGreaterThanOrEq: ">=", + v3.FilterOperatorLike: "LIKE", + v3.FilterOperatorNotLike: "NOT LIKE", + v3.FilterOperatorContains: "LIKE", + v3.FilterOperatorNotContains: "NOT LIKE", + v3.FilterOperatorRegex: "match(%s, %s)", + v3.FilterOperatorNotRegex: "NOT match(%s, %s)", + v3.FilterOperatorIn: "IN", + v3.FilterOperatorNotIn: "NOT IN", + v3.FilterOperatorExists: "mapContains(%s_%s, '%s')", + v3.FilterOperatorNotExists: "not mapContains(%s_%s, '%s')", +} + // buildResourceFilter builds a clickhouse filter string for resource labels func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value interface{}) string { + // for all operators except contains and like searchKey := fmt.Sprintf("simpleJSONExtractString(labels, '%s')", key) + // for contains and like it will be case insensitive + lowerSearchKey := fmt.Sprintf("simpleJSONExtractString(lower(labels), '%s')", key) + chFmtVal := utils.ClickHouseFormattedValue(value) + lowerValue := strings.ToLower(fmt.Sprintf("%s", value)) + switch op { case v3.FilterOperatorExists: return fmt.Sprintf("simpleJSONHas(labels, '%s')", key) @@ -24,20 +49,20 @@ func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value case v3.FilterOperatorContains, v3.FilterOperatorNotContains: // this is required as clickhouseFormattedValue add's quotes to the string // we also want to treat %, _ as literals for contains - escapedStringValue := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value)) - return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, escapedStringValue) + escapedStringValue := utils.QuoteEscapedStringForContains(lowerValue) + return fmt.Sprintf("%s %s '%%%s%%'", lowerSearchKey, logsOp, escapedStringValue) case v3.FilterOperatorLike, v3.FilterOperatorNotLike: // this is required as clickhouseFormattedValue add's quotes to the string - escapedStringValue := utils.QuoteEscapedString(fmt.Sprintf("%s", value)) - return fmt.Sprintf("%s %s '%s'", searchKey, logsOp, escapedStringValue) + escapedStringValue := utils.QuoteEscapedString(lowerValue) + return fmt.Sprintf("%s %s '%s'", lowerSearchKey, logsOp, escapedStringValue) default: return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtVal) } } // buildIndexFilterForInOperator builds a clickhouse filter string for in operator -// example:= x in a,b,c = (labels like '%x%a%' or labels like '%"x":"b"%' or labels like '%"x"="c"%') -// example:= x nin a,b,c = (labels nlike '%x%a%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%') +// example:= x in a,b,c = (labels like '%"x"%"a"%' or labels like '%"x":"b"%' or labels like '%"x"="c"%') +// example:= x nin a,b,c = (labels nlike '%"x"%"a"%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%') func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value interface{}) string { conditions := []string{} separator := " OR " @@ -77,24 +102,34 @@ func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value inter // buildResourceIndexFilter builds a clickhouse filter string for resource labels // example:= x like '%john%' = labels like '%x%john%' +// we have two indexes for resource attributes one is lower and one is normal. +// for all operators other then like/contains we will use normal index +// for like/contains we will use lower index +// we can use lower index for =, in etc but it's difficult to do it for !=, NIN etc +// if as x != "ABC" we cannot predict something like "not lower(labels) like '%%x%%abc%%'". It has it be "not lower(labels) like '%%x%%ABC%%'" func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string { // not using clickhouseFormattedValue as we don't wan't the quotes strVal := fmt.Sprintf("%s", value) - formattedValueEscapedForContains := utils.QuoteEscapedStringForContains(strVal) + formattedValueEscapedForContains := strings.ToLower(utils.QuoteEscapedStringForContains(strVal)) formattedValueEscaped := utils.QuoteEscapedString(strVal) // add index filters switch op { case v3.FilterOperatorContains: - return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscapedForContains) + return fmt.Sprintf("lower(labels) like '%%%s%%%s%%'", key, formattedValueEscapedForContains) case v3.FilterOperatorNotContains: - return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscapedForContains) - case v3.FilterOperatorLike, v3.FilterOperatorEqual: + return fmt.Sprintf("lower(labels) not like '%%%s%%%s%%'", key, formattedValueEscapedForContains) + case v3.FilterOperatorLike: + return fmt.Sprintf("lower(labels) like '%%%s%%%s%%'", key, formattedValueEscaped) + case v3.FilterOperatorNotLike: + return fmt.Sprintf("lower(labels) not like '%%%s%%%s%%'", key, formattedValueEscaped) + case v3.FilterOperatorEqual: return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscaped) - case v3.FilterOperatorNotLike, v3.FilterOperatorNotEqual: + case v3.FilterOperatorNotEqual: return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscaped) - case v3.FilterOperatorNotRegex: - return fmt.Sprintf("labels not like '%%%s%%'", key) + case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex: + // don't try to do anything for regex. + return "" case v3.FilterOperatorIn, v3.FilterOperatorNotIn: return buildIndexFilterForInOperator(key, op, value) default: @@ -137,7 +172,7 @@ func buildResourceFiltersFromFilterItems(fs *v3.FilterSet) ([]string, error) { } } - if logsOp, ok := logOperators[op]; ok { + if logsOp, ok := resourceLogOperators[op]; ok { // the filter if resourceFilter := buildResourceFilter(logsOp, keyName, op, value); resourceFilter != "" { conditions = append(conditions, resourceFilter) diff --git a/pkg/query-service/app/logs/v4/resource_query_builder_test.go b/pkg/query-service/app/logs/v4/resource_query_builder_test.go index e315f739a3..0657bcc9b3 100644 --- a/pkg/query-service/app/logs/v4/resource_query_builder_test.go +++ b/pkg/query-service/app/logs/v4/resource_query_builder_test.go @@ -53,7 +53,7 @@ func Test_buildResourceFilter(t *testing.T) { op: v3.FilterOperatorContains, value: "Application%_", }, - want: `simpleJSONExtractString(labels, 'service.name') LIKE '%Application\%\_%'`, + want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE '%application\%\_%'`, }, { name: "test eq", @@ -83,7 +83,7 @@ func Test_buildResourceFilter(t *testing.T) { op: v3.FilterOperatorLike, value: "Application%_", }, - want: `simpleJSONExtractString(labels, 'service.name') LIKE 'Application%_'`, + want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE 'application%_'`, }, } for _, tt := range tests { @@ -170,7 +170,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorContains, value: "application", }, - want: `labels like '%service.name%application%'`, + want: `lower(labels) like '%service.name%application%'`, }, { name: "test not contains", @@ -179,7 +179,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorNotContains, value: "application", }, - want: `labels not like '%service.name%application%'`, + want: `lower(labels) not like '%service.name%application%'`, }, { name: "test contains with % and _", @@ -188,7 +188,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorNotContains, value: "application%_test", }, - want: `labels not like '%service.name%application\%\_test%'`, + want: `lower(labels) not like '%service.name%application\%\_test%'`, }, { name: "test like with % and _", @@ -197,7 +197,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorLike, value: "application%_test", }, - want: `labels like '%service.name%application%_test%'`, + want: `lower(labels) like '%service.name%application%_test%'`, }, { name: "test not regex", @@ -206,7 +206,7 @@ func Test_buildResourceIndexFilter(t *testing.T) { op: v3.FilterOperatorNotRegex, value: ".*", }, - want: `labels not like '%service.name%'`, + want: ``, }, { name: "test in", @@ -318,8 +318,8 @@ func Test_buildResourceFiltersFromFilterItems(t *testing.T) { want: []string{ "simpleJSONExtractString(labels, 'service.name') = 'test'", "labels like '%service.name%test%'", - "simpleJSONExtractString(labels, 'namespace') LIKE '%test1%'", - "labels like '%namespace%test1%'", + "simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%'", + "lower(labels) like '%namespace%test1%'", }, wantErr: false, }, @@ -480,7 +480,7 @@ func Test_buildResourceSubQuery(t *testing.T) { want: "(SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE " + "(seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND " + "simpleJSONExtractString(labels, 'service.name') = 'test' AND labels like '%service.name%test%' " + - "AND simpleJSONExtractString(labels, 'namespace') LIKE '%test1%' AND labels like '%namespace%test1%' " + + "AND simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%' AND lower(labels) like '%namespace%test1%' " + "AND (simpleJSONHas(labels, 'cluster.name') AND labels like '%cluster.name%') AND " + "( (simpleJSONHas(labels, 'host.name') AND labels like '%host.name%') ))", wantErr: false,