Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: cheaper query for fetching log attribute values for filter suggestions #5989

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 55 additions & 34 deletions pkg/query-service/app/clickhouseReader/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -4098,44 +4098,65 @@ func (r *ClickHouseReader) GetQBFilterSuggestionsForLogs(
func (r *ClickHouseReader) getValuesForLogAttributes(
ctx context.Context, attributes []v3.AttributeKey, limit uint64,
) ([][]any, *model.ApiError) {
// query top `limit` distinct values seen for `tagKey`s of interest
// ordered by timestamp when the value was seen

// we added the settings max_rows_to_group_by=100, group_by_overflow_mode = 'break'
// to avoid query from taking up all the resources when value is high cardinality.
query := fmt.Sprintf(
`
select tagKey, stringTagValue, int64TagValue, float64TagValue
from (
select
tagKey,
stringTagValue,
int64TagValue,
float64TagValue,
row_number() over (partition by tagKey order by ts desc) as rank
from (
select
tagKey,
stringTagValue,
int64TagValue,
float64TagValue,
max(timestamp) as ts
from %s.%s
where tagKey in $1
group by (tagKey, stringTagValue, int64TagValue, float64TagValue) SETTINGS max_rows_to_group_by = 100, group_by_overflow_mode = 'break', max_threads=4
)
/*
The query used here needs to be as cheap as possible, and while uncommon, it is possible for
a tag to have 100s of millions of values (eg: message, request_id)

Construct a query to UNION the result of querying first `limit` values for each attribute. For example:
```
select * from (
(
select tagKey, stringTagValue, int64TagValue, float64TagValue
from signoz_logs.distributed_tag_attributes
where tagKey = $1 and (
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
)
limit 2
) UNION DISTINCT (
select tagKey, stringTagValue, int64TagValue, float64TagValue
from signoz_logs.distributed_tag_attributes
where tagKey = $2 and (
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
)
limit 2
)
) settings max_threads=2
```
Since tag_attributes table uses ReplacingMergeTree, the values would be distinct and no order by
is being used to ensure the `limit` clause minimizes the amount of data scanned.

This query scanned ~30k rows per attribute on fiscalnote-v2 for attributes like `message` and `time`
that had >~110M values each
*/

if len(attributes) > 10 {
zap.L().Error(
"log attribute values requested for too many attributes. This can lead to slow and costly queries",
zap.Int("count", len(attributes)),
)
where rank <= %d
`,
r.logsDB, r.logsTagAttributeTable, limit,
)
attributes = attributes[:10]
}

attribNames := []string{}
for _, attrib := range attributes {
attribNames = append(attribNames, attrib.Key)
tagQueries := []string{}
tagKeyQueryArgs := []any{}
for idx, attrib := range attributes {
tagQueries = append(tagQueries, fmt.Sprintf(`(
select tagKey, stringTagValue, int64TagValue, float64TagValue
from %s.%s
where tagKey = $%d and (
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
)
limit %d
)`, r.logsDB, r.logsTagAttributeTable, idx+1, limit))

tagKeyQueryArgs = append(tagKeyQueryArgs, attrib.Key)
}

rows, err := r.db.Query(ctx, query, attribNames)
query := fmt.Sprintf(`select * from (
%s
) settings max_threads=2`, strings.Join(tagQueries, " UNION DISTINCT "))

rows, err := r.db.Query(ctx, query, tagKeyQueryArgs...)
if err != nil {
zap.L().Error("couldn't query attrib values for suggestions", zap.Error(err))
return nil, model.InternalError(fmt.Errorf(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse(
{Type: "Nullable(Float64)", Name: "float64TagValue"},
}

expectedAttribKeysInQuery := []string{}
expectedAttribKeysInQuery := []any{}
mockResultRows := [][]any{}
for idx, attrib := range expectedAttribs {
expectedAttribKeysInQuery = append(expectedAttribKeysInQuery, attrib.Key)
Expand All @@ -198,8 +198,8 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse(
}

tb.mockClickhouse.ExpectQuery(
"select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey.*in.*",
).WithArgs(expectedAttribKeysInQuery).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows))
"select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey",
).WithArgs(expectedAttribKeysInQuery...).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows))
}

type FilterSuggestionsTestBed struct {
Expand Down
Loading