Skip to content

Commit

Permalink
Fix: cheaper query for fetching log attribute values for filter sugge…
Browse files Browse the repository at this point in the history
…stions (#5989)

* chore: change query for fetching multiple log attribs to make sure it is always cheap

* chore: get filter suggestions tests passing
  • Loading branch information
raj-k-singh authored Sep 17, 2024
1 parent 49dd5f2 commit 8c891f0
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 37 deletions.
89 changes: 55 additions & 34 deletions pkg/query-service/app/clickhouseReader/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -4098,44 +4098,65 @@ func (r *ClickHouseReader) GetQBFilterSuggestionsForLogs(
func (r *ClickHouseReader) getValuesForLogAttributes(
ctx context.Context, attributes []v3.AttributeKey, limit uint64,
) ([][]any, *model.ApiError) {
// query top `limit` distinct values seen for `tagKey`s of interest
// ordered by timestamp when the value was seen

// we added the settings max_rows_to_group_by=100, group_by_overflow_mode = 'break'
// to avoid query from taking up all the resources when value is high cardinality.
query := fmt.Sprintf(
`
select tagKey, stringTagValue, int64TagValue, float64TagValue
from (
select
tagKey,
stringTagValue,
int64TagValue,
float64TagValue,
row_number() over (partition by tagKey order by ts desc) as rank
from (
select
tagKey,
stringTagValue,
int64TagValue,
float64TagValue,
max(timestamp) as ts
from %s.%s
where tagKey in $1
group by (tagKey, stringTagValue, int64TagValue, float64TagValue) SETTINGS max_rows_to_group_by = 100, group_by_overflow_mode = 'break', max_threads=4
)
/*
The query used here needs to be as cheap as possible, and while uncommon, it is possible for
a tag to have 100s of millions of values (eg: message, request_id)
Construct a query to UNION the result of querying first `limit` values for each attribute. For example:
```
select * from (
(
select tagKey, stringTagValue, int64TagValue, float64TagValue
from signoz_logs.distributed_tag_attributes
where tagKey = $1 and (
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
)
limit 2
) UNION DISTINCT (
select tagKey, stringTagValue, int64TagValue, float64TagValue
from signoz_logs.distributed_tag_attributes
where tagKey = $2 and (
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
)
limit 2
)
) settings max_threads=2
```
Since tag_attributes table uses ReplacingMergeTree, the values would be distinct and no order by
is being used to ensure the `limit` clause minimizes the amount of data scanned.
This query scanned ~30k rows per attribute on fiscalnote-v2 for attributes like `message` and `time`
that had >~110M values each
*/

if len(attributes) > 10 {
zap.L().Error(
"log attribute values requested for too many attributes. This can lead to slow and costly queries",
zap.Int("count", len(attributes)),
)
where rank <= %d
`,
r.logsDB, r.logsTagAttributeTable, limit,
)
attributes = attributes[:10]
}

attribNames := []string{}
for _, attrib := range attributes {
attribNames = append(attribNames, attrib.Key)
tagQueries := []string{}
tagKeyQueryArgs := []any{}
for idx, attrib := range attributes {
tagQueries = append(tagQueries, fmt.Sprintf(`(
select tagKey, stringTagValue, int64TagValue, float64TagValue
from %s.%s
where tagKey = $%d and (
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
)
limit %d
)`, r.logsDB, r.logsTagAttributeTable, idx+1, limit))

tagKeyQueryArgs = append(tagKeyQueryArgs, attrib.Key)
}

rows, err := r.db.Query(ctx, query, attribNames)
query := fmt.Sprintf(`select * from (
%s
) settings max_threads=2`, strings.Join(tagQueries, " UNION DISTINCT "))

rows, err := r.db.Query(ctx, query, tagKeyQueryArgs...)
if err != nil {
zap.L().Error("couldn't query attrib values for suggestions", zap.Error(err))
return nil, model.InternalError(fmt.Errorf(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse(
{Type: "Nullable(Float64)", Name: "float64TagValue"},
}

expectedAttribKeysInQuery := []string{}
expectedAttribKeysInQuery := []any{}
mockResultRows := [][]any{}
for idx, attrib := range expectedAttribs {
expectedAttribKeysInQuery = append(expectedAttribKeysInQuery, attrib.Key)
Expand All @@ -198,8 +198,8 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse(
}

tb.mockClickhouse.ExpectQuery(
"select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey.*in.*",
).WithArgs(expectedAttribKeysInQuery).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows))
"select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey",
).WithArgs(expectedAttribKeysInQuery...).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows))
}

type FilterSuggestionsTestBed struct {
Expand Down

0 comments on commit 8c891f0

Please sign in to comment.