Skip to content

Commit

Permalink
feat(bigquery): store total rows when profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
ravisuhag committed Jun 5, 2024
1 parent c079904 commit 7feb3c7
Show file tree
Hide file tree
Showing 5 changed files with 417 additions and 443 deletions.
2 changes: 1 addition & 1 deletion plugins/extractors/bigquery/bigquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ func (e *Extractor) buildAsset(ctx context.Context, t *bigquery.Table, md *bigqu
tableFQN := t.FullyQualifiedName()
tableURN := plugins.BigQueryURN(t.ProjectID, t.DatasetID, t.TableID)

tableProfile := e.buildTableProfile(tableURN, tableStats)
tableProfile := e.buildTableProfile(tableURN, tableStats, md)
var partitionField string
partitionData := make(map[string]interface{})
if md.TimePartitioning != nil {
Expand Down
8 changes: 4 additions & 4 deletions plugins/extractors/bigquery/profile.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package bigquery

import (
"cloud.google.com/go/bigquery"
v1beta2 "github.com/raystack/meteor/models/raystack/assets/v1beta2"
"github.com/raystack/meteor/plugins/extractors/bigquery/auditlog"
)

func (e *Extractor) buildTableProfile(tableURN string, tableStats *auditlog.TableStats) (tp *v1beta2.TableProfile) {
func (e *Extractor) buildTableProfile(tableURN string, tableStats *auditlog.TableStats, md *bigquery.TableMetadata) *v1beta2.TableProfile {
var tableUsage int64
var commonJoins []*v1beta2.TableCommonJoin
var filterConditions []string
Expand Down Expand Up @@ -37,11 +38,10 @@ func (e *Extractor) buildTableProfile(tableURN string, tableStats *auditlog.Tabl
}
}

tp = &v1beta2.TableProfile{
return &v1beta2.TableProfile{
UsageCount: tableUsage,
CommonJoins: commonJoins,
Filters: filterConditions,
TotalRows: int64(md.NumRows),
}

return
}
10 changes: 7 additions & 3 deletions plugins/extractors/bigquery/profile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package bigquery
import (
"testing"

"cloud.google.com/go/bigquery"
v1beta2 "github.com/raystack/meteor/models/raystack/assets/v1beta2"
"github.com/raystack/meteor/plugins"
"github.com/raystack/meteor/plugins/extractors/bigquery/auditlog"
Expand All @@ -22,7 +23,7 @@ func TestBuildTableProfile(t *testing.T) {
},
}

tp := extr.buildTableProfile(tableURN, tableStats)
tp := extr.buildTableProfile(tableURN, tableStats, &bigquery.TableMetadata{})

assert.Empty(t, tp.UsageCount)
assert.Empty(t, tp.CommonJoins)
Expand All @@ -35,7 +36,7 @@ func TestBuildTableProfile(t *testing.T) {
},
}

tp := extr.buildTableProfile(tableURN, nil)
tp := extr.buildTableProfile(tableURN, nil, &bigquery.TableMetadata{})

assert.Empty(t, tp.UsageCount)
assert.Empty(t, tp.CommonJoins)
Expand Down Expand Up @@ -77,7 +78,9 @@ func TestBuildTableProfile(t *testing.T) {
},
}

tp := extr.buildTableProfile(tableURN, tableStats)
tp := extr.buildTableProfile(tableURN, tableStats, &bigquery.TableMetadata{
NumRows: 42,
})

assert.EqualValues(t, 5, tp.UsageCount)
assert.Contains(t, tp.CommonJoins, &v1beta2.TableCommonJoin{
Expand All @@ -94,5 +97,6 @@ func TestBuildTableProfile(t *testing.T) {
Conditions: []string{"ON t1.somefield = t2.anotherfield"},
})
assert.Contains(t, tp.Filters, "WHERE t1.somefield = 'somevalue'")
assert.Equal(t, tp.TotalRows, int64(42))
})
}
Loading

0 comments on commit 7feb3c7

Please sign in to comment.