Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ChunkMatch.BestLineMatch to return the best-scoring line #884

Merged
merged 2 commits into from
Jan 8, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions api.go
Original file line number Diff line number Diff line change
@@ -161,7 +161,13 @@ type ChunkMatch struct {
// beginning of a line (Column will always be 1).
ContentStart Location

// Score is the overall relevance score of this chunk.
Score float64

// BestLineMatch is the line number of the highest-scoring line match in this chunk.
// The line number represents the index in the full file, and is 1-based. If FileName: true,
// this number will be 0.
BestLineMatch uint32
}

func (cm *ChunkMatch) sizeBytes() (sz uint64) {
30 changes: 16 additions & 14 deletions api_proto.go
Original file line number Diff line number Diff line change
@@ -96,13 +96,14 @@ func ChunkMatchFromProto(p *proto.ChunkMatch) ChunkMatch {
}

return ChunkMatch{
Content: p.GetContent(),
ContentStart: LocationFromProto(p.GetContentStart()),
FileName: p.GetFileName(),
Ranges: ranges,
SymbolInfo: symbols,
Score: p.GetScore(),
DebugScore: p.GetDebugScore(),
Content: p.GetContent(),
ContentStart: LocationFromProto(p.GetContentStart()),
FileName: p.GetFileName(),
Ranges: ranges,
SymbolInfo: symbols,
Score: p.GetScore(),
BestLineMatch: p.GetBestLineMatch(),
DebugScore: p.GetDebugScore(),
}
}

@@ -118,13 +119,14 @@ func (cm *ChunkMatch) ToProto() *proto.ChunkMatch {
}

return &proto.ChunkMatch{
Content: cm.Content,
ContentStart: cm.ContentStart.ToProto(),
FileName: cm.FileName,
Ranges: ranges,
SymbolInfo: symbolInfo,
Score: cm.Score,
DebugScore: cm.DebugScore,
Content: cm.Content,
ContentStart: cm.ContentStart.ToProto(),
FileName: cm.FileName,
Ranges: ranges,
SymbolInfo: symbolInfo,
Score: cm.Score,
BestLineMatch: cm.BestLineMatch,
DebugScore: cm.DebugScore,
}
}

2 changes: 1 addition & 1 deletion api_test.go
Original file line number Diff line number Diff line change
@@ -149,7 +149,7 @@ func TestMatchSize(t *testing.T) {
size: 256,
}, {
v: ChunkMatch{},
size: 112,
size: 120,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran the fieldalignment tool as this test suggests, and did not see a regression. Here is the output for api.go where ChunkMatch lives ... there is no mention of ChunkMatch or FileMatch:

/Users/jtibshirani/code/zoekt/api.go:232:16: struct with 136 pointer bytes could be 96
/Users/jtibshirani/code/zoekt/api.go:301:24: struct with 32 pointer bytes could be 8
/Users/jtibshirani/code/zoekt/api.go:503:19: struct with 216 pointer bytes could be 24
/Users/jtibshirani/code/zoekt/api.go:561:17: struct of size 224 could be 208
/Users/jtibshirani/code/zoekt/api.go:753:20: struct with 88 pointer bytes could be 56
/Users/jtibshirani/code/zoekt/api.go:833:27: struct with 16 pointer bytes could be 8
/Users/jtibshirani/code/zoekt/api.go:873:15: struct with 32 pointer bytes could be 16
/Users/jtibshirani/code/zoekt/api.go:929:20: struct of size 88 could be 80

}, {
v: candidateMatch{},
size: 80,
53 changes: 48 additions & 5 deletions build/scoring_test.go
Original file line number Diff line number Diff line change
@@ -27,11 +27,12 @@ import (
)

type scoreCase struct {
fileName string
content []byte
query query.Q
language string
wantScore float64
fileName string
content []byte
query query.Q
language string
wantScore float64
wantBestLineMatch uint32
}

func TestFileNameMatch(t *testing.T) {
@@ -79,6 +80,8 @@ func TestBM25(t *testing.T) {
language: "Java",
// bm25-score: 0.58 <- sum-termFrequencyScore: 14.00, length-ratio: 1.00
wantScore: 0.58,
// line 5: private final int exampleField;
wantBestLineMatch: 5,
}, {
// Matches only on content
fileName: "example.java",
@@ -91,6 +94,8 @@ func TestBM25(t *testing.T) {
language: "Java",
// bm25-score: 1.81 <- sum-termFrequencyScore: 116.00, length-ratio: 1.00
wantScore: 1.81,
// line 3: public class InnerClasses {
wantBestLineMatch: 3,
},
{
// Matches only on filename
@@ -130,6 +135,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word)
wantScore: 6550,
// line 37: public class InnerClass implements InnerInterface<Integer, Integer> {
wantBestLineMatch: 37,
},
{
fileName: "example.java",
@@ -138,6 +145,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word)
wantScore: 7000,
// line 32: public static class InnerStaticClass {
wantBestLineMatch: 32,
},
{
fileName: "example.java",
@@ -146,6 +155,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 900 (Java enum) + 500 (word)
wantScore: 8400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
@@ -154,6 +165,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 800 (Java interface) + 500 (word)
wantScore: 8300,
// line 22: public interface InnerInterface<A, B> {
wantBestLineMatch: 22,
},
{
fileName: "example.java",
@@ -162,6 +175,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 700 (Java method) + 500 (word)
wantScore: 8200,
// line 44: public void innerMethod() {
wantBestLineMatch: 44,
},
{
fileName: "example.java",
@@ -170,6 +185,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 600 (Java field) + 500 (word)
wantScore: 8100,
// line 38: private final int field;
wantBestLineMatch: 38,
},
{
fileName: "example.java",
@@ -178,6 +195,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 500 (Java enum constant) + 500 (word)
wantScore: 8000,
// line 18: B,
wantBestLineMatch: 18,
},
// 2 Atoms (1x content and 1x filename)
{
@@ -187,6 +206,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom)
wantScore: 6800,
// line 5: private final int exampleField;
wantBestLineMatch: 5,
},
// 3 Atoms (2x content, 1x filename)
{
@@ -199,6 +220,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom)
wantScore: 8466,
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
wantBestLineMatch: 54,
},
// 4 Atoms (4x content)
{
@@ -213,6 +236,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom)
wantScore: 8700,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
@@ -221,6 +246,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 4000 (overlap Symbol) + 700 (Java method) + 50 (partial word)
wantScore: 4750,
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
wantBestLineMatch: 54,
},
{
fileName: "example.java",
@@ -229,6 +256,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (Symbol) + 900 (Java enum) + 500 (word)
wantScore: 8400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
@@ -237,6 +266,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (edge Symbol) + 900 (Java enum) + 500 (word)
wantScore: 6900,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
@@ -245,6 +276,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 4000 (overlap Symbol) + 900 (Java enum) + 500 (word)
wantScore: 5400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
}

@@ -640,6 +673,16 @@ func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTag
t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].ChunkMatches[0].DebugScore)
}

if c.wantBestLineMatch != 0 {
if len(srs.Files[0].ChunkMatches) == 0 {
t.Fatalf("want BestLineMatch %d, but no chunk matches were returned", c.wantBestLineMatch)
}
chunkMatch := srs.Files[0].ChunkMatches[0]
if chunkMatch.BestLineMatch != c.wantBestLineMatch {
t.Fatalf("want BestLineMatch %d, got %d", c.wantBestLineMatch, chunkMatch.BestLineMatch)
}
}

if got := srs.Files[0].Language; got != c.language {
t.Fatalf("want %s, got %s", c.language, got)
}
Loading