Skip to content

Commit

Permalink
Ability to differentiate between nested fields and those with .
Browse files Browse the repository at this point in the history
+ Reference: https://issues.couchbase.com/browse/MB-55699

+ bleve uses "." as the path separator for nested field names.
  This can conflict with those fields whose names contains "."
  within them - which is an allowed parameter.

+ The proposal here is decorate field names under the hood
  within backticks to preserve their true meaning.
  So for example ..
  - ``` `a.b` ``` is a single unnested field name
  - ``` `a`.`b` ``` is a nested field name with ``` `b` ``` being a child field of ``` `a` ```

+ Here're the ramifications with this approach:

    - While indexing, users can still specify fields names as
      they appear in their JSON documents. Underneath the hood
      however, these field names will now be registered with
      their decorated versions to avoid ambiguity.

    - While querying, users can still specify fields as they
      expect to see them within their json documents. Note that,
      it will be the user's responsibility to differentiate
      between nested field names and others.
        For example, consider an index mapping over this kind of
        a document:
        ```
            {
                "x": {
                    "y": "1"
                },
                "x.y": "2"
            }
        ```
        The searches that'd work here are ..
            1. ```{"field": "`x.y`", "match": 2}```
            2. ```{"field": "x.y", "match": 1}```
            3. ```{"field": "`x`.`y`", "match": 1}```

    - Users will also be responsible for specifying sort keys,
      facet fields, highlight fields accordingly in their search
      requests. For example ..
      ```
        x        : interpreted as `x`
        `x`      : interpreted as `x`
        x.y      : interpreted as `x`.`y`
        `x.y`    : interpreted as `x.y`
        `x`.`y`. : interpreted as `x`.`y`
      ```

    - In the search response, users will now see decorated
      names for fragments, locations and facets to avoid any
      ambiguous interpretation of the field names.
  • Loading branch information
abhinavdangeti committed Feb 24, 2023
1 parent a8beab1 commit 2b2d531
Show file tree
Hide file tree
Showing 36 changed files with 300 additions and 142 deletions.
6 changes: 3 additions & 3 deletions examples_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ func ExampleNewHighlight() {
panic(err)
}

fmt.Println(searchResults.Hits[0].Fragments["Name"][0])
fmt.Println(searchResults.Hits[0].Fragments["`Name`"][0])
// Output:
// great <mark>nameless</mark> one
}
Expand All @@ -335,7 +335,7 @@ func ExampleNewHighlightWithStyle() {
panic(err)
}

fmt.Println(searchResults.Hits[0].Fragments["Name"][0])
fmt.Println(searchResults.Hits[0].Fragments["`Name`"][0])
// Output:
// great nameless one
}
Expand Down Expand Up @@ -446,7 +446,7 @@ func ExampleSearchRequest_SortByCustom() {
searchRequest := NewSearchRequest(query)
searchRequest.SortByCustom(search.SortOrder{
&search.SortField{
Field: "Age",
Field: "`Age`",
Missing: search.SortFieldMissingFirst,
},
&search.SortDocID{},
Expand Down
14 changes: 7 additions & 7 deletions http/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,9 @@ func TestHandlers(t *testing.T) {
},
Status: http.StatusOK,
ResponseMatch: map[string]bool{
`"id":"a"`: true,
`"body":"test"`: true,
`"name":"a"`: true,
"\"id\":\"a\"": true,
"\"`body`\":\"test\"": true,
"\"`name`\":\"a\"": true,
},
},
{
Expand Down Expand Up @@ -483,10 +483,10 @@ func TestHandlers(t *testing.T) {
},
Status: http.StatusOK,
ResponseMatch: map[string]bool{
`"fields":`: true,
`"name"`: true,
`"body"`: true,
`"_all"`: true,
"\"fields\"": true,
"\"`name`\"": true,
"\"`body`\"": true,
"\"_all\"": true,
},
},
{
Expand Down
8 changes: 6 additions & 2 deletions index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/blevesearch/bleve/v2/search/collector"
"github.com/blevesearch/bleve/v2/search/facet"
"github.com/blevesearch/bleve/v2/search/highlight"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)

Expand Down Expand Up @@ -631,7 +632,7 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
fieldsToLoad := deDuplicate(req.Fields)
for _, f := range fieldsToLoad {
doc.VisitFields(func(docF index.Field) {
if f == "*" || docF.Name() == f {
if f == "*" || docF.Name() == util.CleansePath(f) {
var value interface{}
switch docF := docF.(type) {
case index.TextField:
Expand Down Expand Up @@ -683,7 +684,7 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
}
}
for _, hf := range highlightFields {
highlighter.BestFragmentsInField(hit, doc, hf, 1)
highlighter.BestFragmentsInField(hit, doc, util.CleansePath(hf), 1)
}
}
} else if doc == nil {
Expand Down Expand Up @@ -737,6 +738,7 @@ func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) {
return nil, err
}

field = util.CleansePath(field)
fieldDict, err := indexReader.FieldDict(field)
if err != nil {
i.mutex.RUnlock()
Expand Down Expand Up @@ -764,6 +766,7 @@ func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byt
return nil, err
}

field = util.CleansePath(field)
fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm)
if err != nil {
i.mutex.RUnlock()
Expand Down Expand Up @@ -791,6 +794,7 @@ func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.Fiel
return nil, err
}

field = util.CleansePath(field)
fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix)
if err != nil {
i.mutex.RUnlock()
Expand Down
117 changes: 62 additions & 55 deletions index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"os"
"path/filepath"
"reflect"
"sort"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -199,7 +198,7 @@ func TestCrud(t *testing.T) {
}
foundNameField := false
doc.VisitFields(func(field index.Field) {
if field.Name() == "name" && string(field.Value()) == "marty" {
if field.Name() == "`name`" && string(field.Value()) == "marty" {
foundNameField = true
}
})
Expand All @@ -212,9 +211,9 @@ func TestCrud(t *testing.T) {
t.Fatal(err)
}
expectedFields := map[string]bool{
"_all": false,
"name": false,
"desc": false,
"_all": false,
"`name`": false,
"`desc`": false,
}
if len(fields) < len(expectedFields) {
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
Expand Down Expand Up @@ -399,10 +398,11 @@ func TestBytesRead(t *testing.T) {
if err != nil {
t.Error(err)
}

stats, _ := idx.StatsMap()["index"].(map[string]interface{})
prevBytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
if prevBytesRead != 32349 && res.BytesRead == prevBytesRead {
t.Fatalf("expected bytes read for query string 32349, got %v",
if prevBytesRead != 32475 && res.BytesRead == prevBytesRead {
t.Fatalf("expected bytes read for query string 32475, got %v",
prevBytesRead)
}

Expand Down Expand Up @@ -580,8 +580,8 @@ func TestBytesReadStored(t *testing.T) {

stats, _ := idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead != 25928 && bytesRead == res.BytesRead {
t.Fatalf("expected the bytes read stat to be around 25928, got %v", bytesRead)
if bytesRead != 26054 && bytesRead == res.BytesRead {
t.Fatalf("expected the bytes read stat to be around 26054, got %v", bytesRead)
}
prevBytesRead := bytesRead

Expand Down Expand Up @@ -651,8 +651,8 @@ func TestBytesReadStored(t *testing.T) {

stats, _ = idx1.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead != 18114 && bytesRead == res.BytesRead {
t.Fatalf("expected the bytes read stat to be around 18114, got %v", bytesRead)
if bytesRead != 18240 && bytesRead == res.BytesRead {
t.Fatalf("expected the bytes read stat to be around 18240, got %v", bytesRead)
}
prevBytesRead = bytesRead

Expand Down Expand Up @@ -920,17 +920,17 @@ func TestStoredFieldPreserved(t *testing.T) {
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].Fields["name"] != "Marty" {
t.Errorf("expected 'Marty' got '%s'", res.Hits[0].Fields["name"])
if res.Hits[0].Fields["`name`"] != "Marty" {
t.Errorf("expected 'Marty' got '%s'", res.Hits[0].Fields["`name`"])
}
if res.Hits[0].Fields["desc"] != "GopherCON India" {
t.Errorf("expected 'GopherCON India' got '%s'", res.Hits[0].Fields["desc"])
if res.Hits[0].Fields["`desc`"] != "GopherCON India" {
t.Errorf("expected 'GopherCON India' got '%s'", res.Hits[0].Fields["`desc`"])
}
if res.Hits[0].Fields["num"] != float64(1) {
t.Errorf("expected '1' got '%v'", res.Hits[0].Fields["num"])
if res.Hits[0].Fields["`num`"] != float64(1) {
t.Errorf("expected '1' got '%v'", res.Hits[0].Fields["`num`"])
}
if res.Hits[0].Fields["bool"] != true {
t.Errorf("expected 'true' got '%v'", res.Hits[0].Fields["bool"])
if res.Hits[0].Fields["`bool`"] != true {
t.Errorf("expected 'true' got '%v'", res.Hits[0].Fields["`bool`"])
}
}

Expand Down Expand Up @@ -1185,7 +1185,7 @@ func TestSortMatchSearch(t *testing.T) {
}
prev := ""
for _, hit := range sr.Hits {
val := hit.Fields["Day"].(string)
val := hit.Fields["`Day`"].(string)
if prev > val {
t.Errorf("Hits must be sorted by 'Day'. Found '%s' before '%s'", prev, val)
}
Expand Down Expand Up @@ -1533,14 +1533,14 @@ func TestTermVectorArrayPositions(t *testing.T) {
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["second"]) < 1 {
if len(results.Hits[0].Locations["`Messages`"]["second"]) < 1 {
t.Fatalf("expected at least one location")
}
if len(results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions) < 1 {
if len(results.Hits[0].Locations["`Messages`"]["second"][0].ArrayPositions) < 1 {
t.Fatalf("expected at least one location array position")
}
if results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0] != 1 {
t.Fatalf("expected array position 1, got %d", results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0])
if results.Hits[0].Locations["`Messages`"]["second"][0].ArrayPositions[0] != 1 {
t.Fatalf("expected array position 1, got %d", results.Hits[0].Locations["`Messages`"]["second"][0].ArrayPositions[0])
}

// repeat search for this document in Messages field
Expand All @@ -1555,14 +1555,14 @@ func TestTermVectorArrayPositions(t *testing.T) {
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["third"]) < 1 {
if len(results.Hits[0].Locations["`Messages`"]["third"]) < 1 {
t.Fatalf("expected at least one location")
}
if len(results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions) < 1 {
if len(results.Hits[0].Locations["`Messages`"]["third"][0].ArrayPositions) < 1 {
t.Fatalf("expected at least one location array position")
}
if results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0] != 2 {
t.Fatalf("expected array position 2, got %d", results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0])
if results.Hits[0].Locations["`Messages`"]["third"][0].ArrayPositions[0] != 2 {
t.Fatalf("expected array position 2, got %d", results.Hits[0].Locations["`Messages`"]["third"][0].ArrayPositions[0])
}

err = index.Close()
Expand Down Expand Up @@ -1611,14 +1611,21 @@ func TestDocumentStaticMapping(t *testing.T) {
if err != nil {
t.Fatal(err)
}
sort.Strings(fields)
expectedFields := []string{"Date", "Numeric", "Text", "_all"}
expectedFields := map[string]bool{
"`Date`": false,
"`Numeric`": false,
"`Text`": false,
"_all": false,
}
if len(fields) < len(expectedFields) {
t.Fatalf("invalid field count: %d", len(fields))
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
}
for _, f := range fields {
expectedFields[f] = true
}
for i, expected := range expectedFields {
if expected != fields[i] {
t.Fatalf("unexpected field[%d]: %s", i, fields[i])
for ef, efp := range expectedFields {
if !efp {
t.Errorf("field %s is missing", ef)
}
}

Expand Down Expand Up @@ -1791,13 +1798,13 @@ func TestDocumentFieldArrayPositionsBug295(t *testing.T) {
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["bleve"]) != 2 {
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["Messages"]["bleve"]))
if len(results.Hits[0].Locations["`Messages`"]["bleve"]) != 2 {
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["`Messages`"]["bleve"]))
}
if results.Hits[0].Locations["Messages"]["bleve"][0].ArrayPositions[0] != 0 {
if results.Hits[0].Locations["`Messages`"]["bleve"][0].ArrayPositions[0] != 0 {
t.Errorf("expected array position to be 0")
}
if results.Hits[0].Locations["Messages"]["bleve"][1].ArrayPositions[0] != 1 {
if results.Hits[0].Locations["`Messages`"]["bleve"][1].ArrayPositions[0] != 1 {
t.Errorf("expected array position to be 1")
}

Expand All @@ -1812,13 +1819,13 @@ func TestDocumentFieldArrayPositionsBug295(t *testing.T) {
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["bleve"]) != 2 {
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["Messages"]["bleve"]))
if len(results.Hits[0].Locations["`Messages`"]["bleve"]) != 2 {
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["`Messages`"]["bleve"]))
}
if results.Hits[0].Locations["Messages"]["bleve"][0].ArrayPositions[0] != 0 {
if results.Hits[0].Locations["`Messages`"]["bleve"][0].ArrayPositions[0] != 0 {
t.Errorf("expected array position to be 0")
}
if results.Hits[0].Locations["Messages"]["bleve"][1].ArrayPositions[0] != 1 {
if results.Hits[0].Locations["`Messages`"]["bleve"][1].ArrayPositions[0] != 1 {
t.Errorf("expected array position to be 1")
}

Expand Down Expand Up @@ -2389,7 +2396,7 @@ func TestBatchMerge(t *testing.T) {

foundNameField := false
doc.VisitFields(func(field index.Field) {
if field.Name() == "name" && string(field.Value()) == "blahblah" {
if field.Name() == "`name`" && string(field.Value()) == "blahblah" {
foundNameField = true
}
})
Expand All @@ -2403,10 +2410,10 @@ func TestBatchMerge(t *testing.T) {
}

expectedFields := map[string]bool{
"_all": false,
"name": false,
"desc": false,
"country": false,
"_all": false,
"`name`": false,
"`desc`": false,
"`country`": false,
}
if len(fields) < len(expectedFields) {
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
Expand Down Expand Up @@ -2837,7 +2844,7 @@ func TestCopyIndex(t *testing.T) {
}
foundNameField := false
doc.VisitFields(func(field index.Field) {
if field.Name() == "name" && string(field.Value()) == "tester" {
if field.Name() == "`name`" && string(field.Value()) == "tester" {
foundNameField = true
}
})
Expand All @@ -2850,9 +2857,9 @@ func TestCopyIndex(t *testing.T) {
t.Fatal(err)
}
expectedFields := map[string]bool{
"_all": false,
"name": false,
"desc": false,
"_all": false,
"`name`": false,
"`desc`": false,
}
if len(fields) < len(expectedFields) {
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
Expand Down Expand Up @@ -2906,7 +2913,7 @@ func TestCopyIndex(t *testing.T) {
}
copyFoundNameField := false
copyDoc.VisitFields(func(field index.Field) {
if field.Name() == "name" && string(field.Value()) == "tester" {
if field.Name() == "`name`" && string(field.Value()) == "tester" {
copyFoundNameField = true
}
})
Expand All @@ -2919,9 +2926,9 @@ func TestCopyIndex(t *testing.T) {
t.Fatal(err)
}
copyExpectedFields := map[string]bool{
"_all": false,
"name": false,
"desc": false,
"_all": false,
"`name`": false,
"`desc`": false,
}
if len(copyFields) < len(copyExpectedFields) {
t.Fatalf("expected %d fields got %d", len(copyExpectedFields), len(copyFields))
Expand Down
Loading

0 comments on commit 2b2d531

Please sign in to comment.