Skip to content

Commit

Permalink
schema fingerprint exposed as unsigned 64-bit integer
Browse files Browse the repository at this point in the history
All operations on Rabin values are unsigned 64-bit integers, but
previous code exposed as int64 values, leading to casting values back
and forth, and possibly in the long term, errors as some values cannot
be cast without loss of information.
  • Loading branch information
Karrick S. McDermott committed Jun 12, 2019
1 parent 33d54fe commit 6b190fa
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 68 deletions.
14 changes: 3 additions & 11 deletions codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ type Codec struct {
nativeFromBinary func([]byte) (interface{}, []byte, error)
textualFromNative func([]byte, interface{}) ([]byte, error)

rabin uint64
Rabin uint64
}

// NewCodec returns a Codec used to translate between a byte slice of either
Expand Down Expand Up @@ -105,9 +105,9 @@ func NewCodec(schemaSpecification string) (*Codec, error) {
return nil, err // should not get here because schema was validated above
}

c.rabin = rabin([]byte(c.schemaCanonical))
c.Rabin = rabin([]byte(c.schemaCanonical))
c.soeHeader = []byte{0xC3, 0x01, 0, 0, 0, 0, 0, 0, 0, 0}
binary.LittleEndian.PutUint64(c.soeHeader[2:], c.rabin)
binary.LittleEndian.PutUint64(c.soeHeader[2:], c.Rabin)

c.schemaOriginal = schemaSpecification
return c, nil
Expand Down Expand Up @@ -458,14 +458,6 @@ func (c *Codec) CanonicalSchema() string {
return c.schemaCanonical
}

// SchemaCRC64Avro returns a signed 64-bit integer Rabin fingerprint for the
// canonical schema.
func (c *Codec) SchemaCRC64Avro() int64 {
// Must perform the bitwise calculations using unsigned 64-bit integer math,
// but the Avro code and test files return a signed 64-bit integer.
return int64(c.rabin)
}

// convert a schema data structure to a codec, prefixing with specified
// namespace
func buildCodec(st map[string]*Codec, enclosingNamespace string, schema interface{}) (*Codec, error) {
Expand Down
108 changes: 54 additions & 54 deletions codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,110 +26,110 @@ func ExampleCodecCanonicalSchema() {
// Output: {"type":"map","values":{"name":"foo","type":"enum","symbols":["alpha","bravo"]}}
}

func TestCodecSchemaCRC64Avro(t *testing.T) {
func TestCodecRabin(t *testing.T) {
cases := []struct {
Schema string
Fingerprint int64
Schema string
Rabin uint64
}{
{
Schema: `"null"`,
Fingerprint: 7195948357588979594,
Schema: `"null"`,
Rabin: 0x63dd24e7cc258f8a,
},
{
Schema: `"boolean"`,
Fingerprint: -6970731678124411036,
Schema: `"boolean"`,
Rabin: 0x9f42fc78a4d4f764,
},
{
Schema: `"int"`,
Fingerprint: 8247732601305521295,
Schema: `"int"`,
Rabin: 0x7275d51a3f395c8f,
},
{
Schema: `"long"`,
Fingerprint: -3434872931120570953,
Schema: `"long"`,
Rabin: 0xd054e14493f41db7,
},
{
Schema: `"float"`,
Fingerprint: 5583340709985441680,
Schema: `"float"`,
Rabin: 0x4d7c02cb3ea8d790,
},
{
Schema: `"double"`,
Fingerprint: -8181574048448539266,
Schema: `"double"`,
Rabin: 0x8e7535c032ab957e,
},
{
Schema: `"bytes"`,
Fingerprint: 5746618253357095269,
Schema: `"bytes"`,
Rabin: 0x4fc016dac3201965,
},
{
Schema: `"string"`,
Fingerprint: -8142146995180207161,
Schema: `"string"`,
Rabin: 0x8f014872634503c7,
},
{
Schema: `[ "int" ]`,
Fingerprint: -5232228896498058493,
Schema: `[ "int" ]`,
Rabin: 0xb763638a48b2fb03,
},
{
Schema: `[ "int" , {"type":"boolean"} ]`,
Fingerprint: 5392556393470105090,
Schema: `[ "int" , {"type":"boolean"} ]`,
Rabin: 0x4ad63578080c1602,
},
{
Schema: `{"fields":[], "type":"record", "name":"foo"}`,
Fingerprint: -4824392279771201922,
Schema: `{"fields":[], "type":"record", "name":"foo"}`,
Rabin: 0xbd0c50c84319be7e,
},
{
Schema: `{"fields":[], "type":"record", "name":"foo", "namespace":"x.y"}`,
Fingerprint: 5916914534497305771,
Schema: `{"fields":[], "type":"record", "name":"foo", "namespace":"x.y"}`,
Rabin: 0x521d1a6b830ec4ab,
},
{
Schema: `{"fields":[], "type":"record", "name":"a.b.foo", "namespace":"x.y"}`,
Fingerprint: -4616218487480524110,
Schema: `{"fields":[], "type":"record", "name":"a.b.foo", "namespace":"x.y"}`,
Rabin: 0xbfefe5be5021e2b2,
},
{
Schema: `{"fields":[], "type":"record", "name":"foo", "doc":"Useful info"}`,
Fingerprint: -4824392279771201922,
Schema: `{"fields":[], "type":"record", "name":"foo", "doc":"Useful info"}`,
Rabin: 0xbd0c50c84319be7e,
},
{
Schema: `{"fields":[], "type":"record", "name":"foo", "aliases":["foo","bar"]}`,
Fingerprint: -4824392279771201922,
Schema: `{"fields":[], "type":"record", "name":"foo", "aliases":["foo","bar"]}`,
Rabin: 0xbd0c50c84319be7e,
},
{
Schema: `{"fields":[], "type":"record", "name":"foo", "doc":"foo", "aliases":["foo","bar"]}`,
Fingerprint: -4824392279771201922,
Schema: `{"fields":[], "type":"record", "name":"foo", "doc":"foo", "aliases":["foo","bar"]}`,
Rabin: 0xbd0c50c84319be7e,
},
{
Schema: `{"fields":[{"type":{"type":"boolean"}, "name":"f1"}], "type":"record", "name":"foo"}`,
Fingerprint: 7843277075252814651,
Schema: `{"fields":[{"type":{"type":"boolean"}, "name":"f1"}], "type":"record", "name":"foo"}`,
Rabin: 0x6cd8eaf1c968a33b,
},
{
Schema: `{ "fields":[{"type":"boolean", "aliases":[], "name":"f1", "default":true}, {"order":"descending","name":"f2","doc":"Hello","type":"int"}], "type":"record", "name":"foo"}`,
Fingerprint: -4860222112080293046,
Schema: `{ "fields":[{"type":"boolean", "aliases":[], "name":"f1", "default":true}, {"order":"descending","name":"f2","doc":"Hello","type":"int"}], "type":"record", "name":"foo"}`,
Rabin: 0xbc8d05bd57f4934a,
},
{
Schema: `{"type":"enum", "name":"foo", "symbols":["A1"]}`,
Fingerprint: -6342190197741309591,
Schema: `{"type":"enum", "name":"foo", "symbols":["A1"]}`,
Rabin: 0xa7fc039e15aa3169,
},
{
Schema: `{"namespace":"x.y.z", "type":"enum", "name":"foo", "doc":"foo bar", "symbols":["A1", "A2"]}`,
Fingerprint: -4448647247586288245,
Schema: `{"namespace":"x.y.z", "type":"enum", "name":"foo", "doc":"foo bar", "symbols":["A1", "A2"]}`,
Rabin: 0xc2433ae5f4999d8b,
},
{
Schema: `{"name":"foo","type":"fixed","size":15}`,
Fingerprint: 1756455273707447556,
Schema: `{"name":"foo","type":"fixed","size":15}`,
Rabin: 0x18602ec3ed31a504,
},
{
Schema: `{"namespace":"x.y.z", "type":"fixed", "name":"foo", "doc":"foo bar", "size":32}`,
Fingerprint: -3064184465700546786,
Schema: `{"namespace":"x.y.z", "type":"fixed", "name":"foo", "doc":"foo bar", "size":32}`,
Rabin: 0xd579d47693a6171e,
},
{
Schema: `{ "items":{"type":"null"}, "type":"array"}`,
Fingerprint: -589620603366471059,
Schema: `{ "items":{"type":"null"}, "type":"array"}`,
Rabin: 0xf7d13f2f68170a6d,
},
{
Schema: `{ "values":"string", "type":"map"}`,
Fingerprint: -8732877298790414990,
Schema: `{ "values":"string", "type":"map"}`,
Rabin: 0x86ce965d92864572,
},
{
Schema: `{"name":"PigValue","type":"record", "fields":[{"name":"value", "type":["null", "int", "long", "PigValue"]}]}`,
Fingerprint: -1759257747318642341,
Schema: `{"name":"PigValue","type":"record", "fields":[{"name":"value", "type":["null", "int", "long", "PigValue"]}]}`,
Rabin: 0xe795dc6656b7e95b,
},
}

Expand All @@ -138,7 +138,7 @@ func TestCodecSchemaCRC64Avro(t *testing.T) {
if err != nil {
t.Fatalf("CASE: %s; cannot create code: %s", c.Schema, err)
}
if got, want := codec.SchemaCRC64Avro(), c.Fingerprint; got != want {
if got, want := codec.Rabin, c.Rabin; got != want {
t.Errorf("CASE: %s; GOT: %#x; WANT: %#x", c.Schema, got, want)
}
}
Expand Down
6 changes: 3 additions & 3 deletions rabin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ import (
"testing"
)

func TestCrc64Avro(t *testing.T) {
func TestRabin(t *testing.T) {
t.Run("int", func(t *testing.T) {
if got, want := rabin([]byte(`"int"`)), uint64(0x7275d51a3f395c8f); got != want {
t.Errorf("GOT: %x; WANT: %x", got, want)
t.Errorf("GOT: %#x; WANT: %#x", got, want)
}
})

t.Run("string", func(t *testing.T) {
if got, want := rabin([]byte(`"string"`)), uint64(0x8f014872634503c7); got != want {
t.Errorf("GOT: %x; WANT: %x", got, want)
t.Errorf("GOT: %#x; WANT: %#x", got, want)
}
})
}

0 comments on commit 6b190fa

Please sign in to comment.