From c0282ab81a0634f6aec5fc143d6f3b9161dcf809 Mon Sep 17 00:00:00 2001 From: Leonid Bugaev Date: Tue, 29 Mar 2016 10:06:07 +0500 Subject: [PATCH 1/6] Experiments with multiple key paths --- Makefile | 2 +- parser.go | 98 ++++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 80 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index 352a457..f8bd5bc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ SOURCE = parser.go CONTAINER = jsonparser SOURCE_PATH = /go/src/github.com/buger/jsonparser -BENCHMARK = JsonParser +BENCHMARK = JsonParserSmall BENCHTIME = 5s TEST = . diff --git a/parser.go b/parser.go index 99a3415..4ef3f01 100644 --- a/parser.go +++ b/parser.go @@ -93,12 +93,23 @@ func blockEnd(data []byte, openSym byte, closeSym byte) int { return -1 } -func searchKeys(data []byte, keys ...string) int { +func searchPaths(data []byte, paths ...[]string) int { keyLevel := 0 level := 0 i := 0 ln := len(data) - lk := len(keys) + var pathOffsets []int + + pathsMatched := 0 + if len(paths) > 1 { + pathOffsets = make([]int, len(paths)) + } + maxLen := len(paths[0]) + for _, p := range paths { + if len(p) > maxLen { + maxLen = len(p) + } + } for i < ln { switch data[i] { @@ -112,31 +123,80 @@ func searchKeys(data []byte, keys ...string) int { } i += strEnd keyEnd := i - 1 + key := data[keyBegin:keyEnd] - valueOffset := nextToken(data[i:], true) - if valueOffset == -1 { + colonOffset := nextToken(data[i:], true) + if colonOffset == -1 { return -1 } - i += valueOffset - - if i < ln && - data[i] == ':' && // if string is a Key, and key level match - keyLevel == level-1 && // If key nesting level match current object nested level - keys[level-1] == unsafeBytesToString(data[keyBegin:keyEnd]) { - keyLevel++ - // If we found all keys in path - if keyLevel == lk { - return i + 1 + i += colonOffset + + // If string is a Key + if i < ln && data[i] == ':' && level <= maxLen { + match := false + + if len(paths) > 1 { + // searchMade := false + for pi, p := range paths { + if pathOffsets[pi] != 0 { + continue + } + // searchMade = true + + if p[level-1] == unsafeBytesToString(key) { + match = true + + if len(p) == level { + pathOffsets[pi] = i + 1 + pathsMatched++ + + if pathsMatched == len(paths) { + return i + 1 + } + } + } + } + + if !match { + tokenOffset := nextToken(data[i+1:], false) + i += tokenOffset + 1 + + if data[i] == '{' { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + 1 + } + } + } else { + keys := paths[0] + + if keyLevel == level-1 && + keys[level-1] == unsafeBytesToString(key) { + keyLevel++ + // If we found all keys in path + if keyLevel == len(keys) { + return i + 1 + } } + } } + + i-- case '{': - level++ + if level-1 > maxLen { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + } else { + level++ + } case '}': level-- case '[': // Do not search for keys inside arrays - arraySkip := blockEnd(data[i:], '[', ']') - i += arraySkip + blockSkip := blockEnd(data[i:], '[', ']') + if blockSkip == -1 { + return -1 + } + i += blockSkip } i++ @@ -171,7 +231,7 @@ If no keys provided it will try to extract closest JSON value (simple ones or ob */ func Get(data []byte, keys ...string) (value []byte, dataType int, offset int, err error) { if len(keys) > 0 { - if offset = searchKeys(data, keys...); offset == -1 { + if offset = searchPaths(data, keys); offset == -1 { return []byte{}, NotExist, -1, errors.New("Key path not found") } } @@ -269,7 +329,7 @@ func ArrayEach(data []byte, cb func(value []byte, dataType int, offset int, err offset := 1 if len(keys) > 0 { - if offset = searchKeys(data, keys...); offset == -1 { + if offset = searchPaths(data, keys); offset == -1 { return errors.New("Key path not found") } From 27fe4f0f88fc361fe80c3a7dc866179a8dbda68b Mon Sep 17 00:00:00 2001 From: Leonid Bugaev Date: Tue, 29 Mar 2016 14:04:45 +0500 Subject: [PATCH 2/6] Use separate function and update benchmarks --- Makefile | 2 +- benchmark/benchmark_large_payload_test.go | 22 ++++ benchmark/benchmark_medium_payload_test.go | 23 +++- benchmark/benchmark_small_payload_test.go | 20 ++++ parser.go | 122 +++++++++++++-------- 5 files changed, 143 insertions(+), 46 deletions(-) diff --git a/Makefile b/Makefile index f8bd5bc..352a457 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ SOURCE = parser.go CONTAINER = jsonparser SOURCE_PATH = /go/src/github.com/buger/jsonparser -BENCHMARK = JsonParserSmall +BENCHMARK = JsonParser BENCHTIME = 5s TEST = . diff --git a/benchmark/benchmark_large_payload_test.go b/benchmark/benchmark_large_payload_test.go index 643377f..a0185c2 100644 --- a/benchmark/benchmark_large_payload_test.go +++ b/benchmark/benchmark_large_payload_test.go @@ -35,6 +35,28 @@ func BenchmarkJsonParserLarge(b *testing.B) { } } +func BenchmarkJsonParserLargeOptimized(b *testing.B) { + for i := 0; i < b.N; i++ { + r := largeFixture + offsets := jsonparser.KeyOffsets(r, + []string{"users"}, + []string{"topics", "topics"}, + ) + + jsonparser.ArrayEach(r[offsets[0]:], func(value []byte, dataType int, offset int, err error) { + jsonparser.Get(value, "username") + nothing() + }) + + jsonparser.ArrayEach(r[offsets[1]:], func(value []byte, dataType int, offset int, err error) { + aOff := jsonparser.KeyOffsets(value, []string{"id"}, []string{"slug"}) + jsonparser.GetInt(value[aOff[0]:]) + jsonparser.Get(value[aOff[1]:]) + nothing() + }) + } +} + /* encoding/json */ diff --git a/benchmark/benchmark_medium_payload_test.go b/benchmark/benchmark_medium_payload_test.go index 751761e..3c512c2 100644 --- a/benchmark/benchmark_medium_payload_test.go +++ b/benchmark/benchmark_medium_payload_test.go @@ -15,7 +15,7 @@ import ( "github.com/pquerna/ffjson/ffjson" "github.com/ugorji/go/codec" "testing" - // "fmt" + _ "fmt" ) /* @@ -34,6 +34,27 @@ func BenchmarkJsonParserMedium(b *testing.B) { } } +func BenchmarkJsonParserMediumOptimized(b *testing.B) { + for i := 0; i < b.N; i++ { + r := mediumFixture + offsets := jsonparser.KeyOffsets(r, + []string{"person", "name", "fullName"}, + []string{"person", "github", "followers"}, + []string{"company"}, + []string{"person", "gravatar", "avatars"}, + ) + + jsonparser.Get(r[offsets[0]:]) + jsonparser.GetInt(r[offsets[1]:]) + jsonparser.Get(r[offsets[2]:]) + + jsonparser.ArrayEach(r[offsets[3]:], func(value []byte, dataType int, offset int, err error) { + jsonparser.GetUnsafeString(value, "url") + nothing() + }) + } +} + /* encoding/json */ diff --git a/benchmark/benchmark_small_payload_test.go b/benchmark/benchmark_small_payload_test.go index 8fa0463..86887b9 100644 --- a/benchmark/benchmark_small_payload_test.go +++ b/benchmark/benchmark_small_payload_test.go @@ -35,6 +35,26 @@ func BenchmarkJsonParserSmall(b *testing.B) { } } +func BenchmarkJsonParserSmallOptimized(b *testing.B) { + for i := 0; i < b.N; i++ { + r := smallFixture + offsets := jsonparser.KeyOffsets(r, + []string{"uuid"}, + []string{"tz"}, + []string{"ua"}, + []string{"st"}, + ) + + jsonparser.Get(r[offsets[0]:]) + jsonparser.GetInt(r[offsets[1]:]) + jsonparser.Get(r[offsets[2]:]) + jsonparser.GetInt(r[offsets[3]:]) + + nothing() + } +} + + /* encoding/json */ diff --git a/parser.go b/parser.go index 4ef3f01..5b67d28 100644 --- a/parser.go +++ b/parser.go @@ -93,17 +93,64 @@ func blockEnd(data []byte, openSym byte, closeSym byte) int { return -1 } -func searchPaths(data []byte, paths ...[]string) int { +func searchKeys(data []byte, keys ...string) int { keyLevel := 0 level := 0 i := 0 ln := len(data) - var pathOffsets []int + lk := len(keys) - pathsMatched := 0 - if len(paths) > 1 { - pathOffsets = make([]int, len(paths)) + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd := stringEnd(data[i:]) + if strEnd == -1 { + return -1 + } + i += strEnd + keyEnd := i - 1 + + valueOffset := nextToken(data[i:], true) + if valueOffset == -1 { + return -1 + } + i += valueOffset + + if i < ln && + data[i] == ':' && // if string is a Key, and key level match + keyLevel == level-1 && // If key nesting level match current object nested level + keys[level-1] == unsafeBytesToString(data[keyBegin:keyEnd]) { + keyLevel++ + // If we found all keys in path + if keyLevel == lk { + return i + 1 + } + } + case '{': + level++ + case '}': + level-- + case '[': + // Do not search for keys inside arrays + arraySkip := blockEnd(data[i:], '[', ']') + i += arraySkip + } + + i++ } + + return -1 +} + +func KeyOffsets(data []byte, paths ...[]string) (keyOffsets []int) { + level := 0 + i := 0 + ln := len(data) + pathsMatched := 0 + keyOffsets = make([]int, len(paths)) maxLen := len(paths[0]) for _, p := range paths { if len(p) > maxLen { @@ -119,7 +166,7 @@ func searchPaths(data []byte, paths ...[]string) int { strEnd := stringEnd(data[i:]) if strEnd == -1 { - return -1 + return } i += strEnd keyEnd := i - 1 @@ -127,7 +174,7 @@ func searchPaths(data []byte, paths ...[]string) int { colonOffset := nextToken(data[i:], true) if colonOffset == -1 { - return -1 + return } i += colonOffset @@ -135,47 +182,34 @@ func searchPaths(data []byte, paths ...[]string) int { if i < ln && data[i] == ':' && level <= maxLen { match := false - if len(paths) > 1 { - // searchMade := false - for pi, p := range paths { - if pathOffsets[pi] != 0 { - continue - } - // searchMade = true + // searchMade := false + for pi, p := range paths { + if keyOffsets[pi] != 0 || len(p) < level { + continue + } + // searchMade = true - if p[level-1] == unsafeBytesToString(key) { - match = true + if p[level-1] == unsafeBytesToString(key) { + match = true - if len(p) == level { - pathOffsets[pi] = i + 1 - pathsMatched++ + if len(p) == level { + keyOffsets[pi] = i + 1 + pathsMatched++ - if pathsMatched == len(paths) { - return i + 1 - } + if pathsMatched == len(paths) { + return } } } + } - if !match { - tokenOffset := nextToken(data[i+1:], false) - i += tokenOffset + 1 + if !match { + tokenOffset := nextToken(data[i+1:], false) + i += tokenOffset + 1 - if data[i] == '{' { - blockSkip := blockEnd(data[i:], '{', '}') - i += blockSkip + 1 - } - } - } else { - keys := paths[0] - - if keyLevel == level-1 && - keys[level-1] == unsafeBytesToString(key) { - keyLevel++ - // If we found all keys in path - if keyLevel == len(keys) { - return i + 1 - } + if data[i] == '{' { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + 1 } } } @@ -194,7 +228,7 @@ func searchPaths(data []byte, paths ...[]string) int { // Do not search for keys inside arrays blockSkip := blockEnd(data[i:], '[', ']') if blockSkip == -1 { - return -1 + return } i += blockSkip } @@ -202,7 +236,7 @@ func searchPaths(data []byte, paths ...[]string) int { i++ } - return -1 + return } // Data types available in valid JSON data. @@ -231,7 +265,7 @@ If no keys provided it will try to extract closest JSON value (simple ones or ob */ func Get(data []byte, keys ...string) (value []byte, dataType int, offset int, err error) { if len(keys) > 0 { - if offset = searchPaths(data, keys); offset == -1 { + if offset = searchKeys(data, keys...); offset == -1 { return []byte{}, NotExist, -1, errors.New("Key path not found") } } @@ -329,7 +363,7 @@ func ArrayEach(data []byte, cb func(value []byte, dataType int, offset int, err offset := 1 if len(keys) > 0 { - if offset = searchPaths(data, keys); offset == -1 { + if offset = searchKeys(data, keys...); offset == -1 { return errors.New("Key path not found") } From cb726dff3150b479d5be64525b3d0b8495814e11 Mon Sep 17 00:00:00 2001 From: Leonid Bugaev Date: Tue, 29 Mar 2016 19:58:25 +0500 Subject: [PATCH 3/6] Add unmarshmaling support --- benchmark/benchmark_large_payload_test.go | 2 +- benchmark/benchmark_medium_payload_test.go | 16 +++- benchmark/benchmark_small_payload_test.go | 10 ++- encode.go | 92 ++++++++++++++++++++++ parser_test.go | 35 ++++++++ 5 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 encode.go diff --git a/benchmark/benchmark_large_payload_test.go b/benchmark/benchmark_large_payload_test.go index a0185c2..85ab755 100644 --- a/benchmark/benchmark_large_payload_test.go +++ b/benchmark/benchmark_large_payload_test.go @@ -35,7 +35,7 @@ func BenchmarkJsonParserLarge(b *testing.B) { } } -func BenchmarkJsonParserLargeOptimized(b *testing.B) { +func BenchmarkJsonParserLargeOffsets(b *testing.B) { for i := 0; i < b.N; i++ { r := largeFixture offsets := jsonparser.KeyOffsets(r, diff --git a/benchmark/benchmark_medium_payload_test.go b/benchmark/benchmark_medium_payload_test.go index 3c512c2..afcb924 100644 --- a/benchmark/benchmark_medium_payload_test.go +++ b/benchmark/benchmark_medium_payload_test.go @@ -34,7 +34,7 @@ func BenchmarkJsonParserMedium(b *testing.B) { } } -func BenchmarkJsonParserMediumOptimized(b *testing.B) { +func BenchmarkJsonParserMediumOffsets(b *testing.B) { for i := 0; i < b.N; i++ { r := mediumFixture offsets := jsonparser.KeyOffsets(r, @@ -55,6 +55,20 @@ func BenchmarkJsonParserMediumOptimized(b *testing.B) { } } +// func BenchmarkJsonParserMediumStruct(b *testing.B) { +// for i := 0; i < b.N; i++ { +// var data MediumPayload +// jsonparser.Unmarshal(mediumFixture, &data) + +// nothing(data.Person.Name.FullName, data.Person.Github.Followers, data.Company) + +// for _, el := range data.Person.Gravatar.Avatars { +// nothing(el.Url) +// } +// } +// } + + /* encoding/json */ diff --git a/benchmark/benchmark_small_payload_test.go b/benchmark/benchmark_small_payload_test.go index 86887b9..bd49513 100644 --- a/benchmark/benchmark_small_payload_test.go +++ b/benchmark/benchmark_small_payload_test.go @@ -35,7 +35,7 @@ func BenchmarkJsonParserSmall(b *testing.B) { } } -func BenchmarkJsonParserSmallOptimized(b *testing.B) { +func BenchmarkJsonParserSmallOffsets(b *testing.B) { for i := 0; i < b.N; i++ { r := smallFixture offsets := jsonparser.KeyOffsets(r, @@ -54,6 +54,14 @@ func BenchmarkJsonParserSmallOptimized(b *testing.B) { } } +func BenchmarkJsonParserSmallStruct(b *testing.B) { + for i := 0; i < b.N; i++ { + var data SmallPayload + jsonparser.Unmarshal(smallFixture, &data) + nothing(data.Uuid, data.Tz, data.Ua, data.St) + } +} + /* encoding/json diff --git a/encode.go b/encode.go new file mode 100644 index 0000000..fa50aa3 --- /dev/null +++ b/encode.go @@ -0,0 +1,92 @@ +package jsonparser + +import ( + "reflect" + "strings" + _ "fmt" +) + +type structCache struct { + fields [][]string + fieldTypes []reflect.Kind +} + +var cache map[string]*structCache + +func init() { + cache = make(map[string]*structCache) +} + +func Unmarshal(data []byte, v interface{}) error { + val := reflect.ValueOf(v).Elem() + + sName := val.Type().Name() + var sCache *structCache + var ok bool + + // Cache struct info + if sCache, ok = cache[sName]; !ok { + count := val.NumField() + fields := make([][]string, count) + fieldTypes := make([]reflect.Kind, count) + + for i := 0; i < val.NumField(); i++ { + valueField := val.Field(i) + typeField := val.Type().Field(i) + tag := typeField.Tag + jsonKey := tag.Get("json") + + if jsonKey != "" { + fields[i] = []string{jsonKey} + } else { + fields[i] = []string{strings.ToLower(typeField.Name)} + } + fieldTypes[i] = valueField.Kind() + } + + sCache = &structCache{fields, fieldTypes} + cache[sName] = sCache + } + + fields := sCache.fields + fieldTypes := sCache.fieldTypes + + offsets := KeyOffsets(data, fields...) + + for i, of := range offsets { + f := val.Field(i) + + if !f.IsValid() || !f.CanSet() { + continue + } + + switch fieldTypes[i] { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + v, err := GetInt(data[of:]) + + if err == nil { + f.SetInt(v) + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + v, err := GetInt(data[of:]) + + if err == nil { + f.SetUint(uint64(v)) + } + case reflect.String: + v, err := GetString(data[of:]) + + if err == nil { + f.SetString(v) + } + case reflect.Bool: + v, err := GetBoolean(data[of:]) + + if err == nil { + f.SetBool(v) + } + } + } + + return nil +} \ No newline at end of file diff --git a/parser_test.go b/parser_test.go index 511e99e..d7818d8 100644 --- a/parser_test.go +++ b/parser_test.go @@ -513,3 +513,38 @@ func TestGetSlice(t *testing.T) { }, ) } + +type testStruct struct { + Name string + Order string + Sum int + Len int8 + VERYLONGFIELD bool `json:"isPaid"` +} + +var testJson = []byte(`{"name": "Name", "order":"Order", "sum": 100, "len": 12, "isPaid": true}`) + +func TestUnmarshal(t *testing.T) { + var s testStruct + Unmarshal(testJson, &s) + + if s.Name != "Name" { + t.Errorf("Should fill Name field") + } + + if s.Order != "Order" { + t.Errorf("Should fill Order field") + } + + if s.Sum != 100 { + t.Errorf("Should fill Sum field") + } + + if s.Len != 12 { + t.Errorf("Should process int8") + } + + if !s.VERYLONGFIELD { + t.Errorf("Should process boolean and custom name") + } +} From fc67273be9e19148c8cfb685843f2639cfd8ddee Mon Sep 17 00:00:00 2001 From: Leonid Bugaev Date: Wed, 30 Mar 2016 18:33:54 +0500 Subject: [PATCH 4/6] Use streaming support --- encode.go | 37 ++++++--------- parser.go | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 141 insertions(+), 32 deletions(-) diff --git a/encode.go b/encode.go index fa50aa3..b789299 100644 --- a/encode.go +++ b/encode.go @@ -51,42 +51,35 @@ func Unmarshal(data []byte, v interface{}) error { fields := sCache.fields fieldTypes := sCache.fieldTypes - offsets := KeyOffsets(data, fields...) - - for i, of := range offsets { + KeyEach(data, func(i int, d []byte) int { f := val.Field(i) - if !f.IsValid() || !f.CanSet() { - continue + return 0 } + v, dt, of, err := Get(d) + switch fieldTypes[i] { case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - v, err := GetInt(data[of:]) - - if err == nil { - f.SetInt(v) + if dt == Number && err == nil { + f.SetInt(ParseInt(v)) } case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - v, err := GetInt(data[of:]) - - if err == nil { - f.SetUint(uint64(v)) + if dt == Number && err == nil { + f.SetUint(uint64(ParseInt(v))) } case reflect.String: - v, err := GetString(data[of:]) - - if err == nil { - f.SetString(v) + if dt == String && err == nil { + f.SetString(unsafeBytesToString(v)) } case reflect.Bool: - v, err := GetBoolean(data[of:]) - - if err == nil { - f.SetBool(v) + if dt == Boolean && err == nil { + f.SetBool(ParseBoolean(v)) } } - } + + return of + }, fields...) return nil } \ No newline at end of file diff --git a/parser.go b/parser.go index 5b67d28..0c5ee6f 100644 --- a/parser.go +++ b/parser.go @@ -145,6 +145,105 @@ func searchKeys(data []byte, keys ...string) int { return -1 } + +func KeyEach(data []byte, cb func(int, []byte) int, paths ...[]string) { + level := 0 + i := 0 + ln := len(data) + pathsMatched := 0 + keyOffsets := make([]int, len(paths)) + maxLen := len(paths[0]) + for _, p := range paths { + if len(p) > maxLen { + maxLen = len(p) + } + } + + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd := stringEnd(data[i:]) + if strEnd == -1 { + return + } + i += strEnd + keyEnd := i - 1 + key := data[keyBegin:keyEnd] + + colonOffset := nextToken(data[i:], true) + if colonOffset == -1 { + return + } + i += colonOffset + + // If string is a Key + if i < ln && data[i] == ':' && level <= maxLen { + match := false + i++ + + // searchMade := false + for pi, p := range paths { + if keyOffsets[pi] != 0 || len(p) < level { + continue + } + // searchMade = true + + if p[level-1] == unsafeBytesToString(key) { + match = true + + if len(p) == level { + keyOffsets[pi] = i + 1 + pathsMatched++ + + offset := cb(pi, data[i:]) + i += offset + + if pathsMatched == len(paths) { + return + } + } + } + } + + if !match { + tokenOffset := nextToken(data[i+1:], false) + i += tokenOffset + 1 + + if data[i] == '{' { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + 1 + } + } + } + + i-- + case '{': + if level-1 > maxLen { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + } else { + level++ + } + case '}': + level-- + case '[': + // Do not search for keys inside arrays + blockSkip := blockEnd(data[i:], '[', ']') + if blockSkip == -1 { + return + } + i += blockSkip + } + + i++ + } + + return +} + func KeyOffsets(data []byte, paths ...[]string) (keyOffsets []int) { level := 0 i := 0 @@ -428,14 +527,18 @@ func GetString(data []byte, keys ...string) (val string, err error) { return "", fmt.Errorf("Value is not a number: %s", string(v)) } + val = ParseString(v) + return +} + +func ParseString(v []byte) string { // If no escapes return raw conten if bytes.IndexByte(v, '\\') == -1 { - return string(v), nil + return string(v) } - s, err := strconv.Unquote(`"` + unsafeBytesToString(v) + `"`) - - return s, err + s, _ := strconv.Unquote(`"` + unsafeBytesToString(v) + `"`) + return s } // GetFloat returns the value retrieved by `Get`, cast to a float64 if possible. @@ -452,7 +555,12 @@ func GetFloat(data []byte, keys ...string) (val float64, err error) { return 0, fmt.Errorf("Value is not a number: %s", string(v)) } - val, err = strconv.ParseFloat(unsafeBytesToString(v), 64) + val = ParseFloat(v) + return +} + +func ParseFloat(v []byte) (num float64) { + num, _ = strconv.ParseFloat(unsafeBytesToString(v), 64) return } @@ -469,7 +577,12 @@ func GetInt(data []byte, keys ...string) (val int64, err error) { return 0, fmt.Errorf("Value is not a number: %s", string(v)) } - val, err = strconv.ParseInt(unsafeBytesToString(v), 10, 64) + val = ParseInt(v) + return +} + +func ParseInt(v []byte) (num int64) { + num, _ = strconv.ParseInt(unsafeBytesToString(v), 10, 64) return } @@ -487,13 +600,16 @@ func GetBoolean(data []byte, keys ...string) (val bool, err error) { return false, fmt.Errorf("Value is not a boolean: %s", string(v)) } + val = ParseBoolean(v) + return +} + +func ParseBoolean(v []byte) bool { if v[0] == 't' { - val = true + return true } else { - val = false + return false } - - return } // A hack until issue golang/go#2632 is fixed. From 62f79057c7f64b01dbb468f509865f5434a1001e Mon Sep 17 00:00:00 2001 From: Leonid Bugaev Date: Fri, 1 Apr 2016 10:54:35 +0500 Subject: [PATCH 5/6] Improve unmarshmaling --- benchmark/benchmark_large_payload_test.go | 56 +++++--- benchmark/benchmark_medium_payload_test.go | 62 +++++---- encode.go | 104 +++++++++----- parser.go | 39 +++--- parser_test.go | 154 ++++++++++++++++++++- 5 files changed, 319 insertions(+), 96 deletions(-) diff --git a/benchmark/benchmark_large_payload_test.go b/benchmark/benchmark_large_payload_test.go index 85ab755..526b950 100644 --- a/benchmark/benchmark_large_payload_test.go +++ b/benchmark/benchmark_large_payload_test.go @@ -35,25 +35,47 @@ func BenchmarkJsonParserLarge(b *testing.B) { } } -func BenchmarkJsonParserLargeOffsets(b *testing.B) { - for i := 0; i < b.N; i++ { - r := largeFixture - offsets := jsonparser.KeyOffsets(r, - []string{"users"}, - []string{"topics", "topics"}, - ) - jsonparser.ArrayEach(r[offsets[0]:], func(value []byte, dataType int, offset int, err error) { - jsonparser.Get(value, "username") - nothing() - }) +func BenchmarkJsonParserOffsetsLarge(b *testing.B) { + keys := [][]string{ + []string{"users"}, + []string{"topics", "topics"}, + } - jsonparser.ArrayEach(r[offsets[1]:], func(value []byte, dataType int, offset int, err error) { - aOff := jsonparser.KeyOffsets(value, []string{"id"}, []string{"slug"}) - jsonparser.GetInt(value[aOff[0]:]) - jsonparser.Get(value[aOff[1]:]) - nothing() - }) + nestedKeys := [][]string{ + []string{"id"}, + []string{"slug"}, + } + + for i := 0; i < b.N; i++ { + r := largeFixture + jsonparser.KeyEach(r, func (idx int, value []byte) (offset int) { + switch idx { + case 0: // users + aOff, _ := jsonparser.ArrayEach(value, func(value []byte, dataType int, offset int, err error) { + jsonparser.Get(value, "username") + nothing() + }) + return aOff + case 1: // topics + aOff, _ := jsonparser.ArrayEach(value, func(value []byte, dataType int, offset int, err error) { + jsonparser.KeyEach(value, func(nidx int, value []byte)(offset int) { + v, _, of, _ := jsonparser.Get(value) + panic(string(v)) + switch nidx { + case 0: + jsonparser.ParseInt(v) + case 1: + nothing(v) + } + + return of + }, nestedKeys...) + }) + return aOff + } + return + }, keys...) } } diff --git a/benchmark/benchmark_medium_payload_test.go b/benchmark/benchmark_medium_payload_test.go index afcb924..509489d 100644 --- a/benchmark/benchmark_medium_payload_test.go +++ b/benchmark/benchmark_medium_payload_test.go @@ -34,39 +34,49 @@ func BenchmarkJsonParserMedium(b *testing.B) { } } -func BenchmarkJsonParserMediumOffsets(b *testing.B) { +func BenchmarkJsonParserOffsetsMedium(b *testing.B) { + keys := [][]string{ + []string{"person", "name", "fullName"}, + []string{"person", "github", "followers"}, + []string{"company"}, + []string{"person", "gravatar", "avatars"}, + } + for i := 0; i < b.N; i++ { r := mediumFixture - offsets := jsonparser.KeyOffsets(r, - []string{"person", "name", "fullName"}, - []string{"person", "github", "followers"}, - []string{"company"}, - []string{"person", "gravatar", "avatars"}, - ) - - jsonparser.Get(r[offsets[0]:]) - jsonparser.GetInt(r[offsets[1]:]) - jsonparser.Get(r[offsets[2]:]) - - jsonparser.ArrayEach(r[offsets[3]:], func(value []byte, dataType int, offset int, err error) { - jsonparser.GetUnsafeString(value, "url") - nothing() - }) + jsonparser.KeyEach(r, func (idx int, value []byte) (offset int) { + v, _, offset, _ := jsonparser.Get(value) + + switch idx { + case 0: // fullName + nothing(v) + case 1: // followers + jsonparser.ParseInt(value) + case 2: // company + jsonparser.Get(value) + case 3: // Processing array + aOff, _ := jsonparser.ArrayEach(value, func(value []byte, dataType int, offset int, err error) { + jsonparser.Get(value, "url") + }) + offset += aOff + } + return + }, keys...) } } -// func BenchmarkJsonParserMediumStruct(b *testing.B) { -// for i := 0; i < b.N; i++ { -// var data MediumPayload -// jsonparser.Unmarshal(mediumFixture, &data) +func BenchmarkJsonParserStructMedium(b *testing.B) { + for i := 0; i < b.N; i++ { + var data MediumPayload + jsonparser.Unmarshal(mediumFixture, &data) -// nothing(data.Person.Name.FullName, data.Person.Github.Followers, data.Company) + nothing(data.Person.Name.FullName, data.Person.Github.Followers, data.Company) -// for _, el := range data.Person.Gravatar.Avatars { -// nothing(el.Url) -// } -// } -// } + for _, el := range data.Person.Gravatar.Avatars { + nothing(el.Url) + } + } +} /* diff --git a/encode.go b/encode.go index b789299..cead272 100644 --- a/encode.go +++ b/encode.go @@ -17,9 +17,62 @@ func init() { cache = make(map[string]*structCache) } -func Unmarshal(data []byte, v interface{}) error { - val := reflect.ValueOf(v).Elem() +func unmarshalValue(data []byte, val reflect.Value) int { + if !val.IsValid() || !val.CanSet() { + return 0 + } + + v, dt, of, err := Get(data) + + switch val.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if dt == Number && err == nil { + val.SetInt(ParseInt(v)) + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + if dt == Number && err == nil { + val.SetUint(uint64(ParseInt(v))) + } + case reflect.String: + if dt == String && err == nil { + val.SetString(unsafeBytesToString(v)) + } + case reflect.Bool: + if dt == Boolean && err == nil { + val.SetBool(ParseBoolean(v)) + } + case reflect.Ptr: + obj := reflect.New(val.Type().Elem()) + unmarshalValue(v, obj.Elem()) + val.Set(obj) + case reflect.Struct: + obj := reflect.New(val.Type()) + unmarshalStruct(v, obj.Elem()) + val.Set(obj.Elem()) + case reflect.Slice: + sT := val.Type().Elem() + s := reflect.MakeSlice(val.Type(), 0, 0) + ArrayEach(v, func(value []byte, dataType int, offset int, err error){ + el := reflect.New(sT) + + switch sT.Kind() { + case reflect.Struct: + unmarshalStruct(value, el.Elem()) + case reflect.Ptr: + unmarshalValue(value, el.Elem()) + default: + unmarshalValue(value, el.Elem()) + } + + s = reflect.Append(s, el.Elem()) + }) + val.Set(s) + } + + return of +} +func unmarshalStruct(data []byte, val reflect.Value) int { sName := val.Type().Name() var sCache *structCache var ok bool @@ -28,10 +81,10 @@ func Unmarshal(data []byte, v interface{}) error { if sCache, ok = cache[sName]; !ok { count := val.NumField() fields := make([][]string, count) - fieldTypes := make([]reflect.Kind, count) + // fieldTypes := make([]reflect.Kind, count) for i := 0; i < val.NumField(); i++ { - valueField := val.Field(i) + // valueField := val.Field(i) typeField := val.Type().Field(i) tag := typeField.Tag jsonKey := tag.Get("json") @@ -39,47 +92,32 @@ func Unmarshal(data []byte, v interface{}) error { if jsonKey != "" { fields[i] = []string{jsonKey} } else { - fields[i] = []string{strings.ToLower(typeField.Name)} + fields[i] = []string{strings.ToLower(string(typeField.Name[:1])) + string(typeField.Name[1:])} } - fieldTypes[i] = valueField.Kind() + // fieldTypes[i] = valueField.Kind() } - sCache = &structCache{fields, fieldTypes} + sCache = &structCache{fields: fields} cache[sName] = sCache } fields := sCache.fields - fieldTypes := sCache.fieldTypes + // fieldTypes := sCache.fieldTypes - KeyEach(data, func(i int, d []byte) int { + offset := KeyEach(data, func(i int, d []byte) int { f := val.Field(i) - if !f.IsValid() || !f.CanSet() { - return 0 - } - v, dt, of, err := Get(d) + return unmarshalValue(d, f) + }, fields...) + // panic(string(data)) - switch fieldTypes[i] { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if dt == Number && err == nil { - f.SetInt(ParseInt(v)) - } - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - if dt == Number && err == nil { - f.SetUint(uint64(ParseInt(v))) - } - case reflect.String: - if dt == String && err == nil { - f.SetString(unsafeBytesToString(v)) - } - case reflect.Bool: - if dt == Boolean && err == nil { - f.SetBool(ParseBoolean(v)) - } - } + return offset +} - return of - }, fields...) +func Unmarshal(data []byte, v interface{}) error { + val := reflect.ValueOf(v).Elem() + + unmarshalStruct(data, val) return nil } \ No newline at end of file diff --git a/parser.go b/parser.go index 0c5ee6f..8d381b1 100644 --- a/parser.go +++ b/parser.go @@ -17,7 +17,8 @@ func tokenEnd(data []byte) int { } } - return -1 + // If end of data + return len(data) } // Find position of next character which is not ' ', ',', '}' or ']' @@ -146,7 +147,7 @@ func searchKeys(data []byte, keys ...string) int { } -func KeyEach(data []byte, cb func(int, []byte) int, paths ...[]string) { +func KeyEach(data []byte, cb func(int, []byte) int, paths ...[]string) int { level := 0 i := 0 ln := len(data) @@ -167,7 +168,7 @@ func KeyEach(data []byte, cb func(int, []byte) int, paths ...[]string) { strEnd := stringEnd(data[i:]) if strEnd == -1 { - return + return i } i += strEnd keyEnd := i - 1 @@ -175,7 +176,7 @@ func KeyEach(data []byte, cb func(int, []byte) int, paths ...[]string) { colonOffset := nextToken(data[i:], true) if colonOffset == -1 { - return + return i } i += colonOffset @@ -202,7 +203,7 @@ func KeyEach(data []byte, cb func(int, []byte) int, paths ...[]string) { i += offset if pathsMatched == len(paths) { - return + return i } } } @@ -229,11 +230,15 @@ func KeyEach(data []byte, cb func(int, []byte) int, paths ...[]string) { } case '}': level-- + + if level == 0 { + return i + } case '[': // Do not search for keys inside arrays blockSkip := blockEnd(data[i:], '[', ']') if blockSkip == -1 { - return + return i } i += blockSkip } @@ -241,7 +246,7 @@ func KeyEach(data []byte, cb func(int, []byte) int, paths ...[]string) { i++ } - return + return i } func KeyOffsets(data []byte, paths ...[]string) (keyOffsets []int) { @@ -411,10 +416,6 @@ func Get(data []byte, keys ...string) (value []byte, dataType int, offset int, e // Number, Boolean or None end := tokenEnd(data[endOffset:]) - if end == -1 { - return nil, dataType, offset, errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol") - } - value := unsafeBytesToString(data[offset : endOffset+end]) switch data[offset] { @@ -454,35 +455,35 @@ func Get(data []byte, keys ...string) (value []byte, dataType int, offset int, e } // ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`. -func ArrayEach(data []byte, cb func(value []byte, dataType int, offset int, err error), keys ...string) (err error) { +func ArrayEach(data []byte, cb func(value []byte, dataType int, offset int, err error), keys ...string) (offset int, err error) { if len(data) == 0 { - return errors.New("Object is empty") + return 0, errors.New("Object is empty") } - offset := 1 + offset = 1 if len(keys) > 0 { if offset = searchKeys(data, keys...); offset == -1 { - return errors.New("Key path not found") + return offset, errors.New("Key path not found") } // Go to closest value nO := nextToken(data[offset:], false) if nO == -1 { - return errors.New("Malformed JSON") + return offset, errors.New("Malformed JSON") } offset += nO if data[offset] != '[' { - return errors.New("Value is not array") + return offset, errors.New("Value is not array") } offset++ } - for true { + for { v, t, o, e := Get(data[offset:]) if o == 0 { @@ -500,7 +501,7 @@ func ArrayEach(data []byte, cb func(value []byte, dataType int, offset int, err offset += o } - return nil + return offset, nil } // GetUnsafeString returns the value retrieved by `Get`, use creates string without memory allocation by mapping string to slice memory. It does not handle escape symbols. diff --git a/parser_test.go b/parser_test.go index d7818d8..513381f 100644 --- a/parser_test.go +++ b/parser_test.go @@ -514,15 +514,27 @@ func TestGetSlice(t *testing.T) { ) } +type nestedStruct struct { + A string + B int + + N *nestedStruct `json:"nested3"` +} + type testStruct struct { Name string Order string Sum int Len int8 VERYLONGFIELD bool `json:"isPaid"` + NestedPtr *nestedStruct `json:"nested"` + Nested nestedStruct `json:"nested2"` + Arr []nestedStruct + ArrInt []int + IntPtr *int } -var testJson = []byte(`{"name": "Name", "order":"Order", "sum": 100, "len": 12, "isPaid": true}`) +var testJson = []byte(`{"name": "Name", "order":"Order", "sum": 100, "len": 12, "isPaid": true, "nested": {"a":"test", "b":2, "nested3":{"a":"test3","b":4}, "c": "unknown"}, "nested2": {"a":"test2", "b":3}, "arr": [{"a":"zxc", "b": 1}, {"a":"123", "b":2}], "arrInt": [1,2,3,4], "intPtr": 10}`) func TestUnmarshal(t *testing.T) { var s testStruct @@ -547,4 +559,144 @@ func TestUnmarshal(t *testing.T) { if !s.VERYLONGFIELD { t.Errorf("Should process boolean and custom name") } + + if s.NestedPtr == nil { + t.Errorf("Should initialize nested pointer to struct") + } else { + if s.NestedPtr.A != "test" || s.NestedPtr.B != 2 { + t.Errorf("Should fill nested pointer to struct %v", s.NestedPtr) + } + + if s.NestedPtr.N == nil { + t.Errorf("Should initialize deeply nested pointer to struct") + } else { + if s.NestedPtr.N.A != "test3" || s.NestedPtr.N.B != 4 { + t.Errorf("Should fill nested pointer to struct %v", s.NestedPtr.N) + } + } + } + + if s.Nested.A != "test2" || s.Nested.B != 3 { + t.Errorf("Should fill nested struct %v", s.Nested) + } + + if len(s.Arr) != 2 { + t.Errorf("Should fill array") + } else { + if s.Arr[0].A != "zxc" && s.Arr[0].B != 1 { + t.Errorf("Should fill first array item") + } + if s.Arr[1].A != "123" && s.Arr[1].B != 2 { + t.Errorf("Should fill first array item") + } + } + + if len(s.ArrInt) != 4 { + t.Errorf("Should fill int array") + } else { + if !reflect.DeepEqual(s.ArrInt, []int{1,2,3,4}) { + t.Errorf("Should fill int array with proper values %v", s.ArrInt) + } + } + + if *s.IntPtr != 10 { + t.Errorf("Should update simple type pointer") + } } + +// var fixture []byte = []byte(`{ +// "person": { +// "id": "d50887ca-a6ce-4e59-b89f-14f0b5d03b03", +// "name": { +// "fullName": "Leonid Bugaev", +// "givenName": "Leonid", +// "familyName": "Bugaev" +// }, +// "email": "leonsbox@gmail.com", +// "gender": "male", +// "location": "Saint Petersburg, Saint Petersburg, RU", +// "geo": { +// "city": "Saint Petersburg", +// "state": "Saint Petersburg", +// "country": "Russia", +// "lat": 59.9342802, +// "lng": 30.3350986 +// }, +// "bio": "Senior engineer at Granify.com", +// "site": "http://flickfaver.com", +// "avatar": "https://d1ts43dypk8bqh.cloudfront.net/v1/avatars/d50887ca-a6ce-4e59-b89f-14f0b5d03b03", +// "employment": { +// "name": "www.latera.ru", +// "title": "Software Engineer", +// "domain": "gmail.com" +// }, +// "facebook": { +// "handle": "leonid.bugaev" +// }, +// "github": { +// "handle": "buger", +// "id": 14009, +// "avatar": "https://avatars.githubusercontent.com/u/14009?v=3", +// "company": "Granify", +// "blog": "http://leonsbox.com", +// "followers": 95, +// "following": 10 +// }, +// "twitter": { +// "handle": "flickfaver", +// "id": 77004410, +// "bio": null, +// "followers": 2, +// "following": 1, +// "statuses": 5, +// "favorites": 0, +// "location": "", +// "site": "http://flickfaver.com", +// "avatar": null +// }, +// "linkedin": { +// "handle": "in/leonidbugaev" +// }, +// "googleplus": { +// "handle": null +// }, +// "angellist": { +// "handle": "leonid-bugaev", +// "id": 61541, +// "bio": "Senior engineer at Granify.com", +// "blog": "http://buger.github.com", +// "site": "http://buger.github.com", +// "followers": 41, +// "avatar": "https://d1qb2nb5cznatu.cloudfront.net/users/61541-medium_jpg?1405474390" +// }, +// "klout": { +// "handle": null, +// "score": null +// }, +// "foursquare": { +// "handle": null +// }, +// "aboutme": { +// "handle": "leonid.bugaev", +// "bio": null, +// "avatar": null +// }, +// "gravatar": { +// "handle": "buger", +// "urls": [ + +// ], +// "avatar": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510", +// "avatars": [ +// { +// "url": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510", +// "type": "thumbnail" +// } +// ] +// }, +// "fuzzy": false +// }, +// "company": null +// }`) + + From fe3ac274dcbbb741ca29d72717e85bd3973e2228 Mon Sep 17 00:00:00 2001 From: Leonid Bugaev Date: Fri, 1 Apr 2016 10:56:18 +0500 Subject: [PATCH 6/6] Refactor Makefile --- Makefile | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 352a457..d3767c4 100644 --- a/Makefile +++ b/Makefile @@ -4,30 +4,31 @@ SOURCE_PATH = /go/src/github.com/buger/jsonparser BENCHMARK = JsonParser BENCHTIME = 5s TEST = . +DRUN = docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) build: docker build -t $(CONTAINER) . race: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) --env GORACE="halt_on_error=1" go test ./. $(ARGS) -v -race -timeout 15s + $(DRUN) --env GORACE="halt_on_error=1" go test ./. $(ARGS) -v -race -timeout 15s bench: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -benchtime $(BENCHTIME) -v + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -benchtime $(BENCHTIME) -v profile: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -memprofile mem.mprof -v - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -cpuprofile cpu.out -v - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -c + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -memprofile mem.mprof -v + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -cpuprofile cpu.out -v + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -c test: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) ./ -run $(TEST) -timeout 10s $(ARGS) -v + $(DRUN) go test $(LDFLAGS) ./ -run $(TEST) -timeout 10s $(ARGS) -v fmt: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go fmt ./... + $(DRUN) go fmt ./... vet: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go vet ./. + $(DRUN) go vet ./. bash: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) /bin/bash \ No newline at end of file + $(DRUN) /bin/bash \ No newline at end of file