diff --git a/Makefile b/Makefile index 352a457..d3767c4 100644 --- a/Makefile +++ b/Makefile @@ -4,30 +4,31 @@ SOURCE_PATH = /go/src/github.com/buger/jsonparser BENCHMARK = JsonParser BENCHTIME = 5s TEST = . +DRUN = docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) build: docker build -t $(CONTAINER) . race: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) --env GORACE="halt_on_error=1" go test ./. $(ARGS) -v -race -timeout 15s + $(DRUN) --env GORACE="halt_on_error=1" go test ./. $(ARGS) -v -race -timeout 15s bench: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -benchtime $(BENCHTIME) -v + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -benchtime $(BENCHTIME) -v profile: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -memprofile mem.mprof -v - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -cpuprofile cpu.out -v - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -c + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -memprofile mem.mprof -v + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -cpuprofile cpu.out -v + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -c test: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) ./ -run $(TEST) -timeout 10s $(ARGS) -v + $(DRUN) go test $(LDFLAGS) ./ -run $(TEST) -timeout 10s $(ARGS) -v fmt: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go fmt ./... + $(DRUN) go fmt ./... vet: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go vet ./. + $(DRUN) go vet ./. bash: - docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) /bin/bash \ No newline at end of file + $(DRUN) /bin/bash \ No newline at end of file diff --git a/benchmark/benchmark_large_payload_test.go b/benchmark/benchmark_large_payload_test.go index 643377f..526b950 100644 --- a/benchmark/benchmark_large_payload_test.go +++ b/benchmark/benchmark_large_payload_test.go @@ -35,6 +35,50 @@ func BenchmarkJsonParserLarge(b *testing.B) { } } + +func BenchmarkJsonParserOffsetsLarge(b *testing.B) { + keys := [][]string{ + []string{"users"}, + []string{"topics", "topics"}, + } + + nestedKeys := [][]string{ + []string{"id"}, + []string{"slug"}, + } + + for i := 0; i < b.N; i++ { + r := largeFixture + jsonparser.KeyEach(r, func (idx int, value []byte) (offset int) { + switch idx { + case 0: // users + aOff, _ := jsonparser.ArrayEach(value, func(value []byte, dataType int, offset int, err error) { + jsonparser.Get(value, "username") + nothing() + }) + return aOff + case 1: // topics + aOff, _ := jsonparser.ArrayEach(value, func(value []byte, dataType int, offset int, err error) { + jsonparser.KeyEach(value, func(nidx int, value []byte)(offset int) { + v, _, of, _ := jsonparser.Get(value) + panic(string(v)) + switch nidx { + case 0: + jsonparser.ParseInt(v) + case 1: + nothing(v) + } + + return of + }, nestedKeys...) + }) + return aOff + } + return + }, keys...) + } +} + /* encoding/json */ diff --git a/benchmark/benchmark_medium_payload_test.go b/benchmark/benchmark_medium_payload_test.go index 751761e..509489d 100644 --- a/benchmark/benchmark_medium_payload_test.go +++ b/benchmark/benchmark_medium_payload_test.go @@ -15,7 +15,7 @@ import ( "github.com/pquerna/ffjson/ffjson" "github.com/ugorji/go/codec" "testing" - // "fmt" + _ "fmt" ) /* @@ -34,6 +34,51 @@ func BenchmarkJsonParserMedium(b *testing.B) { } } +func BenchmarkJsonParserOffsetsMedium(b *testing.B) { + keys := [][]string{ + []string{"person", "name", "fullName"}, + []string{"person", "github", "followers"}, + []string{"company"}, + []string{"person", "gravatar", "avatars"}, + } + + for i := 0; i < b.N; i++ { + r := mediumFixture + jsonparser.KeyEach(r, func (idx int, value []byte) (offset int) { + v, _, offset, _ := jsonparser.Get(value) + + switch idx { + case 0: // fullName + nothing(v) + case 1: // followers + jsonparser.ParseInt(value) + case 2: // company + jsonparser.Get(value) + case 3: // Processing array + aOff, _ := jsonparser.ArrayEach(value, func(value []byte, dataType int, offset int, err error) { + jsonparser.Get(value, "url") + }) + offset += aOff + } + return + }, keys...) + } +} + +func BenchmarkJsonParserStructMedium(b *testing.B) { + for i := 0; i < b.N; i++ { + var data MediumPayload + jsonparser.Unmarshal(mediumFixture, &data) + + nothing(data.Person.Name.FullName, data.Person.Github.Followers, data.Company) + + for _, el := range data.Person.Gravatar.Avatars { + nothing(el.Url) + } + } +} + + /* encoding/json */ diff --git a/benchmark/benchmark_small_payload_test.go b/benchmark/benchmark_small_payload_test.go index 8fa0463..bd49513 100644 --- a/benchmark/benchmark_small_payload_test.go +++ b/benchmark/benchmark_small_payload_test.go @@ -35,6 +35,34 @@ func BenchmarkJsonParserSmall(b *testing.B) { } } +func BenchmarkJsonParserSmallOffsets(b *testing.B) { + for i := 0; i < b.N; i++ { + r := smallFixture + offsets := jsonparser.KeyOffsets(r, + []string{"uuid"}, + []string{"tz"}, + []string{"ua"}, + []string{"st"}, + ) + + jsonparser.Get(r[offsets[0]:]) + jsonparser.GetInt(r[offsets[1]:]) + jsonparser.Get(r[offsets[2]:]) + jsonparser.GetInt(r[offsets[3]:]) + + nothing() + } +} + +func BenchmarkJsonParserSmallStruct(b *testing.B) { + for i := 0; i < b.N; i++ { + var data SmallPayload + jsonparser.Unmarshal(smallFixture, &data) + nothing(data.Uuid, data.Tz, data.Ua, data.St) + } +} + + /* encoding/json */ diff --git a/encode.go b/encode.go new file mode 100644 index 0000000..cead272 --- /dev/null +++ b/encode.go @@ -0,0 +1,123 @@ +package jsonparser + +import ( + "reflect" + "strings" + _ "fmt" +) + +type structCache struct { + fields [][]string + fieldTypes []reflect.Kind +} + +var cache map[string]*structCache + +func init() { + cache = make(map[string]*structCache) +} + +func unmarshalValue(data []byte, val reflect.Value) int { + if !val.IsValid() || !val.CanSet() { + return 0 + } + + v, dt, of, err := Get(data) + + switch val.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if dt == Number && err == nil { + val.SetInt(ParseInt(v)) + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + if dt == Number && err == nil { + val.SetUint(uint64(ParseInt(v))) + } + case reflect.String: + if dt == String && err == nil { + val.SetString(unsafeBytesToString(v)) + } + case reflect.Bool: + if dt == Boolean && err == nil { + val.SetBool(ParseBoolean(v)) + } + case reflect.Ptr: + obj := reflect.New(val.Type().Elem()) + unmarshalValue(v, obj.Elem()) + val.Set(obj) + case reflect.Struct: + obj := reflect.New(val.Type()) + unmarshalStruct(v, obj.Elem()) + val.Set(obj.Elem()) + case reflect.Slice: + sT := val.Type().Elem() + s := reflect.MakeSlice(val.Type(), 0, 0) + ArrayEach(v, func(value []byte, dataType int, offset int, err error){ + el := reflect.New(sT) + + switch sT.Kind() { + case reflect.Struct: + unmarshalStruct(value, el.Elem()) + case reflect.Ptr: + unmarshalValue(value, el.Elem()) + default: + unmarshalValue(value, el.Elem()) + } + + s = reflect.Append(s, el.Elem()) + }) + val.Set(s) + } + + return of +} + +func unmarshalStruct(data []byte, val reflect.Value) int { + sName := val.Type().Name() + var sCache *structCache + var ok bool + + // Cache struct info + if sCache, ok = cache[sName]; !ok { + count := val.NumField() + fields := make([][]string, count) + // fieldTypes := make([]reflect.Kind, count) + + for i := 0; i < val.NumField(); i++ { + // valueField := val.Field(i) + typeField := val.Type().Field(i) + tag := typeField.Tag + jsonKey := tag.Get("json") + + if jsonKey != "" { + fields[i] = []string{jsonKey} + } else { + fields[i] = []string{strings.ToLower(string(typeField.Name[:1])) + string(typeField.Name[1:])} + } + // fieldTypes[i] = valueField.Kind() + } + + sCache = &structCache{fields: fields} + cache[sName] = sCache + } + + fields := sCache.fields + // fieldTypes := sCache.fieldTypes + + offset := KeyEach(data, func(i int, d []byte) int { + f := val.Field(i) + + return unmarshalValue(d, f) + }, fields...) + // panic(string(data)) + + return offset +} + +func Unmarshal(data []byte, v interface{}) error { + val := reflect.ValueOf(v).Elem() + + unmarshalStruct(data, val) + + return nil +} \ No newline at end of file diff --git a/parser.go b/parser.go index 99a3415..8d381b1 100644 --- a/parser.go +++ b/parser.go @@ -17,7 +17,8 @@ func tokenEnd(data []byte) int { } } - return -1 + // If end of data + return len(data) } // Find position of next character which is not ' ', ',', '}' or ']' @@ -145,6 +146,203 @@ func searchKeys(data []byte, keys ...string) int { return -1 } + +func KeyEach(data []byte, cb func(int, []byte) int, paths ...[]string) int { + level := 0 + i := 0 + ln := len(data) + pathsMatched := 0 + keyOffsets := make([]int, len(paths)) + maxLen := len(paths[0]) + for _, p := range paths { + if len(p) > maxLen { + maxLen = len(p) + } + } + + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd := stringEnd(data[i:]) + if strEnd == -1 { + return i + } + i += strEnd + keyEnd := i - 1 + key := data[keyBegin:keyEnd] + + colonOffset := nextToken(data[i:], true) + if colonOffset == -1 { + return i + } + i += colonOffset + + // If string is a Key + if i < ln && data[i] == ':' && level <= maxLen { + match := false + i++ + + // searchMade := false + for pi, p := range paths { + if keyOffsets[pi] != 0 || len(p) < level { + continue + } + // searchMade = true + + if p[level-1] == unsafeBytesToString(key) { + match = true + + if len(p) == level { + keyOffsets[pi] = i + 1 + pathsMatched++ + + offset := cb(pi, data[i:]) + i += offset + + if pathsMatched == len(paths) { + return i + } + } + } + } + + if !match { + tokenOffset := nextToken(data[i+1:], false) + i += tokenOffset + 1 + + if data[i] == '{' { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + 1 + } + } + } + + i-- + case '{': + if level-1 > maxLen { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + } else { + level++ + } + case '}': + level-- + + if level == 0 { + return i + } + case '[': + // Do not search for keys inside arrays + blockSkip := blockEnd(data[i:], '[', ']') + if blockSkip == -1 { + return i + } + i += blockSkip + } + + i++ + } + + return i +} + +func KeyOffsets(data []byte, paths ...[]string) (keyOffsets []int) { + level := 0 + i := 0 + ln := len(data) + pathsMatched := 0 + keyOffsets = make([]int, len(paths)) + maxLen := len(paths[0]) + for _, p := range paths { + if len(p) > maxLen { + maxLen = len(p) + } + } + + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd := stringEnd(data[i:]) + if strEnd == -1 { + return + } + i += strEnd + keyEnd := i - 1 + key := data[keyBegin:keyEnd] + + colonOffset := nextToken(data[i:], true) + if colonOffset == -1 { + return + } + i += colonOffset + + // If string is a Key + if i < ln && data[i] == ':' && level <= maxLen { + match := false + + // searchMade := false + for pi, p := range paths { + if keyOffsets[pi] != 0 || len(p) < level { + continue + } + // searchMade = true + + if p[level-1] == unsafeBytesToString(key) { + match = true + + if len(p) == level { + keyOffsets[pi] = i + 1 + pathsMatched++ + + if pathsMatched == len(paths) { + return + } + } + } + } + + if !match { + tokenOffset := nextToken(data[i+1:], false) + i += tokenOffset + 1 + + if data[i] == '{' { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + 1 + } + } + } + + i-- + case '{': + if level-1 > maxLen { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + } else { + level++ + } + case '}': + level-- + case '[': + // Do not search for keys inside arrays + blockSkip := blockEnd(data[i:], '[', ']') + if blockSkip == -1 { + return + } + i += blockSkip + } + + i++ + } + + return +} + // Data types available in valid JSON data. const ( NotExist = iota @@ -218,10 +416,6 @@ func Get(data []byte, keys ...string) (value []byte, dataType int, offset int, e // Number, Boolean or None end := tokenEnd(data[endOffset:]) - if end == -1 { - return nil, dataType, offset, errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol") - } - value := unsafeBytesToString(data[offset : endOffset+end]) switch data[offset] { @@ -261,35 +455,35 @@ func Get(data []byte, keys ...string) (value []byte, dataType int, offset int, e } // ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`. -func ArrayEach(data []byte, cb func(value []byte, dataType int, offset int, err error), keys ...string) (err error) { +func ArrayEach(data []byte, cb func(value []byte, dataType int, offset int, err error), keys ...string) (offset int, err error) { if len(data) == 0 { - return errors.New("Object is empty") + return 0, errors.New("Object is empty") } - offset := 1 + offset = 1 if len(keys) > 0 { if offset = searchKeys(data, keys...); offset == -1 { - return errors.New("Key path not found") + return offset, errors.New("Key path not found") } // Go to closest value nO := nextToken(data[offset:], false) if nO == -1 { - return errors.New("Malformed JSON") + return offset, errors.New("Malformed JSON") } offset += nO if data[offset] != '[' { - return errors.New("Value is not array") + return offset, errors.New("Value is not array") } offset++ } - for true { + for { v, t, o, e := Get(data[offset:]) if o == 0 { @@ -307,7 +501,7 @@ func ArrayEach(data []byte, cb func(value []byte, dataType int, offset int, err offset += o } - return nil + return offset, nil } // GetUnsafeString returns the value retrieved by `Get`, use creates string without memory allocation by mapping string to slice memory. It does not handle escape symbols. @@ -334,14 +528,18 @@ func GetString(data []byte, keys ...string) (val string, err error) { return "", fmt.Errorf("Value is not a number: %s", string(v)) } + val = ParseString(v) + return +} + +func ParseString(v []byte) string { // If no escapes return raw conten if bytes.IndexByte(v, '\\') == -1 { - return string(v), nil + return string(v) } - s, err := strconv.Unquote(`"` + unsafeBytesToString(v) + `"`) - - return s, err + s, _ := strconv.Unquote(`"` + unsafeBytesToString(v) + `"`) + return s } // GetFloat returns the value retrieved by `Get`, cast to a float64 if possible. @@ -358,7 +556,12 @@ func GetFloat(data []byte, keys ...string) (val float64, err error) { return 0, fmt.Errorf("Value is not a number: %s", string(v)) } - val, err = strconv.ParseFloat(unsafeBytesToString(v), 64) + val = ParseFloat(v) + return +} + +func ParseFloat(v []byte) (num float64) { + num, _ = strconv.ParseFloat(unsafeBytesToString(v), 64) return } @@ -375,7 +578,12 @@ func GetInt(data []byte, keys ...string) (val int64, err error) { return 0, fmt.Errorf("Value is not a number: %s", string(v)) } - val, err = strconv.ParseInt(unsafeBytesToString(v), 10, 64) + val = ParseInt(v) + return +} + +func ParseInt(v []byte) (num int64) { + num, _ = strconv.ParseInt(unsafeBytesToString(v), 10, 64) return } @@ -393,13 +601,16 @@ func GetBoolean(data []byte, keys ...string) (val bool, err error) { return false, fmt.Errorf("Value is not a boolean: %s", string(v)) } + val = ParseBoolean(v) + return +} + +func ParseBoolean(v []byte) bool { if v[0] == 't' { - val = true + return true } else { - val = false + return false } - - return } // A hack until issue golang/go#2632 is fixed. diff --git a/parser_test.go b/parser_test.go index 511e99e..513381f 100644 --- a/parser_test.go +++ b/parser_test.go @@ -513,3 +513,190 @@ func TestGetSlice(t *testing.T) { }, ) } + +type nestedStruct struct { + A string + B int + + N *nestedStruct `json:"nested3"` +} + +type testStruct struct { + Name string + Order string + Sum int + Len int8 + VERYLONGFIELD bool `json:"isPaid"` + NestedPtr *nestedStruct `json:"nested"` + Nested nestedStruct `json:"nested2"` + Arr []nestedStruct + ArrInt []int + IntPtr *int +} + +var testJson = []byte(`{"name": "Name", "order":"Order", "sum": 100, "len": 12, "isPaid": true, "nested": {"a":"test", "b":2, "nested3":{"a":"test3","b":4}, "c": "unknown"}, "nested2": {"a":"test2", "b":3}, "arr": [{"a":"zxc", "b": 1}, {"a":"123", "b":2}], "arrInt": [1,2,3,4], "intPtr": 10}`) + +func TestUnmarshal(t *testing.T) { + var s testStruct + Unmarshal(testJson, &s) + + if s.Name != "Name" { + t.Errorf("Should fill Name field") + } + + if s.Order != "Order" { + t.Errorf("Should fill Order field") + } + + if s.Sum != 100 { + t.Errorf("Should fill Sum field") + } + + if s.Len != 12 { + t.Errorf("Should process int8") + } + + if !s.VERYLONGFIELD { + t.Errorf("Should process boolean and custom name") + } + + if s.NestedPtr == nil { + t.Errorf("Should initialize nested pointer to struct") + } else { + if s.NestedPtr.A != "test" || s.NestedPtr.B != 2 { + t.Errorf("Should fill nested pointer to struct %v", s.NestedPtr) + } + + if s.NestedPtr.N == nil { + t.Errorf("Should initialize deeply nested pointer to struct") + } else { + if s.NestedPtr.N.A != "test3" || s.NestedPtr.N.B != 4 { + t.Errorf("Should fill nested pointer to struct %v", s.NestedPtr.N) + } + } + } + + if s.Nested.A != "test2" || s.Nested.B != 3 { + t.Errorf("Should fill nested struct %v", s.Nested) + } + + if len(s.Arr) != 2 { + t.Errorf("Should fill array") + } else { + if s.Arr[0].A != "zxc" && s.Arr[0].B != 1 { + t.Errorf("Should fill first array item") + } + if s.Arr[1].A != "123" && s.Arr[1].B != 2 { + t.Errorf("Should fill first array item") + } + } + + if len(s.ArrInt) != 4 { + t.Errorf("Should fill int array") + } else { + if !reflect.DeepEqual(s.ArrInt, []int{1,2,3,4}) { + t.Errorf("Should fill int array with proper values %v", s.ArrInt) + } + } + + if *s.IntPtr != 10 { + t.Errorf("Should update simple type pointer") + } +} + +// var fixture []byte = []byte(`{ +// "person": { +// "id": "d50887ca-a6ce-4e59-b89f-14f0b5d03b03", +// "name": { +// "fullName": "Leonid Bugaev", +// "givenName": "Leonid", +// "familyName": "Bugaev" +// }, +// "email": "leonsbox@gmail.com", +// "gender": "male", +// "location": "Saint Petersburg, Saint Petersburg, RU", +// "geo": { +// "city": "Saint Petersburg", +// "state": "Saint Petersburg", +// "country": "Russia", +// "lat": 59.9342802, +// "lng": 30.3350986 +// }, +// "bio": "Senior engineer at Granify.com", +// "site": "http://flickfaver.com", +// "avatar": "https://d1ts43dypk8bqh.cloudfront.net/v1/avatars/d50887ca-a6ce-4e59-b89f-14f0b5d03b03", +// "employment": { +// "name": "www.latera.ru", +// "title": "Software Engineer", +// "domain": "gmail.com" +// }, +// "facebook": { +// "handle": "leonid.bugaev" +// }, +// "github": { +// "handle": "buger", +// "id": 14009, +// "avatar": "https://avatars.githubusercontent.com/u/14009?v=3", +// "company": "Granify", +// "blog": "http://leonsbox.com", +// "followers": 95, +// "following": 10 +// }, +// "twitter": { +// "handle": "flickfaver", +// "id": 77004410, +// "bio": null, +// "followers": 2, +// "following": 1, +// "statuses": 5, +// "favorites": 0, +// "location": "", +// "site": "http://flickfaver.com", +// "avatar": null +// }, +// "linkedin": { +// "handle": "in/leonidbugaev" +// }, +// "googleplus": { +// "handle": null +// }, +// "angellist": { +// "handle": "leonid-bugaev", +// "id": 61541, +// "bio": "Senior engineer at Granify.com", +// "blog": "http://buger.github.com", +// "site": "http://buger.github.com", +// "followers": 41, +// "avatar": "https://d1qb2nb5cznatu.cloudfront.net/users/61541-medium_jpg?1405474390" +// }, +// "klout": { +// "handle": null, +// "score": null +// }, +// "foursquare": { +// "handle": null +// }, +// "aboutme": { +// "handle": "leonid.bugaev", +// "bio": null, +// "avatar": null +// }, +// "gravatar": { +// "handle": "buger", +// "urls": [ + +// ], +// "avatar": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510", +// "avatars": [ +// { +// "url": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510", +// "type": "thumbnail" +// } +// ] +// }, +// "fuzzy": false +// }, +// "company": null +// }`) + +