From 6e9fa2781b308b66c00849c0a483c8a6ffd00bcf Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 14 May 2024 12:44:00 +0300 Subject: [PATCH] Add getting node byte length without allocations --- .gitignore | 4 +- insane.go | 193 +++++++++++++++++++++++++++++++++++++++++++++++++ insane_test.go | 30 +++++++- 3 files changed, 224 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 720907b..48a0e27 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ cpu.out -insane-json.test \ No newline at end of file +insane-json.test + +.idea/ diff --git a/insane.go b/insane.go index 325cb1e..7704f9f 100644 --- a/insane.go +++ b/insane.go @@ -704,6 +704,199 @@ popSkip: } } +type bytesCounter int + +func newBytesCounter() bytesCounter { + return 0 +} + +func (c bytesCounter) addByte(_ byte) bytesCounter { + return c + 1 +} + +func (c bytesCounter) addBytes(a []byte) bytesCounter { + return c + bytesCounter(len(a)) +} + +func (c bytesCounter) addString(s string) bytesCounter { + return c + bytesCounter(len(s)) +} + +func (c bytesCounter) addEscapedString(str string) bytesCounter { + if !shouldEscape(str) { + return c.addByte('"').addString(str).addByte('"') + } + + result := c.addByte('"') + s := toByte(str) + start := 0 + for i := 0; i < len(s); { + if b := s[i]; b < utf8.RuneSelf { + if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' { + i++ + continue + } + if start < i { + result = result.addBytes(s[start:i]) + } + switch b { + case '\\', '"': + result = result.addByte('\\').addByte(b) + case '\n': + result = result.addString("\\n") + case '\r': + result = result.addString("\\r") + case '\t': + result = result.addString("\\t") + default: + result = result.addString("\\u00") + result = result.addByte(hex[b>>4]) + result = result.addByte(hex[b&0xf]) + } + i++ + start = i + continue + } + + c, size := utf8.DecodeRune(s[i:]) + if c == utf8.RuneError && size == 1 { + if start < i { + result = result.addBytes(s[start:i]) + } + result = result.addString("\\ufffd") + i += size + start = i + continue + } + + if c == '\u2028' || c == '\u2029' { + if start < i { + result = result.addBytes(s[start:i]) + } + result = result.addString("\\u202") + result = result.addByte(hex[c&0xF]) + i += size + start = i + continue + } + i += size + } + if start < len(s) { + result = result.addBytes(s[start:]) + } + result = result.addByte('"') + + return result +} + +func (c bytesCounter) total() int { + return int(c) +} + +// ByteLength is like Encode, but it doesn't allocate and returns node byte length +func (n *Node) ByteLength() int { + result := newBytesCounter() + + s := 0 + curNode := n + topNode := n + + if len(curNode.nodes) == 0 { + if curNode.bits&hellBitObject == hellBitObject { + return result.addString("{}").total() + } + if curNode.bits&hellBitArray == hellBitArray { + return result.addString("[]").total() + } + } + + goto encodeSkip +encode: + result = result.addString(",") +encodeSkip: + switch curNode.bits & hellBitTypeFilter { + case hellBitObject: + if len(curNode.nodes) == 0 { + result = result.addString("{}") + curNode = curNode.next + goto popSkip + } + topNode = curNode + result = result.addByte('{') + curNode = curNode.nodes[0] + if curNode.bits&hellBitField == hellBitField { + result = result.addEscapedString(curNode.data) + result = result.addByte(':') + } else { + result = result.addString(curNode.data) + } + curNode = curNode.next + s++ + goto encodeSkip + case hellBitArray: + if len(curNode.nodes) == 0 { + result = result.addString("[]") + curNode = curNode.next + goto popSkip + } + topNode = curNode + result = result.addByte('[') + curNode = curNode.nodes[0] + s++ + goto encodeSkip + case hellBitNumber: + result = result.addString(curNode.data) + case hellBitString: + result = result.addEscapedString(curNode.data) + case hellBitEscapedString: + result = result.addString(curNode.data) + case hellBitFalse: + result = result.addString("false") + case hellBitTrue: + result = result.addString("true") + case hellBitNull: + result = result.addString("null") + } +pop: + curNode = curNode.next +popSkip: + if topNode.bits&hellBitArray == hellBitArray { + if curNode.bits&hellBitArrayEnd == hellBitArrayEnd { + result = result.addString("]") + curNode = topNode + topNode = topNode.parent + s-- + if s == 0 { + return result.total() + } + goto pop + } + goto encode + } else if topNode.bits&hellBitObject == hellBitObject { + if curNode.bits&hellBitEnd == hellBitEnd { + result = result.addString("}") + curNode = topNode + topNode = topNode.parent + s-- + if s == 0 { + return result.total() + } + goto pop + } + result = result.addString(",") + if curNode.bits&hellBitField == hellBitField { + result = result.addEscapedString(curNode.data) + result = result.addByte(':') + } else { + result = result.addString(curNode.data) + } + curNode = curNode.next + goto encodeSkip + } else { + return result.total() + } +} + // Dig legendary insane dig function func (n *Node) Dig(path ...string) *Node { if n == nil { diff --git a/insane_test.go b/insane_test.go index 6a19a32..69707a5 100644 --- a/insane_test.go +++ b/insane_test.go @@ -67,6 +67,8 @@ func TestDecodeAdditional(t *testing.T) { assert.NoError(t, err, "error while decoding") assert.Equal(t, jsonA, root.EncodeToString(), "wrong first node") + assert.Equal(t, len(jsonA), root.ByteLength(), "wrong byte length") + assert.Equal(t, 1, node.Dig("1").AsInt(), "wrong node value") } @@ -204,7 +206,6 @@ func TestDecodeErr(t *testing.T) { {json: `falsenull`, err: ErrUnexpectedJSONEnding}, {json: `null:`, err: ErrUnexpectedJSONEnding}, - // ok {json: `0`, err: nil}, {json: `1.0`, err: nil}, @@ -241,6 +242,7 @@ func TestEncode(t *testing.T) { assert.NotNil(t, root, "node shouldn't be nil") assert.Equal(t, json, root.EncodeToString(), "wrong encoding") + assert.Equal(t, len(json), root.ByteLength(), "wrong byte length") } func TestString(t *testing.T) { @@ -256,6 +258,7 @@ func TestString(t *testing.T) { assert.Equal(t, "shit", root.Dig("1").AsString(), "wrong node value") assert.Equal(t, json, root.EncodeToString(), "wrong encoding") + assert.Equal(t, len(json), root.ByteLength(), "wrong byte length") } func TestField(t *testing.T) { @@ -269,6 +272,7 @@ func TestField(t *testing.T) { assert.Equal(t, "shit", root.Dig(`hello \ " op \ " op op`).AsString(), "wrong node value") assert.Equal(t, json, root.EncodeToString(), "wrong encoding") + assert.Equal(t, len(json), root.ByteLength(), "wrong byte length") } func TestInsane(t *testing.T) { @@ -349,6 +353,7 @@ func TestAddField(t *testing.T) { assert.True(t, root.Dig(field).IsNull(), "wrong node type") } assert.Equal(t, test.result, root.EncodeToString(), "wrong encoding") + assert.Equal(t, len(test.result), root.ByteLength(), "wrong byte length") Release(root) } } @@ -374,9 +379,10 @@ func TestAddElement(t *testing.T) { for index := 0; index < test.count; index++ { root.AddElement() l := len(root.AsArray()) - assert.True(t, root.Dig(strconv.Itoa(l - 1)).IsNull(), "wrong node type") + assert.True(t, root.Dig(strconv.Itoa(l-1)).IsNull(), "wrong node type") } assert.Equal(t, test.result, root.EncodeToString(), "wrong encoding") + assert.Equal(t, len(test.result), root.ByteLength(), "wrong byte length") Release(root) } } @@ -408,6 +414,7 @@ func TestInsertElement(t *testing.T) { assert.True(t, root.Dig(strconv.Itoa(test.pos2)).IsNull(), "wrong node type") assert.Equal(t, test.result, root.EncodeToString(), "wrong encoding") + assert.Equal(t, len(test.result), root.ByteLength(), "wrong byte length") Release(root) } } @@ -477,6 +484,7 @@ func TestArraySuicide(t *testing.T) { } assert.Equal(t, 0, len(root.AsArray()), "array should be empty") assert.Equal(t, `[]`, root.EncodeToString(), "array should be empty") + assert.Equal(t, len(`[]`), root.ByteLength(), "wrong byte length") Release(root) root, err = DecodeString(json) @@ -488,6 +496,7 @@ func TestArraySuicide(t *testing.T) { assert.Equal(t, 0, len(root.AsArray()), "array should be empty") assert.Equal(t, `[]`, root.EncodeToString(), "array should be empty") + assert.Equal(t, len(`[]`), root.ByteLength(), "wrong byte length") Release(root) } } @@ -514,6 +523,7 @@ func TestObjectSuicide(t *testing.T) { } assert.Equal(t, 0, len(root.AsArray()), "array should be empty") assert.Equal(t, `{}`, root.EncodeToString(), "array should be empty") + assert.Equal(t, len(`{}`), root.ByteLength(), "wrong byte length") Release(root) root, err = DecodeString(json) @@ -528,6 +538,7 @@ func TestObjectSuicide(t *testing.T) { } assert.Equal(t, 0, len(root.AsArray()), "array should be empty") assert.Equal(t, `{}`, root.EncodeToString(), "array should be empty") + assert.Equal(t, len(`{}`), root.ByteLength(), "wrong byte length") Release(root) } } @@ -548,6 +559,7 @@ func TestMergeWith(t *testing.T) { root.MergeWith(node) assert.Equal(t, `{"1":"1","2":"2","3":"3","4":"4"}`, root.EncodeToString(), "wrong first node") + assert.Equal(t, len(`{"1":"1","2":"2","3":"3","4":"4"}`), root.ByteLength(), "wrong byte length") } func TestMergeWithComplex(t *testing.T) { @@ -566,6 +578,7 @@ func TestMergeWithComplex(t *testing.T) { root.MergeWith(node) assert.Equal(t, `{"1":1,"2":{"2":"2"}}`, root.EncodeToString(), "wrong first node") + assert.Equal(t, len(`{"1":1,"2":{"2":"2"}}`), root.ByteLength(), "wrong byte length") } func TestMutateToJSON(t *testing.T) { @@ -650,6 +663,7 @@ func TestMutateToJSON(t *testing.T) { } assert.Equal(t, test.result, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(test.result), root.ByteLength(), "wrong byte length") Release(root) } @@ -723,6 +737,7 @@ func TestMutateToObject(t *testing.T) { o.Dig("test").Suicide() assert.Equal(t, test.result, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(test.result), root.ByteLength(), "wrong byte length") Release(root) } @@ -796,6 +811,7 @@ func TestMutateToArray(t *testing.T) { o.Dig("0").Suicide() assert.Equal(t, test.result, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(test.result), root.ByteLength(), "wrong byte length") Release(root) } @@ -840,6 +856,7 @@ func TestMutateCollapse(t *testing.T) { } assert.Equal(t, test.result, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(test.result), root.ByteLength(), "wrong byte length") Release(root) } } @@ -853,6 +870,7 @@ func TestMutateToInt(t *testing.T) { assert.Equal(t, 5, root.Dig("a").AsInt(), "wrong node value") assert.Equal(t, `{"a":5}`, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(`{"a":5}`), root.ByteLength(), "wrong byte length") } func TestMutateToFloat(t *testing.T) { @@ -865,6 +883,7 @@ func TestMutateToFloat(t *testing.T) { assert.Equal(t, 6, root.Dig("a").AsInt(), "wrong node value") assert.Equal(t, `{"a":5.6}`, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(`{"a":5.6}`), root.ByteLength(), "wrong byte length") } func TestMutateToString(t *testing.T) { @@ -876,6 +895,7 @@ func TestMutateToString(t *testing.T) { assert.Equal(t, "insane", root.Dig("a").AsString(), "wrong node value") assert.Equal(t, `{"a":"insane"}`, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(`{"a":"insane"}`), root.ByteLength(), "wrong byte length") } func TestMutateToField(t *testing.T) { @@ -893,6 +913,7 @@ func TestMutateToField(t *testing.T) { assert.Equal(t, "", root.Dig("unique").AsString(), "wrong node value for %s", json) assert.Equal(t, "some_val", root.Dig("mutated").AsString(), "wrong node value for %s", json) assert.Equal(t, strings.ReplaceAll(json, "unique", "mutated"), root.EncodeToString(), "wrong result json for %s", json) + assert.Equal(t, len(strings.ReplaceAll(json, "unique", "mutated")), root.ByteLength(), "wrong byte length for %s", json) Release(root) } @@ -907,6 +928,7 @@ func TestDigField(t *testing.T) { assert.Equal(t, "b", root.Dig("insane").AsString(), "wrong node value") assert.Equal(t, `{"insane":"b"}`, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(`{"insane":"b"}`), root.ByteLength(), "wrong byte length") } func TestWhitespace(t *testing.T) { @@ -946,6 +968,7 @@ func TestObjectManyFieldsSuicide(t *testing.T) { } assert.Equal(t, `{}`, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(`{}`), root.ByteLength(), "wrong byte length") } func TestObjectManyFieldsAddSuicide(t *testing.T) { @@ -974,6 +997,7 @@ func TestObjectManyFieldsAddSuicide(t *testing.T) { } assert.Equal(t, `{}`, root.EncodeToString(), "wrong result json") + assert.Equal(t, len(`{}`), root.ByteLength(), "wrong byte length") } func TestObjectFields(t *testing.T) { @@ -1043,6 +1067,8 @@ func TestEscapeString(t *testing.T) { for _, test := range tests { out = escapeString(out[:0], test.s) assert.Equal(t, string(strconv.AppendQuote(nil, test.s)), string(out), "wrong escaping") + size := newBytesCounter().addEscapedString(test.s).total() + assert.Equal(t, len(string(strconv.AppendQuote(nil, test.s))), size, "wrong string escaping by bytes counter") } }