diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Reader/JsonReaderHelper.net8.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Reader/JsonReaderHelper.net8.cs index 08f72de280193c..656164278345c3 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Reader/JsonReaderHelper.net8.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Reader/JsonReaderHelper.net8.cs @@ -21,5 +21,20 @@ internal static partial class JsonReaderHelper [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int IndexOfQuoteOrAnyControlOrBackSlash(this ReadOnlySpan span) => span.IndexOfAny(s_controlQuoteBackslash); + + /// JSON insignificant whitespace: space (0x20), tab (0x09), CR (0x0D), and LF (0x0A). + /// https://tools.ietf.org/html/rfc8259#section-2 + private static readonly SearchValues s_whiteSpace = SearchValues.Create(" \t\r\n"u8); + + /// + /// Returns the index of the first byte that is not JSON insignificant whitespace, + /// or the length of the span if every byte is whitespace. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int IndexOfFirstNonWhiteSpace(this ReadOnlySpan span) + { + int index = span.IndexOfAnyExcept(s_whiteSpace); + return index < 0 ? span.Length : index; + } } } diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Reader/Utf8JsonReader.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Reader/Utf8JsonReader.cs index dd1cb741873c09..7a5e1cee2e7b65 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Reader/Utf8JsonReader.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Reader/Utf8JsonReader.cs @@ -1008,6 +1008,30 @@ private void SkipWhiteSpace() { // Create local copy to avoid bounds checks. ReadOnlySpan localBuffer = _buffer; +#if NET + // Vectorized scan to the first non-whitespace byte. The SearchValues-based + // IndexOfAnyExcept already handles short and long runs efficiently, so there is no + // need to special-case small inputs with a scalar pre-scan. + ReadOnlySpan remaining = localBuffer.Slice(_consumed); + int idx = remaining.IndexOfFirstNonWhiteSpace(); + if (idx > 0) + { + // Reproduce the scalar loop's line/byte-position bookkeeping for the skipped run. + (int newLines, int lastLineFeedIndex) = JsonReaderHelper.CountNewLines(remaining.Slice(0, idx)); + _lineNumber += newLines; + if (lastLineFeedIndex >= 0) + { + // Byte positions on the current line start after the last line feed character. + _bytePositionInLine = idx - lastLineFeedIndex - 1; + } + else + { + _bytePositionInLine += idx; + } + + _consumed += idx; + } +#else for (; _consumed < localBuffer.Length; _consumed++) { byte val = localBuffer[_consumed]; @@ -1031,6 +1055,7 @@ not JsonConstants.LineFeed and _bytePositionInLine++; } } +#endif } /// diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonReaderTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonReaderTests.cs index 7d48fa855c1903..1cfb1ea78f933a 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonReaderTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonReaderTests.cs @@ -2152,6 +2152,84 @@ public static void PositionInCodeUnits(string jsonString, int expectedlineNumber } } + [Fact] + public static void ReadLongWhitespaceAndDigitRuns() + { + // Long whitespace runs exercise the vectorized whitespace-skipping path in + // Utf8JsonReader, alongside long integer/fraction digit runs, to lock in correct + // tokenization and number parsing for large runs. + string jsonString = + "[" + new string(' ', 60) + "1234567890123456789," + + "\n\n" + new string(' ', 40) + "-1.25e3," + + new string('\t', 33) + "42," + + new string(' ', 48) + "12345678901234567890123456789012345678" + + "\n]"; + byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonString); + + var json = new Utf8JsonReader(dataUtf8, isFinalBlock: true, state: default); + + Assert.True(json.Read()); + Assert.Equal(JsonTokenType.StartArray, json.TokenType); + + Assert.True(json.Read()); + Assert.Equal(JsonTokenType.Number, json.TokenType); + Assert.Equal(1234567890123456789L, json.GetInt64()); + + Assert.True(json.Read()); + Assert.Equal(JsonTokenType.Number, json.TokenType); + Assert.Equal(-1250.0, json.GetDouble()); + + Assert.True(json.Read()); + Assert.Equal(JsonTokenType.Number, json.TokenType); + Assert.Equal(42, json.GetInt32()); + + Assert.True(json.Read()); + Assert.Equal(JsonTokenType.Number, json.TokenType); + Assert.Equal(38, json.ValueSpan.Length); + + Assert.True(json.Read()); + Assert.Equal(JsonTokenType.EndArray, json.TokenType); + + Assert.False(json.Read()); + Assert.Equal(dataUtf8.Length, json.BytesConsumed); + } + + public static IEnumerable WhitespaceBeforeInvalidTokenData() + { + yield return new object[] { "\n\n\n ", 3, 3 }; + yield return new object[] { "\r\n\t\t", 1, 2 }; + yield return new object[] { new string(' ', 100), 0, 101 }; + yield return new object[] { new string('\n', 50) + new string(' ', 30), 50, 30 }; + yield return new object[] { " \t \t \r\n", 1, 0 }; + } + + [Theory] + [MemberData(nameof(WhitespaceBeforeInvalidTokenData))] + public static void WhitespaceRunBeforeInvalidToken_ReportsLineAndBytePosition(string whitespace, int expectedLineNumber, int expectedBytePosition) + { + byte[] dataUtf8 = Encoding.UTF8.GetBytes("[" + whitespace + "@]"); + + foreach (JsonCommentHandling commentHandling in Enum.GetValues(typeof(JsonCommentHandling))) + { + var state = new JsonReaderState(new JsonReaderOptions { CommentHandling = commentHandling }); + var json = new Utf8JsonReader(dataUtf8, isFinalBlock: true, state); + + Assert.True(json.Read()); + Assert.Equal(JsonTokenType.StartArray, json.TokenType); + + try + { + json.Read(); + Assert.Fail("Expected JsonException was not thrown."); + } + catch (JsonException ex) + { + Assert.Equal(expectedLineNumber, ex.LineNumber); + Assert.Equal(expectedBytePosition, ex.BytePositionInLine); + } + } + } + [Theory] [MemberData(nameof(InvalidJsonStrings))] public static void InvalidJson(string jsonString, int expectedlineNumber, int expectedBytePosition, int maxDepth = 64)