Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,20 @@ internal static partial class JsonReaderHelper
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int IndexOfQuoteOrAnyControlOrBackSlash(this ReadOnlySpan<byte> span) =>
span.IndexOfAny(s_controlQuoteBackslash);

/// <summary>JSON insignificant whitespace: space (0x20), tab (0x09), CR (0x0D), and LF (0x0A).</summary>
/// <remarks>https://tools.ietf.org/html/rfc8259#section-2</remarks>
private static readonly SearchValues<byte> s_whiteSpace = SearchValues.Create(" \t\r\n"u8);

/// <summary>
/// Returns the index of the first byte that is not JSON insignificant whitespace,
/// or the length of the span if every byte is whitespace.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int IndexOfFirstNonWhiteSpace(this ReadOnlySpan<byte> span)
{
int index = span.IndexOfAnyExcept(s_whiteSpace);
return index < 0 ? span.Length : index;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,30 @@ private void SkipWhiteSpace()
{
// Create local copy to avoid bounds checks.
ReadOnlySpan<byte> localBuffer = _buffer;
#if NET
// Vectorized scan to the first non-whitespace byte. The SearchValues-based
// IndexOfAnyExcept already handles short and long runs efficiently, so there is no
// need to special-case small inputs with a scalar pre-scan.
ReadOnlySpan<byte> remaining = localBuffer.Slice(_consumed);
int idx = remaining.IndexOfFirstNonWhiteSpace();
if (idx > 0)
{
// Reproduce the scalar loop's line/byte-position bookkeeping for the skipped run.
(int newLines, int lastLineFeedIndex) = JsonReaderHelper.CountNewLines(remaining.Slice(0, idx));
_lineNumber += newLines;
if (lastLineFeedIndex >= 0)
{
// Byte positions on the current line start after the last line feed character.
_bytePositionInLine = idx - lastLineFeedIndex - 1;
}
else
{
_bytePositionInLine += idx;
}

_consumed += idx;
}
#else
for (; _consumed < localBuffer.Length; _consumed++)
{
byte val = localBuffer[_consumed];
Expand All @@ -1031,6 +1055,7 @@ not JsonConstants.LineFeed and
_bytePositionInLine++;
}
}
#endif
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2152,6 +2152,84 @@ public static void PositionInCodeUnits(string jsonString, int expectedlineNumber
}
}

[Fact]
public static void ReadLongWhitespaceAndDigitRuns()
{
// Long whitespace runs exercise the vectorized whitespace-skipping path in
// Utf8JsonReader, alongside long integer/fraction digit runs, to lock in correct
// tokenization and number parsing for large runs.
string jsonString =
"[" + new string(' ', 60) + "1234567890123456789," +
"\n\n" + new string(' ', 40) + "-1.25e3," +
new string('\t', 33) + "42," +
new string(' ', 48) + "12345678901234567890123456789012345678" +
"\n]";
byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonString);

var json = new Utf8JsonReader(dataUtf8, isFinalBlock: true, state: default);

Assert.True(json.Read());
Assert.Equal(JsonTokenType.StartArray, json.TokenType);

Assert.True(json.Read());
Assert.Equal(JsonTokenType.Number, json.TokenType);
Assert.Equal(1234567890123456789L, json.GetInt64());

Assert.True(json.Read());
Assert.Equal(JsonTokenType.Number, json.TokenType);
Assert.Equal(-1250.0, json.GetDouble());

Assert.True(json.Read());
Assert.Equal(JsonTokenType.Number, json.TokenType);
Assert.Equal(42, json.GetInt32());

Assert.True(json.Read());
Assert.Equal(JsonTokenType.Number, json.TokenType);
Assert.Equal(38, json.ValueSpan.Length);

Assert.True(json.Read());
Assert.Equal(JsonTokenType.EndArray, json.TokenType);

Assert.False(json.Read());
Assert.Equal(dataUtf8.Length, json.BytesConsumed);
}

public static IEnumerable<object[]> WhitespaceBeforeInvalidTokenData()
{
yield return new object[] { "\n\n\n ", 3, 3 };
yield return new object[] { "\r\n\t\t", 1, 2 };
yield return new object[] { new string(' ', 100), 0, 101 };
yield return new object[] { new string('\n', 50) + new string(' ', 30), 50, 30 };
yield return new object[] { " \t \t \r\n", 1, 0 };
}

[Theory]
[MemberData(nameof(WhitespaceBeforeInvalidTokenData))]
public static void WhitespaceRunBeforeInvalidToken_ReportsLineAndBytePosition(string whitespace, int expectedLineNumber, int expectedBytePosition)
{
byte[] dataUtf8 = Encoding.UTF8.GetBytes("[" + whitespace + "@]");

foreach (JsonCommentHandling commentHandling in Enum.GetValues(typeof(JsonCommentHandling)))
{
var state = new JsonReaderState(new JsonReaderOptions { CommentHandling = commentHandling });
var json = new Utf8JsonReader(dataUtf8, isFinalBlock: true, state);

Assert.True(json.Read());
Assert.Equal(JsonTokenType.StartArray, json.TokenType);

try
{
json.Read();
Assert.Fail("Expected JsonException was not thrown.");
}
catch (JsonException ex)
{
Assert.Equal(expectedLineNumber, ex.LineNumber);
Assert.Equal(expectedBytePosition, ex.BytePositionInLine);
}
}
}

[Theory]
[MemberData(nameof(InvalidJsonStrings))]
public static void InvalidJson(string jsonString, int expectedlineNumber, int expectedBytePosition, int maxDepth = 64)
Expand Down
Loading