@@ -23,19 +23,6 @@ internal static unsafe partial class Utf8Utility
2323 /// </remarks>
2424 public static byte * GetPointerToFirstInvalidByte ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
2525 {
26- if ( AdvSimd . Arm64 . IsSupported )
27- {
28- return GetPointerToFirstInvalidByteArm64 ( pInputBuffer , inputLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
29- }
30- if ( Vector512 . IsHardwareAccelerated && Avx512Vbmi . IsSupported && Popcnt . X64 . IsSupported )
31- {
32- return GetPointerToFirstInvalidByteAvx512 ( pInputBuffer , inputLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
33- }
34- if ( Avx2 . IsSupported && Popcnt . X64 . IsSupported )
35- {
36- return GetPointerToFirstInvalidByteAvx2 ( pInputBuffer , inputLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
37- }
38-
3926 Debug . Assert ( inputLength >= 0 , "Input length must not be negative." ) ;
4027 Debug . Assert ( pInputBuffer != null || inputLength == 0 , "Input length must be zero if input buffer pointer is null." ) ;
4128
@@ -54,12 +41,39 @@ internal static unsafe partial class Utf8Utility
5441 return pInputBuffer ;
5542 }
5643
44+ if ( AdvSimd . Arm64 . IsSupported )
45+ {
46+ return GetPointerToFirstInvalidByteArm64 ( pInputBuffer , inputLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
47+ }
48+ if ( Vector512 . IsHardwareAccelerated && Avx512Vbmi . IsSupported && Popcnt . X64 . IsSupported )
49+ {
50+ return GetPointerToFirstInvalidByteAvx512 ( pInputBuffer , inputLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
51+ }
52+ if ( Avx2 . IsSupported && Popcnt . X64 . IsSupported )
53+ {
54+ return GetPointerToFirstInvalidByteAvx2 ( pInputBuffer , inputLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
55+ }
56+ return GetPointerToFirstInvalidByte_Default ( pInputBuffer , inputLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
57+ }
58+
59+ // Returns &inputBuffer[inputLength] if the input buffer is valid.
60+ /// <summary>
61+ /// Given an input buffer <paramref name="pInputBuffer"/> of byte length <paramref name="inputLength"/>,
62+ /// returns a pointer to where the first invalid data appears in <paramref name="pInputBuffer"/>.
63+ /// </summary>
64+ /// <remarks>
65+ /// Returns a pointer to the end of <paramref name="pInputBuffer"/> if the buffer is well-formed.
66+ /// </remarks>
67+ private static byte * GetPointerToFirstInvalidByte_Default ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
68+ {
69+ Debug . Assert ( inputLength >= 0 , "Input length must not be negative." ) ;
70+ Debug . Assert ( pInputBuffer != null || inputLength == 0 , "Input length must be zero if input buffer pointer is null." ) ;
71+
5772#if DEBUG
5873 // Keep these around for final validation at the end of the method.
5974 byte * pOriginalInputBuffer = pInputBuffer ;
6075 int originalInputLength = inputLength ;
6176#endif
62-
6377 // Enregistered locals that we'll eventually out to our caller.
6478
6579 int tempUtf16CodeUnitCountAdjustment = 0 ;
@@ -792,18 +806,19 @@ private static ulong GetNonAsciiBytes(Vector128<byte> value, Vector128<byte> bit
792806 {
793807 // We skip any ASCII characters at the start of the buffer
794808 int asciirun = 0 ;
795- for ( ; asciirun + 64 <= inputLength ; asciirun += 64 )
796- {
797- Vector128 < byte > block1 = Vector128 . Load ( pInputBuffer + asciirun ) ;
798- Vector128 < byte > block2 = Vector128 . Load ( pInputBuffer + asciirun + 16 ) ;
799- Vector128 < byte > block3 = Vector128 . Load ( pInputBuffer + asciirun + 32 ) ;
800- Vector128 < byte > block4 = Vector128 . Load ( pInputBuffer + asciirun + 48 ) ;
801- Vector128 < byte > or = ( block1 | block2 ) | ( block3 | block4 ) ;
802- if ( AdvSimd . Arm64 . MaxAcross ( or ) . ToScalar ( ) > 127 )
803- {
804- break ;
805- }
806- }
809+ //for (; asciirun + 64 <= inputLength; asciirun += 64)
810+ //{
811+ // Vector128<byte> block1 = Vector128.Load(pInputBuffer + asciirun);
812+ // Vector128<byte> block2 = Vector128.Load(pInputBuffer + asciirun + 16);
813+ // Vector128<byte> block3 = Vector128.Load(pInputBuffer + asciirun + 32);
814+ // Vector128<byte> block4 = Vector128.Load(pInputBuffer + asciirun + 48);
815+ // Vector128<byte> or = (block1 | block2) | (block3 | block4);
816+ // if (AdvSimd.Arm64.MaxAcross(or).ToScalar() > 127)
817+ // {
818+ // break;
819+ // }
820+ //}
821+ // NOTE: input's first byte is non-ascii already
807822 processedLength = asciirun ;
808823
809824 if ( processedLength + 32 < inputLength )
@@ -981,9 +996,10 @@ private static ulong GetNonAsciiBytes(Vector128<byte> value, Vector128<byte> bit
981996 return pInputBuffer + inputLength ;
982997 }
983998 }
984- return GetPointerToFirstInvalidByteScalar ( pInputBuffer + processedLength , inputLength - processedLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
999+ return GetPointerToFirstInvalidByte_Default ( pInputBuffer + processedLength , inputLength - processedLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
9851000 }
9861001
1002+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
9871003 private static void RemoveCounters ( byte * start , byte * end , ref int n4 , ref int contbytes )
9881004 {
9891005 for ( byte * p = start ; p < end ; p ++ )
@@ -999,6 +1015,7 @@ private static void RemoveCounters(byte* start, byte* end, ref int n4, ref int c
9991015 }
10001016 }
10011017
1018+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
10021019 private static void AddCounters ( byte * start , byte * end , ref int n4 , ref int contbytes )
10031020 {
10041021 for ( byte * p = start ; p < end ; p ++ )
@@ -1138,6 +1155,7 @@ private static void AddCounters(byte* start, byte* end, ref int n4, ref int cont
11381155 return buf + len ; // no error
11391156 }
11401157
1158+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
11411159 private static ( int utfadjust , int scalaradjust ) CalculateN2N3FinalSimdAdjustments ( int n4 , int contbytes )
11421160 {
11431161 int n3 = - 2 * n4 + 2 * contbytes ;
@@ -1147,145 +1165,6 @@ private static (int utfadjust, int scalaradjust) CalculateN2N3FinalSimdAdjustmen
11471165 return ( utfadjust , scalaradjust ) ;
11481166 }
11491167
1150- private static byte * GetPointerToFirstInvalidByteScalar ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
1151- {
1152- int TempUtf16CodeUnitCountAdjustment = 0 ;
1153- int TempScalarCountAdjustment = 0 ;
1154-
1155- int pos = 0 ;
1156- int nextPos ;
1157- uint codePoint = 0 ;
1158-
1159- while ( pos < inputLength )
1160- {
1161-
1162- byte firstByte = pInputBuffer [ pos ] ;
1163- while ( firstByte < 0b10000000 )
1164- {
1165- if ( ++ pos == inputLength )
1166- {
1167-
1168- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1169- scalarCountAdjustment = TempScalarCountAdjustment ;
1170- return pInputBuffer + inputLength ;
1171- }
1172- firstByte = pInputBuffer [ pos ] ;
1173- }
1174-
1175- if ( ( firstByte & 0b11100000 ) == 0b11000000 )
1176- {
1177- nextPos = pos + 2 ;
1178- if ( nextPos > inputLength )
1179- {
1180- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1181- scalarCountAdjustment = TempScalarCountAdjustment ;
1182- return pInputBuffer + pos ;
1183- } // Too short
1184- if ( ( pInputBuffer [ pos + 1 ] & 0b11000000 ) != 0b10000000 )
1185- {
1186- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1187- scalarCountAdjustment = TempScalarCountAdjustment ;
1188- return pInputBuffer + pos ;
1189- } // Too short
1190- // range check
1191- codePoint = ( uint ) ( firstByte & 0b00011111 ) << 6 | ( uint ) ( pInputBuffer [ pos + 1 ] & 0b00111111 ) ;
1192- if ( ( codePoint < 0x80 ) || ( 0x7ff < codePoint ) )
1193- {
1194- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1195- scalarCountAdjustment = TempScalarCountAdjustment ;
1196- return pInputBuffer + pos ;
1197- } // Overlong
1198- TempUtf16CodeUnitCountAdjustment -= 1 ;
1199- }
1200- else if ( ( firstByte & 0b11110000 ) == 0b11100000 )
1201- {
1202- nextPos = pos + 3 ;
1203- if ( nextPos > inputLength )
1204- {
1205-
1206- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1207- scalarCountAdjustment = TempScalarCountAdjustment ;
1208- return pInputBuffer + pos ;
1209- } // Too short
1210- // range check
1211- codePoint = ( uint ) ( firstByte & 0b00001111 ) << 12 |
1212- ( uint ) ( pInputBuffer [ pos + 1 ] & 0b00111111 ) << 6 |
1213- ( uint ) ( pInputBuffer [ pos + 2 ] & 0b00111111 ) ;
1214- // Either overlong or too large:
1215- if ( ( codePoint < 0x800 ) || ( 0xffff < codePoint ) ||
1216- ( 0xd7ff < codePoint && codePoint < 0xe000 ) )
1217- {
1218- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1219- scalarCountAdjustment = TempScalarCountAdjustment ;
1220- return pInputBuffer + pos ;
1221- }
1222- if ( ( pInputBuffer [ pos + 1 ] & 0b11000000 ) != 0b10000000 )
1223- {
1224- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1225- scalarCountAdjustment = TempScalarCountAdjustment ;
1226- return pInputBuffer + pos ;
1227- } // Too short
1228- if ( ( pInputBuffer [ pos + 2 ] & 0b11000000 ) != 0b10000000 )
1229- {
1230- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1231- scalarCountAdjustment = TempScalarCountAdjustment ;
1232- return pInputBuffer + pos ;
1233- } // Too short
1234- TempUtf16CodeUnitCountAdjustment -= 2 ;
1235- }
1236- else if ( ( firstByte & 0b11111000 ) == 0b11110000 )
1237- {
1238- nextPos = pos + 4 ;
1239- if ( nextPos > inputLength )
1240- {
1241- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1242- scalarCountAdjustment = TempScalarCountAdjustment ; return pInputBuffer + pos ;
1243- }
1244- if ( ( pInputBuffer [ pos + 1 ] & 0b11000000 ) != 0b10000000 )
1245- {
1246- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1247- scalarCountAdjustment = TempScalarCountAdjustment ;
1248- return pInputBuffer + pos ;
1249- }
1250- if ( ( pInputBuffer [ pos + 2 ] & 0b11000000 ) != 0b10000000 )
1251- {
1252- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1253- scalarCountAdjustment = TempScalarCountAdjustment ;
1254- return pInputBuffer + pos ;
1255- }
1256- if ( ( pInputBuffer [ pos + 3 ] & 0b11000000 ) != 0b10000000 )
1257- {
1258- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1259- scalarCountAdjustment = TempScalarCountAdjustment ;
1260- return pInputBuffer + pos ;
1261- }
1262- // range check
1263- codePoint =
1264- ( uint ) ( firstByte & 0b00000111 ) << 18 | ( uint ) ( pInputBuffer [ pos + 1 ] & 0b00111111 ) << 12 |
1265- ( uint ) ( pInputBuffer [ pos + 2 ] & 0b00111111 ) << 6 | ( uint ) ( pInputBuffer [ pos + 3 ] & 0b00111111 ) ;
1266- if ( codePoint <= 0xffff || 0x10ffff < codePoint )
1267- {
1268- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1269- scalarCountAdjustment = TempScalarCountAdjustment ;
1270- return pInputBuffer + pos ;
1271- }
1272- TempUtf16CodeUnitCountAdjustment -= 2 ;
1273- TempScalarCountAdjustment -= 1 ;
1274- }
1275- else
1276- {
1277- // we may have a continuation/too long error
1278- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1279- scalarCountAdjustment = TempScalarCountAdjustment ;
1280- return pInputBuffer + pos ;
1281- }
1282- pos = nextPos ;
1283- }
1284- utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
1285- scalarCountAdjustment = TempScalarCountAdjustment ;
1286- return pInputBuffer + inputLength ;
1287- }
1288-
12891168 [ CompExactlyDependsOn ( typeof ( Avx2 ) ) ]
12901169 [ CompExactlyDependsOn ( typeof ( Popcnt . X64 ) ) ]
12911170 private static byte * GetPointerToFirstInvalidByteAvx2 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
@@ -1301,16 +1180,17 @@ private static (int utfadjust, int scalaradjust) CalculateN2N3FinalSimdAdjustmen
13011180 {
13021181 // We skip any ASCII characters at the start of the buffer
13031182 int asciirun = 0 ;
1304- for ( ; asciirun + 64 <= inputLength ; asciirun += 64 )
1305- {
1306- Vector256 < byte > block1 = Avx . LoadVector256 ( pInputBuffer + asciirun ) ;
1307- Vector256 < byte > block2 = Avx . LoadVector256 ( pInputBuffer + asciirun + 32 ) ;
1308- Vector256 < byte > or = Avx2 . Or ( block1 , block2 ) ;
1309- if ( Avx2 . MoveMask ( or ) != 0 )
1310- {
1311- break ;
1312- }
1313- }
1183+ //for (; asciirun + 64 <= inputLength; asciirun += 64)
1184+ //{
1185+ // Vector256<byte> block1 = Avx.LoadVector256(pInputBuffer + asciirun);
1186+ // Vector256<byte> block2 = Avx.LoadVector256(pInputBuffer + asciirun + 32);
1187+ // Vector256<byte> or = Avx2.Or(block1, block2);
1188+ // if (Avx2.MoveMask(or) != 0)
1189+ // {
1190+ // break;
1191+ // }
1192+ //}
1193+ // NOTE: input's first byte is non-ascii already
13141194 processedLength = asciirun ;
13151195
13161196 if ( processedLength + 32 < inputLength )
@@ -1434,7 +1314,7 @@ private static (int utfadjust, int scalaradjust) CalculateN2N3FinalSimdAdjustmen
14341314 {
14351315 // We have an ASCII block, no need to process it, but
14361316 // we need to check if the previous block was incomplete.
1437- if ( ! Avx2 . TestZ ( prevIncomplete , prevIncomplete ) )
1317+ if ( ! Avx . TestZ ( prevIncomplete , prevIncomplete ) )
14381318 {
14391319 byte * invalidBytePointer = SimpleRewindAndValidateWithErrors ( 16 - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
14401320 // So the code is correct up to invalidBytePointer
@@ -1552,7 +1432,7 @@ private static (int utfadjust, int scalaradjust) CalculateN2N3FinalSimdAdjustmen
15521432 return pInputBuffer + inputLength ;
15531433 }
15541434 }
1555- return GetPointerToFirstInvalidByteScalar ( pInputBuffer + processedLength , inputLength - processedLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
1435+ return GetPointerToFirstInvalidByte_Default ( pInputBuffer + processedLength , inputLength - processedLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
15561436 }
15571437
15581438 [ CompExactlyDependsOn ( typeof ( Avx512Vbmi ) ) ]
@@ -1572,16 +1452,17 @@ private static (int utfadjust, int scalaradjust) CalculateN2N3FinalSimdAdjustmen
15721452 // We skip any ASCII characters at the start of the buffer
15731453 // We intentionally use AVX2 instead of AVX-512.
15741454 int asciirun = 0 ;
1575- for ( ; asciirun + 64 <= inputLength ; asciirun += 64 )
1576- {
1577- Vector256 < byte > block1 = Avx . LoadVector256 ( pInputBuffer + asciirun ) ;
1578- Vector256 < byte > block2 = Avx . LoadVector256 ( pInputBuffer + asciirun + 32 ) ;
1579- Vector256 < byte > or = Avx2 . Or ( block1 , block2 ) ;
1580- if ( Avx2 . MoveMask ( or ) != 0 )
1581- {
1582- break ;
1583- }
1584- }
1455+ //for (; asciirun + 64 <= inputLength; asciirun += 64)
1456+ //{
1457+ // Vector256<byte> block1 = Avx.LoadVector256(pInputBuffer + asciirun);
1458+ // Vector256<byte> block2 = Avx.LoadVector256(pInputBuffer + asciirun + 32);
1459+ // Vector256<byte> or = Avx2.Or(block1, block2);
1460+ // if (Avx2.MoveMask(or) != 0)
1461+ // {
1462+ // break;
1463+ // }
1464+ //}
1465+ // NOTE: input's first byte is non-ascii already
15851466 processedLength = asciirun ;
15861467
15871468 if ( processedLength + 64 < inputLength )
@@ -1880,7 +1761,7 @@ private static (int utfadjust, int scalaradjust) CalculateN2N3FinalSimdAdjustmen
18801761 return pInputBuffer + inputLength ;
18811762 }
18821763 }
1883- return GetPointerToFirstInvalidByteScalar ( pInputBuffer + processedLength , inputLength - processedLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
1764+ return GetPointerToFirstInvalidByte_Default ( pInputBuffer + processedLength , inputLength - processedLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
18841765 }
18851766 }
18861767}
0 commit comments