From 1156c503800d1a07dc2662fd61f78110982fd479 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Tue, 28 Jan 2025 12:13:57 +0000 Subject: [PATCH 1/3] Fix IndexOf on empty strings on iOS to return -1 --- .../CompareInfo/CompareInfoTests.IndexOf.cs | 13 +++++++++--- .../System/StringTests.cs | 1 + .../pal_collation.m | 20 +++++++++++-------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs index ac39b82d74fb7c..9372bab591c4a1 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs @@ -12,10 +12,17 @@ public class CompareInfoIndexOfTests : CompareInfoTestsBase { public static IEnumerable IndexOf_TestData() { - // Empty string + // Empty string, invariant yield return new object[] { s_invariantCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 }; yield return new object[] { s_invariantCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 }; yield return new object[] { s_invariantCompare, "", "", 0, 0, CompareOptions.None, 0, 0 }; + yield return new object[] { s_invariantCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 }; + + // Empty string, non-invariant (s_germanCompare) + yield return new object[] { s_germanCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 }; + yield return new object[] { s_germanCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 }; + yield return new object[] { s_germanCompare, "", "", 0, 0, CompareOptions.None, 0, 0 }; + yield return new object[] { s_germanCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 }; // OrdinalIgnoreCase yield return new object[] { s_invariantCompare, "Hello", "l", 0, 5, CompareOptions.OrdinalIgnoreCase, 2, 1 }; @@ -166,7 +173,7 @@ public static IEnumerable IndexOf_Aesc_Ligature_TestData() { bool useNls = PlatformDetection.IsNlsGlobalization; // Searches for the ligature \u00C6 - string source1 = "Is AE or ae the same as \u00C6 or \u00E6?"; // 3 failures here + string source1 = "Is AE or ae the same as \u00C6 or \u00E6?"; yield return new object[] { s_invariantCompare, source1, "AE", 8, 18, CompareOptions.None, useNls ? 24 : -1, useNls ? 1 : 0}; yield return new object[] { s_invariantCompare, source1, "ae", 8, 18, CompareOptions.None, 9 , 2}; yield return new object[] { s_invariantCompare, source1, "\u00C6", 8, 18, CompareOptions.None, 24, 1 }; @@ -184,7 +191,7 @@ public static IEnumerable IndexOf_Aesc_Ligature_TestData() public static IEnumerable IndexOf_U_WithDiaeresis_TestData() { // Searches for the combining character sequence Latin capital letter U with diaeresis or Latin small letter u with diaeresis. - string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?"; // 7 failures here + string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?"; yield return new object[] { s_invariantCompare, source, "U\u0308", 8, 18, CompareOptions.None, 24, 1 }; yield return new object[] { s_invariantCompare, source, "u\u0308", 8, 18, CompareOptions.None, 9, 2 }; yield return new object[] { s_invariantCompare, source, "\u00DC", 8, 18, CompareOptions.None, 24, 1 }; diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs index a6e16e2fce3caa..84dac266e67129 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs @@ -210,6 +210,7 @@ public static void Contains_Char(string s, char value, bool expected) [InlineData("Hello", 'e', StringComparison.CurrentCulture, true)] [InlineData("Hello", 'E', StringComparison.CurrentCulture, false)] [InlineData("", 'H', StringComparison.CurrentCulture, false)] + [InlineData("", '\u0301', StringComparison.CurrentCulture, false)] // CurrentCultureIgnoreCase [InlineData("Hello", 'H', StringComparison.CurrentCultureIgnoreCase, true)] [InlineData("Hello", 'Z', StringComparison.CurrentCultureIgnoreCase, false)] diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m index ebe0db5c2c202a..56af941fb40033 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.m +++ b/src/native/libs/System.Globalization.Native/pal_collation.m @@ -117,6 +117,11 @@ int32_t GlobalizationNative_CompareStringNative(const uint16_t* localeName, int3 } } +/** + * Removes zero-width and other weightless characters such as U+200B (Zero Width Space), + * U+200C (Zero Width Non-Joiner), U+200D (Zero Width Joiner), U+FEFF (Zero Width No-Break Space), + * and the NUL character from the specified string. + */ static NSString* RemoveWeightlessCharacters(NSString* source) { NSError *error = nil; @@ -143,10 +148,9 @@ static int32_t IsIndexFound(int32_t fromBeginning, int32_t foundLocation, int32_ /* Function: IndexOf -Find detailed explanation how this function works in https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-hybrid-mode.md +Find detailed explanation how this function works in https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-hybrid-mode.md#string-indexing */ -Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpTarget, int32_t cwTargetLength, - const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions, int32_t fromBeginning) +Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpTarget, int32_t cwTargetLength, const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions, int32_t fromBeginning) { @autoreleasepool { @@ -158,6 +162,9 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam return result; } NSStringCompareOptions options = ConvertFromCompareOptionsToNSStringCompareOptions(comparisonOptions, true); + if (!fromBeginning) // LastIndexOf + options |= NSBackwardsSearch; + NSString *searchString = [NSString stringWithCharacters: lpTarget length: (NSUInteger)cwTargetLength]; NSString *searchStrCleaned = RemoveWeightlessCharacters(searchString); NSString *sourceString = [NSString stringWithCharacters: lpSource length: (NSUInteger)cwSourceLength]; @@ -168,7 +175,7 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam searchStrCleaned = ConvertToKatakana(searchStrCleaned); } - if (sourceStrCleaned.length == 0 || searchStrCleaned.length == 0) + if (searchStrCleaned.length == 0) { result.location = fromBeginning ? 0 : (int32_t)sourceString.length; return result; @@ -178,9 +185,6 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam NSString *searchStrPrecomposed = searchStrCleaned.precomposedStringWithCanonicalMapping; NSString *sourceStrPrecomposed = sourceStrCleaned.precomposedStringWithCanonicalMapping; - // last index - if (!fromBeginning) - options |= NSBackwardsSearch; // check if there is a possible match and return -1 if not // doesn't matter which normalization form is used here @@ -233,7 +237,7 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam result.location = (int32_t)precomposedRange.location; result.length = (int32_t)precomposedRange.length; if (!(comparisonOptions & IgnoreCase)) - return result; + return result; } // check if sourceString has decomposed form of characters and searchString has precomposed form of characters From 45d706feb9326af684f2dd64804a37ce502274b5 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Tue, 28 Jan 2025 12:45:13 +0000 Subject: [PATCH 2/3] add comments to test cases --- .../CompareInfo/CompareInfoTests.IndexOf.cs | 2 +- .../tests/System.Runtime.Tests/System/StringTests.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs index 9372bab591c4a1..f54ba88eceb84a 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs @@ -18,7 +18,7 @@ public static IEnumerable IndexOf_TestData() yield return new object[] { s_invariantCompare, "", "", 0, 0, CompareOptions.None, 0, 0 }; yield return new object[] { s_invariantCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 }; - // Empty string, non-invariant (s_germanCompare) + // Empty string, using non-invariant (s_germanCompare) CompareInfo to test the ICU path yield return new object[] { s_germanCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 }; yield return new object[] { s_germanCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 }; yield return new object[] { s_germanCompare, "", "", 0, 0, CompareOptions.None, 0, 0 }; diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs index 84dac266e67129..3355955051da9e 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs @@ -210,7 +210,7 @@ public static void Contains_Char(string s, char value, bool expected) [InlineData("Hello", 'e', StringComparison.CurrentCulture, true)] [InlineData("Hello", 'E', StringComparison.CurrentCulture, false)] [InlineData("", 'H', StringComparison.CurrentCulture, false)] - [InlineData("", '\u0301', StringComparison.CurrentCulture, false)] + [InlineData("", '\u0301', StringComparison.CurrentCulture, false)] // Using non-ASCII character to test ICU path // CurrentCultureIgnoreCase [InlineData("Hello", 'H', StringComparison.CurrentCultureIgnoreCase, true)] [InlineData("Hello", 'Z', StringComparison.CurrentCultureIgnoreCase, false)] From 551ba2e56523cfdaffd3938802632dd3b86a9852 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Tue, 11 Feb 2025 09:01:02 +0100 Subject: [PATCH 3/3] disable new test for hybrid glob on browser --- .../CompareInfo/CompareInfoTests.IndexOf.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs index f54ba88eceb84a..6953eaafab818a 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs @@ -22,7 +22,10 @@ public static IEnumerable IndexOf_TestData() yield return new object[] { s_germanCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 }; yield return new object[] { s_germanCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 }; yield return new object[] { s_germanCompare, "", "", 0, 0, CompareOptions.None, 0, 0 }; - yield return new object[] { s_germanCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 }; + if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + { + yield return new object[] { s_germanCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 }; + } // OrdinalIgnoreCase yield return new object[] { s_invariantCompare, "Hello", "l", 0, 5, CompareOptions.OrdinalIgnoreCase, 2, 1 };