diff --git a/src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md b/src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md index f545d810ccdfa5..681f3763fc5bf0 100644 --- a/src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md +++ b/src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md @@ -51,5 +51,6 @@ This should be done automatically by dependency-flow, so in theory there shouldn $ ./dotnet.sh run --project src/native/minipal/UnicodeDataGenerator /tmp/UnicodeData.txt > src/native/minipal/unicodedata.c ``` 5. Update the Regex casing equivalence table using the UnicodeData.txt file from the new Unicode version. You can find the instructions on how to do this [here](../../../System.Text.RegularExpressions/tools/Readme.md). -6. Finally, last step is to update the license for the Unicode data into our [Third party notices](../../../../../THIRD-PARTY-NOTICES.TXT) by copying the contents located in `https://www.unicode.org/license.html` to the section that has the Unicode license in our notices. -7. That's it, now commit all of the changed files, and send a PR into dotnet/runtime with the updates. If there were any special things you had to do that are not noted on this document, PLEASE UPDATE THESE INSTRUCTIONS to facilitate future updates. +6. Update the Regex named blocks using the Blocks.txt file from the new Unicode version. Run the GenRegexNamedBlocks tool located at `src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks` following the instructions in its [README.md](../../../System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md). +7. Finally, last step is to update the license for the Unicode data into our [Third party notices](../../../../../THIRD-PARTY-NOTICES.TXT) by copying the contents located in `https://www.unicode.org/license.html` to the section that has the Unicode license in our notices. +8. That's it, now commit all of the changed files, and send a PR into dotnet/runtime with the updates. If there were any special things you had to do that are not noted on this document, PLEASE UPDATE THESE INSTRUCTIONS to facilitate future updates. diff --git a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj index 96c59b3bc1f3ec..0322546266e830 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj +++ b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj @@ -38,6 +38,7 @@ + diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index c666e4b99edb8a..0654ef9c001736 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -34,6 +34,7 @@ + diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.Tables.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.Tables.cs new file mode 100644 index 00000000000000..e507df08302d50 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.Tables.cs @@ -0,0 +1,196 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This is a generated file. Do not edit directly. +// Run the GenRegexNamedBlocks tool to regenerate. + +namespace System.Text.RegularExpressions +{ + internal sealed partial class RegexCharClass + { + /* + * The property table contains all the block definitions defined in the + * XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 17.0 spec (www.unicode.org), + * and Perl 5.6 (see Programming Perl, 3rd edition page 167). Three blocks defined by Perl (and here) may + * not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates. + * + **/ + // Has to be sorted by the first column + private static readonly string[][] s_propTable = + [ + ["IsAlphabeticPresentationForms", "\uFB00\uFB50"], + ["IsArabic", "\u0600\u0700"], + ["IsArabicExtended-A", "\u08A0\u0900"], + ["IsArabicExtended-B", "\u0870\u08A0"], + ["IsArabicPresentationForms-A", "\uFB50\uFE00"], + ["IsArabicPresentationForms-B", "\uFE70\uFF00"], + ["IsArabicSupplement", "\u0750\u0780"], + ["IsArmenian", "\u0530\u0590"], + ["IsArrows", "\u2190\u2200"], + ["IsBalinese", "\u1B00\u1B80"], + ["IsBamum", "\uA6A0\uA700"], + ["IsBasicLatin", "\u0000\u0080"], + ["IsBatak", "\u1BC0\u1C00"], + ["IsBengali", "\u0980\u0A00"], + ["IsBlockElements", "\u2580\u25A0"], + ["IsBopomofo", "\u3100\u3130"], + ["IsBopomofoExtended", "\u31A0\u31C0"], + ["IsBoxDrawing", "\u2500\u2580"], + ["IsBraillePatterns", "\u2800\u2900"], + ["IsBuginese", "\u1A00\u1A20"], + ["IsBuhid", "\u1740\u1760"], + ["IsCJKCompatibility", "\u3300\u3400"], + ["IsCJKCompatibilityForms", "\uFE30\uFE50"], + ["IsCJKCompatibilityIdeographs", "\uF900\uFB00"], + ["IsCJKRadicalsSupplement", "\u2E80\u2F00"], + ["IsCJKStrokes", "\u31C0\u31F0"], + ["IsCJKSymbolsandPunctuation", "\u3000\u3040"], + ["IsCJKUnifiedIdeographs", "\u4E00\uA000"], + ["IsCJKUnifiedIdeographsExtensionA", "\u3400\u4DC0"], + ["IsCham", "\uAA00\uAA60"], + ["IsCherokee", "\u13A0\u1400"], + ["IsCherokeeSupplement", "\uAB70\uABC0"], + ["IsCombiningDiacriticalMarks", "\u0300\u0370"], + ["IsCombiningDiacriticalMarksExtended", "\u1AB0\u1B00"], + ["IsCombiningDiacriticalMarksSupplement", "\u1DC0\u1E00"], + ["IsCombiningDiacriticalMarksforSymbols", "\u20D0\u2100"], + ["IsCombiningHalfMarks", "\uFE20\uFE30"], + ["IsCombiningMarksforSymbols", "\u20D0\u2100"], + ["IsCommonIndicNumberForms", "\uA830\uA840"], + ["IsControlPictures", "\u2400\u2440"], + ["IsCoptic", "\u2C80\u2D00"], + ["IsCurrencySymbols", "\u20A0\u20D0"], + ["IsCyrillic", "\u0400\u0500"], + ["IsCyrillicExtended-A", "\u2DE0\u2E00"], + ["IsCyrillicExtended-B", "\uA640\uA6A0"], + ["IsCyrillicExtended-C", "\u1C80\u1C90"], + ["IsCyrillicSupplement", "\u0500\u0530"], + ["IsDevanagari", "\u0900\u0980"], + ["IsDevanagariExtended", "\uA8E0\uA900"], + ["IsDingbats", "\u2700\u27C0"], + ["IsEnclosedAlphanumerics", "\u2460\u2500"], + ["IsEnclosedCJKLettersandMonths", "\u3200\u3300"], + ["IsEthiopic", "\u1200\u1380"], + ["IsEthiopicExtended", "\u2D80\u2DE0"], + ["IsEthiopicExtended-A", "\uAB00\uAB30"], + ["IsEthiopicSupplement", "\u1380\u13A0"], + ["IsGeneralPunctuation", "\u2000\u2070"], + ["IsGeometricShapes", "\u25A0\u2600"], + ["IsGeorgian", "\u10A0\u1100"], + ["IsGeorgianExtended", "\u1C90\u1CC0"], + ["IsGeorgianSupplement", "\u2D00\u2D30"], + ["IsGlagolitic", "\u2C00\u2C60"], + ["IsGreek", "\u0370\u0400"], + ["IsGreekExtended", "\u1F00\u2000"], + ["IsGreekandCoptic", "\u0370\u0400"], + ["IsGujarati", "\u0A80\u0B00"], + ["IsGurmukhi", "\u0A00\u0A80"], + ["IsHalfwidthandFullwidthForms", "\uFF00\uFFF0"], + ["IsHangulCompatibilityJamo", "\u3130\u3190"], + ["IsHangulJamo", "\u1100\u1200"], + ["IsHangulJamoExtended-A", "\uA960\uA980"], + ["IsHangulJamoExtended-B", "\uD7B0\uD800"], + ["IsHangulSyllables", "\uAC00\uD7B0"], + ["IsHanunoo", "\u1720\u1740"], + ["IsHebrew", "\u0590\u0600"], + ["IsHighPrivateUseSurrogates", "\uDB80\uDC00"], + ["IsHighSurrogates", "\uD800\uDB80"], + ["IsHiragana", "\u3040\u30A0"], + ["IsIPAExtensions", "\u0250\u02B0"], + ["IsIdeographicDescriptionCharacters", "\u2FF0\u3000"], + ["IsJavanese", "\uA980\uA9E0"], + ["IsKanbun", "\u3190\u31A0"], + ["IsKangxiRadicals", "\u2F00\u2FE0"], + ["IsKannada", "\u0C80\u0D00"], + ["IsKatakana", "\u30A0\u3100"], + ["IsKatakanaPhoneticExtensions", "\u31F0\u3200"], + ["IsKayahLi", "\uA900\uA930"], + ["IsKhmer", "\u1780\u1800"], + ["IsKhmerSymbols", "\u19E0\u1A00"], + ["IsLao", "\u0E80\u0F00"], + ["IsLatin-1Supplement", "\u0080\u0100"], + ["IsLatinExtended-A", "\u0100\u0180"], + ["IsLatinExtended-B", "\u0180\u0250"], + ["IsLatinExtended-C", "\u2C60\u2C80"], + ["IsLatinExtended-D", "\uA720\uA800"], + ["IsLatinExtended-E", "\uAB30\uAB70"], + ["IsLatinExtendedAdditional", "\u1E00\u1F00"], + ["IsLepcha", "\u1C00\u1C50"], + ["IsLetterlikeSymbols", "\u2100\u2150"], + ["IsLimbu", "\u1900\u1950"], + ["IsLisu", "\uA4D0\uA500"], + ["IsLowSurrogates", "\uDC00\uE000"], + ["IsMalayalam", "\u0D00\u0D80"], + ["IsMandaic", "\u0840\u0860"], + ["IsMathematicalOperators", "\u2200\u2300"], + ["IsMeeteiMayek", "\uABC0\uAC00"], + ["IsMeeteiMayekExtensions", "\uAAE0\uAB00"], + ["IsMiscellaneousMathematicalSymbols-A", "\u27C0\u27F0"], + ["IsMiscellaneousMathematicalSymbols-B", "\u2980\u2A00"], + ["IsMiscellaneousSymbols", "\u2600\u2700"], + ["IsMiscellaneousSymbolsandArrows", "\u2B00\u2C00"], + ["IsMiscellaneousTechnical", "\u2300\u2400"], + ["IsModifierToneLetters", "\uA700\uA720"], + ["IsMongolian", "\u1800\u18B0"], + ["IsMyanmar", "\u1000\u10A0"], + ["IsMyanmarExtended-A", "\uAA60\uAA80"], + ["IsMyanmarExtended-B", "\uA9E0\uAA00"], + ["IsNKo", "\u07C0\u0800"], + ["IsNewTaiLue", "\u1980\u19E0"], + ["IsNumberForms", "\u2150\u2190"], + ["IsOgham", "\u1680\u16A0"], + ["IsOlChiki", "\u1C50\u1C80"], + ["IsOpticalCharacterRecognition", "\u2440\u2460"], + ["IsOriya", "\u0B00\u0B80"], + ["IsPhags-pa", "\uA840\uA880"], + ["IsPhoneticExtensions", "\u1D00\u1D80"], + ["IsPhoneticExtensionsSupplement", "\u1D80\u1DC0"], + ["IsPrivateUse", "\uE000\uF900"], + ["IsPrivateUseArea", "\uE000\uF900"], + ["IsRejang", "\uA930\uA960"], + ["IsRunic", "\u16A0\u1700"], + ["IsSamaritan", "\u0800\u0840"], + ["IsSaurashtra", "\uA880\uA8E0"], + ["IsSinhala", "\u0D80\u0E00"], + ["IsSmallFormVariants", "\uFE50\uFE70"], + ["IsSpacingModifierLetters", "\u02B0\u0300"], + ["IsSpecials", "\uFFF0"], + ["IsSundanese", "\u1B80\u1BC0"], + ["IsSundaneseSupplement", "\u1CC0\u1CD0"], + ["IsSuperscriptsandSubscripts", "\u2070\u20A0"], + ["IsSupplementalArrows-A", "\u27F0\u2800"], + ["IsSupplementalArrows-B", "\u2900\u2980"], + ["IsSupplementalMathematicalOperators", "\u2A00\u2B00"], + ["IsSupplementalPunctuation", "\u2E00\u2E80"], + ["IsSylotiNagri", "\uA800\uA830"], + ["IsSyriac", "\u0700\u0750"], + ["IsSyriacSupplement", "\u0860\u0870"], + ["IsTagalog", "\u1700\u1720"], + ["IsTagbanwa", "\u1760\u1780"], + ["IsTaiLe", "\u1950\u1980"], + ["IsTaiTham", "\u1A20\u1AB0"], + ["IsTaiViet", "\uAA80\uAAE0"], + ["IsTamil", "\u0B80\u0C00"], + ["IsTelugu", "\u0C00\u0C80"], + ["IsThaana", "\u0780\u07C0"], + ["IsThai", "\u0E00\u0E80"], + ["IsTibetan", "\u0F00\u1000"], + ["IsTifinagh", "\u2D30\u2D80"], + ["IsUnifiedCanadianAboriginalSyllabics", "\u1400\u1680"], + ["IsUnifiedCanadianAboriginalSyllabicsExtended", "\u18B0\u1900"], + ["IsVai", "\uA500\uA640"], + ["IsVariationSelectors", "\uFE00\uFE10"], + ["IsVedicExtensions", "\u1CD0\u1D00"], + ["IsVerticalForms", "\uFE10\uFE20"], + ["IsYiRadicals", "\uA490\uA4D0"], + ["IsYiSyllables", "\uA000\uA490"], + ["IsYijingHexagramSymbols", "\u4DC0\u4E00"], + ["_xmlC", /* Name Char */ "\u002D\u002F\u0030\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00B7\u00B8\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u02D0\u02D2\u0300\u0346\u0360\u0362\u0386\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0483\u0487\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0653\u0660\u066A\u0670\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06E9\u06EA\u06EE\u06F0\u06FA\u0901\u0904\u0905\u093A\u093C\u094E\u0951\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC" + +"\u09DE\u09DF\u09E4\u09E6\u09F2\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B70\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF0\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62" +"\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0E01\u0E2F\u0E30\u0E3B\u0E40\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0F18\u0F1A\u0F20\u0F2A\u0F35\u0F36\u0F37\u0F38\u0F39\u0F3A\u0F3E\u0F48\u0F49\u0F6A\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F96\u0F97\u0F98\u0F99\u0FAE\u0FB1\u0FB8\u0FB9\u0FBA\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00" +"\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u20D0\u20DD\u20E1\u20E2\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3005\u3006\u3007\u3008\u3021\u3030\u3031\u3036\u3041\u3095\u3099\u309B\u309D\u309F\u30A1\u30FB\u30FC\u30FF\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"], + ["_xmlD", "\u0030\u003A\u0660\u066A\u06F0\u06FA\u0966\u0970\u09E6\u09F0\u0A66\u0A70\u0AE6\u0AF0\u0B66\u0B70\u0BE7\u0BF0\u0C66\u0C70\u0CE6\u0CF0\u0D66\u0D70\u0E50\u0E5A\u0ED0\u0EDA\u0F20\u0F2A\u1040\u104A\u1369\u1372\u17E0\u17EA\u1810\u181A\uFF10\uFF1A"], + ["_xmlI", /* Start Name Char */ "\u003A\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u0386\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0641\u064B\u0671\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06D6\u06E5\u06E7\u0905\u093A\u093D\u093E\u0958\u0962\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09DC\u09DE\u09DF\u09E2\u09F0\u09F2\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A59\u0A5D\u0A5E\u0A5F\u0A72\u0A75\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABD\u0ABE\u0AE0\u0AE1\u0B05\u0B0D\u0B0F" + +"\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3D\u0B3E\u0B5C\u0B5E\u0B5F\u0B62\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C60\u0C62\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CDE\u0CDF\u0CE0\u0CE2\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D60\u0D62\u0E01\u0E2F\u0E30\u0E31\u0E32\u0E34\u0E40\u0E46\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EB1\u0EB2\u0EB4\u0EBD\u0EBE\u0EC0\u0EC5\u0F40\u0F48\u0F49\u0F6A\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC" +"\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3007\u3008\u3021\u302A\u3041\u3095\u30A1\u30FB\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"], + ["_xmlW", "\u0024\u0025\u002B\u002C\u0030\u003A\u003C\u003F\u0041\u005B\u005E\u005F\u0060\u007B\u007C\u007D\u007E\u007F\u00A2\u00AB\u00AC\u00AD\u00AE\u00B7\u00B8\u00BB\u00BC\u00BF\u00C0\u0221\u0222\u0234\u0250\u02AE\u02B0\u02EF\u0300\u0350\u0360\u0370\u0374\u0376\u037A\u037B\u0384\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03F7\u0400\u0487\u0488\u04CF\u04D0\u04F6\u04F8\u04FA\u0500\u0510\u0531\u0557\u0559\u055A\u0561\u0588\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0656\u0660\u066A\u066E\u06D4\u06D5\u06DD\u06DE\u06EE\u06F0\u06FF\u0710\u072D\u0730\u074B\u0780\u07B2\u0901\u0904\u0905\u093A\u093C\u094E\u0950\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC\u09DE\u09DF\u09E4\u09E6\u09FB\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35" +"\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AD0\u0AD1\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B71\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF3\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49" +"\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0D82\u0D84\u0D85\u0D97\u0D9A\u0DB2\u0DB3\u0DBC\u0DBD\u0DBE\u0DC0\u0DC7\u0DCA\u0DCB\u0DCF\u0DD5\u0DD6\u0DD7\u0DD8\u0DE0\u0DF2\u0DF4\u0E01\u0E3B\u0E3F\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0EDC\u0EDE\u0F00\u0F04\u0F13\u0F3A\u0F3E\u0F48\u0F49\u0F6B\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F98\u0F99\u0FBD\u0FBE\u0FCD\u0FCF\u0FD0\u1000\u1022\u1023\u1028\u1029\u102B\u102C\u1033\u1036\u103A\u1040\u104A\u1050\u105A\u10A0\u10C6\u10D0\u10F9\u1100\u115A\u115F\u11A3\u11A8\u11FA\u1200\u1207\u1208\u1247\u1248\u1249\u124A\u124E\u1250\u1257\u1258\u1259\u125A\u125E\u1260\u1287\u1288\u1289\u128A\u128E\u1290\u12AF\u12B0\u12B1\u12B2\u12B6\u12B8\u12BF\u12C0\u12C1\u12C2\u12C6\u12C8\u12CF\u12D0\u12D7\u12D8\u12EF\u12F0\u130F\u1310\u1311\u1312\u1316\u1318\u131F\u1320\u1347\u1348\u135B\u1369\u137D\u13A0" +"\u13F5\u1401\u166D\u166F\u1677\u1681\u169B\u16A0\u16EB\u16EE\u16F1\u1700\u170D\u170E\u1715\u1720\u1735\u1740\u1754\u1760\u176D\u176E\u1771\u1772\u1774\u1780\u17D4\u17D7\u17D8\u17DB\u17DD\u17E0\u17EA\u180B\u180E\u1810\u181A\u1820\u1878\u1880\u18AA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FC5\u1FC6\u1FD4\u1FD6\u1FDC\u1FDD\u1FF0\u1FF2\u1FF5\u1FF6\u1FFF\u2044\u2045\u2052\u2053\u2070\u2072\u2074\u207D\u207F\u208D\u20A0\u20B2\u20D0\u20EB\u2100\u213B\u213D\u214C\u2153\u2184\u2190\u2329\u232B\u23B4\u23B7\u23CF\u2400\u2427\u2440\u244B\u2460\u24FF\u2500\u2614\u2616\u2618\u2619\u267E\u2680\u268A\u2701\u2705\u2706\u270A\u270C\u2728\u2729\u274C\u274D\u274E\u274F\u2753\u2756\u2757\u2758\u275F\u2761\u2768\u2776\u2795\u2798\u27B0\u27B1\u27BF\u27D0\u27E6\u27F0\u2983\u2999\u29D8\u29DC\u29FC\u29FE\u2B00\u2E80\u2E9A\u2E9B\u2EF4\u2F00\u2FD6\u2FF0\u2FFC\u3004\u3008\u3012\u3014\u3020\u3030\u3031\u303D\u303E\u3040" +"\u3041\u3097\u3099\u30A0\u30A1\u30FB\u30FC\u3100\u3105\u312D\u3131\u318F\u3190\u31B8\u31F0\u321D\u3220\u3244\u3251\u327C\u327F\u32CC\u32D0\u32FF\u3300\u3377\u337B\u33DE\u33E0\u33FF\u3400\u4DB6\u4E00\u9FA6\uA000\uA48D\uA490\uA4C7\uAC00\uD7A4\uF900\uFA2E\uFA30\uFA6B\uFB00\uFB07\uFB13\uFB18\uFB1D\uFB37\uFB38\uFB3D\uFB3E\uFB3F\uFB40\uFB42\uFB43\uFB45\uFB46\uFBB2\uFBD3\uFD3E\uFD50\uFD90\uFD92\uFDC8\uFDF0\uFDFD\uFE00\uFE10\uFE20\uFE24\uFE62\uFE63\uFE64\uFE67\uFE69\uFE6A\uFE70\uFE75\uFE76\uFEFD\uFF04\uFF05\uFF0B\uFF0C\uFF10\uFF1A\uFF1C\uFF1F\uFF21\uFF3B\uFF3E\uFF3F\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF66\uFFBF\uFFC2\uFFC8\uFFCA\uFFD0\uFFD2\uFFD8\uFFDA\uFFDD\uFFE0\uFFE7\uFFE8\uFFEF\uFFFC\uFFFE"], + ]; + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 5d859fbf6aa76f..d22036e6da2844 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -174,139 +174,6 @@ internal sealed partial class RegexCharClass { "Z", "\u0000\u000D\u000E\u000C\u0000" }, }; - /* - * The property table contains all the block definitions defined in the - * XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 4.0 spec (www.unicode.org), - * and Perl 5.6 (see Programming Perl, 3rd edition page 167). Three blocks defined by Perl (and here) may - * not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates. - * - **/ - // Has to be sorted by the first column - private static readonly string[][] s_propTable = - [ - ["IsAlphabeticPresentationForms", "\uFB00\uFB50"], - ["IsArabic", "\u0600\u0700"], - ["IsArabicPresentationForms-A", "\uFB50\uFE00"], - ["IsArabicPresentationForms-B", "\uFE70\uFF00"], - ["IsArmenian", "\u0530\u0590"], - ["IsArrows", "\u2190\u2200"], - ["IsBasicLatin", "\u0000\u0080"], - ["IsBengali", "\u0980\u0A00"], - ["IsBlockElements", "\u2580\u25A0"], - ["IsBopomofo", "\u3100\u3130"], - ["IsBopomofoExtended", "\u31A0\u31C0"], - ["IsBoxDrawing", "\u2500\u2580"], - ["IsBraillePatterns", "\u2800\u2900"], - ["IsBuhid", "\u1740\u1760"], - ["IsCJKCompatibility", "\u3300\u3400"], - ["IsCJKCompatibilityForms", "\uFE30\uFE50"], - ["IsCJKCompatibilityIdeographs", "\uF900\uFB00"], - ["IsCJKRadicalsSupplement", "\u2E80\u2F00"], - ["IsCJKSymbolsandPunctuation", "\u3000\u3040"], - ["IsCJKUnifiedIdeographs", "\u4E00\uA000"], - ["IsCJKUnifiedIdeographsExtensionA", "\u3400\u4DC0"], - ["IsCherokee", "\u13A0\u1400"], - ["IsCombiningDiacriticalMarks", "\u0300\u0370"], - ["IsCombiningDiacriticalMarksforSymbols", "\u20D0\u2100"], - ["IsCombiningHalfMarks", "\uFE20\uFE30"], - ["IsCombiningMarksforSymbols", "\u20D0\u2100"], - ["IsControlPictures", "\u2400\u2440"], - ["IsCurrencySymbols", "\u20A0\u20D0"], - ["IsCyrillic", "\u0400\u0500"], - ["IsCyrillicSupplement", "\u0500\u0530"], - ["IsDevanagari", "\u0900\u0980"], - ["IsDingbats", "\u2700\u27C0"], - ["IsEnclosedAlphanumerics", "\u2460\u2500"], - ["IsEnclosedCJKLettersandMonths", "\u3200\u3300"], - ["IsEthiopic", "\u1200\u1380"], - ["IsGeneralPunctuation", "\u2000\u2070"], - ["IsGeometricShapes", "\u25A0\u2600"], - ["IsGeorgian", "\u10A0\u1100"], - ["IsGreek", "\u0370\u0400"], - ["IsGreekExtended", "\u1F00\u2000"], - ["IsGreekandCoptic", "\u0370\u0400"], - ["IsGujarati", "\u0A80\u0B00"], - ["IsGurmukhi", "\u0A00\u0A80"], - ["IsHalfwidthandFullwidthForms", "\uFF00\uFFF0"], - ["IsHangulCompatibilityJamo", "\u3130\u3190"], - ["IsHangulJamo", "\u1100\u1200"], - ["IsHangulSyllables", "\uAC00\uD7B0"], - ["IsHanunoo", "\u1720\u1740"], - ["IsHebrew", "\u0590\u0600"], - ["IsHighPrivateUseSurrogates", "\uDB80\uDC00"], - ["IsHighSurrogates", "\uD800\uDB80"], - ["IsHiragana", "\u3040\u30A0"], - ["IsIPAExtensions", "\u0250\u02B0"], - ["IsIdeographicDescriptionCharacters", "\u2FF0\u3000"], - ["IsKanbun", "\u3190\u31A0"], - ["IsKangxiRadicals", "\u2F00\u2FE0"], - ["IsKannada", "\u0C80\u0D00"], - ["IsKatakana", "\u30A0\u3100"], - ["IsKatakanaPhoneticExtensions", "\u31F0\u3200"], - ["IsKhmer", "\u1780\u1800"], - ["IsKhmerSymbols", "\u19E0\u1A00"], - ["IsLao", "\u0E80\u0F00"], - ["IsLatin-1Supplement", "\u0080\u0100"], - ["IsLatinExtended-A", "\u0100\u0180"], - ["IsLatinExtended-B", "\u0180\u0250"], - ["IsLatinExtendedAdditional", "\u1E00\u1F00"], - ["IsLetterlikeSymbols", "\u2100\u2150"], - ["IsLimbu", "\u1900\u1950"], - ["IsLowSurrogates", "\uDC00\uE000"], - ["IsMalayalam", "\u0D00\u0D80"], - ["IsMathematicalOperators", "\u2200\u2300"], - ["IsMiscellaneousMathematicalSymbols-A", "\u27C0\u27F0"], - ["IsMiscellaneousMathematicalSymbols-B", "\u2980\u2A00"], - ["IsMiscellaneousSymbols", "\u2600\u2700"], - ["IsMiscellaneousSymbolsandArrows", "\u2B00\u2C00"], - ["IsMiscellaneousTechnical", "\u2300\u2400"], - ["IsMongolian", "\u1800\u18B0"], - ["IsMyanmar", "\u1000\u10A0"], - ["IsNumberForms", "\u2150\u2190"], - ["IsOgham", "\u1680\u16A0"], - ["IsOpticalCharacterRecognition", "\u2440\u2460"], - ["IsOriya", "\u0B00\u0B80"], - ["IsPhoneticExtensions", "\u1D00\u1D80"], - ["IsPrivateUse", "\uE000\uF900"], - ["IsPrivateUseArea", "\uE000\uF900"], - ["IsRunic", "\u16A0\u1700"], - ["IsSinhala", "\u0D80\u0E00"], - ["IsSmallFormVariants", "\uFE50\uFE70"], - ["IsSpacingModifierLetters", "\u02B0\u0300"], - ["IsSpecials", "\uFFF0"], - ["IsSuperscriptsandSubscripts", "\u2070\u20A0"], - ["IsSupplementalArrows-A", "\u27F0\u2800"], - ["IsSupplementalArrows-B", "\u2900\u2980"], - ["IsSupplementalMathematicalOperators", "\u2A00\u2B00"], - ["IsSyriac", "\u0700\u0750"], - ["IsTagalog", "\u1700\u1720"], - ["IsTagbanwa", "\u1760\u1780"], - ["IsTaiLe", "\u1950\u1980"], - ["IsTamil", "\u0B80\u0C00"], - ["IsTelugu", "\u0C00\u0C80"], - ["IsThaana", "\u0780\u07C0"], - ["IsThai", "\u0E00\u0E80"], - ["IsTibetan", "\u0F00\u1000"], - ["IsUnifiedCanadianAboriginalSyllabics", "\u1400\u1680"], - ["IsVariationSelectors", "\uFE00\uFE10"], - ["IsYiRadicals", "\uA490\uA4D0"], - ["IsYiSyllables", "\uA000\uA490"], - ["IsYijingHexagramSymbols", "\u4DC0\u4E00"], - ["_xmlC", /* Name Char */ "\u002D\u002F\u0030\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00B7\u00B8\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u02D0\u02D2\u0300\u0346\u0360\u0362\u0386\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0483\u0487\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0653\u0660\u066A\u0670\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06E9\u06EA\u06EE\u06F0\u06FA\u0901\u0904\u0905\u093A\u093C\u094E\u0951\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC" - +"\u09DE\u09DF\u09E4\u09E6\u09F2\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B70\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF0\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2" - +"\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0E01\u0E2F\u0E30\u0E3B\u0E40\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0F18\u0F1A\u0F20\u0F2A\u0F35\u0F36\u0F37\u0F38\u0F39\u0F3A\u0F3E\u0F48\u0F49\u0F6A\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F96\u0F97\u0F98\u0F99\u0FAE\u0FB1\u0FB8\u0FB9\u0FBA\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00" - +"\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u20D0\u20DD\u20E1\u20E2\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3005\u3006\u3007\u3008\u3021\u3030\u3031\u3036\u3041\u3095\u3099\u309B\u309D\u309F\u30A1\u30FB\u30FC\u30FF\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"], - ["_xmlD", "\u0030\u003A\u0660\u066A\u06F0\u06FA\u0966\u0970\u09E6\u09F0\u0A66\u0A70\u0AE6\u0AF0\u0B66\u0B70\u0BE7\u0BF0\u0C66\u0C70\u0CE6\u0CF0\u0D66\u0D70\u0E50\u0E5A\u0ED0\u0EDA\u0F20\u0F2A\u1040\u104A\u1369\u1372\u17E0\u17EA\u1810\u181A\uFF10\uFF1A"], - ["_xmlI", /* Start Name Char */ "\u003A\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u0386\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0641\u064B\u0671\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06D6\u06E5\u06E7\u0905\u093A\u093D\u093E\u0958\u0962\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09DC\u09DE\u09DF\u09E2\u09F0\u09F2\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A59\u0A5D\u0A5E\u0A5F\u0A72\u0A75\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABD\u0ABE\u0AE0\u0AE1\u0B05\u0B0D\u0B0F" - +"\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3D\u0B3E\u0B5C\u0B5E\u0B5F\u0B62\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C60\u0C62\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CDE\u0CDF\u0CE0\u0CE2\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D60\u0D62\u0E01\u0E2F\u0E30\u0E31\u0E32\u0E34\u0E40\u0E46\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EB1\u0EB2\u0EB4\u0EBD\u0EBE\u0EC0\u0EC5\u0F40\u0F48\u0F49\u0F6A\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC" - +"\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3007\u3008\u3021\u302A\u3041\u3095\u30A1\u30FB\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"], - ["_xmlW", "\u0024\u0025\u002B\u002C\u0030\u003A\u003C\u003F\u0041\u005B\u005E\u005F\u0060\u007B\u007C\u007D\u007E\u007F\u00A2\u00AB\u00AC\u00AD\u00AE\u00B7\u00B8\u00BB\u00BC\u00BF\u00C0\u0221\u0222\u0234\u0250\u02AE\u02B0\u02EF\u0300\u0350\u0360\u0370\u0374\u0376\u037A\u037B\u0384\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03F7\u0400\u0487\u0488\u04CF\u04D0\u04F6\u04F8\u04FA\u0500\u0510\u0531\u0557\u0559\u055A\u0561\u0588\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0656\u0660\u066A\u066E\u06D4\u06D5\u06DD\u06DE\u06EE\u06F0\u06FF\u0710\u072D\u0730\u074B\u0780\u07B2\u0901\u0904\u0905\u093A\u093C\u094E\u0950\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC\u09DE\u09DF\u09E4\u09E6\u09FB\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35" - +"\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AD0\u0AD1\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B71\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF3\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49" - +"\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0D82\u0D84\u0D85\u0D97\u0D9A\u0DB2\u0DB3\u0DBC\u0DBD\u0DBE\u0DC0\u0DC7\u0DCA\u0DCB\u0DCF\u0DD5\u0DD6\u0DD7\u0DD8\u0DE0\u0DF2\u0DF4\u0E01\u0E3B\u0E3F\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0EDC\u0EDE\u0F00\u0F04\u0F13\u0F3A\u0F3E\u0F48\u0F49\u0F6B\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F98\u0F99\u0FBD\u0FBE\u0FCD\u0FCF\u0FD0\u1000\u1022\u1023\u1028\u1029\u102B\u102C\u1033\u1036\u103A\u1040\u104A\u1050\u105A\u10A0\u10C6\u10D0\u10F9\u1100\u115A\u115F\u11A3\u11A8\u11FA\u1200\u1207\u1208\u1247\u1248\u1249\u124A\u124E\u1250\u1257\u1258\u1259\u125A\u125E\u1260\u1287\u1288\u1289\u128A\u128E\u1290\u12AF\u12B0\u12B1\u12B2\u12B6\u12B8\u12BF\u12C0\u12C1\u12C2\u12C6\u12C8\u12CF\u12D0\u12D7\u12D8\u12EF\u12F0\u130F\u1310\u1311\u1312\u1316\u1318\u131F\u1320\u1347\u1348\u135B\u1369\u137D\u13A0" - +"\u13F5\u1401\u166D\u166F\u1677\u1681\u169B\u16A0\u16EB\u16EE\u16F1\u1700\u170D\u170E\u1715\u1720\u1735\u1740\u1754\u1760\u176D\u176E\u1771\u1772\u1774\u1780\u17D4\u17D7\u17D8\u17DB\u17DD\u17E0\u17EA\u180B\u180E\u1810\u181A\u1820\u1878\u1880\u18AA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FC5\u1FC6\u1FD4\u1FD6\u1FDC\u1FDD\u1FF0\u1FF2\u1FF5\u1FF6\u1FFF\u2044\u2045\u2052\u2053\u2070\u2072\u2074\u207D\u207F\u208D\u20A0\u20B2\u20D0\u20EB\u2100\u213B\u213D\u214C\u2153\u2184\u2190\u2329\u232B\u23B4\u23B7\u23CF\u2400\u2427\u2440\u244B\u2460\u24FF\u2500\u2614\u2616\u2618\u2619\u267E\u2680\u268A\u2701\u2705\u2706\u270A\u270C\u2728\u2729\u274C\u274D\u274E\u274F\u2753\u2756\u2757\u2758\u275F\u2761\u2768\u2776\u2795\u2798\u27B0\u27B1\u27BF\u27D0\u27E6\u27F0\u2983\u2999\u29D8\u29DC\u29FC\u29FE\u2B00\u2E80\u2E9A\u2E9B\u2EF4\u2F00\u2FD6\u2FF0\u2FFC\u3004\u3008\u3012\u3014\u3020\u3030\u3031\u303D\u303E\u3040" - +"\u3041\u3097\u3099\u30A0\u30A1\u30FB\u30FC\u3100\u3105\u312D\u3131\u318F\u3190\u31B8\u31F0\u321D\u3220\u3244\u3251\u327C\u327F\u32CC\u32D0\u32FF\u3300\u3377\u337B\u33DE\u33E0\u33FF\u3400\u4DB6\u4E00\u9FA6\uA000\uA48D\uA490\uA4C7\uAC00\uD7A4\uF900\uFA2E\uFA30\uFA6B\uFB00\uFB07\uFB13\uFB18\uFB1D\uFB37\uFB38\uFB3D\uFB3E\uFB3F\uFB40\uFB42\uFB43\uFB45\uFB46\uFBB2\uFBD3\uFD3E\uFD50\uFD90\uFD92\uFDC8\uFDF0\uFDFD\uFE00\uFE10\uFE20\uFE24\uFE62\uFE63\uFE64\uFE67\uFE69\uFE6A\uFE70\uFE75\uFE76\uFEFD\uFF04\uFF05\uFF0B\uFF0C\uFF10\uFF1A\uFF1C\uFF1F\uFF21\uFF3B\uFF3E\uFF3F\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF66\uFFBF\uFFC2\uFFC8\uFFCA\uFFD0\uFFD2\uFFD8\uFFDA\uFFDD\uFFE0\uFFE7\uFFE8\uFFEF\uFFFC\uFFFE"], - ]; - private static readonly char[] s_whitespaceChars = ['\u0009', '\u000A', '\u000B', '\u000C', '\u000D', '\u0020', '\u0085', '\u00A0', '\u1680', '\u2000', diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs index 2eb52774c33842..1be9ebe6f2a0b4 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs @@ -245,7 +245,14 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsHebrew}", new[] { 0x0590, 0x05FF } }; yield return new object[] { engine, @"\p{IsArabic}", new[] { 0x0600, 0x06FF } }; yield return new object[] { engine, @"\p{IsSyriac}", new[] { 0x0700, 0x074F } }; + yield return new object[] { engine, @"\p{IsArabicSupplement}", new[] { 0x0750, 0x077F } }; yield return new object[] { engine, @"\p{IsThaana}", new[] { 0x0780, 0x07BF } }; + yield return new object[] { engine, @"\p{IsNKo}", new[] { 0x07C0, 0x07FF } }; + yield return new object[] { engine, @"\p{IsSamaritan}", new[] { 0x0800, 0x083F } }; + yield return new object[] { engine, @"\p{IsMandaic}", new[] { 0x0840, 0x085F } }; + yield return new object[] { engine, @"\p{IsSyriacSupplement}", new[] { 0x0860, 0x086F } }; + yield return new object[] { engine, @"\p{IsArabicExtended-B}", new[] { 0x0870, 0x089F } }; + yield return new object[] { engine, @"\p{IsArabicExtended-A}", new[] { 0x08A0, 0x08FF } }; yield return new object[] { engine, @"\p{IsDevanagari}", new[] { 0x0900, 0x097F } }; yield return new object[] { engine, @"\p{IsBengali}", new[] { 0x0980, 0x09FF } }; yield return new object[] { engine, @"\p{IsGurmukhi}", new[] { 0x0A00, 0x0A7F } }; @@ -273,10 +280,26 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsTagbanwa}", new[] { 0x1760, 0x177F } }; yield return new object[] { engine, @"\p{IsKhmer}", new[] { 0x1780, 0x17FF } }; yield return new object[] { engine, @"\p{IsMongolian}", new[] { 0x1800, 0x18AF } }; + yield return new object[] { engine, @"\p{IsUnifiedCanadianAboriginalSyllabicsExtended}", new[] { 0x18B0, 0x18FF } }; yield return new object[] { engine, @"\p{IsLimbu}", new[] { 0x1900, 0x194F } }; yield return new object[] { engine, @"\p{IsTaiLe}", new[] { 0x1950, 0x197F } }; + yield return new object[] { engine, @"\p{IsNewTaiLue}", new[] { 0x1980, 0x19DF } }; yield return new object[] { engine, @"\p{IsKhmerSymbols}", new[] { 0x19E0, 0x19FF } }; + yield return new object[] { engine, @"\p{IsBuginese}", new[] { 0x1A00, 0x1A1F } }; + yield return new object[] { engine, @"\p{IsTaiTham}", new[] { 0x1A20, 0x1AAF } }; + yield return new object[] { engine, @"\p{IsCombiningDiacriticalMarksExtended}", new[] { 0x1AB0, 0x1AFF } }; + yield return new object[] { engine, @"\p{IsBalinese}", new[] { 0x1B00, 0x1B7F } }; + yield return new object[] { engine, @"\p{IsSundanese}", new[] { 0x1B80, 0x1BBF } }; + yield return new object[] { engine, @"\p{IsBatak}", new[] { 0x1BC0, 0x1BFF } }; + yield return new object[] { engine, @"\p{IsLepcha}", new[] { 0x1C00, 0x1C4F } }; + yield return new object[] { engine, @"\p{IsOlChiki}", new[] { 0x1C50, 0x1C7F } }; + yield return new object[] { engine, @"\p{IsCyrillicExtended-C}", new[] { 0x1C80, 0x1C8F } }; + yield return new object[] { engine, @"\p{IsGeorgianExtended}", new[] { 0x1C90, 0x1CBF } }; + yield return new object[] { engine, @"\p{IsSundaneseSupplement}", new[] { 0x1CC0, 0x1CCF } }; + yield return new object[] { engine, @"\p{IsVedicExtensions}", new[] { 0x1CD0, 0x1CFF } }; yield return new object[] { engine, @"\p{IsPhoneticExtensions}", new[] { 0x1D00, 0x1D7F } }; + yield return new object[] { engine, @"\p{IsPhoneticExtensionsSupplement}", new[] { 0x1D80, 0x1DBF } }; + yield return new object[] { engine, @"\p{IsCombiningDiacriticalMarksSupplement}", new[] { 0x1DC0, 0x1DFF } }; yield return new object[] { engine, @"\p{IsLatinExtendedAdditional}", new[] { 0x1E00, 0x1EFF } }; yield return new object[] { engine, @"\p{IsGreekExtended}", new[] { 0x1F00, 0x1FFF } }; yield return new object[] { engine, @"\p{IsGeneralPunctuation}", new[] { 0x2000, 0x206F } }; @@ -303,6 +326,14 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsMiscellaneousMathematicalSymbols-B}", new[] { 0x2980, 0x29FF } }; yield return new object[] { engine, @"\p{IsSupplementalMathematicalOperators}", new[] { 0x2A00, 0x2AFF } }; yield return new object[] { engine, @"\p{IsMiscellaneousSymbolsandArrows}", new[] { 0x2B00, 0x2BFF } }; + yield return new object[] { engine, @"\p{IsGlagolitic}", new[] { 0x2C00, 0x2C5F } }; + yield return new object[] { engine, @"\p{IsLatinExtended-C}", new[] { 0x2C60, 0x2C7F } }; + yield return new object[] { engine, @"\p{IsCoptic}", new[] { 0x2C80, 0x2CFF } }; + yield return new object[] { engine, @"\p{IsGeorgianSupplement}", new[] { 0x2D00, 0x2D2F } }; + yield return new object[] { engine, @"\p{IsTifinagh}", new[] { 0x2D30, 0x2D7F } }; + yield return new object[] { engine, @"\p{IsEthiopicExtended}", new[] { 0x2D80, 0x2DDF } }; + yield return new object[] { engine, @"\p{IsCyrillicExtended-A}", new[] { 0x2DE0, 0x2DFF } }; + yield return new object[] { engine, @"\p{IsSupplementalPunctuation}", new[] { 0x2E00, 0x2E7F } }; yield return new object[] { engine, @"\p{IsCJKRadicalsSupplement}", new[] { 0x2E80, 0x2EFF } }; yield return new object[] { engine, @"\p{IsKangxiRadicals}", new[] { 0x2F00, 0x2FDF } }; yield return new object[] { engine, @"\p{IsIdeographicDescriptionCharacters}", new[] { 0x2FF0, 0x2FFF } }; @@ -313,6 +344,7 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsHangulCompatibilityJamo}", new[] { 0x3130, 0x318F } }; yield return new object[] { engine, @"\p{IsKanbun}", new[] { 0x3190, 0x319F } }; yield return new object[] { engine, @"\p{IsBopomofoExtended}", new[] { 0x31A0, 0x31BF } }; + yield return new object[] { engine, @"\p{IsCJKStrokes}", new[] { 0x31C0, 0x31EF } }; yield return new object[] { engine, @"\p{IsKatakanaPhoneticExtensions}", new[] { 0x31F0, 0x31FF } }; yield return new object[] { engine, @"\p{IsEnclosedCJKLettersandMonths}", new[] { 0x3200, 0x32FF } }; yield return new object[] { engine, @"\p{IsCJKCompatibility}", new[] { 0x3300, 0x33FF } }; @@ -321,7 +353,32 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsCJKUnifiedIdeographs}", new[] { 0x4E00, 0x9FFF } }; yield return new object[] { engine, @"\p{IsYiSyllables}", new[] { 0xA000, 0xA48F } }; yield return new object[] { engine, @"\p{IsYiRadicals}", new[] { 0xA490, 0xA4CF } }; + yield return new object[] { engine, @"\p{IsLisu}", new[] { 0xA4D0, 0xA4FF } }; + yield return new object[] { engine, @"\p{IsVai}", new[] { 0xA500, 0xA63F } }; + yield return new object[] { engine, @"\p{IsCyrillicExtended-B}", new[] { 0xA640, 0xA69F } }; + yield return new object[] { engine, @"\p{IsBamum}", new[] { 0xA6A0, 0xA6FF } }; + yield return new object[] { engine, @"\p{IsModifierToneLetters}", new[] { 0xA700, 0xA71F } }; + yield return new object[] { engine, @"\p{IsLatinExtended-D}", new[] { 0xA720, 0xA7FF } }; + yield return new object[] { engine, @"\p{IsSylotiNagri}", new[] { 0xA800, 0xA82F } }; + yield return new object[] { engine, @"\p{IsCommonIndicNumberForms}", new[] { 0xA830, 0xA83F } }; + yield return new object[] { engine, @"\p{IsPhags-pa}", new[] { 0xA840, 0xA87F } }; + yield return new object[] { engine, @"\p{IsSaurashtra}", new[] { 0xA880, 0xA8DF } }; + yield return new object[] { engine, @"\p{IsDevanagariExtended}", new[] { 0xA8E0, 0xA8FF } }; + yield return new object[] { engine, @"\p{IsKayahLi}", new[] { 0xA900, 0xA92F } }; + yield return new object[] { engine, @"\p{IsRejang}", new[] { 0xA930, 0xA95F } }; + yield return new object[] { engine, @"\p{IsHangulJamoExtended-A}", new[] { 0xA960, 0xA97F } }; + yield return new object[] { engine, @"\p{IsJavanese}", new[] { 0xA980, 0xA9DF } }; + yield return new object[] { engine, @"\p{IsMyanmarExtended-B}", new[] { 0xA9E0, 0xA9FF } }; + yield return new object[] { engine, @"\p{IsCham}", new[] { 0xAA00, 0xAA5F } }; + yield return new object[] { engine, @"\p{IsMyanmarExtended-A}", new[] { 0xAA60, 0xAA7F } }; + yield return new object[] { engine, @"\p{IsTaiViet}", new[] { 0xAA80, 0xAADF } }; + yield return new object[] { engine, @"\p{IsMeeteiMayekExtensions}", new[] { 0xAAE0, 0xAAFF } }; + yield return new object[] { engine, @"\p{IsEthiopicExtended-A}", new[] { 0xAB00, 0xAB2F } }; + yield return new object[] { engine, @"\p{IsLatinExtended-E}", new[] { 0xAB30, 0xAB6F } }; + yield return new object[] { engine, @"\p{IsCherokeeSupplement}", new[] { 0xAB70, 0xABBF } }; + yield return new object[] { engine, @"\p{IsMeeteiMayek}", new[] { 0xABC0, 0xABFF } }; yield return new object[] { engine, @"\p{IsHangulSyllables}", new[] { 0xAC00, 0xD7AF } }; + yield return new object[] { engine, @"\p{IsHangulJamoExtended-B}", new[] { 0xD7B0, 0xD7FF } }; yield return new object[] { engine, @"\p{IsHighSurrogates}", new[] { 0xD800, 0xDB7F } }; yield return new object[] { engine, @"\p{IsHighPrivateUseSurrogates}", new[] { 0xDB80, 0xDBFF } }; yield return new object[] { engine, @"\p{IsLowSurrogates}", new[] { 0xDC00, 0xDFFF } }; @@ -330,6 +387,7 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsAlphabeticPresentationForms}", new[] { 0xFB00, 0xFB4F } }; yield return new object[] { engine, @"\p{IsArabicPresentationForms-A}", new[] { 0xFB50, 0xFDFF } }; yield return new object[] { engine, @"\p{IsVariationSelectors}", new[] { 0xFE00, 0xFE0F } }; + yield return new object[] { engine, @"\p{IsVerticalForms}", new[] { 0xFE10, 0xFE1F } }; yield return new object[] { engine, @"\p{IsCombiningHalfMarks}", new[] { 0xFE20, 0xFE2F } }; yield return new object[] { engine, @"\p{IsCJKCompatibilityForms}", new[] { 0xFE30, 0xFE4F } }; yield return new object[] { engine, @"\p{IsSmallFormVariants}", new[] { 0xFE50, 0xFE6F } }; diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj index a3d8e3a172dc7a..2c70ec872f5d94 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj @@ -31,6 +31,7 @@ + diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj new file mode 100644 index 00000000000000..f9929d13fb73d1 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj @@ -0,0 +1,9 @@ + + + + Exe + net10.0 + enable + + + diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs new file mode 100644 index 00000000000000..9863b99140b634 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs @@ -0,0 +1,171 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; + +using static System.FormattableString; + +namespace GenRegexNamedBlocks +{ + /// + /// This program generates RegexCharClass.Tables.cs with Unicode named blocks + /// + class Program + { + static void Main(string[] args) + { + if (args.Length < 2) + { + Console.WriteLine("Usage: dotnet run -- "); + Console.WriteLine("Example: dotnet run -- Blocks.txt ../../src/System/Text/RegularExpressions/RegexCharClass.Tables.cs"); + return; + } + + string blocksFile = args[0]; + string outputFile = args[1]; + + // The input file should be Blocks.txt from the UCD corresponding to the + // version of the Unicode spec we're consuming. + // More info: https://www.unicode.org/reports/tr44/ + // Latest Blocks.txt: https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt + + string[] allInputLines = File.ReadAllLines(blocksFile); + + Regex inputLineRegex = new Regex(@"^(?[0-9A-F]{4})\.\.(?[0-9A-F]{4}); (?.+)$"); + + var entries = new List<(string name, string startCode, string endCode)>(); + + foreach (string inputLine in allInputLines) + { + // We only care about lines of the form "XXXX..XXXX; Block name" + var match = inputLineRegex.Match(inputLine); + if (match == null || !match.Success) + { + continue; + } + + string startCode = match.Groups["startCode"].Value; + string endCode = match.Groups["endCode"].Value; + string blockName = match.Groups["blockName"].Value; + + // Exclude the surrogate range and everything outside the BMP. + uint startCodeAsInt = uint.Parse(startCode, NumberStyles.HexNumber, CultureInfo.InvariantCulture); + if (startCodeAsInt >= 0x10000 || (startCodeAsInt >= 0xD800 && startCodeAsInt <= 0xDFFF)) + { + continue; + } + + // Exclude any private use areas + if (blockName.Contains("Private Use", StringComparison.OrdinalIgnoreCase)) + { + continue; + } + + // Convert block name to Regex format (with "Is" prefix) + string regexBlockName = "Is" + RemoveAllNonAlphanumeric(blockName); + + entries.Add((regexBlockName, startCode, endCode)); + } + + // Sort alphabetically for consistent output + entries.Sort((a, b) => string.Compare(a.name, b.name, StringComparison.Ordinal)); + + // Add special backward-compatibility aliases + entries.Add(("IsCombiningMarksforSymbols", "20D0", "20FF")); // Alias for IsCombiningDiacriticalMarksforSymbols + entries.Add(("IsGreek", "0370", "03FF")); // Alias for IsGreekandCoptic + entries.Add(("IsHighPrivateUseSurrogates", "DB80", "DBFF")); + entries.Add(("IsHighSurrogates", "D800", "DB7F")); + entries.Add(("IsLowSurrogates", "DC00", "DFFF")); + entries.Add(("IsPrivateUse", "E000", "F8FF")); // Alias for IsPrivateUseArea + entries.Add(("IsPrivateUseArea", "E000", "F8FF")); + + // Re-sort to include the new entries + entries.Sort((a, b) => string.Compare(a.name, b.name, StringComparison.Ordinal)); + + // Generate the output file + var output = new StringBuilder(); + output.AppendLine("// Licensed to the .NET Foundation under one or more agreements."); + output.AppendLine("// The .NET Foundation licenses this file to you under the MIT license."); + output.AppendLine(); + output.AppendLine("// This is a generated file. Do not edit directly."); + output.AppendLine("// Run the GenRegexNamedBlocks tool to regenerate."); + output.AppendLine(); + output.AppendLine("namespace System.Text.RegularExpressions"); + output.AppendLine("{"); + output.AppendLine(" internal sealed partial class RegexCharClass"); + output.AppendLine(" {"); + output.AppendLine(" /*"); + output.AppendLine(" * The property table contains all the block definitions defined in the"); + output.AppendLine(" * XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 17.0 spec (www.unicode.org),"); + output.AppendLine(" * and Perl 5.6 (see Programming Perl, 3rd edition page 167). Three blocks defined by Perl (and here) may"); + output.AppendLine(" * not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates."); + output.AppendLine(" *"); + output.AppendLine(" **/"); + output.AppendLine(" // Has to be sorted by the first column"); + output.AppendLine(" private static readonly string[][] s_propTable ="); + output.AppendLine(" ["); + + foreach (var entry in entries) + { + // Special handling for IsSpecials - it goes to the end of BMP + if (entry.name == "IsSpecials") + { + output.AppendLine($" [\"{entry.name}\", \"\\u{entry.startCode}\"],"); + } + else + { + output.AppendLine($" [\"{entry.name}\", \"\\u{entry.startCode}\\u{GetNextCodePoint(entry.endCode)}\"],"); + } + } + + output.AppendLine(" [\"_xmlC\", /* Name Char */ \"\\u002D\\u002F\\u0030\\u003B\\u0041\\u005B\\u005F\\u0060\\u0061\\u007B\\u00B7\\u00B8\\u00C0\\u00D7\\u00D8\\u00F7\\u00F8\\u0132\\u0134\\u013F\\u0141\\u0149\\u014A\\u017F\\u0180\\u01C4\\u01CD\\u01F1\\u01F4\\u01F6\\u01FA\\u0218\\u0250\\u02A9\\u02BB\\u02C2\\u02D0\\u02D2\\u0300\\u0346\\u0360\\u0362\\u0386\\u038B\\u038C\\u038D\\u038E\\u03A2\\u03A3\\u03CF\\u03D0\\u03D7\\u03DA\\u03DB\\u03DC\\u03DD\\u03DE\\u03DF\\u03E0\\u03E1\\u03E2\\u03F4\\u0401\\u040D\\u040E\\u0450\\u0451\\u045D\\u045E\\u0482\\u0483\\u0487\\u0490\\u04C5\\u04C7\\u04C9\\u04CB\\u04CD\\u04D0\\u04EC\\u04EE\\u04F6\\u04F8\\u04FA\\u0531\\u0557\\u0559\\u055A\\u0561\\u0587\\u0591\\u05A2\\u05A3\\u05BA\\u05BB\\u05BE\\u05BF\\u05C0\\u05C1\\u05C3\\u05C4\\u05C5\\u05D0\\u05EB\\u05F0\\u05F3\\u0621\\u063B\\u0640\\u0653\\u0660\\u066A\\u0670\\u06B8\\u06BA\\u06BF\\u06C0\\u06CF\\u06D0\\u06D4\\u06D5\\u06E9\\u06EA\\u06EE\\u06F0\\u06FA\\u0901\\u0904\\u0905\\u093A\\u093C\\u094E\\u0951\\u0955\\u0958\\u0964\\u0966\\u0970\\u0981\\u0984\\u0985\\u098D\\u098F\\u0991\\u0993\\u09A9\\u09AA\\u09B1\\u09B2\\u09B3\\u09B6\\u09BA\\u09BC\\u09BD\\u09BE\\u09C5\\u09C7\\u09C9\\u09CB\\u09CE\\u09D7\\u09D8\\u09DC\""); + output.Append(" +\"\\u09DE\\u09DF\\u09E4\\u09E6\\u09F2\\u0A02\\u0A03\\u0A05\\u0A0B\\u0A0F\\u0A11\\u0A13\\u0A29\\u0A2A\\u0A31\\u0A32\\u0A34\\u0A35\\u0A37\\u0A38\\u0A3A\\u0A3C\\u0A3D\\u0A3E\\u0A43\\u0A47\\u0A49\\u0A4B\\u0A4E\\u0A59\\u0A5D\\u0A5E\\u0A5F\\u0A66\\u0A75\\u0A81\\u0A84\\u0A85\\u0A8C\\u0A8D\\u0A8E\\u0A8F\\u0A92\\u0A93\\u0AA9\\u0AAA\\u0AB1\\u0AB2\\u0AB4\\u0AB5\\u0ABA\\u0ABC\\u0AC6\\u0AC7\\u0ACA\\u0ACB\\u0ACE\\u0AE0\\u0AE1\\u0AE6\\u0AF0\\u0B01\\u0B04\\u0B05\\u0B0D\\u0B0F\\u0B11\\u0B13\\u0B29\\u0B2A\\u0B31\\u0B32\\u0B34\\u0B36\\u0B3A\\u0B3C\\u0B44\\u0B47\\u0B49\\u0B4B\\u0B4E\\u0B56\\u0B58\\u0B5C\\u0B5E\\u0B5F\\u0B62\\u0B66\\u0B70\\u0B82\\u0B84\\u0B85\\u0B8B\\u0B8E\\u0B91\\u0B92\\u0B96\\u0B99\\u0B9B\\u0B9C\\u0B9D\\u0B9E\\u0BA0\\u0BA3\\u0BA5\\u0BA8\\u0BAB\\u0BAE\\u0BB6\\u0BB7\\u0BBA\\u0BBE\\u0BC3\\u0BC6\\u0BC9\\u0BCA\\u0BCE\\u0BD7\\u0BD8\\u0BE7\\u0BF0\\u0C01\\u0C04\\u0C05\\u0C0D\\u0C0E\\u0C11\\u0C12\\u0C29\\u0C2A\\u0C34\\u0C35\\u0C3A\\u0C3E\\u0C45\\u0C46\\u0C49\\u0C4A\\u0C4E\\u0C55\\u0C57\\u0C60\\u0C62\""); + output.Append(" +\"\\u0CE6\\u0CF0\\u0D02\\u0D04\\u0D05\\u0D0D\\u0D0E\\u0D11\\u0D12\\u0D29\\u0D2A\\u0D3A\\u0D3E\\u0D44\\u0D46\\u0D49\\u0D4A\\u0D4E\\u0D57\\u0D58\\u0D60\\u0D62\\u0D66\\u0D70\\u0E01\\u0E2F\\u0E30\\u0E3B\\u0E40\\u0E4F\\u0E50\\u0E5A\\u0E81\\u0E83\\u0E84\\u0E85\\u0E87\\u0E89\\u0E8A\\u0E8B\\u0E8D\\u0E8E\\u0E94\\u0E98\\u0E99\\u0EA0\\u0EA1\\u0EA4\\u0EA5\\u0EA6\\u0EA7\\u0EA8\\u0EAA\\u0EAC\\u0EAD\\u0EAF\\u0EB0\\u0EBA\\u0EBB\\u0EBE\\u0EC0\\u0EC5\\u0EC6\\u0EC7\\u0EC8\\u0ECE\\u0ED0\\u0EDA\\u0F18\\u0F1A\\u0F20\\u0F2A\\u0F35\\u0F36\\u0F37\\u0F38\\u0F39\\u0F3A\\u0F3E\\u0F48\\u0F49\\u0F6A\\u0F71\\u0F85\\u0F86\\u0F8C\\u0F90\\u0F96\\u0F97\\u0F98\\u0F99\\u0FAE\\u0FB1\\u0FB8\\u0FB9\\u0FBA\\u10A0\\u10C6\\u10D0\\u10F7\\u1100\\u1101\\u1102\\u1104\\u1105\\u1108\\u1109\\u110A\\u110B\\u110D\\u110E\\u1113\\u113C\\u113D\\u113E\\u113F\\u1140\\u1141\\u114C\\u114D\\u114E\\u114F\\u1150\\u1151\\u1154\\u1156\\u1159\\u115A\\u115F\\u1162\\u1163\\u1164\\u1165\\u1166\\u1167\\u1168\\u1169\\u116A\\u116D\\u116F\\u1172\\u1174\\u1175\\u1176\\u119E\\u119F\\u11A8\\u11A9\\u11AB\\u11AC\\u11AE\\u11B0\\u11B7\\u11B9\\u11BA\\u11BB\\u11BC\\u11C3\\u11EB\\u11EC\\u11F0\\u11F1\\u11F9\\u11FA\\u1E00\\u1E9C\\u1EA0\\u1EFA\\u1F00\""); + output.Append(" +\"\\u1F16\\u1F18\\u1F1E\\u1F20\\u1F46\\u1F48\\u1F4E\\u1F50\\u1F58\\u1F59\\u1F5A\\u1F5B\\u1F5C\\u1F5D\\u1F5E\\u1F5F\\u1F7E\\u1F80\\u1FB5\\u1FB6\\u1FBD\\u1FBE\\u1FBF\\u1FC2\\u1FC5\\u1FC6\\u1FCD\\u1FD0\\u1FD4\\u1FD6\\u1FDC\\u1FE0\\u1FED\\u1FF2\\u1FF5\\u1FF6\\u1FFD\\u20D0\\u20DD\\u20E1\\u20E2\\u2126\\u2127\\u212A\\u212C\\u212E\\u212F\\u2180\\u2183\\u3005\\u3006\\u3007\\u3008\\u3021\\u3030\\u3031\\u3036\\u3041\\u3095\\u3099\\u309B\\u309D\\u309F\\u30A1\\u30FB\\u30FC\\u30FF\\u3105\\u312D\\u4E00\\u9FA6\\uAC00\\uD7A4\"],"); + output.AppendLine(); + output.AppendLine(" [\"_xmlD\", \"\\u0030\\u003A\\u0660\\u066A\\u06F0\\u06FA\\u0966\\u0970\\u09E6\\u09F0\\u0A66\\u0A70\\u0AE6\\u0AF0\\u0B66\\u0B70\\u0BE7\\u0BF0\\u0C66\\u0C70\\u0CE6\\u0CF0\\u0D66\\u0D70\\u0E50\\u0E5A\\u0ED0\\u0EDA\\u0F20\\u0F2A\\u1040\\u104A\\u1369\\u1372\\u17E0\\u17EA\\u1810\\u181A\\uFF10\\uFF1A\"],"); + output.AppendLine(" [\"_xmlI\", /* Start Name Char */ \"\\u003A\\u003B\\u0041\\u005B\\u005F\\u0060\\u0061\\u007B\\u00C0\\u00D7\\u00D8\\u00F7\\u00F8\\u0132\\u0134\\u013F\\u0141\\u0149\\u014A\\u017F\\u0180\\u01C4\\u01CD\\u01F1\\u01F4\\u01F6\\u01FA\\u0218\\u0250\\u02A9\\u02BB\\u02C2\\u0386\\u0387\\u0388\\u038B\\u038C\\u038D\\u038E\\u03A2\\u03A3\\u03CF\\u03D0\\u03D7\\u03DA\\u03DB\\u03DC\\u03DD\\u03DE\\u03DF\\u03E0\\u03E1\\u03E2\\u03F4\\u0401\\u040D\\u040E\\u0450\\u0451\\u045D\\u045E\\u0482\\u0490\\u04C5\\u04C7\\u04C9\\u04CB\\u04CD\\u04D0\\u04EC\\u04EE\\u04F6\\u04F8\\u04FA\\u0531\\u0557\\u0559\\u055A\\u0561\\u0587\\u05D0\\u05EB\\u05F0\\u05F3\\u0621\\u063B\\u0641\\u064B\\u0671\\u06B8\\u06BA\\u06BF\\u06C0\\u06CF\\u06D0\\u06D4\\u06D5\\u06D6\\u06E5\\u06E7\\u0905\\u093A\\u093D\\u093E\\u0958\\u0962\\u0985\\u098D\\u098F\\u0991\\u0993\\u09A9\\u09AA\\u09B1\\u09B2\\u09B3\\u09B6\\u09BA\\u09DC\\u09DE\\u09DF\\u09E2\\u09F0\\u09F2\\u0A05\\u0A0B\\u0A0F\\u0A11\\u0A13\\u0A29\\u0A2A\\u0A31\\u0A32\\u0A34\\u0A35\\u0A37\\u0A38\\u0A3A\\u0A59\\u0A5D\\u0A5E\\u0A5F\\u0A72\\u0A75\\u0A85\\u0A8C\\u0A8D\\u0A8E\\u0A8F\\u0A92\\u0A93\\u0AA9\\u0AAA\\u0AB1\\u0AB2\\u0AB4\\u0AB5\\u0ABA\\u0ABD\\u0ABE\\u0AE0\\u0AE1\\u0B05\\u0B0D\\u0B0F\""); + output.Append(" +\"\\u0B11\\u0B13\\u0B29\\u0B2A\\u0B31\\u0B32\\u0B34\\u0B36\\u0B3A\\u0B3D\\u0B3E\\u0B5C\\u0B5E\\u0B5F\\u0B62\\u0B85\\u0B8B\\u0B8E\\u0B91\\u0B92\\u0B96\\u0B99\\u0B9B\\u0B9C\\u0B9D\\u0B9E\\u0BA0\\u0BA3\\u0BA5\\u0BA8\\u0BAB\\u0BAE\\u0BB6\\u0BB7\\u0BBA\\u0C05\\u0C0D\\u0C0E\\u0C11\\u0C12\\u0C29\\u0C2A\\u0C34\\u0C35\\u0C3A\\u0C60\\u0C62\\u0C85\\u0C8D\\u0C8E\\u0C91\\u0C92\\u0CA9\\u0CAA\\u0CB4\\u0CB5\\u0CBA\\u0CDE\\u0CDF\\u0CE0\\u0CE2\\u0D05\\u0D0D\\u0D0E\\u0D11\\u0D12\\u0D29\\u0D2A\\u0D3A\\u0D60\\u0D62\\u0E01\\u0E2F\\u0E30\\u0E31\\u0E32\\u0E34\\u0E40\\u0E46\\u0E81\\u0E83\\u0E84\\u0E85\\u0E87\\u0E89\\u0E8A\\u0E8B\\u0E8D\\u0E8E\\u0E94\\u0E98\\u0E99\\u0EA0\\u0EA1\\u0EA4\\u0EA5\\u0EA6\\u0EA7\\u0EA8\\u0EAA\\u0EAC\\u0EAD\\u0EAF\\u0EB0\\u0EB1\\u0EB2\\u0EB4\\u0EBD\\u0EBE\\u0EC0\\u0EC5\\u0F40\\u0F48\\u0F49\\u0F6A\\u10A0\\u10C6\\u10D0\\u10F7\\u1100\\u1101\\u1102\\u1104\\u1105\\u1108\\u1109\\u110A\\u110B\\u110D\\u110E\\u1113\\u113C\\u113D\\u113E\\u113F\\u1140\\u1141\\u114C\\u114D\\u114E\\u114F\\u1150\\u1151\\u1154\\u1156\\u1159\\u115A\\u115F\\u1162\\u1163\\u1164\\u1165\\u1166\\u1167\\u1168\\u1169\\u116A\\u116D\\u116F\\u1172\\u1174\\u1175\\u1176\\u119E\\u119F\\u11A8\\u11A9\\u11AB\\u11AC\""); + output.Append(" +\"\\u11AE\\u11B0\\u11B7\\u11B9\\u11BA\\u11BB\\u11BC\\u11C3\\u11EB\\u11EC\\u11F0\\u11F1\\u11F9\\u11FA\\u1E00\\u1E9C\\u1EA0\\u1EFA\\u1F00\\u1F16\\u1F18\\u1F1E\\u1F20\\u1F46\\u1F48\\u1F4E\\u1F50\\u1F58\\u1F59\\u1F5A\\u1F5B\\u1F5C\\u1F5D\\u1F5E\\u1F5F\\u1F7E\\u1F80\\u1FB5\\u1FB6\\u1FBD\\u1FBE\\u1FBF\\u1FC2\\u1FC5\\u1FC6\\u1FCD\\u1FD0\\u1FD4\\u1FD6\\u1FDC\\u1FE0\\u1FED\\u1FF2\\u1FF5\\u1FF6\\u1FFD\\u2126\\u2127\\u212A\\u212C\\u212E\\u212F\\u2180\\u2183\\u3007\\u3008\\u3021\\u302A\\u3041\\u3095\\u30A1\\u30FB\\u3105\\u312D\\u4E00\\u9FA6\\uAC00\\uD7A4\"],"); + output.AppendLine(); + output.Append(" [\"_xmlW\", \"\\u0024\\u0025\\u002B\\u002C\\u0030\\u003A\\u003C\\u003F\\u0041\\u005B\\u005E\\u005F\\u0060\\u007B\\u007C\\u007D\\u007E\\u007F\\u00A2\\u00AB\\u00AC\\u00AD\\u00AE\\u00B7\\u00B8\\u00BB\\u00BC\\u00BF\\u00C0\\u0221\\u0222\\u0234\\u0250\\u02AE\\u02B0\\u02EF\\u0300\\u0350\\u0360\\u0370\\u0374\\u0376\\u037A\\u037B\\u0384\\u0387\\u0388\\u038B\\u038C\\u038D\\u038E\\u03A2\\u03A3\\u03CF\\u03D0\\u03F7\\u0400\\u0487\\u0488\\u04CF\\u04D0\\u04F6\\u04F8\\u04FA\\u0500\\u0510\\u0531\\u0557\\u0559\\u055A\\u0561\\u0588\\u0591\\u05A2\\u05A3\\u05BA\\u05BB\\u05BE\\u05BF\\u05C0\\u05C1\\u05C3\\u05C4\\u05C5\\u05D0\\u05EB\\u05F0\\u05F3\\u0621\\u063B\\u0640\\u0656\\u0660\\u066A\\u066E\\u06D4\\u06D5\\u06DD\\u06DE\\u06EE\\u06F0\\u06FF\\u0710\\u072D\\u0730\\u074B\\u0780\\u07B2\\u0901\\u0904\\u0905\\u093A\\u093C\\u094E\\u0950\\u0955\\u0958\\u0964\\u0966\\u0970\\u0981\\u0984\\u0985\\u098D\\u098F\\u0991\\u0993\\u09A9\\u09AA\\u09B1\\u09B2\\u09B3\\u09B6\\u09BA\\u09BC\\u09BD\\u09BE\\u09C5\\u09C7\\u09C9\\u09CB\\u09CE\\u09D7\\u09D8\\u09DC\\u09DE\\u09DF\\u09E4\\u09E6\\u09FB\\u0A02\\u0A03\\u0A05\\u0A0B\\u0A0F\\u0A11\\u0A13\\u0A29\\u0A2A\\u0A31\\u0A32\\u0A34\\u0A35\""); + output.Append(" +\"\\u0A37\\u0A38\\u0A3A\\u0A3C\\u0A3D\\u0A3E\\u0A43\\u0A47\\u0A49\\u0A4B\\u0A4E\\u0A59\\u0A5D\\u0A5E\\u0A5F\\u0A66\\u0A75\\u0A81\\u0A84\\u0A85\\u0A8C\\u0A8D\\u0A8E\\u0A8F\\u0A92\\u0A93\\u0AA9\\u0AAA\\u0AB1\\u0AB2\\u0AB4\\u0AB5\\u0ABA\\u0ABC\\u0AC6\\u0AC7\\u0ACA\\u0ACB\\u0ACE\\u0AD0\\u0AD1\\u0AE0\\u0AE1\\u0AE6\\u0AF0\\u0B01\\u0B04\\u0B05\\u0B0D\\u0B0F\\u0B11\\u0B13\\u0B29\\u0B2A\\u0B31\\u0B32\\u0B34\\u0B36\\u0B3A\\u0B3C\\u0B44\\u0B47\\u0B49\\u0B4B\\u0B4E\\u0B56\\u0B58\\u0B5C\\u0B5E\\u0B5F\\u0B62\\u0B66\\u0B71\\u0B82\\u0B84\\u0B85\\u0B8B\\u0B8E\\u0B91\\u0B92\\u0B96\\u0B99\\u0B9B\\u0B9C\\u0B9D\\u0B9E\\u0BA0\\u0BA3\\u0BA5\\u0BA8\\u0BAB\\u0BAE\\u0BB6\\u0BB7\\u0BBA\\u0BBE\\u0BC3\\u0BC6\\u0BC9\\u0BCA\\u0BCE\\u0BD7\\u0BD8\\u0BE7\\u0BF3\\u0C01\\u0C04\\u0C05\\u0C0D\\u0C0E\\u0C11\\u0C12\\u0C29\\u0C2A\\u0C34\\u0C35\\u0C3A\\u0C3E\\u0C45\\u0C46\\u0C49\\u0C4A\\u0C4E\\u0C55\\u0C57\\u0C60\\u0C62\\u0C66\\u0C70\\u0C82\\u0C84\\u0C85\\u0C8D\\u0C8E\\u0C91\\u0C92\\u0CA9\\u0CAA\\u0CB4\\u0CB5\\u0CBA\\u0CBE\\u0CC5\\u0CC6\\u0CC9\\u0CCA\\u0CCE\\u0CD5\\u0CD7\\u0CDE\\u0CDF\\u0CE0\\u0CE2\\u0CE6\\u0CF0\\u0D02\\u0D04\\u0D05\\u0D0D\\u0D0E\\u0D11\\u0D12\\u0D29\\u0D2A\\u0D3A\\u0D3E\\u0D44\\u0D46\\u0D49\""); + output.Append(" +\"\\u0D4A\\u0D4E\\u0D57\\u0D58\\u0D60\\u0D62\\u0D66\\u0D70\\u0D82\\u0D84\\u0D85\\u0D97\\u0D9A\\u0DB2\\u0DB3\\u0DBC\\u0DBD\\u0DBE\\u0DC0\\u0DC7\\u0DCA\\u0DCB\\u0DCF\\u0DD5\\u0DD6\\u0DD7\\u0DD8\\u0DE0\\u0DF2\\u0DF4\\u0E01\\u0E3B\\u0E3F\\u0E4F\\u0E50\\u0E5A\\u0E81\\u0E83\\u0E84\\u0E85\\u0E87\\u0E89\\u0E8A\\u0E8B\\u0E8D\\u0E8E\\u0E94\\u0E98\\u0E99\\u0EA0\\u0EA1\\u0EA4\\u0EA5\\u0EA6\\u0EA7\\u0EA8\\u0EAA\\u0EAC\\u0EAD\\u0EBA\\u0EBB\\u0EBE\\u0EC0\\u0EC5\\u0EC6\\u0EC7\\u0EC8\\u0ECE\\u0ED0\\u0EDA\\u0EDC\\u0EDE\\u0F00\\u0F04\\u0F13\\u0F3A\\u0F3E\\u0F48\\u0F49\\u0F6B\\u0F71\\u0F85\\u0F86\\u0F8C\\u0F90\\u0F98\\u0F99\\u0FBD\\u0FBE\\u0FCD\\u0FCF\\u0FD0\\u1000\\u1022\\u1023\\u1028\\u1029\\u102B\\u102C\\u1033\\u1036\\u103A\\u1040\\u104A\\u1050\\u105A\\u10A0\\u10C6\\u10D0\\u10F9\\u1100\\u115A\\u115F\\u11A3\\u11A8\\u11FA\\u1200\\u1207\\u1208\\u1247\\u1248\\u1249\\u124A\\u124E\\u1250\\u1257\\u1258\\u1259\\u125A\\u125E\\u1260\\u1287\\u1288\\u1289\\u128A\\u128E\\u1290\\u12AF\\u12B0\\u12B1\\u12B2\\u12B6\\u12B8\\u12BF\\u12C0\\u12C1\\u12C2\\u12C6\\u12C8\\u12CF\\u12D0\\u12D7\\u12D8\\u12EF\\u12F0\\u130F\\u1310\\u1311\\u1312\\u1316\\u1318\\u131F\\u1320\\u1347\\u1348\\u135B\\u1369\\u137D\\u13A0\""); + output.Append(" +\"\\u13F5\\u1401\\u166D\\u166F\\u1677\\u1681\\u169B\\u16A0\\u16EB\\u16EE\\u16F1\\u1700\\u170D\\u170E\\u1715\\u1720\\u1735\\u1740\\u1754\\u1760\\u176D\\u176E\\u1771\\u1772\\u1774\\u1780\\u17D4\\u17D7\\u17D8\\u17DB\\u17DD\\u17E0\\u17EA\\u180B\\u180E\\u1810\\u181A\\u1820\\u1878\\u1880\\u18AA\\u1E00\\u1E9C\\u1EA0\\u1EFA\\u1F00\\u1F16\\u1F18\\u1F1E\\u1F20\\u1F46\\u1F48\\u1F4E\\u1F50\\u1F58\\u1F59\\u1F5A\\u1F5B\\u1F5C\\u1F5D\\u1F5E\\u1F5F\\u1F7E\\u1F80\\u1FB5\\u1FB6\\u1FC5\\u1FC6\\u1FD4\\u1FD6\\u1FDC\\u1FDD\\u1FF0\\u1FF2\\u1FF5\\u1FF6\\u1FFF\\u2044\\u2045\\u2052\\u2053\\u2070\\u2072\\u2074\\u207D\\u207F\\u208D\\u20A0\\u20B2\\u20D0\\u20EB\\u2100\\u213B\\u213D\\u214C\\u2153\\u2184\\u2190\\u2329\\u232B\\u23B4\\u23B7\\u23CF\\u2400\\u2427\\u2440\\u244B\\u2460\\u24FF\\u2500\\u2614\\u2616\\u2618\\u2619\\u267E\\u2680\\u268A\\u2701\\u2705\\u2706\\u270A\\u270C\\u2728\\u2729\\u274C\\u274D\\u274E\\u274F\\u2753\\u2756\\u2757\\u2758\\u275F\\u2761\\u2768\\u2776\\u2795\\u2798\\u27B0\\u27B1\\u27BF\\u27D0\\u27E6\\u27F0\\u2983\\u2999\\u29D8\\u29DC\\u29FC\\u29FE\\u2B00\\u2E80\\u2E9A\\u2E9B\\u2EF4\\u2F00\\u2FD6\\u2FF0\\u2FFC\\u3004\\u3008\\u3012\\u3014\\u3020\\u3030\\u3031\\u303D\\u303E\\u3040\""); + output.Append(" +\"\\u3041\\u3097\\u3099\\u30A0\\u30A1\\u30FB\\u30FC\\u3100\\u3105\\u312D\\u3131\\u318F\\u3190\\u31B8\\u31F0\\u321D\\u3220\\u3244\\u3251\\u327C\\u327F\\u32CC\\u32D0\\u32FF\\u3300\\u3377\\u337B\\u33DE\\u33E0\\u33FF\\u3400\\u4DB6\\u4E00\\u9FA6\\uA000\\uA48D\\uA490\\uA4C7\\uAC00\\uD7A4\\uF900\\uFA2E\\uFA30\\uFA6B\\uFB00\\uFB07\\uFB13\\uFB18\\uFB1D\\uFB37\\uFB38\\uFB3D\\uFB3E\\uFB3F\\uFB40\\uFB42\\uFB43\\uFB45\\uFB46\\uFBB2\\uFBD3\\uFD3E\\uFD50\\uFD90\\uFD92\\uFDC8\\uFDF0\\uFDFD\\uFE00\\uFE10\\uFE20\\uFE24\\uFE62\\uFE63\\uFE64\\uFE67\\uFE69\\uFE6A\\uFE70\\uFE75\\uFE76\\uFEFD\\uFF04\\uFF05\\uFF0B\\uFF0C\\uFF10\\uFF1A\\uFF1C\\uFF1F\\uFF21\\uFF3B\\uFF3E\\uFF3F\\uFF40\\uFF5B\\uFF5C\\uFF5D\\uFF5E\\uFF5F\\uFF66\\uFFBF\\uFFC2\\uFFC8\\uFFCA\\uFFD0\\uFFD2\\uFFD8\\uFFDA\\uFFDD\\uFFE0\\uFFE7\\uFFE8\\uFFEF\\uFFFC\\uFFFE\"],"); + output.AppendLine(); + output.AppendLine(" ];"); + output.AppendLine(" }"); + output.AppendLine("}"); + + File.WriteAllText(outputFile, output.ToString()); + Console.WriteLine($"Successfully generated {outputFile}"); + } + + private static string RemoveAllNonAlphanumeric(string blockName) + { + // Allow only A-Z a-z 0-9 and hyphens + // Keep hyphens to preserve naming like "Latin-1" or "Extended-A" + return new string(blockName.ToCharArray().Where(c => + ('A' <= c && c <= 'Z') || + ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || + c == '-').ToArray()); + } + + private static string GetNextCodePoint(string hexCode) + { + // Regex named blocks use the start of the next block as the end code + // So we need to add 1 to the end code + uint code = uint.Parse(hexCode, NumberStyles.HexNumber, CultureInfo.InvariantCulture); + code++; + return code.ToString("X4"); + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md new file mode 100644 index 00000000000000..497af06eedae80 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md @@ -0,0 +1,48 @@ +# GenRegexNamedBlocks Tool + +## Overview + +This tool generates the named Unicode blocks for `RegexCharClass.cs` based on the Unicode Character Database (UCD) `Blocks.txt` file. Named blocks allow regex patterns to match characters in specific Unicode blocks using syntax like `\p{IsBasicLatin}` or `\p{IsGreek}`. + +The current implementation is based on **Unicode 17.0**. + +## Usage + +To update the named blocks when a new Unicode version is released: + +1. Download the `Blocks.txt` file from the Unicode Consortium: + ``` + https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt + ``` + + +2. Run the tool from this directory: + ```bash + dotnet run -- ../../src/System/Text/RegularExpressions/RegexCharClass.Tables.cs + ``` + +3. The tool will generate the `RegexCharClass.Tables.cs` file with all named blocks + +4. Update tests in `RegexCharacterSetTests.cs` to include tests for new blocks if needed + +5. Build and test to ensure all tests pass + +## Notes + +- The tool automatically excludes: + - Blocks outside the Basic Multilingual Plane (BMP) (code points >= U+10000) + - Surrogate blocks (U+D800-U+DFFF) + - Private Use Area blocks + + +- Block names are converted to "Is" + alphanumeric characters + hyphens (e.g., "Greek and Coptic" becomes "IsGreekandCoptic") + +- The tool sorts blocks alphabetically by name for consistent output + +- For backward compatibility, some aliases like "IsGreek" (alias for "IsGreekandCoptic") should be manually maintained + +## See Also + +- [Unicode Character Database](https://www.unicode.org/ucd/) +- [Unicode Block Names](https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt) +- Related tool: `src/libraries/System.Text.Encodings.Web/tools/GenUnicodeRanges/`