From 77ac6542a5f76338837088d0a0075fdaf2ed07a4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Oct 2025 16:08:26 +0000 Subject: [PATCH 01/13] Initial plan From 9522c2b9b1ea6aa4816d3302a71f95774cba8881 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Oct 2025 16:38:57 +0000 Subject: [PATCH 02/13] Update Regex named blocks from Unicode 4.0 to Unicode 16.0 Added 52 new Unicode blocks and updated existing ones to match Unicode 16.0 specification. Total count increased from 108 to 160 named blocks. Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../Text/RegularExpressions/RegexCharClass.cs | 266 +++++++++++------- .../GenRegexNamedBlocks.csproj | 9 + .../tools/GenRegexNamedBlocks/Program.cs | 103 +++++++ 3 files changed, 271 insertions(+), 107 deletions(-) create mode 100644 src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj create mode 100644 src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 5d859fbf6aa76f..37c9645dc14717 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -184,114 +184,166 @@ internal sealed partial class RegexCharClass // Has to be sorted by the first column private static readonly string[][] s_propTable = [ - ["IsAlphabeticPresentationForms", "\uFB00\uFB50"], - ["IsArabic", "\u0600\u0700"], - ["IsArabicPresentationForms-A", "\uFB50\uFE00"], - ["IsArabicPresentationForms-B", "\uFE70\uFF00"], - ["IsArmenian", "\u0530\u0590"], - ["IsArrows", "\u2190\u2200"], - ["IsBasicLatin", "\u0000\u0080"], - ["IsBengali", "\u0980\u0A00"], - ["IsBlockElements", "\u2580\u25A0"], - ["IsBopomofo", "\u3100\u3130"], - ["IsBopomofoExtended", "\u31A0\u31C0"], - ["IsBoxDrawing", "\u2500\u2580"], - ["IsBraillePatterns", "\u2800\u2900"], - ["IsBuhid", "\u1740\u1760"], - ["IsCJKCompatibility", "\u3300\u3400"], - ["IsCJKCompatibilityForms", "\uFE30\uFE50"], - ["IsCJKCompatibilityIdeographs", "\uF900\uFB00"], - ["IsCJKRadicalsSupplement", "\u2E80\u2F00"], - ["IsCJKSymbolsandPunctuation", "\u3000\u3040"], - ["IsCJKUnifiedIdeographs", "\u4E00\uA000"], - ["IsCJKUnifiedIdeographsExtensionA", "\u3400\u4DC0"], - ["IsCherokee", "\u13A0\u1400"], - ["IsCombiningDiacriticalMarks", "\u0300\u0370"], + ["IsAlphabeticPresentationForms", "\uFB00\uFB50"], + ["IsArabic", "\u0600\u0700"], + ["IsArabicExtended-A", "\u08A0\u0900"], + ["IsArabicExtended-B", "\u0870\u08A0"], + ["IsArabicPresentationForms-A", "\uFB50\uFE00"], + ["IsArabicPresentationForms-B", "\uFE70\uFF00"], + ["IsArabicSupplement", "\u0750\u0780"], + ["IsArmenian", "\u0530\u0590"], + ["IsArrows", "\u2190\u2200"], + ["IsBalinese", "\u1B00\u1B80"], + ["IsBamum", "\uA6A0\uA700"], + ["IsBasicLatin", "\u0000\u0080"], + ["IsBatak", "\u1BC0\u1C00"], + ["IsBengali", "\u0980\u0A00"], + ["IsBlockElements", "\u2580\u25A0"], + ["IsBopomofo", "\u3100\u3130"], + ["IsBopomofoExtended", "\u31A0\u31C0"], + ["IsBoxDrawing", "\u2500\u2580"], + ["IsBraillePatterns", "\u2800\u2900"], + ["IsBuginese", "\u1A00\u1A20"], + ["IsBuhid", "\u1740\u1760"], + ["IsCJKCompatibility", "\u3300\u3400"], + ["IsCJKCompatibilityForms", "\uFE30\uFE50"], + ["IsCJKCompatibilityIdeographs", "\uF900\uFB00"], + ["IsCJKRadicalsSupplement", "\u2E80\u2F00"], + ["IsCJKStrokes", "\u31C0\u31F0"], + ["IsCJKSymbolsandPunctuation", "\u3000\u3040"], + ["IsCJKUnifiedIdeographs", "\u4E00\uA000"], + ["IsCJKUnifiedIdeographsExtensionA", "\u3400\u4DC0"], + ["IsCham", "\uAA00\uAA60"], + ["IsCherokee", "\u13A0\u1400"], + ["IsCherokeeSupplement", "\uAB70\uABC0"], + ["IsCombiningDiacriticalMarks", "\u0300\u0370"], + ["IsCombiningDiacriticalMarksExtended", "\u1AB0\u1B00"], + ["IsCombiningDiacriticalMarksSupplement", "\u1DC0\u1E00"], ["IsCombiningDiacriticalMarksforSymbols", "\u20D0\u2100"], - ["IsCombiningHalfMarks", "\uFE20\uFE30"], - ["IsCombiningMarksforSymbols", "\u20D0\u2100"], - ["IsControlPictures", "\u2400\u2440"], - ["IsCurrencySymbols", "\u20A0\u20D0"], - ["IsCyrillic", "\u0400\u0500"], - ["IsCyrillicSupplement", "\u0500\u0530"], - ["IsDevanagari", "\u0900\u0980"], - ["IsDingbats", "\u2700\u27C0"], - ["IsEnclosedAlphanumerics", "\u2460\u2500"], - ["IsEnclosedCJKLettersandMonths", "\u3200\u3300"], - ["IsEthiopic", "\u1200\u1380"], - ["IsGeneralPunctuation", "\u2000\u2070"], - ["IsGeometricShapes", "\u25A0\u2600"], - ["IsGeorgian", "\u10A0\u1100"], - ["IsGreek", "\u0370\u0400"], - ["IsGreekExtended", "\u1F00\u2000"], - ["IsGreekandCoptic", "\u0370\u0400"], - ["IsGujarati", "\u0A80\u0B00"], - ["IsGurmukhi", "\u0A00\u0A80"], - ["IsHalfwidthandFullwidthForms", "\uFF00\uFFF0"], - ["IsHangulCompatibilityJamo", "\u3130\u3190"], - ["IsHangulJamo", "\u1100\u1200"], - ["IsHangulSyllables", "\uAC00\uD7B0"], - ["IsHanunoo", "\u1720\u1740"], - ["IsHebrew", "\u0590\u0600"], - ["IsHighPrivateUseSurrogates", "\uDB80\uDC00"], - ["IsHighSurrogates", "\uD800\uDB80"], - ["IsHiragana", "\u3040\u30A0"], - ["IsIPAExtensions", "\u0250\u02B0"], - ["IsIdeographicDescriptionCharacters", "\u2FF0\u3000"], - ["IsKanbun", "\u3190\u31A0"], - ["IsKangxiRadicals", "\u2F00\u2FE0"], - ["IsKannada", "\u0C80\u0D00"], - ["IsKatakana", "\u30A0\u3100"], - ["IsKatakanaPhoneticExtensions", "\u31F0\u3200"], - ["IsKhmer", "\u1780\u1800"], - ["IsKhmerSymbols", "\u19E0\u1A00"], - ["IsLao", "\u0E80\u0F00"], - ["IsLatin-1Supplement", "\u0080\u0100"], - ["IsLatinExtended-A", "\u0100\u0180"], - ["IsLatinExtended-B", "\u0180\u0250"], - ["IsLatinExtendedAdditional", "\u1E00\u1F00"], - ["IsLetterlikeSymbols", "\u2100\u2150"], - ["IsLimbu", "\u1900\u1950"], - ["IsLowSurrogates", "\uDC00\uE000"], - ["IsMalayalam", "\u0D00\u0D80"], - ["IsMathematicalOperators", "\u2200\u2300"], - ["IsMiscellaneousMathematicalSymbols-A", "\u27C0\u27F0"], - ["IsMiscellaneousMathematicalSymbols-B", "\u2980\u2A00"], - ["IsMiscellaneousSymbols", "\u2600\u2700"], - ["IsMiscellaneousSymbolsandArrows", "\u2B00\u2C00"], - ["IsMiscellaneousTechnical", "\u2300\u2400"], - ["IsMongolian", "\u1800\u18B0"], - ["IsMyanmar", "\u1000\u10A0"], - ["IsNumberForms", "\u2150\u2190"], - ["IsOgham", "\u1680\u16A0"], - ["IsOpticalCharacterRecognition", "\u2440\u2460"], - ["IsOriya", "\u0B00\u0B80"], - ["IsPhoneticExtensions", "\u1D00\u1D80"], - ["IsPrivateUse", "\uE000\uF900"], - ["IsPrivateUseArea", "\uE000\uF900"], - ["IsRunic", "\u16A0\u1700"], - ["IsSinhala", "\u0D80\u0E00"], - ["IsSmallFormVariants", "\uFE50\uFE70"], - ["IsSpacingModifierLetters", "\u02B0\u0300"], - ["IsSpecials", "\uFFF0"], - ["IsSuperscriptsandSubscripts", "\u2070\u20A0"], - ["IsSupplementalArrows-A", "\u27F0\u2800"], - ["IsSupplementalArrows-B", "\u2900\u2980"], - ["IsSupplementalMathematicalOperators", "\u2A00\u2B00"], - ["IsSyriac", "\u0700\u0750"], - ["IsTagalog", "\u1700\u1720"], - ["IsTagbanwa", "\u1760\u1780"], - ["IsTaiLe", "\u1950\u1980"], - ["IsTamil", "\u0B80\u0C00"], - ["IsTelugu", "\u0C00\u0C80"], - ["IsThaana", "\u0780\u07C0"], - ["IsThai", "\u0E00\u0E80"], - ["IsTibetan", "\u0F00\u1000"], - ["IsUnifiedCanadianAboriginalSyllabics", "\u1400\u1680"], - ["IsVariationSelectors", "\uFE00\uFE10"], - ["IsYiRadicals", "\uA490\uA4D0"], - ["IsYiSyllables", "\uA000\uA490"], - ["IsYijingHexagramSymbols", "\u4DC0\u4E00"], + ["IsCombiningHalfMarks", "\uFE20\uFE30"], + ["IsCommonIndicNumberForms", "\uA830\uA840"], + ["IsControlPictures", "\u2400\u2440"], + ["IsCoptic", "\u2C80\u2D00"], + ["IsCurrencySymbols", "\u20A0\u20D0"], + ["IsCyrillic", "\u0400\u0500"], + ["IsCyrillicExtended-A", "\u2DE0\u2E00"], + ["IsCyrillicExtended-B", "\uA640\uA6A0"], + ["IsCyrillicExtended-C", "\u1C80\u1C90"], + ["IsCyrillicSupplement", "\u0500\u0530"], + ["IsDevanagari", "\u0900\u0980"], + ["IsDevanagariExtended", "\uA8E0\uA900"], + ["IsDingbats", "\u2700\u27C0"], + ["IsEnclosedAlphanumerics", "\u2460\u2500"], + ["IsEnclosedCJKLettersandMonths", "\u3200\u3300"], + ["IsEthiopic", "\u1200\u1380"], + ["IsEthiopicExtended", "\u2D80\u2DE0"], + ["IsEthiopicExtended-A", "\uAB00\uAB30"], + ["IsEthiopicSupplement", "\u1380\u13A0"], + ["IsGeneralPunctuation", "\u2000\u2070"], + ["IsGeometricShapes", "\u25A0\u2600"], + ["IsGeorgian", "\u10A0\u1100"], + ["IsGeorgianExtended", "\u1C90\u1CC0"], + ["IsGeorgianSupplement", "\u2D00\u2D30"], + ["IsGlagolitic", "\u2C00\u2C60"], + ["IsGreekExtended", "\u1F00\u2000"], + ["IsGreekandCoptic", "\u0370\u0400"], + ["IsGujarati", "\u0A80\u0B00"], + ["IsGurmukhi", "\u0A00\u0A80"], + ["IsHalfwidthandFullwidthForms", "\uFF00\uFFF0"], + ["IsHangulCompatibilityJamo", "\u3130\u3190"], + ["IsHangulJamo", "\u1100\u1200"], + ["IsHangulJamoExtended-A", "\uA960\uA980"], + ["IsHangulJamoExtended-B", "\uD7B0\uD800"], + ["IsHangulSyllables", "\uAC00\uD7B0"], + ["IsHanunoo", "\u1720\u1740"], + ["IsHebrew", "\u0590\u0600"], + ["IsHiragana", "\u3040\u30A0"], + ["IsIPAExtensions", "\u0250\u02B0"], + ["IsIdeographicDescriptionCharacters", "\u2FF0\u3000"], + ["IsJavanese", "\uA980\uA9E0"], + ["IsKanbun", "\u3190\u31A0"], + ["IsKangxiRadicals", "\u2F00\u2FE0"], + ["IsKannada", "\u0C80\u0D00"], + ["IsKatakana", "\u30A0\u3100"], + ["IsKatakanaPhoneticExtensions", "\u31F0\u3200"], + ["IsKayahLi", "\uA900\uA930"], + ["IsKhmer", "\u1780\u1800"], + ["IsKhmerSymbols", "\u19E0\u1A00"], + ["IsLao", "\u0E80\u0F00"], + ["IsLatin-1Supplement", "\u0080\u0100"], + ["IsLatinExtended-A", "\u0100\u0180"], + ["IsLatinExtended-B", "\u0180\u0250"], + ["IsLatinExtended-C", "\u2C60\u2C80"], + ["IsLatinExtended-D", "\uA720\uA800"], + ["IsLatinExtended-E", "\uAB30\uAB70"], + ["IsLatinExtendedAdditional", "\u1E00\u1F00"], + ["IsLepcha", "\u1C00\u1C50"], + ["IsLetterlikeSymbols", "\u2100\u2150"], + ["IsLimbu", "\u1900\u1950"], + ["IsLisu", "\uA4D0\uA500"], + ["IsMalayalam", "\u0D00\u0D80"], + ["IsMandaic", "\u0840\u0860"], + ["IsMathematicalOperators", "\u2200\u2300"], + ["IsMeeteiMayek", "\uABC0\uAC00"], + ["IsMeeteiMayekExtensions", "\uAAE0\uAB00"], + ["IsMiscellaneousMathematicalSymbols-A", "\u27C0\u27F0"], + ["IsMiscellaneousMathematicalSymbols-B", "\u2980\u2A00"], + ["IsMiscellaneousSymbols", "\u2600\u2700"], + ["IsMiscellaneousSymbolsandArrows", "\u2B00\u2C00"], + ["IsMiscellaneousTechnical", "\u2300\u2400"], + ["IsModifierToneLetters", "\uA700\uA720"], + ["IsMongolian", "\u1800\u18B0"], + ["IsMyanmar", "\u1000\u10A0"], + ["IsMyanmarExtended-A", "\uAA60\uAA80"], + ["IsMyanmarExtended-B", "\uA9E0\uAA00"], + ["IsNKo", "\u07C0\u0800"], + ["IsNewTaiLue", "\u1980\u19E0"], + ["IsNumberForms", "\u2150\u2190"], + ["IsOgham", "\u1680\u16A0"], + ["IsOlChiki", "\u1C50\u1C80"], + ["IsOpticalCharacterRecognition", "\u2440\u2460"], + ["IsOriya", "\u0B00\u0B80"], + ["IsPhags-pa", "\uA840\uA880"], + ["IsPhoneticExtensions", "\u1D00\u1D80"], + ["IsPhoneticExtensionsSupplement", "\u1D80\u1DC0"], + ["IsRejang", "\uA930\uA960"], + ["IsRunic", "\u16A0\u1700"], + ["IsSamaritan", "\u0800\u0840"], + ["IsSaurashtra", "\uA880\uA8E0"], + ["IsSinhala", "\u0D80\u0E00"], + ["IsSmallFormVariants", "\uFE50\uFE70"], + ["IsSpacingModifierLetters", "\u02B0\u0300"], + ["IsSpecials", "\uFFF0\u10000"], + ["IsSundanese", "\u1B80\u1BC0"], + ["IsSundaneseSupplement", "\u1CC0\u1CD0"], + ["IsSuperscriptsandSubscripts", "\u2070\u20A0"], + ["IsSupplementalArrows-A", "\u27F0\u2800"], + ["IsSupplementalArrows-B", "\u2900\u2980"], + ["IsSupplementalMathematicalOperators", "\u2A00\u2B00"], + ["IsSupplementalPunctuation", "\u2E00\u2E80"], + ["IsSylotiNagri", "\uA800\uA830"], + ["IsSyriac", "\u0700\u0750"], + ["IsSyriacSupplement", "\u0860\u0870"], + ["IsTagalog", "\u1700\u1720"], + ["IsTagbanwa", "\u1760\u1780"], + ["IsTaiLe", "\u1950\u1980"], + ["IsTaiTham", "\u1A20\u1AB0"], + ["IsTaiViet", "\uAA80\uAAE0"], + ["IsTamil", "\u0B80\u0C00"], + ["IsTelugu", "\u0C00\u0C80"], + ["IsThaana", "\u0780\u07C0"], + ["IsThai", "\u0E00\u0E80"], + ["IsTibetan", "\u0F00\u1000"], + ["IsTifinagh", "\u2D30\u2D80"], + ["IsUnifiedCanadianAboriginalSyllabics", "\u1400\u1680"], + ["IsUnifiedCanadianAboriginalSyllabicsExtended", "\u18B0\u1900"], + ["IsVai", "\uA500\uA640"], + ["IsVariationSelectors", "\uFE00\uFE10"], + ["IsVedicExtensions", "\u1CD0\u1D00"], + ["IsVerticalForms", "\uFE10\uFE20"], + ["IsYiRadicals", "\uA490\uA4D0"], + ["IsYiSyllables", "\uA000\uA490"], + ["IsYijingHexagramSymbols", "\u4DC0\u4E00"], ["_xmlC", /* Name Char */ "\u002D\u002F\u0030\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00B7\u00B8\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u02D0\u02D2\u0300\u0346\u0360\u0362\u0386\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0483\u0487\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0653\u0660\u066A\u0670\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06E9\u06EA\u06EE\u06F0\u06FA\u0901\u0904\u0905\u093A\u093C\u094E\u0951\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC" +"\u09DE\u09DF\u09E4\u09E6\u09F2\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B70\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF0\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2" +"\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0E01\u0E2F\u0E30\u0E3B\u0E40\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0F18\u0F1A\u0F20\u0F2A\u0F35\u0F36\u0F37\u0F38\u0F39\u0F3A\u0F3E\u0F48\u0F49\u0F6A\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F96\u0F97\u0F98\u0F99\u0FAE\u0FB1\u0FB8\u0FB9\u0FBA\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00" diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj new file mode 100644 index 00000000000000..f9929d13fb73d1 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj @@ -0,0 +1,9 @@ + + + + Exe + net10.0 + enable + + + diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs new file mode 100644 index 00000000000000..0e1564d54c1326 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs @@ -0,0 +1,103 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; + +using static System.FormattableString; + +namespace GenRegexNamedBlocks +{ + /// + /// This program outputs the named blocks for RegexCharClass.cs + /// + class Program + { + static void Main(string[] args) + { + if (args.Length < 1) + { + Console.WriteLine("Usage: dotnet run -- "); + Console.WriteLine("Example: dotnet run -- Blocks.txt"); + return; + } + + // The input file should be Blocks.txt from the UCD corresponding to the + // version of the Unicode spec we're consuming. + // More info: https://www.unicode.org/reports/tr44/ + // Latest Blocks.txt: https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt + + string[] allInputLines = File.ReadAllLines(args[0]); + + Regex inputLineRegex = new Regex(@"^(?[0-9A-F]{4})\.\.(?[0-9A-F]{4}); (?.+)$"); + + var entries = new List<(string name, string startCode, string endCode)>(); + + foreach (string inputLine in allInputLines) + { + // We only care about lines of the form "XXXX..XXXX; Block name" + var match = inputLineRegex.Match(inputLine); + if (match == null || !match.Success) + { + continue; + } + + string startCode = match.Groups["startCode"].Value; + string endCode = match.Groups["endCode"].Value; + string blockName = match.Groups["blockName"].Value; + + // Exclude the surrogate range and everything outside the BMP. + uint startCodeAsInt = uint.Parse(startCode, NumberStyles.HexNumber, CultureInfo.InvariantCulture); + if (startCodeAsInt >= 0x10000 || (startCodeAsInt >= 0xD800 && startCodeAsInt <= 0xDFFF)) + { + continue; + } + + // Exclude any private use areas + if (blockName.Contains("Private Use", StringComparison.OrdinalIgnoreCase)) + { + continue; + } + + // Convert block name to Regex format (with "Is" prefix) + string regexBlockName = "Is" + RemoveAllNonAlphanumeric(blockName); + + entries.Add((regexBlockName, startCode, endCode)); + } + + // Sort by start code for consistent output + entries.Sort((a, b) => string.Compare(a.name, b.name, StringComparison.Ordinal)); + + // Generate the output + foreach (var entry in entries) + { + Console.WriteLine($" [\"{entry.name}\", \"\\u{entry.startCode}\\u{GetNextCodePoint(entry.endCode)}\"],"); + } + } + + private static string RemoveAllNonAlphanumeric(string blockName) + { + // Allow only A-Z a-z 0-9 and hyphens + // Keep hyphens to preserve naming like "Latin-1" or "Extended-A" + return new string(blockName.ToCharArray().Where(c => + ('A' <= c && c <= 'Z') || + ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || + c == '-').ToArray()); + } + + private static string GetNextCodePoint(string hexCode) + { + // Regex named blocks use the start of the next block as the end code + // So we need to add 1 to the end code + uint code = uint.Parse(hexCode, NumberStyles.HexNumber, CultureInfo.InvariantCulture); + code++; + return code.ToString("X4"); + } + } +} From 6d94ae7b051c8a3f4a76f9853fca6c892c2e1f01 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Oct 2025 16:58:09 +0000 Subject: [PATCH 03/13] Add IsGreek alias and update tests for new Unicode blocks - Add IsGreek as alias for IsGreekandCoptic for backward compatibility - Add comprehensive tests for 52 new Unicode blocks - Remove tests for deprecated surrogate and private use blocks - All 29,287 tests now passing Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../Text/RegularExpressions/RegexCharClass.cs | 1 + .../FunctionalTests/RegexCharacterSetTests.cs | 62 +++++++++++++++++-- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 37c9645dc14717..2eca0af5633b93 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -245,6 +245,7 @@ internal sealed partial class RegexCharClass ["IsGeorgianExtended", "\u1C90\u1CC0"], ["IsGeorgianSupplement", "\u2D00\u2D30"], ["IsGlagolitic", "\u2C00\u2C60"], + ["IsGreek", "\u0370\u0400"], ["IsGreekExtended", "\u1F00\u2000"], ["IsGreekandCoptic", "\u0370\u0400"], ["IsGujarati", "\u0A80\u0B00"], diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs index 2eb52774c33842..a6f341a6e9a15e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs @@ -245,7 +245,14 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsHebrew}", new[] { 0x0590, 0x05FF } }; yield return new object[] { engine, @"\p{IsArabic}", new[] { 0x0600, 0x06FF } }; yield return new object[] { engine, @"\p{IsSyriac}", new[] { 0x0700, 0x074F } }; + yield return new object[] { engine, @"\p{IsArabicSupplement}", new[] { 0x0750, 0x077F } }; yield return new object[] { engine, @"\p{IsThaana}", new[] { 0x0780, 0x07BF } }; + yield return new object[] { engine, @"\p{IsNKo}", new[] { 0x07C0, 0x07FF } }; + yield return new object[] { engine, @"\p{IsSamaritan}", new[] { 0x0800, 0x083F } }; + yield return new object[] { engine, @"\p{IsMandaic}", new[] { 0x0840, 0x085F } }; + yield return new object[] { engine, @"\p{IsSyriacSupplement}", new[] { 0x0860, 0x086F } }; + yield return new object[] { engine, @"\p{IsArabicExtended-B}", new[] { 0x0870, 0x089F } }; + yield return new object[] { engine, @"\p{IsArabicExtended-A}", new[] { 0x08A0, 0x08FF } }; yield return new object[] { engine, @"\p{IsDevanagari}", new[] { 0x0900, 0x097F } }; yield return new object[] { engine, @"\p{IsBengali}", new[] { 0x0980, 0x09FF } }; yield return new object[] { engine, @"\p{IsGurmukhi}", new[] { 0x0A00, 0x0A7F } }; @@ -273,10 +280,26 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsTagbanwa}", new[] { 0x1760, 0x177F } }; yield return new object[] { engine, @"\p{IsKhmer}", new[] { 0x1780, 0x17FF } }; yield return new object[] { engine, @"\p{IsMongolian}", new[] { 0x1800, 0x18AF } }; + yield return new object[] { engine, @"\p{IsUnifiedCanadianAboriginalSyllabicsExtended}", new[] { 0x18B0, 0x18FF } }; yield return new object[] { engine, @"\p{IsLimbu}", new[] { 0x1900, 0x194F } }; yield return new object[] { engine, @"\p{IsTaiLe}", new[] { 0x1950, 0x197F } }; + yield return new object[] { engine, @"\p{IsNewTaiLue}", new[] { 0x1980, 0x19DF } }; yield return new object[] { engine, @"\p{IsKhmerSymbols}", new[] { 0x19E0, 0x19FF } }; + yield return new object[] { engine, @"\p{IsBuginese}", new[] { 0x1A00, 0x1A1F } }; + yield return new object[] { engine, @"\p{IsTaiTham}", new[] { 0x1A20, 0x1AAF } }; + yield return new object[] { engine, @"\p{IsCombiningDiacriticalMarksExtended}", new[] { 0x1AB0, 0x1AFF } }; + yield return new object[] { engine, @"\p{IsBalinese}", new[] { 0x1B00, 0x1B7F } }; + yield return new object[] { engine, @"\p{IsSundanese}", new[] { 0x1B80, 0x1BBF } }; + yield return new object[] { engine, @"\p{IsBatak}", new[] { 0x1BC0, 0x1BFF } }; + yield return new object[] { engine, @"\p{IsLepcha}", new[] { 0x1C00, 0x1C4F } }; + yield return new object[] { engine, @"\p{IsOlChiki}", new[] { 0x1C50, 0x1C7F } }; + yield return new object[] { engine, @"\p{IsCyrillicExtended-C}", new[] { 0x1C80, 0x1C8F } }; + yield return new object[] { engine, @"\p{IsGeorgianExtended}", new[] { 0x1C90, 0x1CBF } }; + yield return new object[] { engine, @"\p{IsSundaneseSupplement}", new[] { 0x1CC0, 0x1CCF } }; + yield return new object[] { engine, @"\p{IsVedicExtensions}", new[] { 0x1CD0, 0x1CFF } }; yield return new object[] { engine, @"\p{IsPhoneticExtensions}", new[] { 0x1D00, 0x1D7F } }; + yield return new object[] { engine, @"\p{IsPhoneticExtensionsSupplement}", new[] { 0x1D80, 0x1DBF } }; + yield return new object[] { engine, @"\p{IsCombiningDiacriticalMarksSupplement}", new[] { 0x1DC0, 0x1DFF } }; yield return new object[] { engine, @"\p{IsLatinExtendedAdditional}", new[] { 0x1E00, 0x1EFF } }; yield return new object[] { engine, @"\p{IsGreekExtended}", new[] { 0x1F00, 0x1FFF } }; yield return new object[] { engine, @"\p{IsGeneralPunctuation}", new[] { 0x2000, 0x206F } }; @@ -303,6 +326,14 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsMiscellaneousMathematicalSymbols-B}", new[] { 0x2980, 0x29FF } }; yield return new object[] { engine, @"\p{IsSupplementalMathematicalOperators}", new[] { 0x2A00, 0x2AFF } }; yield return new object[] { engine, @"\p{IsMiscellaneousSymbolsandArrows}", new[] { 0x2B00, 0x2BFF } }; + yield return new object[] { engine, @"\p{IsGlagolitic}", new[] { 0x2C00, 0x2C5F } }; + yield return new object[] { engine, @"\p{IsLatinExtended-C}", new[] { 0x2C60, 0x2C7F } }; + yield return new object[] { engine, @"\p{IsCoptic}", new[] { 0x2C80, 0x2CFF } }; + yield return new object[] { engine, @"\p{IsGeorgianSupplement}", new[] { 0x2D00, 0x2D2F } }; + yield return new object[] { engine, @"\p{IsTifinagh}", new[] { 0x2D30, 0x2D7F } }; + yield return new object[] { engine, @"\p{IsEthiopicExtended}", new[] { 0x2D80, 0x2DDF } }; + yield return new object[] { engine, @"\p{IsCyrillicExtended-A}", new[] { 0x2DE0, 0x2DFF } }; + yield return new object[] { engine, @"\p{IsSupplementalPunctuation}", new[] { 0x2E00, 0x2E7F } }; yield return new object[] { engine, @"\p{IsCJKRadicalsSupplement}", new[] { 0x2E80, 0x2EFF } }; yield return new object[] { engine, @"\p{IsKangxiRadicals}", new[] { 0x2F00, 0x2FDF } }; yield return new object[] { engine, @"\p{IsIdeographicDescriptionCharacters}", new[] { 0x2FF0, 0x2FFF } }; @@ -313,6 +344,7 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsHangulCompatibilityJamo}", new[] { 0x3130, 0x318F } }; yield return new object[] { engine, @"\p{IsKanbun}", new[] { 0x3190, 0x319F } }; yield return new object[] { engine, @"\p{IsBopomofoExtended}", new[] { 0x31A0, 0x31BF } }; + yield return new object[] { engine, @"\p{IsCJKStrokes}", new[] { 0x31C0, 0x31EF } }; yield return new object[] { engine, @"\p{IsKatakanaPhoneticExtensions}", new[] { 0x31F0, 0x31FF } }; yield return new object[] { engine, @"\p{IsEnclosedCJKLettersandMonths}", new[] { 0x3200, 0x32FF } }; yield return new object[] { engine, @"\p{IsCJKCompatibility}", new[] { 0x3300, 0x33FF } }; @@ -321,15 +353,37 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsCJKUnifiedIdeographs}", new[] { 0x4E00, 0x9FFF } }; yield return new object[] { engine, @"\p{IsYiSyllables}", new[] { 0xA000, 0xA48F } }; yield return new object[] { engine, @"\p{IsYiRadicals}", new[] { 0xA490, 0xA4CF } }; + yield return new object[] { engine, @"\p{IsLisu}", new[] { 0xA4D0, 0xA4FF } }; + yield return new object[] { engine, @"\p{IsVai}", new[] { 0xA500, 0xA63F } }; + yield return new object[] { engine, @"\p{IsCyrillicExtended-B}", new[] { 0xA640, 0xA69F } }; + yield return new object[] { engine, @"\p{IsBamum}", new[] { 0xA6A0, 0xA6FF } }; + yield return new object[] { engine, @"\p{IsModifierToneLetters}", new[] { 0xA700, 0xA71F } }; + yield return new object[] { engine, @"\p{IsLatinExtended-D}", new[] { 0xA720, 0xA7FF } }; + yield return new object[] { engine, @"\p{IsSylotiNagri}", new[] { 0xA800, 0xA82F } }; + yield return new object[] { engine, @"\p{IsCommonIndicNumberForms}", new[] { 0xA830, 0xA83F } }; + yield return new object[] { engine, @"\p{IsPhags-pa}", new[] { 0xA840, 0xA87F } }; + yield return new object[] { engine, @"\p{IsSaurashtra}", new[] { 0xA880, 0xA8DF } }; + yield return new object[] { engine, @"\p{IsDevanagariExtended}", new[] { 0xA8E0, 0xA8FF } }; + yield return new object[] { engine, @"\p{IsKayahLi}", new[] { 0xA900, 0xA92F } }; + yield return new object[] { engine, @"\p{IsRejang}", new[] { 0xA930, 0xA95F } }; + yield return new object[] { engine, @"\p{IsHangulJamoExtended-A}", new[] { 0xA960, 0xA97F } }; + yield return new object[] { engine, @"\p{IsJavanese}", new[] { 0xA980, 0xA9DF } }; + yield return new object[] { engine, @"\p{IsMyanmarExtended-B}", new[] { 0xA9E0, 0xA9FF } }; + yield return new object[] { engine, @"\p{IsCham}", new[] { 0xAA00, 0xAA5F } }; + yield return new object[] { engine, @"\p{IsMyanmarExtended-A}", new[] { 0xAA60, 0xAA7F } }; + yield return new object[] { engine, @"\p{IsTaiViet}", new[] { 0xAA80, 0xAADF } }; + yield return new object[] { engine, @"\p{IsMeeteiMayekExtensions}", new[] { 0xAAE0, 0xAAFF } }; + yield return new object[] { engine, @"\p{IsEthiopicExtended-A}", new[] { 0xAB00, 0xAB2F } }; + yield return new object[] { engine, @"\p{IsLatinExtended-E}", new[] { 0xAB30, 0xAB6F } }; + yield return new object[] { engine, @"\p{IsCherokeeSupplement}", new[] { 0xAB70, 0xABBF } }; + yield return new object[] { engine, @"\p{IsMeeteiMayek}", new[] { 0xABC0, 0xABFF } }; yield return new object[] { engine, @"\p{IsHangulSyllables}", new[] { 0xAC00, 0xD7AF } }; - yield return new object[] { engine, @"\p{IsHighSurrogates}", new[] { 0xD800, 0xDB7F } }; - yield return new object[] { engine, @"\p{IsHighPrivateUseSurrogates}", new[] { 0xDB80, 0xDBFF } }; - yield return new object[] { engine, @"\p{IsLowSurrogates}", new[] { 0xDC00, 0xDFFF } }; - yield return new object[] { engine, @"\p{IsPrivateUse}", new[] { 0xE000, 0xF8FF } }; + yield return new object[] { engine, @"\p{IsHangulJamoExtended-B}", new[] { 0xD7B0, 0xD7FF } }; yield return new object[] { engine, @"\p{IsCJKCompatibilityIdeographs}", new[] { 0xF900, 0xFAFF } }; yield return new object[] { engine, @"\p{IsAlphabeticPresentationForms}", new[] { 0xFB00, 0xFB4F } }; yield return new object[] { engine, @"\p{IsArabicPresentationForms-A}", new[] { 0xFB50, 0xFDFF } }; yield return new object[] { engine, @"\p{IsVariationSelectors}", new[] { 0xFE00, 0xFE0F } }; + yield return new object[] { engine, @"\p{IsVerticalForms}", new[] { 0xFE10, 0xFE1F } }; yield return new object[] { engine, @"\p{IsCombiningHalfMarks}", new[] { 0xFE20, 0xFE2F } }; yield return new object[] { engine, @"\p{IsCJKCompatibilityForms}", new[] { 0xFE30, 0xFE4F } }; yield return new object[] { engine, @"\p{IsSmallFormVariants}", new[] { 0xFE50, 0xFE6F } }; From 2e06313b4d77a7a01df313c49587cc3ecbd2903a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Oct 2025 16:58:40 +0000 Subject: [PATCH 04/13] Add README for GenRegexNamedBlocks tool Document how to use the tool for future Unicode updates Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../tools/GenRegexNamedBlocks/README.md | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md new file mode 100644 index 00000000000000..ccc7b5e2f2b282 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md @@ -0,0 +1,48 @@ +# GenRegexNamedBlocks Tool + +## Overview + +This tool generates the named Unicode blocks for `RegexCharClass.cs` based on the Unicode Character Database (UCD) `Blocks.txt` file. Named blocks allow regex patterns to match characters in specific Unicode blocks using syntax like `\p{IsBasicLatin}` or `\p{IsGreek}`. + +The current implementation is based on **Unicode 16.0**. + +## Usage + +To update the named blocks when a new Unicode version is released: + +1. Download the `Blocks.txt` file from the Unicode Consortium: + ``` + https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt + ``` + +2. Run the tool from this directory: + ```bash + dotnet run -- + ``` + +3. The tool will output the named blocks in the format needed for `RegexCharClass.cs` + +4. Replace the named blocks section in `RegexCharClass.cs` (lines starting with `["Is...`) with the generated output + +5. Update tests in `RegexCharacterSetTests.cs` to include tests for new blocks + +6. Build and test to ensure all tests pass + +## Notes + +- The tool automatically excludes: + - Blocks outside the Basic Multilingual Plane (BMP) (code points >= U+10000) + - Surrogate blocks (U+D800-U+DFFF) + - Private Use Area blocks + +- Block names are converted to "Is" + alphanumeric characters + hyphens (e.g., "Greek and Coptic" becomes "IsGreekandCoptic") + +- The tool sorts blocks alphabetically by name for consistent output + +- For backward compatibility, some aliases like "IsGreek" (alias for "IsGreekandCoptic") should be manually maintained + +## See Also + +- [Unicode Character Database](https://www.unicode.org/ucd/) +- [Unicode Block Names](https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt) +- Related tool: `src/libraries/System.Text.Encodings.Web/tools/GenUnicodeRanges/` From bf209866456687b26c0f9cf71513048e856237de Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Oct 2025 01:48:03 +0000 Subject: [PATCH 05/13] Restore removed blocks for backward compatibility Add back 6 blocks that were removed to avoid breaking changes: - IsCombiningMarksforSymbols (alias for IsCombiningDiacriticalMarksforSymbols) - IsHighPrivateUseSurrogates - IsHighSurrogates - IsLowSurrogates - IsPrivateUse (alias for IsPrivateUseArea) - IsPrivateUseArea Total blocks now: 167 (108 original + 59 new) Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../src/System/Text/RegularExpressions/RegexCharClass.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 2eca0af5633b93..2d962105524918 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -221,6 +221,7 @@ internal sealed partial class RegexCharClass ["IsCombiningDiacriticalMarksSupplement", "\u1DC0\u1E00"], ["IsCombiningDiacriticalMarksforSymbols", "\u20D0\u2100"], ["IsCombiningHalfMarks", "\uFE20\uFE30"], + ["IsCombiningMarksforSymbols", "\u20D0\u2100"], ["IsCommonIndicNumberForms", "\uA830\uA840"], ["IsControlPictures", "\u2400\u2440"], ["IsCoptic", "\u2C80\u2D00"], @@ -257,6 +258,8 @@ internal sealed partial class RegexCharClass ["IsHangulJamoExtended-B", "\uD7B0\uD800"], ["IsHangulSyllables", "\uAC00\uD7B0"], ["IsHanunoo", "\u1720\u1740"], + ["IsHighPrivateUseSurrogates", "\uDB80\uDC00"], + ["IsHighSurrogates", "\uD800\uDB80"], ["IsHebrew", "\u0590\u0600"], ["IsHiragana", "\u3040\u30A0"], ["IsIPAExtensions", "\u0250\u02B0"], @@ -282,6 +285,7 @@ internal sealed partial class RegexCharClass ["IsLetterlikeSymbols", "\u2100\u2150"], ["IsLimbu", "\u1900\u1950"], ["IsLisu", "\uA4D0\uA500"], + ["IsLowSurrogates", "\uDC00\uE000"], ["IsMalayalam", "\u0D00\u0D80"], ["IsMandaic", "\u0840\u0860"], ["IsMathematicalOperators", "\u2200\u2300"], @@ -307,6 +311,8 @@ internal sealed partial class RegexCharClass ["IsPhags-pa", "\uA840\uA880"], ["IsPhoneticExtensions", "\u1D00\u1D80"], ["IsPhoneticExtensionsSupplement", "\u1D80\u1DC0"], + ["IsPrivateUse", "\uE000\uF900"], + ["IsPrivateUseArea", "\uE000\uF900"], ["IsRejang", "\uA930\uA960"], ["IsRunic", "\u16A0\u1700"], ["IsSamaritan", "\u0800\u0840"], From 8b06b5a98bd71a65a86619b28cb55c3a102cbcd7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Oct 2025 02:35:21 +0000 Subject: [PATCH 06/13] Restore tests for surrogate and private use blocks Add back test coverage for the 6 restored blocks: - IsHighSurrogates - IsHighPrivateUseSurrogates - IsLowSurrogates - IsPrivateUse Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../tests/FunctionalTests/RegexCharacterSetTests.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs index a6f341a6e9a15e..1be9ebe6f2a0b4 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs @@ -379,6 +379,10 @@ public static IEnumerable NamedBlocksInclusionsExpected_MemberData() yield return new object[] { engine, @"\p{IsMeeteiMayek}", new[] { 0xABC0, 0xABFF } }; yield return new object[] { engine, @"\p{IsHangulSyllables}", new[] { 0xAC00, 0xD7AF } }; yield return new object[] { engine, @"\p{IsHangulJamoExtended-B}", new[] { 0xD7B0, 0xD7FF } }; + yield return new object[] { engine, @"\p{IsHighSurrogates}", new[] { 0xD800, 0xDB7F } }; + yield return new object[] { engine, @"\p{IsHighPrivateUseSurrogates}", new[] { 0xDB80, 0xDBFF } }; + yield return new object[] { engine, @"\p{IsLowSurrogates}", new[] { 0xDC00, 0xDFFF } }; + yield return new object[] { engine, @"\p{IsPrivateUse}", new[] { 0xE000, 0xF8FF } }; yield return new object[] { engine, @"\p{IsCJKCompatibilityIdeographs}", new[] { 0xF900, 0xFAFF } }; yield return new object[] { engine, @"\p{IsAlphabeticPresentationForms}", new[] { 0xFB00, 0xFB4F } }; yield return new object[] { engine, @"\p{IsArabicPresentationForms-A}", new[] { 0xFB50, 0xFDFF } }; From 5d70ee60f88eb8f9cd12d3b90d51ead611b51f48 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Oct 2025 03:00:09 +0000 Subject: [PATCH 07/13] Fix alphabetical order and IsSpecials value - Move IsHebrew to correct alphabetical position (before IsHighPrivateUseSurrogates) - Restore IsSpecials to original value "\uFFF0" (was incorrectly changed to "\uFFF0\u10000") Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../src/System/Text/RegularExpressions/RegexCharClass.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 2d962105524918..98e2de6cd068a5 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -258,9 +258,9 @@ internal sealed partial class RegexCharClass ["IsHangulJamoExtended-B", "\uD7B0\uD800"], ["IsHangulSyllables", "\uAC00\uD7B0"], ["IsHanunoo", "\u1720\u1740"], + ["IsHebrew", "\u0590\u0600"], ["IsHighPrivateUseSurrogates", "\uDB80\uDC00"], ["IsHighSurrogates", "\uD800\uDB80"], - ["IsHebrew", "\u0590\u0600"], ["IsHiragana", "\u3040\u30A0"], ["IsIPAExtensions", "\u0250\u02B0"], ["IsIdeographicDescriptionCharacters", "\u2FF0\u3000"], @@ -320,7 +320,7 @@ internal sealed partial class RegexCharClass ["IsSinhala", "\u0D80\u0E00"], ["IsSmallFormVariants", "\uFE50\uFE70"], ["IsSpacingModifierLetters", "\u02B0\u0300"], - ["IsSpecials", "\uFFF0\u10000"], + ["IsSpecials", "\uFFF0"], ["IsSundanese", "\u1B80\u1BC0"], ["IsSundaneseSupplement", "\u1CC0\u1CD0"], ["IsSuperscriptsandSubscripts", "\u2070\u20A0"], From c86734402b12ae7482d6432875c43719f513608a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Oct 2025 15:08:08 +0000 Subject: [PATCH 08/13] Update to Unicode 17.0 Unicode 17.0 has the same blocks in the BMP as Unicode 16.0, so no code changes needed. Updated documentation to reflect Unicode 17.0. Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../tools/GenRegexNamedBlocks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md index ccc7b5e2f2b282..361c18f150e1a3 100644 --- a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md @@ -4,7 +4,7 @@ This tool generates the named Unicode blocks for `RegexCharClass.cs` based on the Unicode Character Database (UCD) `Blocks.txt` file. Named blocks allow regex patterns to match characters in specific Unicode blocks using syntax like `\p{IsBasicLatin}` or `\p{IsGreek}`. -The current implementation is based on **Unicode 16.0**. +The current implementation is based on **Unicode 17.0**. ## Usage From b7d0b8fcbf8f48595d5f552455dfed9db92d2f7e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Oct 2025 02:02:40 +0000 Subject: [PATCH 09/13] Fix trailing spaces in README.md Remove trailing spaces on lines 17 and 37 to fix linting errors Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../tools/GenRegexNamedBlocks/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md index 361c18f150e1a3..e16e3b94fd5a48 100644 --- a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md @@ -14,7 +14,8 @@ To update the named blocks when a new Unicode version is released: ``` https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt ``` - + + 2. Run the tool from this directory: ```bash dotnet run -- @@ -34,7 +35,8 @@ To update the named blocks when a new Unicode version is released: - Blocks outside the Basic Multilingual Plane (BMP) (code points >= U+10000) - Surrogate blocks (U+D800-U+DFFF) - Private Use Area blocks - + + - Block names are converted to "Is" + alphanumeric characters + hyphens (e.g., "Greek and Coptic" becomes "IsGreekandCoptic") - The tool sorts blocks alphabetically by name for consistent output From 41e21558245dfd4b182013066f0626a3f6363cc6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Oct 2025 02:29:28 +0000 Subject: [PATCH 10/13] Add GenRegexNamedBlocks tool to Unicode update instructions Added step to run GenRegexNamedBlocks tool when updating Unicode version Co-authored-by: tarekgh <10833894+tarekgh@users.noreply.github.com> --- .../Tools/GenUnicodeProp/Updating-Unicode-Versions.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md b/src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md index f545d810ccdfa5..681f3763fc5bf0 100644 --- a/src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md +++ b/src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md @@ -51,5 +51,6 @@ This should be done automatically by dependency-flow, so in theory there shouldn $ ./dotnet.sh run --project src/native/minipal/UnicodeDataGenerator /tmp/UnicodeData.txt > src/native/minipal/unicodedata.c ``` 5. Update the Regex casing equivalence table using the UnicodeData.txt file from the new Unicode version. You can find the instructions on how to do this [here](../../../System.Text.RegularExpressions/tools/Readme.md). -6. Finally, last step is to update the license for the Unicode data into our [Third party notices](../../../../../THIRD-PARTY-NOTICES.TXT) by copying the contents located in `https://www.unicode.org/license.html` to the section that has the Unicode license in our notices. -7. That's it, now commit all of the changed files, and send a PR into dotnet/runtime with the updates. If there were any special things you had to do that are not noted on this document, PLEASE UPDATE THESE INSTRUCTIONS to facilitate future updates. +6. Update the Regex named blocks using the Blocks.txt file from the new Unicode version. Run the GenRegexNamedBlocks tool located at `src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks` following the instructions in its [README.md](../../../System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md). +7. Finally, last step is to update the license for the Unicode data into our [Third party notices](../../../../../THIRD-PARTY-NOTICES.TXT) by copying the contents located in `https://www.unicode.org/license.html` to the section that has the Unicode license in our notices. +8. That's it, now commit all of the changed files, and send a PR into dotnet/runtime with the updates. If there were any special things you had to do that are not noted on this document, PLEASE UPDATE THESE INSTRUCTIONS to facilitate future updates. From b0718d0df194f91c907f861bbb37028b46697cb0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Oct 2025 03:00:54 +0000 Subject: [PATCH 11/13] Move named blocks table to generated file - Made RegexCharClass partial (already was) - Created RegexCharClass.Tables.cs with s_propTable - Updated GenRegexNamedBlocks tool to generate complete .cs file - Tool now takes output file path parameter - Updated README with new tool usage Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../RegexCharClass.Tables.cs | 196 ++++++++++++++++++ .../Text/RegularExpressions/RegexCharClass.cs | 192 ----------------- .../tools/GenRegexNamedBlocks/Program.cs | 84 +++++++- .../tools/GenRegexNamedBlocks/README.md | 10 +- 4 files changed, 276 insertions(+), 206 deletions(-) create mode 100644 src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.Tables.cs diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.Tables.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.Tables.cs new file mode 100644 index 00000000000000..e507df08302d50 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.Tables.cs @@ -0,0 +1,196 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This is a generated file. Do not edit directly. +// Run the GenRegexNamedBlocks tool to regenerate. + +namespace System.Text.RegularExpressions +{ + internal sealed partial class RegexCharClass + { + /* + * The property table contains all the block definitions defined in the + * XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 17.0 spec (www.unicode.org), + * and Perl 5.6 (see Programming Perl, 3rd edition page 167). Three blocks defined by Perl (and here) may + * not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates. + * + **/ + // Has to be sorted by the first column + private static readonly string[][] s_propTable = + [ + ["IsAlphabeticPresentationForms", "\uFB00\uFB50"], + ["IsArabic", "\u0600\u0700"], + ["IsArabicExtended-A", "\u08A0\u0900"], + ["IsArabicExtended-B", "\u0870\u08A0"], + ["IsArabicPresentationForms-A", "\uFB50\uFE00"], + ["IsArabicPresentationForms-B", "\uFE70\uFF00"], + ["IsArabicSupplement", "\u0750\u0780"], + ["IsArmenian", "\u0530\u0590"], + ["IsArrows", "\u2190\u2200"], + ["IsBalinese", "\u1B00\u1B80"], + ["IsBamum", "\uA6A0\uA700"], + ["IsBasicLatin", "\u0000\u0080"], + ["IsBatak", "\u1BC0\u1C00"], + ["IsBengali", "\u0980\u0A00"], + ["IsBlockElements", "\u2580\u25A0"], + ["IsBopomofo", "\u3100\u3130"], + ["IsBopomofoExtended", "\u31A0\u31C0"], + ["IsBoxDrawing", "\u2500\u2580"], + ["IsBraillePatterns", "\u2800\u2900"], + ["IsBuginese", "\u1A00\u1A20"], + ["IsBuhid", "\u1740\u1760"], + ["IsCJKCompatibility", "\u3300\u3400"], + ["IsCJKCompatibilityForms", "\uFE30\uFE50"], + ["IsCJKCompatibilityIdeographs", "\uF900\uFB00"], + ["IsCJKRadicalsSupplement", "\u2E80\u2F00"], + ["IsCJKStrokes", "\u31C0\u31F0"], + ["IsCJKSymbolsandPunctuation", "\u3000\u3040"], + ["IsCJKUnifiedIdeographs", "\u4E00\uA000"], + ["IsCJKUnifiedIdeographsExtensionA", "\u3400\u4DC0"], + ["IsCham", "\uAA00\uAA60"], + ["IsCherokee", "\u13A0\u1400"], + ["IsCherokeeSupplement", "\uAB70\uABC0"], + ["IsCombiningDiacriticalMarks", "\u0300\u0370"], + ["IsCombiningDiacriticalMarksExtended", "\u1AB0\u1B00"], + ["IsCombiningDiacriticalMarksSupplement", "\u1DC0\u1E00"], + ["IsCombiningDiacriticalMarksforSymbols", "\u20D0\u2100"], + ["IsCombiningHalfMarks", "\uFE20\uFE30"], + ["IsCombiningMarksforSymbols", "\u20D0\u2100"], + ["IsCommonIndicNumberForms", "\uA830\uA840"], + ["IsControlPictures", "\u2400\u2440"], + ["IsCoptic", "\u2C80\u2D00"], + ["IsCurrencySymbols", "\u20A0\u20D0"], + ["IsCyrillic", "\u0400\u0500"], + ["IsCyrillicExtended-A", "\u2DE0\u2E00"], + ["IsCyrillicExtended-B", "\uA640\uA6A0"], + ["IsCyrillicExtended-C", "\u1C80\u1C90"], + ["IsCyrillicSupplement", "\u0500\u0530"], + ["IsDevanagari", "\u0900\u0980"], + ["IsDevanagariExtended", "\uA8E0\uA900"], + ["IsDingbats", "\u2700\u27C0"], + ["IsEnclosedAlphanumerics", "\u2460\u2500"], + ["IsEnclosedCJKLettersandMonths", "\u3200\u3300"], + ["IsEthiopic", "\u1200\u1380"], + ["IsEthiopicExtended", "\u2D80\u2DE0"], + ["IsEthiopicExtended-A", "\uAB00\uAB30"], + ["IsEthiopicSupplement", "\u1380\u13A0"], + ["IsGeneralPunctuation", "\u2000\u2070"], + ["IsGeometricShapes", "\u25A0\u2600"], + ["IsGeorgian", "\u10A0\u1100"], + ["IsGeorgianExtended", "\u1C90\u1CC0"], + ["IsGeorgianSupplement", "\u2D00\u2D30"], + ["IsGlagolitic", "\u2C00\u2C60"], + ["IsGreek", "\u0370\u0400"], + ["IsGreekExtended", "\u1F00\u2000"], + ["IsGreekandCoptic", "\u0370\u0400"], + ["IsGujarati", "\u0A80\u0B00"], + ["IsGurmukhi", "\u0A00\u0A80"], + ["IsHalfwidthandFullwidthForms", "\uFF00\uFFF0"], + ["IsHangulCompatibilityJamo", "\u3130\u3190"], + ["IsHangulJamo", "\u1100\u1200"], + ["IsHangulJamoExtended-A", "\uA960\uA980"], + ["IsHangulJamoExtended-B", "\uD7B0\uD800"], + ["IsHangulSyllables", "\uAC00\uD7B0"], + ["IsHanunoo", "\u1720\u1740"], + ["IsHebrew", "\u0590\u0600"], + ["IsHighPrivateUseSurrogates", "\uDB80\uDC00"], + ["IsHighSurrogates", "\uD800\uDB80"], + ["IsHiragana", "\u3040\u30A0"], + ["IsIPAExtensions", "\u0250\u02B0"], + ["IsIdeographicDescriptionCharacters", "\u2FF0\u3000"], + ["IsJavanese", "\uA980\uA9E0"], + ["IsKanbun", "\u3190\u31A0"], + ["IsKangxiRadicals", "\u2F00\u2FE0"], + ["IsKannada", "\u0C80\u0D00"], + ["IsKatakana", "\u30A0\u3100"], + ["IsKatakanaPhoneticExtensions", "\u31F0\u3200"], + ["IsKayahLi", "\uA900\uA930"], + ["IsKhmer", "\u1780\u1800"], + ["IsKhmerSymbols", "\u19E0\u1A00"], + ["IsLao", "\u0E80\u0F00"], + ["IsLatin-1Supplement", "\u0080\u0100"], + ["IsLatinExtended-A", "\u0100\u0180"], + ["IsLatinExtended-B", "\u0180\u0250"], + ["IsLatinExtended-C", "\u2C60\u2C80"], + ["IsLatinExtended-D", "\uA720\uA800"], + ["IsLatinExtended-E", "\uAB30\uAB70"], + ["IsLatinExtendedAdditional", "\u1E00\u1F00"], + ["IsLepcha", "\u1C00\u1C50"], + ["IsLetterlikeSymbols", "\u2100\u2150"], + ["IsLimbu", "\u1900\u1950"], + ["IsLisu", "\uA4D0\uA500"], + ["IsLowSurrogates", "\uDC00\uE000"], + ["IsMalayalam", "\u0D00\u0D80"], + ["IsMandaic", "\u0840\u0860"], + ["IsMathematicalOperators", "\u2200\u2300"], + ["IsMeeteiMayek", "\uABC0\uAC00"], + ["IsMeeteiMayekExtensions", "\uAAE0\uAB00"], + ["IsMiscellaneousMathematicalSymbols-A", "\u27C0\u27F0"], + ["IsMiscellaneousMathematicalSymbols-B", "\u2980\u2A00"], + ["IsMiscellaneousSymbols", "\u2600\u2700"], + ["IsMiscellaneousSymbolsandArrows", "\u2B00\u2C00"], + ["IsMiscellaneousTechnical", "\u2300\u2400"], + ["IsModifierToneLetters", "\uA700\uA720"], + ["IsMongolian", "\u1800\u18B0"], + ["IsMyanmar", "\u1000\u10A0"], + ["IsMyanmarExtended-A", "\uAA60\uAA80"], + ["IsMyanmarExtended-B", "\uA9E0\uAA00"], + ["IsNKo", "\u07C0\u0800"], + ["IsNewTaiLue", "\u1980\u19E0"], + ["IsNumberForms", "\u2150\u2190"], + ["IsOgham", "\u1680\u16A0"], + ["IsOlChiki", "\u1C50\u1C80"], + ["IsOpticalCharacterRecognition", "\u2440\u2460"], + ["IsOriya", "\u0B00\u0B80"], + ["IsPhags-pa", "\uA840\uA880"], + ["IsPhoneticExtensions", "\u1D00\u1D80"], + ["IsPhoneticExtensionsSupplement", "\u1D80\u1DC0"], + ["IsPrivateUse", "\uE000\uF900"], + ["IsPrivateUseArea", "\uE000\uF900"], + ["IsRejang", "\uA930\uA960"], + ["IsRunic", "\u16A0\u1700"], + ["IsSamaritan", "\u0800\u0840"], + ["IsSaurashtra", "\uA880\uA8E0"], + ["IsSinhala", "\u0D80\u0E00"], + ["IsSmallFormVariants", "\uFE50\uFE70"], + ["IsSpacingModifierLetters", "\u02B0\u0300"], + ["IsSpecials", "\uFFF0"], + ["IsSundanese", "\u1B80\u1BC0"], + ["IsSundaneseSupplement", "\u1CC0\u1CD0"], + ["IsSuperscriptsandSubscripts", "\u2070\u20A0"], + ["IsSupplementalArrows-A", "\u27F0\u2800"], + ["IsSupplementalArrows-B", "\u2900\u2980"], + ["IsSupplementalMathematicalOperators", "\u2A00\u2B00"], + ["IsSupplementalPunctuation", "\u2E00\u2E80"], + ["IsSylotiNagri", "\uA800\uA830"], + ["IsSyriac", "\u0700\u0750"], + ["IsSyriacSupplement", "\u0860\u0870"], + ["IsTagalog", "\u1700\u1720"], + ["IsTagbanwa", "\u1760\u1780"], + ["IsTaiLe", "\u1950\u1980"], + ["IsTaiTham", "\u1A20\u1AB0"], + ["IsTaiViet", "\uAA80\uAAE0"], + ["IsTamil", "\u0B80\u0C00"], + ["IsTelugu", "\u0C00\u0C80"], + ["IsThaana", "\u0780\u07C0"], + ["IsThai", "\u0E00\u0E80"], + ["IsTibetan", "\u0F00\u1000"], + ["IsTifinagh", "\u2D30\u2D80"], + ["IsUnifiedCanadianAboriginalSyllabics", "\u1400\u1680"], + ["IsUnifiedCanadianAboriginalSyllabicsExtended", "\u18B0\u1900"], + ["IsVai", "\uA500\uA640"], + ["IsVariationSelectors", "\uFE00\uFE10"], + ["IsVedicExtensions", "\u1CD0\u1D00"], + ["IsVerticalForms", "\uFE10\uFE20"], + ["IsYiRadicals", "\uA490\uA4D0"], + ["IsYiSyllables", "\uA000\uA490"], + ["IsYijingHexagramSymbols", "\u4DC0\u4E00"], + ["_xmlC", /* Name Char */ "\u002D\u002F\u0030\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00B7\u00B8\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u02D0\u02D2\u0300\u0346\u0360\u0362\u0386\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0483\u0487\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0653\u0660\u066A\u0670\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06E9\u06EA\u06EE\u06F0\u06FA\u0901\u0904\u0905\u093A\u093C\u094E\u0951\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC" + +"\u09DE\u09DF\u09E4\u09E6\u09F2\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B70\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF0\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62" +"\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0E01\u0E2F\u0E30\u0E3B\u0E40\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0F18\u0F1A\u0F20\u0F2A\u0F35\u0F36\u0F37\u0F38\u0F39\u0F3A\u0F3E\u0F48\u0F49\u0F6A\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F96\u0F97\u0F98\u0F99\u0FAE\u0FB1\u0FB8\u0FB9\u0FBA\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00" +"\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u20D0\u20DD\u20E1\u20E2\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3005\u3006\u3007\u3008\u3021\u3030\u3031\u3036\u3041\u3095\u3099\u309B\u309D\u309F\u30A1\u30FB\u30FC\u30FF\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"], + ["_xmlD", "\u0030\u003A\u0660\u066A\u06F0\u06FA\u0966\u0970\u09E6\u09F0\u0A66\u0A70\u0AE6\u0AF0\u0B66\u0B70\u0BE7\u0BF0\u0C66\u0C70\u0CE6\u0CF0\u0D66\u0D70\u0E50\u0E5A\u0ED0\u0EDA\u0F20\u0F2A\u1040\u104A\u1369\u1372\u17E0\u17EA\u1810\u181A\uFF10\uFF1A"], + ["_xmlI", /* Start Name Char */ "\u003A\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u0386\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0641\u064B\u0671\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06D6\u06E5\u06E7\u0905\u093A\u093D\u093E\u0958\u0962\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09DC\u09DE\u09DF\u09E2\u09F0\u09F2\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A59\u0A5D\u0A5E\u0A5F\u0A72\u0A75\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABD\u0ABE\u0AE0\u0AE1\u0B05\u0B0D\u0B0F" + +"\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3D\u0B3E\u0B5C\u0B5E\u0B5F\u0B62\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C60\u0C62\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CDE\u0CDF\u0CE0\u0CE2\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D60\u0D62\u0E01\u0E2F\u0E30\u0E31\u0E32\u0E34\u0E40\u0E46\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EB1\u0EB2\u0EB4\u0EBD\u0EBE\u0EC0\u0EC5\u0F40\u0F48\u0F49\u0F6A\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC" +"\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3007\u3008\u3021\u302A\u3041\u3095\u30A1\u30FB\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"], + ["_xmlW", "\u0024\u0025\u002B\u002C\u0030\u003A\u003C\u003F\u0041\u005B\u005E\u005F\u0060\u007B\u007C\u007D\u007E\u007F\u00A2\u00AB\u00AC\u00AD\u00AE\u00B7\u00B8\u00BB\u00BC\u00BF\u00C0\u0221\u0222\u0234\u0250\u02AE\u02B0\u02EF\u0300\u0350\u0360\u0370\u0374\u0376\u037A\u037B\u0384\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03F7\u0400\u0487\u0488\u04CF\u04D0\u04F6\u04F8\u04FA\u0500\u0510\u0531\u0557\u0559\u055A\u0561\u0588\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0656\u0660\u066A\u066E\u06D4\u06D5\u06DD\u06DE\u06EE\u06F0\u06FF\u0710\u072D\u0730\u074B\u0780\u07B2\u0901\u0904\u0905\u093A\u093C\u094E\u0950\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC\u09DE\u09DF\u09E4\u09E6\u09FB\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35" +"\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AD0\u0AD1\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B71\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF3\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49" +"\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0D82\u0D84\u0D85\u0D97\u0D9A\u0DB2\u0DB3\u0DBC\u0DBD\u0DBE\u0DC0\u0DC7\u0DCA\u0DCB\u0DCF\u0DD5\u0DD6\u0DD7\u0DD8\u0DE0\u0DF2\u0DF4\u0E01\u0E3B\u0E3F\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0EDC\u0EDE\u0F00\u0F04\u0F13\u0F3A\u0F3E\u0F48\u0F49\u0F6B\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F98\u0F99\u0FBD\u0FBE\u0FCD\u0FCF\u0FD0\u1000\u1022\u1023\u1028\u1029\u102B\u102C\u1033\u1036\u103A\u1040\u104A\u1050\u105A\u10A0\u10C6\u10D0\u10F9\u1100\u115A\u115F\u11A3\u11A8\u11FA\u1200\u1207\u1208\u1247\u1248\u1249\u124A\u124E\u1250\u1257\u1258\u1259\u125A\u125E\u1260\u1287\u1288\u1289\u128A\u128E\u1290\u12AF\u12B0\u12B1\u12B2\u12B6\u12B8\u12BF\u12C0\u12C1\u12C2\u12C6\u12C8\u12CF\u12D0\u12D7\u12D8\u12EF\u12F0\u130F\u1310\u1311\u1312\u1316\u1318\u131F\u1320\u1347\u1348\u135B\u1369\u137D\u13A0" +"\u13F5\u1401\u166D\u166F\u1677\u1681\u169B\u16A0\u16EB\u16EE\u16F1\u1700\u170D\u170E\u1715\u1720\u1735\u1740\u1754\u1760\u176D\u176E\u1771\u1772\u1774\u1780\u17D4\u17D7\u17D8\u17DB\u17DD\u17E0\u17EA\u180B\u180E\u1810\u181A\u1820\u1878\u1880\u18AA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FC5\u1FC6\u1FD4\u1FD6\u1FDC\u1FDD\u1FF0\u1FF2\u1FF5\u1FF6\u1FFF\u2044\u2045\u2052\u2053\u2070\u2072\u2074\u207D\u207F\u208D\u20A0\u20B2\u20D0\u20EB\u2100\u213B\u213D\u214C\u2153\u2184\u2190\u2329\u232B\u23B4\u23B7\u23CF\u2400\u2427\u2440\u244B\u2460\u24FF\u2500\u2614\u2616\u2618\u2619\u267E\u2680\u268A\u2701\u2705\u2706\u270A\u270C\u2728\u2729\u274C\u274D\u274E\u274F\u2753\u2756\u2757\u2758\u275F\u2761\u2768\u2776\u2795\u2798\u27B0\u27B1\u27BF\u27D0\u27E6\u27F0\u2983\u2999\u29D8\u29DC\u29FC\u29FE\u2B00\u2E80\u2E9A\u2E9B\u2EF4\u2F00\u2FD6\u2FF0\u2FFC\u3004\u3008\u3012\u3014\u3020\u3030\u3031\u303D\u303E\u3040" +"\u3041\u3097\u3099\u30A0\u30A1\u30FB\u30FC\u3100\u3105\u312D\u3131\u318F\u3190\u31B8\u31F0\u321D\u3220\u3244\u3251\u327C\u327F\u32CC\u32D0\u32FF\u3300\u3377\u337B\u33DE\u33E0\u33FF\u3400\u4DB6\u4E00\u9FA6\uA000\uA48D\uA490\uA4C7\uAC00\uD7A4\uF900\uFA2E\uFA30\uFA6B\uFB00\uFB07\uFB13\uFB18\uFB1D\uFB37\uFB38\uFB3D\uFB3E\uFB3F\uFB40\uFB42\uFB43\uFB45\uFB46\uFBB2\uFBD3\uFD3E\uFD50\uFD90\uFD92\uFDC8\uFDF0\uFDFD\uFE00\uFE10\uFE20\uFE24\uFE62\uFE63\uFE64\uFE67\uFE69\uFE6A\uFE70\uFE75\uFE76\uFEFD\uFF04\uFF05\uFF0B\uFF0C\uFF10\uFF1A\uFF1C\uFF1F\uFF21\uFF3B\uFF3E\uFF3F\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF66\uFFBF\uFFC2\uFFC8\uFFCA\uFFD0\uFFD2\uFFD8\uFFDA\uFFDD\uFFE0\uFFE7\uFFE8\uFFEF\uFFFC\uFFFE"], + ]; + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 98e2de6cd068a5..d22036e6da2844 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -174,198 +174,6 @@ internal sealed partial class RegexCharClass { "Z", "\u0000\u000D\u000E\u000C\u0000" }, }; - /* - * The property table contains all the block definitions defined in the - * XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 4.0 spec (www.unicode.org), - * and Perl 5.6 (see Programming Perl, 3rd edition page 167). Three blocks defined by Perl (and here) may - * not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates. - * - **/ - // Has to be sorted by the first column - private static readonly string[][] s_propTable = - [ - ["IsAlphabeticPresentationForms", "\uFB00\uFB50"], - ["IsArabic", "\u0600\u0700"], - ["IsArabicExtended-A", "\u08A0\u0900"], - ["IsArabicExtended-B", "\u0870\u08A0"], - ["IsArabicPresentationForms-A", "\uFB50\uFE00"], - ["IsArabicPresentationForms-B", "\uFE70\uFF00"], - ["IsArabicSupplement", "\u0750\u0780"], - ["IsArmenian", "\u0530\u0590"], - ["IsArrows", "\u2190\u2200"], - ["IsBalinese", "\u1B00\u1B80"], - ["IsBamum", "\uA6A0\uA700"], - ["IsBasicLatin", "\u0000\u0080"], - ["IsBatak", "\u1BC0\u1C00"], - ["IsBengali", "\u0980\u0A00"], - ["IsBlockElements", "\u2580\u25A0"], - ["IsBopomofo", "\u3100\u3130"], - ["IsBopomofoExtended", "\u31A0\u31C0"], - ["IsBoxDrawing", "\u2500\u2580"], - ["IsBraillePatterns", "\u2800\u2900"], - ["IsBuginese", "\u1A00\u1A20"], - ["IsBuhid", "\u1740\u1760"], - ["IsCJKCompatibility", "\u3300\u3400"], - ["IsCJKCompatibilityForms", "\uFE30\uFE50"], - ["IsCJKCompatibilityIdeographs", "\uF900\uFB00"], - ["IsCJKRadicalsSupplement", "\u2E80\u2F00"], - ["IsCJKStrokes", "\u31C0\u31F0"], - ["IsCJKSymbolsandPunctuation", "\u3000\u3040"], - ["IsCJKUnifiedIdeographs", "\u4E00\uA000"], - ["IsCJKUnifiedIdeographsExtensionA", "\u3400\u4DC0"], - ["IsCham", "\uAA00\uAA60"], - ["IsCherokee", "\u13A0\u1400"], - ["IsCherokeeSupplement", "\uAB70\uABC0"], - ["IsCombiningDiacriticalMarks", "\u0300\u0370"], - ["IsCombiningDiacriticalMarksExtended", "\u1AB0\u1B00"], - ["IsCombiningDiacriticalMarksSupplement", "\u1DC0\u1E00"], - ["IsCombiningDiacriticalMarksforSymbols", "\u20D0\u2100"], - ["IsCombiningHalfMarks", "\uFE20\uFE30"], - ["IsCombiningMarksforSymbols", "\u20D0\u2100"], - ["IsCommonIndicNumberForms", "\uA830\uA840"], - ["IsControlPictures", "\u2400\u2440"], - ["IsCoptic", "\u2C80\u2D00"], - ["IsCurrencySymbols", "\u20A0\u20D0"], - ["IsCyrillic", "\u0400\u0500"], - ["IsCyrillicExtended-A", "\u2DE0\u2E00"], - ["IsCyrillicExtended-B", "\uA640\uA6A0"], - ["IsCyrillicExtended-C", "\u1C80\u1C90"], - ["IsCyrillicSupplement", "\u0500\u0530"], - ["IsDevanagari", "\u0900\u0980"], - ["IsDevanagariExtended", "\uA8E0\uA900"], - ["IsDingbats", "\u2700\u27C0"], - ["IsEnclosedAlphanumerics", "\u2460\u2500"], - ["IsEnclosedCJKLettersandMonths", "\u3200\u3300"], - ["IsEthiopic", "\u1200\u1380"], - ["IsEthiopicExtended", "\u2D80\u2DE0"], - ["IsEthiopicExtended-A", "\uAB00\uAB30"], - ["IsEthiopicSupplement", "\u1380\u13A0"], - ["IsGeneralPunctuation", "\u2000\u2070"], - ["IsGeometricShapes", "\u25A0\u2600"], - ["IsGeorgian", "\u10A0\u1100"], - ["IsGeorgianExtended", "\u1C90\u1CC0"], - ["IsGeorgianSupplement", "\u2D00\u2D30"], - ["IsGlagolitic", "\u2C00\u2C60"], - ["IsGreek", "\u0370\u0400"], - ["IsGreekExtended", "\u1F00\u2000"], - ["IsGreekandCoptic", "\u0370\u0400"], - ["IsGujarati", "\u0A80\u0B00"], - ["IsGurmukhi", "\u0A00\u0A80"], - ["IsHalfwidthandFullwidthForms", "\uFF00\uFFF0"], - ["IsHangulCompatibilityJamo", "\u3130\u3190"], - ["IsHangulJamo", "\u1100\u1200"], - ["IsHangulJamoExtended-A", "\uA960\uA980"], - ["IsHangulJamoExtended-B", "\uD7B0\uD800"], - ["IsHangulSyllables", "\uAC00\uD7B0"], - ["IsHanunoo", "\u1720\u1740"], - ["IsHebrew", "\u0590\u0600"], - ["IsHighPrivateUseSurrogates", "\uDB80\uDC00"], - ["IsHighSurrogates", "\uD800\uDB80"], - ["IsHiragana", "\u3040\u30A0"], - ["IsIPAExtensions", "\u0250\u02B0"], - ["IsIdeographicDescriptionCharacters", "\u2FF0\u3000"], - ["IsJavanese", "\uA980\uA9E0"], - ["IsKanbun", "\u3190\u31A0"], - ["IsKangxiRadicals", "\u2F00\u2FE0"], - ["IsKannada", "\u0C80\u0D00"], - ["IsKatakana", "\u30A0\u3100"], - ["IsKatakanaPhoneticExtensions", "\u31F0\u3200"], - ["IsKayahLi", "\uA900\uA930"], - ["IsKhmer", "\u1780\u1800"], - ["IsKhmerSymbols", "\u19E0\u1A00"], - ["IsLao", "\u0E80\u0F00"], - ["IsLatin-1Supplement", "\u0080\u0100"], - ["IsLatinExtended-A", "\u0100\u0180"], - ["IsLatinExtended-B", "\u0180\u0250"], - ["IsLatinExtended-C", "\u2C60\u2C80"], - ["IsLatinExtended-D", "\uA720\uA800"], - ["IsLatinExtended-E", "\uAB30\uAB70"], - ["IsLatinExtendedAdditional", "\u1E00\u1F00"], - ["IsLepcha", "\u1C00\u1C50"], - ["IsLetterlikeSymbols", "\u2100\u2150"], - ["IsLimbu", "\u1900\u1950"], - ["IsLisu", "\uA4D0\uA500"], - ["IsLowSurrogates", "\uDC00\uE000"], - ["IsMalayalam", "\u0D00\u0D80"], - ["IsMandaic", "\u0840\u0860"], - ["IsMathematicalOperators", "\u2200\u2300"], - ["IsMeeteiMayek", "\uABC0\uAC00"], - ["IsMeeteiMayekExtensions", "\uAAE0\uAB00"], - ["IsMiscellaneousMathematicalSymbols-A", "\u27C0\u27F0"], - ["IsMiscellaneousMathematicalSymbols-B", "\u2980\u2A00"], - ["IsMiscellaneousSymbols", "\u2600\u2700"], - ["IsMiscellaneousSymbolsandArrows", "\u2B00\u2C00"], - ["IsMiscellaneousTechnical", "\u2300\u2400"], - ["IsModifierToneLetters", "\uA700\uA720"], - ["IsMongolian", "\u1800\u18B0"], - ["IsMyanmar", "\u1000\u10A0"], - ["IsMyanmarExtended-A", "\uAA60\uAA80"], - ["IsMyanmarExtended-B", "\uA9E0\uAA00"], - ["IsNKo", "\u07C0\u0800"], - ["IsNewTaiLue", "\u1980\u19E0"], - ["IsNumberForms", "\u2150\u2190"], - ["IsOgham", "\u1680\u16A0"], - ["IsOlChiki", "\u1C50\u1C80"], - ["IsOpticalCharacterRecognition", "\u2440\u2460"], - ["IsOriya", "\u0B00\u0B80"], - ["IsPhags-pa", "\uA840\uA880"], - ["IsPhoneticExtensions", "\u1D00\u1D80"], - ["IsPhoneticExtensionsSupplement", "\u1D80\u1DC0"], - ["IsPrivateUse", "\uE000\uF900"], - ["IsPrivateUseArea", "\uE000\uF900"], - ["IsRejang", "\uA930\uA960"], - ["IsRunic", "\u16A0\u1700"], - ["IsSamaritan", "\u0800\u0840"], - ["IsSaurashtra", "\uA880\uA8E0"], - ["IsSinhala", "\u0D80\u0E00"], - ["IsSmallFormVariants", "\uFE50\uFE70"], - ["IsSpacingModifierLetters", "\u02B0\u0300"], - ["IsSpecials", "\uFFF0"], - ["IsSundanese", "\u1B80\u1BC0"], - ["IsSundaneseSupplement", "\u1CC0\u1CD0"], - ["IsSuperscriptsandSubscripts", "\u2070\u20A0"], - ["IsSupplementalArrows-A", "\u27F0\u2800"], - ["IsSupplementalArrows-B", "\u2900\u2980"], - ["IsSupplementalMathematicalOperators", "\u2A00\u2B00"], - ["IsSupplementalPunctuation", "\u2E00\u2E80"], - ["IsSylotiNagri", "\uA800\uA830"], - ["IsSyriac", "\u0700\u0750"], - ["IsSyriacSupplement", "\u0860\u0870"], - ["IsTagalog", "\u1700\u1720"], - ["IsTagbanwa", "\u1760\u1780"], - ["IsTaiLe", "\u1950\u1980"], - ["IsTaiTham", "\u1A20\u1AB0"], - ["IsTaiViet", "\uAA80\uAAE0"], - ["IsTamil", "\u0B80\u0C00"], - ["IsTelugu", "\u0C00\u0C80"], - ["IsThaana", "\u0780\u07C0"], - ["IsThai", "\u0E00\u0E80"], - ["IsTibetan", "\u0F00\u1000"], - ["IsTifinagh", "\u2D30\u2D80"], - ["IsUnifiedCanadianAboriginalSyllabics", "\u1400\u1680"], - ["IsUnifiedCanadianAboriginalSyllabicsExtended", "\u18B0\u1900"], - ["IsVai", "\uA500\uA640"], - ["IsVariationSelectors", "\uFE00\uFE10"], - ["IsVedicExtensions", "\u1CD0\u1D00"], - ["IsVerticalForms", "\uFE10\uFE20"], - ["IsYiRadicals", "\uA490\uA4D0"], - ["IsYiSyllables", "\uA000\uA490"], - ["IsYijingHexagramSymbols", "\u4DC0\u4E00"], - ["_xmlC", /* Name Char */ "\u002D\u002F\u0030\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00B7\u00B8\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u02D0\u02D2\u0300\u0346\u0360\u0362\u0386\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0483\u0487\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0653\u0660\u066A\u0670\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06E9\u06EA\u06EE\u06F0\u06FA\u0901\u0904\u0905\u093A\u093C\u094E\u0951\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC" - +"\u09DE\u09DF\u09E4\u09E6\u09F2\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B70\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF0\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2" - +"\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0E01\u0E2F\u0E30\u0E3B\u0E40\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0F18\u0F1A\u0F20\u0F2A\u0F35\u0F36\u0F37\u0F38\u0F39\u0F3A\u0F3E\u0F48\u0F49\u0F6A\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F96\u0F97\u0F98\u0F99\u0FAE\u0FB1\u0FB8\u0FB9\u0FBA\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00" - +"\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u20D0\u20DD\u20E1\u20E2\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3005\u3006\u3007\u3008\u3021\u3030\u3031\u3036\u3041\u3095\u3099\u309B\u309D\u309F\u30A1\u30FB\u30FC\u30FF\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"], - ["_xmlD", "\u0030\u003A\u0660\u066A\u06F0\u06FA\u0966\u0970\u09E6\u09F0\u0A66\u0A70\u0AE6\u0AF0\u0B66\u0B70\u0BE7\u0BF0\u0C66\u0C70\u0CE6\u0CF0\u0D66\u0D70\u0E50\u0E5A\u0ED0\u0EDA\u0F20\u0F2A\u1040\u104A\u1369\u1372\u17E0\u17EA\u1810\u181A\uFF10\uFF1A"], - ["_xmlI", /* Start Name Char */ "\u003A\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u0386\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0641\u064B\u0671\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06D6\u06E5\u06E7\u0905\u093A\u093D\u093E\u0958\u0962\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09DC\u09DE\u09DF\u09E2\u09F0\u09F2\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A59\u0A5D\u0A5E\u0A5F\u0A72\u0A75\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABD\u0ABE\u0AE0\u0AE1\u0B05\u0B0D\u0B0F" - +"\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3D\u0B3E\u0B5C\u0B5E\u0B5F\u0B62\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C60\u0C62\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CDE\u0CDF\u0CE0\u0CE2\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D60\u0D62\u0E01\u0E2F\u0E30\u0E31\u0E32\u0E34\u0E40\u0E46\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EB1\u0EB2\u0EB4\u0EBD\u0EBE\u0EC0\u0EC5\u0F40\u0F48\u0F49\u0F6A\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC" - +"\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3007\u3008\u3021\u302A\u3041\u3095\u30A1\u30FB\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"], - ["_xmlW", "\u0024\u0025\u002B\u002C\u0030\u003A\u003C\u003F\u0041\u005B\u005E\u005F\u0060\u007B\u007C\u007D\u007E\u007F\u00A2\u00AB\u00AC\u00AD\u00AE\u00B7\u00B8\u00BB\u00BC\u00BF\u00C0\u0221\u0222\u0234\u0250\u02AE\u02B0\u02EF\u0300\u0350\u0360\u0370\u0374\u0376\u037A\u037B\u0384\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03F7\u0400\u0487\u0488\u04CF\u04D0\u04F6\u04F8\u04FA\u0500\u0510\u0531\u0557\u0559\u055A\u0561\u0588\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0656\u0660\u066A\u066E\u06D4\u06D5\u06DD\u06DE\u06EE\u06F0\u06FF\u0710\u072D\u0730\u074B\u0780\u07B2\u0901\u0904\u0905\u093A\u093C\u094E\u0950\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC\u09DE\u09DF\u09E4\u09E6\u09FB\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35" - +"\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AD0\u0AD1\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B71\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF3\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49" - +"\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0D82\u0D84\u0D85\u0D97\u0D9A\u0DB2\u0DB3\u0DBC\u0DBD\u0DBE\u0DC0\u0DC7\u0DCA\u0DCB\u0DCF\u0DD5\u0DD6\u0DD7\u0DD8\u0DE0\u0DF2\u0DF4\u0E01\u0E3B\u0E3F\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0EDC\u0EDE\u0F00\u0F04\u0F13\u0F3A\u0F3E\u0F48\u0F49\u0F6B\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F98\u0F99\u0FBD\u0FBE\u0FCD\u0FCF\u0FD0\u1000\u1022\u1023\u1028\u1029\u102B\u102C\u1033\u1036\u103A\u1040\u104A\u1050\u105A\u10A0\u10C6\u10D0\u10F9\u1100\u115A\u115F\u11A3\u11A8\u11FA\u1200\u1207\u1208\u1247\u1248\u1249\u124A\u124E\u1250\u1257\u1258\u1259\u125A\u125E\u1260\u1287\u1288\u1289\u128A\u128E\u1290\u12AF\u12B0\u12B1\u12B2\u12B6\u12B8\u12BF\u12C0\u12C1\u12C2\u12C6\u12C8\u12CF\u12D0\u12D7\u12D8\u12EF\u12F0\u130F\u1310\u1311\u1312\u1316\u1318\u131F\u1320\u1347\u1348\u135B\u1369\u137D\u13A0" - +"\u13F5\u1401\u166D\u166F\u1677\u1681\u169B\u16A0\u16EB\u16EE\u16F1\u1700\u170D\u170E\u1715\u1720\u1735\u1740\u1754\u1760\u176D\u176E\u1771\u1772\u1774\u1780\u17D4\u17D7\u17D8\u17DB\u17DD\u17E0\u17EA\u180B\u180E\u1810\u181A\u1820\u1878\u1880\u18AA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FC5\u1FC6\u1FD4\u1FD6\u1FDC\u1FDD\u1FF0\u1FF2\u1FF5\u1FF6\u1FFF\u2044\u2045\u2052\u2053\u2070\u2072\u2074\u207D\u207F\u208D\u20A0\u20B2\u20D0\u20EB\u2100\u213B\u213D\u214C\u2153\u2184\u2190\u2329\u232B\u23B4\u23B7\u23CF\u2400\u2427\u2440\u244B\u2460\u24FF\u2500\u2614\u2616\u2618\u2619\u267E\u2680\u268A\u2701\u2705\u2706\u270A\u270C\u2728\u2729\u274C\u274D\u274E\u274F\u2753\u2756\u2757\u2758\u275F\u2761\u2768\u2776\u2795\u2798\u27B0\u27B1\u27BF\u27D0\u27E6\u27F0\u2983\u2999\u29D8\u29DC\u29FC\u29FE\u2B00\u2E80\u2E9A\u2E9B\u2EF4\u2F00\u2FD6\u2FF0\u2FFC\u3004\u3008\u3012\u3014\u3020\u3030\u3031\u303D\u303E\u3040" - +"\u3041\u3097\u3099\u30A0\u30A1\u30FB\u30FC\u3100\u3105\u312D\u3131\u318F\u3190\u31B8\u31F0\u321D\u3220\u3244\u3251\u327C\u327F\u32CC\u32D0\u32FF\u3300\u3377\u337B\u33DE\u33E0\u33FF\u3400\u4DB6\u4E00\u9FA6\uA000\uA48D\uA490\uA4C7\uAC00\uD7A4\uF900\uFA2E\uFA30\uFA6B\uFB00\uFB07\uFB13\uFB18\uFB1D\uFB37\uFB38\uFB3D\uFB3E\uFB3F\uFB40\uFB42\uFB43\uFB45\uFB46\uFBB2\uFBD3\uFD3E\uFD50\uFD90\uFD92\uFDC8\uFDF0\uFDFD\uFE00\uFE10\uFE20\uFE24\uFE62\uFE63\uFE64\uFE67\uFE69\uFE6A\uFE70\uFE75\uFE76\uFEFD\uFF04\uFF05\uFF0B\uFF0C\uFF10\uFF1A\uFF1C\uFF1F\uFF21\uFF3B\uFF3E\uFF3F\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF66\uFFBF\uFFC2\uFFC8\uFFCA\uFFD0\uFFD2\uFFD8\uFFDA\uFFDD\uFFE0\uFFE7\uFFE8\uFFEF\uFFFC\uFFFE"], - ]; - private static readonly char[] s_whitespaceChars = ['\u0009', '\u000A', '\u000B', '\u000C', '\u000D', '\u0020', '\u0085', '\u00A0', '\u1680', '\u2000', diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs index 0e1564d54c1326..9863b99140b634 100644 --- a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs @@ -14,25 +14,28 @@ namespace GenRegexNamedBlocks { /// - /// This program outputs the named blocks for RegexCharClass.cs + /// This program generates RegexCharClass.Tables.cs with Unicode named blocks /// class Program { static void Main(string[] args) { - if (args.Length < 1) + if (args.Length < 2) { - Console.WriteLine("Usage: dotnet run -- "); - Console.WriteLine("Example: dotnet run -- Blocks.txt"); + Console.WriteLine("Usage: dotnet run -- "); + Console.WriteLine("Example: dotnet run -- Blocks.txt ../../src/System/Text/RegularExpressions/RegexCharClass.Tables.cs"); return; } + string blocksFile = args[0]; + string outputFile = args[1]; + // The input file should be Blocks.txt from the UCD corresponding to the // version of the Unicode spec we're consuming. // More info: https://www.unicode.org/reports/tr44/ // Latest Blocks.txt: https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt - string[] allInputLines = File.ReadAllLines(args[0]); + string[] allInputLines = File.ReadAllLines(blocksFile); Regex inputLineRegex = new Regex(@"^(?[0-9A-F]{4})\.\.(?[0-9A-F]{4}); (?.+)$"); @@ -70,14 +73,79 @@ static void Main(string[] args) entries.Add((regexBlockName, startCode, endCode)); } - // Sort by start code for consistent output + // Sort alphabetically for consistent output entries.Sort((a, b) => string.Compare(a.name, b.name, StringComparison.Ordinal)); - // Generate the output + // Add special backward-compatibility aliases + entries.Add(("IsCombiningMarksforSymbols", "20D0", "20FF")); // Alias for IsCombiningDiacriticalMarksforSymbols + entries.Add(("IsGreek", "0370", "03FF")); // Alias for IsGreekandCoptic + entries.Add(("IsHighPrivateUseSurrogates", "DB80", "DBFF")); + entries.Add(("IsHighSurrogates", "D800", "DB7F")); + entries.Add(("IsLowSurrogates", "DC00", "DFFF")); + entries.Add(("IsPrivateUse", "E000", "F8FF")); // Alias for IsPrivateUseArea + entries.Add(("IsPrivateUseArea", "E000", "F8FF")); + + // Re-sort to include the new entries + entries.Sort((a, b) => string.Compare(a.name, b.name, StringComparison.Ordinal)); + + // Generate the output file + var output = new StringBuilder(); + output.AppendLine("// Licensed to the .NET Foundation under one or more agreements."); + output.AppendLine("// The .NET Foundation licenses this file to you under the MIT license."); + output.AppendLine(); + output.AppendLine("// This is a generated file. Do not edit directly."); + output.AppendLine("// Run the GenRegexNamedBlocks tool to regenerate."); + output.AppendLine(); + output.AppendLine("namespace System.Text.RegularExpressions"); + output.AppendLine("{"); + output.AppendLine(" internal sealed partial class RegexCharClass"); + output.AppendLine(" {"); + output.AppendLine(" /*"); + output.AppendLine(" * The property table contains all the block definitions defined in the"); + output.AppendLine(" * XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 17.0 spec (www.unicode.org),"); + output.AppendLine(" * and Perl 5.6 (see Programming Perl, 3rd edition page 167). Three blocks defined by Perl (and here) may"); + output.AppendLine(" * not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates."); + output.AppendLine(" *"); + output.AppendLine(" **/"); + output.AppendLine(" // Has to be sorted by the first column"); + output.AppendLine(" private static readonly string[][] s_propTable ="); + output.AppendLine(" ["); + foreach (var entry in entries) { - Console.WriteLine($" [\"{entry.name}\", \"\\u{entry.startCode}\\u{GetNextCodePoint(entry.endCode)}\"],"); + // Special handling for IsSpecials - it goes to the end of BMP + if (entry.name == "IsSpecials") + { + output.AppendLine($" [\"{entry.name}\", \"\\u{entry.startCode}\"],"); + } + else + { + output.AppendLine($" [\"{entry.name}\", \"\\u{entry.startCode}\\u{GetNextCodePoint(entry.endCode)}\"],"); + } } + + output.AppendLine(" [\"_xmlC\", /* Name Char */ \"\\u002D\\u002F\\u0030\\u003B\\u0041\\u005B\\u005F\\u0060\\u0061\\u007B\\u00B7\\u00B8\\u00C0\\u00D7\\u00D8\\u00F7\\u00F8\\u0132\\u0134\\u013F\\u0141\\u0149\\u014A\\u017F\\u0180\\u01C4\\u01CD\\u01F1\\u01F4\\u01F6\\u01FA\\u0218\\u0250\\u02A9\\u02BB\\u02C2\\u02D0\\u02D2\\u0300\\u0346\\u0360\\u0362\\u0386\\u038B\\u038C\\u038D\\u038E\\u03A2\\u03A3\\u03CF\\u03D0\\u03D7\\u03DA\\u03DB\\u03DC\\u03DD\\u03DE\\u03DF\\u03E0\\u03E1\\u03E2\\u03F4\\u0401\\u040D\\u040E\\u0450\\u0451\\u045D\\u045E\\u0482\\u0483\\u0487\\u0490\\u04C5\\u04C7\\u04C9\\u04CB\\u04CD\\u04D0\\u04EC\\u04EE\\u04F6\\u04F8\\u04FA\\u0531\\u0557\\u0559\\u055A\\u0561\\u0587\\u0591\\u05A2\\u05A3\\u05BA\\u05BB\\u05BE\\u05BF\\u05C0\\u05C1\\u05C3\\u05C4\\u05C5\\u05D0\\u05EB\\u05F0\\u05F3\\u0621\\u063B\\u0640\\u0653\\u0660\\u066A\\u0670\\u06B8\\u06BA\\u06BF\\u06C0\\u06CF\\u06D0\\u06D4\\u06D5\\u06E9\\u06EA\\u06EE\\u06F0\\u06FA\\u0901\\u0904\\u0905\\u093A\\u093C\\u094E\\u0951\\u0955\\u0958\\u0964\\u0966\\u0970\\u0981\\u0984\\u0985\\u098D\\u098F\\u0991\\u0993\\u09A9\\u09AA\\u09B1\\u09B2\\u09B3\\u09B6\\u09BA\\u09BC\\u09BD\\u09BE\\u09C5\\u09C7\\u09C9\\u09CB\\u09CE\\u09D7\\u09D8\\u09DC\""); + output.Append(" +\"\\u09DE\\u09DF\\u09E4\\u09E6\\u09F2\\u0A02\\u0A03\\u0A05\\u0A0B\\u0A0F\\u0A11\\u0A13\\u0A29\\u0A2A\\u0A31\\u0A32\\u0A34\\u0A35\\u0A37\\u0A38\\u0A3A\\u0A3C\\u0A3D\\u0A3E\\u0A43\\u0A47\\u0A49\\u0A4B\\u0A4E\\u0A59\\u0A5D\\u0A5E\\u0A5F\\u0A66\\u0A75\\u0A81\\u0A84\\u0A85\\u0A8C\\u0A8D\\u0A8E\\u0A8F\\u0A92\\u0A93\\u0AA9\\u0AAA\\u0AB1\\u0AB2\\u0AB4\\u0AB5\\u0ABA\\u0ABC\\u0AC6\\u0AC7\\u0ACA\\u0ACB\\u0ACE\\u0AE0\\u0AE1\\u0AE6\\u0AF0\\u0B01\\u0B04\\u0B05\\u0B0D\\u0B0F\\u0B11\\u0B13\\u0B29\\u0B2A\\u0B31\\u0B32\\u0B34\\u0B36\\u0B3A\\u0B3C\\u0B44\\u0B47\\u0B49\\u0B4B\\u0B4E\\u0B56\\u0B58\\u0B5C\\u0B5E\\u0B5F\\u0B62\\u0B66\\u0B70\\u0B82\\u0B84\\u0B85\\u0B8B\\u0B8E\\u0B91\\u0B92\\u0B96\\u0B99\\u0B9B\\u0B9C\\u0B9D\\u0B9E\\u0BA0\\u0BA3\\u0BA5\\u0BA8\\u0BAB\\u0BAE\\u0BB6\\u0BB7\\u0BBA\\u0BBE\\u0BC3\\u0BC6\\u0BC9\\u0BCA\\u0BCE\\u0BD7\\u0BD8\\u0BE7\\u0BF0\\u0C01\\u0C04\\u0C05\\u0C0D\\u0C0E\\u0C11\\u0C12\\u0C29\\u0C2A\\u0C34\\u0C35\\u0C3A\\u0C3E\\u0C45\\u0C46\\u0C49\\u0C4A\\u0C4E\\u0C55\\u0C57\\u0C60\\u0C62\""); + output.Append(" +\"\\u0CE6\\u0CF0\\u0D02\\u0D04\\u0D05\\u0D0D\\u0D0E\\u0D11\\u0D12\\u0D29\\u0D2A\\u0D3A\\u0D3E\\u0D44\\u0D46\\u0D49\\u0D4A\\u0D4E\\u0D57\\u0D58\\u0D60\\u0D62\\u0D66\\u0D70\\u0E01\\u0E2F\\u0E30\\u0E3B\\u0E40\\u0E4F\\u0E50\\u0E5A\\u0E81\\u0E83\\u0E84\\u0E85\\u0E87\\u0E89\\u0E8A\\u0E8B\\u0E8D\\u0E8E\\u0E94\\u0E98\\u0E99\\u0EA0\\u0EA1\\u0EA4\\u0EA5\\u0EA6\\u0EA7\\u0EA8\\u0EAA\\u0EAC\\u0EAD\\u0EAF\\u0EB0\\u0EBA\\u0EBB\\u0EBE\\u0EC0\\u0EC5\\u0EC6\\u0EC7\\u0EC8\\u0ECE\\u0ED0\\u0EDA\\u0F18\\u0F1A\\u0F20\\u0F2A\\u0F35\\u0F36\\u0F37\\u0F38\\u0F39\\u0F3A\\u0F3E\\u0F48\\u0F49\\u0F6A\\u0F71\\u0F85\\u0F86\\u0F8C\\u0F90\\u0F96\\u0F97\\u0F98\\u0F99\\u0FAE\\u0FB1\\u0FB8\\u0FB9\\u0FBA\\u10A0\\u10C6\\u10D0\\u10F7\\u1100\\u1101\\u1102\\u1104\\u1105\\u1108\\u1109\\u110A\\u110B\\u110D\\u110E\\u1113\\u113C\\u113D\\u113E\\u113F\\u1140\\u1141\\u114C\\u114D\\u114E\\u114F\\u1150\\u1151\\u1154\\u1156\\u1159\\u115A\\u115F\\u1162\\u1163\\u1164\\u1165\\u1166\\u1167\\u1168\\u1169\\u116A\\u116D\\u116F\\u1172\\u1174\\u1175\\u1176\\u119E\\u119F\\u11A8\\u11A9\\u11AB\\u11AC\\u11AE\\u11B0\\u11B7\\u11B9\\u11BA\\u11BB\\u11BC\\u11C3\\u11EB\\u11EC\\u11F0\\u11F1\\u11F9\\u11FA\\u1E00\\u1E9C\\u1EA0\\u1EFA\\u1F00\""); + output.Append(" +\"\\u1F16\\u1F18\\u1F1E\\u1F20\\u1F46\\u1F48\\u1F4E\\u1F50\\u1F58\\u1F59\\u1F5A\\u1F5B\\u1F5C\\u1F5D\\u1F5E\\u1F5F\\u1F7E\\u1F80\\u1FB5\\u1FB6\\u1FBD\\u1FBE\\u1FBF\\u1FC2\\u1FC5\\u1FC6\\u1FCD\\u1FD0\\u1FD4\\u1FD6\\u1FDC\\u1FE0\\u1FED\\u1FF2\\u1FF5\\u1FF6\\u1FFD\\u20D0\\u20DD\\u20E1\\u20E2\\u2126\\u2127\\u212A\\u212C\\u212E\\u212F\\u2180\\u2183\\u3005\\u3006\\u3007\\u3008\\u3021\\u3030\\u3031\\u3036\\u3041\\u3095\\u3099\\u309B\\u309D\\u309F\\u30A1\\u30FB\\u30FC\\u30FF\\u3105\\u312D\\u4E00\\u9FA6\\uAC00\\uD7A4\"],"); + output.AppendLine(); + output.AppendLine(" [\"_xmlD\", \"\\u0030\\u003A\\u0660\\u066A\\u06F0\\u06FA\\u0966\\u0970\\u09E6\\u09F0\\u0A66\\u0A70\\u0AE6\\u0AF0\\u0B66\\u0B70\\u0BE7\\u0BF0\\u0C66\\u0C70\\u0CE6\\u0CF0\\u0D66\\u0D70\\u0E50\\u0E5A\\u0ED0\\u0EDA\\u0F20\\u0F2A\\u1040\\u104A\\u1369\\u1372\\u17E0\\u17EA\\u1810\\u181A\\uFF10\\uFF1A\"],"); + output.AppendLine(" [\"_xmlI\", /* Start Name Char */ \"\\u003A\\u003B\\u0041\\u005B\\u005F\\u0060\\u0061\\u007B\\u00C0\\u00D7\\u00D8\\u00F7\\u00F8\\u0132\\u0134\\u013F\\u0141\\u0149\\u014A\\u017F\\u0180\\u01C4\\u01CD\\u01F1\\u01F4\\u01F6\\u01FA\\u0218\\u0250\\u02A9\\u02BB\\u02C2\\u0386\\u0387\\u0388\\u038B\\u038C\\u038D\\u038E\\u03A2\\u03A3\\u03CF\\u03D0\\u03D7\\u03DA\\u03DB\\u03DC\\u03DD\\u03DE\\u03DF\\u03E0\\u03E1\\u03E2\\u03F4\\u0401\\u040D\\u040E\\u0450\\u0451\\u045D\\u045E\\u0482\\u0490\\u04C5\\u04C7\\u04C9\\u04CB\\u04CD\\u04D0\\u04EC\\u04EE\\u04F6\\u04F8\\u04FA\\u0531\\u0557\\u0559\\u055A\\u0561\\u0587\\u05D0\\u05EB\\u05F0\\u05F3\\u0621\\u063B\\u0641\\u064B\\u0671\\u06B8\\u06BA\\u06BF\\u06C0\\u06CF\\u06D0\\u06D4\\u06D5\\u06D6\\u06E5\\u06E7\\u0905\\u093A\\u093D\\u093E\\u0958\\u0962\\u0985\\u098D\\u098F\\u0991\\u0993\\u09A9\\u09AA\\u09B1\\u09B2\\u09B3\\u09B6\\u09BA\\u09DC\\u09DE\\u09DF\\u09E2\\u09F0\\u09F2\\u0A05\\u0A0B\\u0A0F\\u0A11\\u0A13\\u0A29\\u0A2A\\u0A31\\u0A32\\u0A34\\u0A35\\u0A37\\u0A38\\u0A3A\\u0A59\\u0A5D\\u0A5E\\u0A5F\\u0A72\\u0A75\\u0A85\\u0A8C\\u0A8D\\u0A8E\\u0A8F\\u0A92\\u0A93\\u0AA9\\u0AAA\\u0AB1\\u0AB2\\u0AB4\\u0AB5\\u0ABA\\u0ABD\\u0ABE\\u0AE0\\u0AE1\\u0B05\\u0B0D\\u0B0F\""); + output.Append(" +\"\\u0B11\\u0B13\\u0B29\\u0B2A\\u0B31\\u0B32\\u0B34\\u0B36\\u0B3A\\u0B3D\\u0B3E\\u0B5C\\u0B5E\\u0B5F\\u0B62\\u0B85\\u0B8B\\u0B8E\\u0B91\\u0B92\\u0B96\\u0B99\\u0B9B\\u0B9C\\u0B9D\\u0B9E\\u0BA0\\u0BA3\\u0BA5\\u0BA8\\u0BAB\\u0BAE\\u0BB6\\u0BB7\\u0BBA\\u0C05\\u0C0D\\u0C0E\\u0C11\\u0C12\\u0C29\\u0C2A\\u0C34\\u0C35\\u0C3A\\u0C60\\u0C62\\u0C85\\u0C8D\\u0C8E\\u0C91\\u0C92\\u0CA9\\u0CAA\\u0CB4\\u0CB5\\u0CBA\\u0CDE\\u0CDF\\u0CE0\\u0CE2\\u0D05\\u0D0D\\u0D0E\\u0D11\\u0D12\\u0D29\\u0D2A\\u0D3A\\u0D60\\u0D62\\u0E01\\u0E2F\\u0E30\\u0E31\\u0E32\\u0E34\\u0E40\\u0E46\\u0E81\\u0E83\\u0E84\\u0E85\\u0E87\\u0E89\\u0E8A\\u0E8B\\u0E8D\\u0E8E\\u0E94\\u0E98\\u0E99\\u0EA0\\u0EA1\\u0EA4\\u0EA5\\u0EA6\\u0EA7\\u0EA8\\u0EAA\\u0EAC\\u0EAD\\u0EAF\\u0EB0\\u0EB1\\u0EB2\\u0EB4\\u0EBD\\u0EBE\\u0EC0\\u0EC5\\u0F40\\u0F48\\u0F49\\u0F6A\\u10A0\\u10C6\\u10D0\\u10F7\\u1100\\u1101\\u1102\\u1104\\u1105\\u1108\\u1109\\u110A\\u110B\\u110D\\u110E\\u1113\\u113C\\u113D\\u113E\\u113F\\u1140\\u1141\\u114C\\u114D\\u114E\\u114F\\u1150\\u1151\\u1154\\u1156\\u1159\\u115A\\u115F\\u1162\\u1163\\u1164\\u1165\\u1166\\u1167\\u1168\\u1169\\u116A\\u116D\\u116F\\u1172\\u1174\\u1175\\u1176\\u119E\\u119F\\u11A8\\u11A9\\u11AB\\u11AC\""); + output.Append(" +\"\\u11AE\\u11B0\\u11B7\\u11B9\\u11BA\\u11BB\\u11BC\\u11C3\\u11EB\\u11EC\\u11F0\\u11F1\\u11F9\\u11FA\\u1E00\\u1E9C\\u1EA0\\u1EFA\\u1F00\\u1F16\\u1F18\\u1F1E\\u1F20\\u1F46\\u1F48\\u1F4E\\u1F50\\u1F58\\u1F59\\u1F5A\\u1F5B\\u1F5C\\u1F5D\\u1F5E\\u1F5F\\u1F7E\\u1F80\\u1FB5\\u1FB6\\u1FBD\\u1FBE\\u1FBF\\u1FC2\\u1FC5\\u1FC6\\u1FCD\\u1FD0\\u1FD4\\u1FD6\\u1FDC\\u1FE0\\u1FED\\u1FF2\\u1FF5\\u1FF6\\u1FFD\\u2126\\u2127\\u212A\\u212C\\u212E\\u212F\\u2180\\u2183\\u3007\\u3008\\u3021\\u302A\\u3041\\u3095\\u30A1\\u30FB\\u3105\\u312D\\u4E00\\u9FA6\\uAC00\\uD7A4\"],"); + output.AppendLine(); + output.Append(" [\"_xmlW\", \"\\u0024\\u0025\\u002B\\u002C\\u0030\\u003A\\u003C\\u003F\\u0041\\u005B\\u005E\\u005F\\u0060\\u007B\\u007C\\u007D\\u007E\\u007F\\u00A2\\u00AB\\u00AC\\u00AD\\u00AE\\u00B7\\u00B8\\u00BB\\u00BC\\u00BF\\u00C0\\u0221\\u0222\\u0234\\u0250\\u02AE\\u02B0\\u02EF\\u0300\\u0350\\u0360\\u0370\\u0374\\u0376\\u037A\\u037B\\u0384\\u0387\\u0388\\u038B\\u038C\\u038D\\u038E\\u03A2\\u03A3\\u03CF\\u03D0\\u03F7\\u0400\\u0487\\u0488\\u04CF\\u04D0\\u04F6\\u04F8\\u04FA\\u0500\\u0510\\u0531\\u0557\\u0559\\u055A\\u0561\\u0588\\u0591\\u05A2\\u05A3\\u05BA\\u05BB\\u05BE\\u05BF\\u05C0\\u05C1\\u05C3\\u05C4\\u05C5\\u05D0\\u05EB\\u05F0\\u05F3\\u0621\\u063B\\u0640\\u0656\\u0660\\u066A\\u066E\\u06D4\\u06D5\\u06DD\\u06DE\\u06EE\\u06F0\\u06FF\\u0710\\u072D\\u0730\\u074B\\u0780\\u07B2\\u0901\\u0904\\u0905\\u093A\\u093C\\u094E\\u0950\\u0955\\u0958\\u0964\\u0966\\u0970\\u0981\\u0984\\u0985\\u098D\\u098F\\u0991\\u0993\\u09A9\\u09AA\\u09B1\\u09B2\\u09B3\\u09B6\\u09BA\\u09BC\\u09BD\\u09BE\\u09C5\\u09C7\\u09C9\\u09CB\\u09CE\\u09D7\\u09D8\\u09DC\\u09DE\\u09DF\\u09E4\\u09E6\\u09FB\\u0A02\\u0A03\\u0A05\\u0A0B\\u0A0F\\u0A11\\u0A13\\u0A29\\u0A2A\\u0A31\\u0A32\\u0A34\\u0A35\""); + output.Append(" +\"\\u0A37\\u0A38\\u0A3A\\u0A3C\\u0A3D\\u0A3E\\u0A43\\u0A47\\u0A49\\u0A4B\\u0A4E\\u0A59\\u0A5D\\u0A5E\\u0A5F\\u0A66\\u0A75\\u0A81\\u0A84\\u0A85\\u0A8C\\u0A8D\\u0A8E\\u0A8F\\u0A92\\u0A93\\u0AA9\\u0AAA\\u0AB1\\u0AB2\\u0AB4\\u0AB5\\u0ABA\\u0ABC\\u0AC6\\u0AC7\\u0ACA\\u0ACB\\u0ACE\\u0AD0\\u0AD1\\u0AE0\\u0AE1\\u0AE6\\u0AF0\\u0B01\\u0B04\\u0B05\\u0B0D\\u0B0F\\u0B11\\u0B13\\u0B29\\u0B2A\\u0B31\\u0B32\\u0B34\\u0B36\\u0B3A\\u0B3C\\u0B44\\u0B47\\u0B49\\u0B4B\\u0B4E\\u0B56\\u0B58\\u0B5C\\u0B5E\\u0B5F\\u0B62\\u0B66\\u0B71\\u0B82\\u0B84\\u0B85\\u0B8B\\u0B8E\\u0B91\\u0B92\\u0B96\\u0B99\\u0B9B\\u0B9C\\u0B9D\\u0B9E\\u0BA0\\u0BA3\\u0BA5\\u0BA8\\u0BAB\\u0BAE\\u0BB6\\u0BB7\\u0BBA\\u0BBE\\u0BC3\\u0BC6\\u0BC9\\u0BCA\\u0BCE\\u0BD7\\u0BD8\\u0BE7\\u0BF3\\u0C01\\u0C04\\u0C05\\u0C0D\\u0C0E\\u0C11\\u0C12\\u0C29\\u0C2A\\u0C34\\u0C35\\u0C3A\\u0C3E\\u0C45\\u0C46\\u0C49\\u0C4A\\u0C4E\\u0C55\\u0C57\\u0C60\\u0C62\\u0C66\\u0C70\\u0C82\\u0C84\\u0C85\\u0C8D\\u0C8E\\u0C91\\u0C92\\u0CA9\\u0CAA\\u0CB4\\u0CB5\\u0CBA\\u0CBE\\u0CC5\\u0CC6\\u0CC9\\u0CCA\\u0CCE\\u0CD5\\u0CD7\\u0CDE\\u0CDF\\u0CE0\\u0CE2\\u0CE6\\u0CF0\\u0D02\\u0D04\\u0D05\\u0D0D\\u0D0E\\u0D11\\u0D12\\u0D29\\u0D2A\\u0D3A\\u0D3E\\u0D44\\u0D46\\u0D49\""); + output.Append(" +\"\\u0D4A\\u0D4E\\u0D57\\u0D58\\u0D60\\u0D62\\u0D66\\u0D70\\u0D82\\u0D84\\u0D85\\u0D97\\u0D9A\\u0DB2\\u0DB3\\u0DBC\\u0DBD\\u0DBE\\u0DC0\\u0DC7\\u0DCA\\u0DCB\\u0DCF\\u0DD5\\u0DD6\\u0DD7\\u0DD8\\u0DE0\\u0DF2\\u0DF4\\u0E01\\u0E3B\\u0E3F\\u0E4F\\u0E50\\u0E5A\\u0E81\\u0E83\\u0E84\\u0E85\\u0E87\\u0E89\\u0E8A\\u0E8B\\u0E8D\\u0E8E\\u0E94\\u0E98\\u0E99\\u0EA0\\u0EA1\\u0EA4\\u0EA5\\u0EA6\\u0EA7\\u0EA8\\u0EAA\\u0EAC\\u0EAD\\u0EBA\\u0EBB\\u0EBE\\u0EC0\\u0EC5\\u0EC6\\u0EC7\\u0EC8\\u0ECE\\u0ED0\\u0EDA\\u0EDC\\u0EDE\\u0F00\\u0F04\\u0F13\\u0F3A\\u0F3E\\u0F48\\u0F49\\u0F6B\\u0F71\\u0F85\\u0F86\\u0F8C\\u0F90\\u0F98\\u0F99\\u0FBD\\u0FBE\\u0FCD\\u0FCF\\u0FD0\\u1000\\u1022\\u1023\\u1028\\u1029\\u102B\\u102C\\u1033\\u1036\\u103A\\u1040\\u104A\\u1050\\u105A\\u10A0\\u10C6\\u10D0\\u10F9\\u1100\\u115A\\u115F\\u11A3\\u11A8\\u11FA\\u1200\\u1207\\u1208\\u1247\\u1248\\u1249\\u124A\\u124E\\u1250\\u1257\\u1258\\u1259\\u125A\\u125E\\u1260\\u1287\\u1288\\u1289\\u128A\\u128E\\u1290\\u12AF\\u12B0\\u12B1\\u12B2\\u12B6\\u12B8\\u12BF\\u12C0\\u12C1\\u12C2\\u12C6\\u12C8\\u12CF\\u12D0\\u12D7\\u12D8\\u12EF\\u12F0\\u130F\\u1310\\u1311\\u1312\\u1316\\u1318\\u131F\\u1320\\u1347\\u1348\\u135B\\u1369\\u137D\\u13A0\""); + output.Append(" +\"\\u13F5\\u1401\\u166D\\u166F\\u1677\\u1681\\u169B\\u16A0\\u16EB\\u16EE\\u16F1\\u1700\\u170D\\u170E\\u1715\\u1720\\u1735\\u1740\\u1754\\u1760\\u176D\\u176E\\u1771\\u1772\\u1774\\u1780\\u17D4\\u17D7\\u17D8\\u17DB\\u17DD\\u17E0\\u17EA\\u180B\\u180E\\u1810\\u181A\\u1820\\u1878\\u1880\\u18AA\\u1E00\\u1E9C\\u1EA0\\u1EFA\\u1F00\\u1F16\\u1F18\\u1F1E\\u1F20\\u1F46\\u1F48\\u1F4E\\u1F50\\u1F58\\u1F59\\u1F5A\\u1F5B\\u1F5C\\u1F5D\\u1F5E\\u1F5F\\u1F7E\\u1F80\\u1FB5\\u1FB6\\u1FC5\\u1FC6\\u1FD4\\u1FD6\\u1FDC\\u1FDD\\u1FF0\\u1FF2\\u1FF5\\u1FF6\\u1FFF\\u2044\\u2045\\u2052\\u2053\\u2070\\u2072\\u2074\\u207D\\u207F\\u208D\\u20A0\\u20B2\\u20D0\\u20EB\\u2100\\u213B\\u213D\\u214C\\u2153\\u2184\\u2190\\u2329\\u232B\\u23B4\\u23B7\\u23CF\\u2400\\u2427\\u2440\\u244B\\u2460\\u24FF\\u2500\\u2614\\u2616\\u2618\\u2619\\u267E\\u2680\\u268A\\u2701\\u2705\\u2706\\u270A\\u270C\\u2728\\u2729\\u274C\\u274D\\u274E\\u274F\\u2753\\u2756\\u2757\\u2758\\u275F\\u2761\\u2768\\u2776\\u2795\\u2798\\u27B0\\u27B1\\u27BF\\u27D0\\u27E6\\u27F0\\u2983\\u2999\\u29D8\\u29DC\\u29FC\\u29FE\\u2B00\\u2E80\\u2E9A\\u2E9B\\u2EF4\\u2F00\\u2FD6\\u2FF0\\u2FFC\\u3004\\u3008\\u3012\\u3014\\u3020\\u3030\\u3031\\u303D\\u303E\\u3040\""); + output.Append(" +\"\\u3041\\u3097\\u3099\\u30A0\\u30A1\\u30FB\\u30FC\\u3100\\u3105\\u312D\\u3131\\u318F\\u3190\\u31B8\\u31F0\\u321D\\u3220\\u3244\\u3251\\u327C\\u327F\\u32CC\\u32D0\\u32FF\\u3300\\u3377\\u337B\\u33DE\\u33E0\\u33FF\\u3400\\u4DB6\\u4E00\\u9FA6\\uA000\\uA48D\\uA490\\uA4C7\\uAC00\\uD7A4\\uF900\\uFA2E\\uFA30\\uFA6B\\uFB00\\uFB07\\uFB13\\uFB18\\uFB1D\\uFB37\\uFB38\\uFB3D\\uFB3E\\uFB3F\\uFB40\\uFB42\\uFB43\\uFB45\\uFB46\\uFBB2\\uFBD3\\uFD3E\\uFD50\\uFD90\\uFD92\\uFDC8\\uFDF0\\uFDFD\\uFE00\\uFE10\\uFE20\\uFE24\\uFE62\\uFE63\\uFE64\\uFE67\\uFE69\\uFE6A\\uFE70\\uFE75\\uFE76\\uFEFD\\uFF04\\uFF05\\uFF0B\\uFF0C\\uFF10\\uFF1A\\uFF1C\\uFF1F\\uFF21\\uFF3B\\uFF3E\\uFF3F\\uFF40\\uFF5B\\uFF5C\\uFF5D\\uFF5E\\uFF5F\\uFF66\\uFFBF\\uFFC2\\uFFC8\\uFFCA\\uFFD0\\uFFD2\\uFFD8\\uFFDA\\uFFDD\\uFFE0\\uFFE7\\uFFE8\\uFFEF\\uFFFC\\uFFFE\"],"); + output.AppendLine(); + output.AppendLine(" ];"); + output.AppendLine(" }"); + output.AppendLine("}"); + + File.WriteAllText(outputFile, output.ToString()); + Console.WriteLine($"Successfully generated {outputFile}"); } private static string RemoveAllNonAlphanumeric(string blockName) diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md index e16e3b94fd5a48..497af06eedae80 100644 --- a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md +++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/README.md @@ -18,16 +18,14 @@ To update the named blocks when a new Unicode version is released: 2. Run the tool from this directory: ```bash - dotnet run -- + dotnet run -- ../../src/System/Text/RegularExpressions/RegexCharClass.Tables.cs ``` -3. The tool will output the named blocks in the format needed for `RegexCharClass.cs` +3. The tool will generate the `RegexCharClass.Tables.cs` file with all named blocks -4. Replace the named blocks section in `RegexCharClass.cs` (lines starting with `["Is...`) with the generated output +4. Update tests in `RegexCharacterSetTests.cs` to include tests for new blocks if needed -5. Update tests in `RegexCharacterSetTests.cs` to include tests for new blocks - -6. Build and test to ensure all tests pass +5. Build and test to ensure all tests pass ## Notes From 73527e82b53c712dc6d7cb3c416aae0f440aba5b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Oct 2025 16:21:16 +0000 Subject: [PATCH 12/13] Add RegexCharClass.Tables.cs to project file Include the new generated file in the csproj to fix build failures Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../src/System.Text.RegularExpressions.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index c666e4b99edb8a..0654ef9c001736 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -34,6 +34,7 @@ + From c2b5eecaf41c8d6ef6c37d6cd4e1a20ce3c53fab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Oct 2025 22:07:59 +0000 Subject: [PATCH 13/13] Add RegexCharClass.Tables.cs to Generator project Include the generated file in the Generator csproj to fix build failures for the source generator project Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../gen/System.Text.RegularExpressions.Generator.csproj | 1 + .../UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj | 1 + 2 files changed, 2 insertions(+) diff --git a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj index 96c59b3bc1f3ec..0322546266e830 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj +++ b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj @@ -38,6 +38,7 @@ + diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj index a3d8e3a172dc7a..2c70ec872f5d94 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj @@ -31,6 +31,7 @@ +