From 77ac6542a5f76338837088d0a0075fdaf2ed07a4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Oct 2025 16:08:26 +0000
Subject: [PATCH 01/13] Initial plan
From 9522c2b9b1ea6aa4816d3302a71f95774cba8881 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Oct 2025 16:38:57 +0000
Subject: [PATCH 02/13] Update Regex named blocks from Unicode 4.0 to Unicode
16.0
Added 52 new Unicode blocks and updated existing ones to match Unicode 16.0 specification. Total count increased from 108 to 160 named blocks.
Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
.../Text/RegularExpressions/RegexCharClass.cs | 266 +++++++++++-------
.../GenRegexNamedBlocks.csproj | 9 +
.../tools/GenRegexNamedBlocks/Program.cs | 103 +++++++
3 files changed, 271 insertions(+), 107 deletions(-)
create mode 100644 src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj
create mode 100644 src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
index 5d859fbf6aa76f..37c9645dc14717 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
@@ -184,114 +184,166 @@ internal sealed partial class RegexCharClass
// Has to be sorted by the first column
private static readonly string[][] s_propTable =
[
- ["IsAlphabeticPresentationForms", "\uFB00\uFB50"],
- ["IsArabic", "\u0600\u0700"],
- ["IsArabicPresentationForms-A", "\uFB50\uFE00"],
- ["IsArabicPresentationForms-B", "\uFE70\uFF00"],
- ["IsArmenian", "\u0530\u0590"],
- ["IsArrows", "\u2190\u2200"],
- ["IsBasicLatin", "\u0000\u0080"],
- ["IsBengali", "\u0980\u0A00"],
- ["IsBlockElements", "\u2580\u25A0"],
- ["IsBopomofo", "\u3100\u3130"],
- ["IsBopomofoExtended", "\u31A0\u31C0"],
- ["IsBoxDrawing", "\u2500\u2580"],
- ["IsBraillePatterns", "\u2800\u2900"],
- ["IsBuhid", "\u1740\u1760"],
- ["IsCJKCompatibility", "\u3300\u3400"],
- ["IsCJKCompatibilityForms", "\uFE30\uFE50"],
- ["IsCJKCompatibilityIdeographs", "\uF900\uFB00"],
- ["IsCJKRadicalsSupplement", "\u2E80\u2F00"],
- ["IsCJKSymbolsandPunctuation", "\u3000\u3040"],
- ["IsCJKUnifiedIdeographs", "\u4E00\uA000"],
- ["IsCJKUnifiedIdeographsExtensionA", "\u3400\u4DC0"],
- ["IsCherokee", "\u13A0\u1400"],
- ["IsCombiningDiacriticalMarks", "\u0300\u0370"],
+ ["IsAlphabeticPresentationForms", "\uFB00\uFB50"],
+ ["IsArabic", "\u0600\u0700"],
+ ["IsArabicExtended-A", "\u08A0\u0900"],
+ ["IsArabicExtended-B", "\u0870\u08A0"],
+ ["IsArabicPresentationForms-A", "\uFB50\uFE00"],
+ ["IsArabicPresentationForms-B", "\uFE70\uFF00"],
+ ["IsArabicSupplement", "\u0750\u0780"],
+ ["IsArmenian", "\u0530\u0590"],
+ ["IsArrows", "\u2190\u2200"],
+ ["IsBalinese", "\u1B00\u1B80"],
+ ["IsBamum", "\uA6A0\uA700"],
+ ["IsBasicLatin", "\u0000\u0080"],
+ ["IsBatak", "\u1BC0\u1C00"],
+ ["IsBengali", "\u0980\u0A00"],
+ ["IsBlockElements", "\u2580\u25A0"],
+ ["IsBopomofo", "\u3100\u3130"],
+ ["IsBopomofoExtended", "\u31A0\u31C0"],
+ ["IsBoxDrawing", "\u2500\u2580"],
+ ["IsBraillePatterns", "\u2800\u2900"],
+ ["IsBuginese", "\u1A00\u1A20"],
+ ["IsBuhid", "\u1740\u1760"],
+ ["IsCJKCompatibility", "\u3300\u3400"],
+ ["IsCJKCompatibilityForms", "\uFE30\uFE50"],
+ ["IsCJKCompatibilityIdeographs", "\uF900\uFB00"],
+ ["IsCJKRadicalsSupplement", "\u2E80\u2F00"],
+ ["IsCJKStrokes", "\u31C0\u31F0"],
+ ["IsCJKSymbolsandPunctuation", "\u3000\u3040"],
+ ["IsCJKUnifiedIdeographs", "\u4E00\uA000"],
+ ["IsCJKUnifiedIdeographsExtensionA", "\u3400\u4DC0"],
+ ["IsCham", "\uAA00\uAA60"],
+ ["IsCherokee", "\u13A0\u1400"],
+ ["IsCherokeeSupplement", "\uAB70\uABC0"],
+ ["IsCombiningDiacriticalMarks", "\u0300\u0370"],
+ ["IsCombiningDiacriticalMarksExtended", "\u1AB0\u1B00"],
+ ["IsCombiningDiacriticalMarksSupplement", "\u1DC0\u1E00"],
["IsCombiningDiacriticalMarksforSymbols", "\u20D0\u2100"],
- ["IsCombiningHalfMarks", "\uFE20\uFE30"],
- ["IsCombiningMarksforSymbols", "\u20D0\u2100"],
- ["IsControlPictures", "\u2400\u2440"],
- ["IsCurrencySymbols", "\u20A0\u20D0"],
- ["IsCyrillic", "\u0400\u0500"],
- ["IsCyrillicSupplement", "\u0500\u0530"],
- ["IsDevanagari", "\u0900\u0980"],
- ["IsDingbats", "\u2700\u27C0"],
- ["IsEnclosedAlphanumerics", "\u2460\u2500"],
- ["IsEnclosedCJKLettersandMonths", "\u3200\u3300"],
- ["IsEthiopic", "\u1200\u1380"],
- ["IsGeneralPunctuation", "\u2000\u2070"],
- ["IsGeometricShapes", "\u25A0\u2600"],
- ["IsGeorgian", "\u10A0\u1100"],
- ["IsGreek", "\u0370\u0400"],
- ["IsGreekExtended", "\u1F00\u2000"],
- ["IsGreekandCoptic", "\u0370\u0400"],
- ["IsGujarati", "\u0A80\u0B00"],
- ["IsGurmukhi", "\u0A00\u0A80"],
- ["IsHalfwidthandFullwidthForms", "\uFF00\uFFF0"],
- ["IsHangulCompatibilityJamo", "\u3130\u3190"],
- ["IsHangulJamo", "\u1100\u1200"],
- ["IsHangulSyllables", "\uAC00\uD7B0"],
- ["IsHanunoo", "\u1720\u1740"],
- ["IsHebrew", "\u0590\u0600"],
- ["IsHighPrivateUseSurrogates", "\uDB80\uDC00"],
- ["IsHighSurrogates", "\uD800\uDB80"],
- ["IsHiragana", "\u3040\u30A0"],
- ["IsIPAExtensions", "\u0250\u02B0"],
- ["IsIdeographicDescriptionCharacters", "\u2FF0\u3000"],
- ["IsKanbun", "\u3190\u31A0"],
- ["IsKangxiRadicals", "\u2F00\u2FE0"],
- ["IsKannada", "\u0C80\u0D00"],
- ["IsKatakana", "\u30A0\u3100"],
- ["IsKatakanaPhoneticExtensions", "\u31F0\u3200"],
- ["IsKhmer", "\u1780\u1800"],
- ["IsKhmerSymbols", "\u19E0\u1A00"],
- ["IsLao", "\u0E80\u0F00"],
- ["IsLatin-1Supplement", "\u0080\u0100"],
- ["IsLatinExtended-A", "\u0100\u0180"],
- ["IsLatinExtended-B", "\u0180\u0250"],
- ["IsLatinExtendedAdditional", "\u1E00\u1F00"],
- ["IsLetterlikeSymbols", "\u2100\u2150"],
- ["IsLimbu", "\u1900\u1950"],
- ["IsLowSurrogates", "\uDC00\uE000"],
- ["IsMalayalam", "\u0D00\u0D80"],
- ["IsMathematicalOperators", "\u2200\u2300"],
- ["IsMiscellaneousMathematicalSymbols-A", "\u27C0\u27F0"],
- ["IsMiscellaneousMathematicalSymbols-B", "\u2980\u2A00"],
- ["IsMiscellaneousSymbols", "\u2600\u2700"],
- ["IsMiscellaneousSymbolsandArrows", "\u2B00\u2C00"],
- ["IsMiscellaneousTechnical", "\u2300\u2400"],
- ["IsMongolian", "\u1800\u18B0"],
- ["IsMyanmar", "\u1000\u10A0"],
- ["IsNumberForms", "\u2150\u2190"],
- ["IsOgham", "\u1680\u16A0"],
- ["IsOpticalCharacterRecognition", "\u2440\u2460"],
- ["IsOriya", "\u0B00\u0B80"],
- ["IsPhoneticExtensions", "\u1D00\u1D80"],
- ["IsPrivateUse", "\uE000\uF900"],
- ["IsPrivateUseArea", "\uE000\uF900"],
- ["IsRunic", "\u16A0\u1700"],
- ["IsSinhala", "\u0D80\u0E00"],
- ["IsSmallFormVariants", "\uFE50\uFE70"],
- ["IsSpacingModifierLetters", "\u02B0\u0300"],
- ["IsSpecials", "\uFFF0"],
- ["IsSuperscriptsandSubscripts", "\u2070\u20A0"],
- ["IsSupplementalArrows-A", "\u27F0\u2800"],
- ["IsSupplementalArrows-B", "\u2900\u2980"],
- ["IsSupplementalMathematicalOperators", "\u2A00\u2B00"],
- ["IsSyriac", "\u0700\u0750"],
- ["IsTagalog", "\u1700\u1720"],
- ["IsTagbanwa", "\u1760\u1780"],
- ["IsTaiLe", "\u1950\u1980"],
- ["IsTamil", "\u0B80\u0C00"],
- ["IsTelugu", "\u0C00\u0C80"],
- ["IsThaana", "\u0780\u07C0"],
- ["IsThai", "\u0E00\u0E80"],
- ["IsTibetan", "\u0F00\u1000"],
- ["IsUnifiedCanadianAboriginalSyllabics", "\u1400\u1680"],
- ["IsVariationSelectors", "\uFE00\uFE10"],
- ["IsYiRadicals", "\uA490\uA4D0"],
- ["IsYiSyllables", "\uA000\uA490"],
- ["IsYijingHexagramSymbols", "\u4DC0\u4E00"],
+ ["IsCombiningHalfMarks", "\uFE20\uFE30"],
+ ["IsCommonIndicNumberForms", "\uA830\uA840"],
+ ["IsControlPictures", "\u2400\u2440"],
+ ["IsCoptic", "\u2C80\u2D00"],
+ ["IsCurrencySymbols", "\u20A0\u20D0"],
+ ["IsCyrillic", "\u0400\u0500"],
+ ["IsCyrillicExtended-A", "\u2DE0\u2E00"],
+ ["IsCyrillicExtended-B", "\uA640\uA6A0"],
+ ["IsCyrillicExtended-C", "\u1C80\u1C90"],
+ ["IsCyrillicSupplement", "\u0500\u0530"],
+ ["IsDevanagari", "\u0900\u0980"],
+ ["IsDevanagariExtended", "\uA8E0\uA900"],
+ ["IsDingbats", "\u2700\u27C0"],
+ ["IsEnclosedAlphanumerics", "\u2460\u2500"],
+ ["IsEnclosedCJKLettersandMonths", "\u3200\u3300"],
+ ["IsEthiopic", "\u1200\u1380"],
+ ["IsEthiopicExtended", "\u2D80\u2DE0"],
+ ["IsEthiopicExtended-A", "\uAB00\uAB30"],
+ ["IsEthiopicSupplement", "\u1380\u13A0"],
+ ["IsGeneralPunctuation", "\u2000\u2070"],
+ ["IsGeometricShapes", "\u25A0\u2600"],
+ ["IsGeorgian", "\u10A0\u1100"],
+ ["IsGeorgianExtended", "\u1C90\u1CC0"],
+ ["IsGeorgianSupplement", "\u2D00\u2D30"],
+ ["IsGlagolitic", "\u2C00\u2C60"],
+ ["IsGreekExtended", "\u1F00\u2000"],
+ ["IsGreekandCoptic", "\u0370\u0400"],
+ ["IsGujarati", "\u0A80\u0B00"],
+ ["IsGurmukhi", "\u0A00\u0A80"],
+ ["IsHalfwidthandFullwidthForms", "\uFF00\uFFF0"],
+ ["IsHangulCompatibilityJamo", "\u3130\u3190"],
+ ["IsHangulJamo", "\u1100\u1200"],
+ ["IsHangulJamoExtended-A", "\uA960\uA980"],
+ ["IsHangulJamoExtended-B", "\uD7B0\uD800"],
+ ["IsHangulSyllables", "\uAC00\uD7B0"],
+ ["IsHanunoo", "\u1720\u1740"],
+ ["IsHebrew", "\u0590\u0600"],
+ ["IsHiragana", "\u3040\u30A0"],
+ ["IsIPAExtensions", "\u0250\u02B0"],
+ ["IsIdeographicDescriptionCharacters", "\u2FF0\u3000"],
+ ["IsJavanese", "\uA980\uA9E0"],
+ ["IsKanbun", "\u3190\u31A0"],
+ ["IsKangxiRadicals", "\u2F00\u2FE0"],
+ ["IsKannada", "\u0C80\u0D00"],
+ ["IsKatakana", "\u30A0\u3100"],
+ ["IsKatakanaPhoneticExtensions", "\u31F0\u3200"],
+ ["IsKayahLi", "\uA900\uA930"],
+ ["IsKhmer", "\u1780\u1800"],
+ ["IsKhmerSymbols", "\u19E0\u1A00"],
+ ["IsLao", "\u0E80\u0F00"],
+ ["IsLatin-1Supplement", "\u0080\u0100"],
+ ["IsLatinExtended-A", "\u0100\u0180"],
+ ["IsLatinExtended-B", "\u0180\u0250"],
+ ["IsLatinExtended-C", "\u2C60\u2C80"],
+ ["IsLatinExtended-D", "\uA720\uA800"],
+ ["IsLatinExtended-E", "\uAB30\uAB70"],
+ ["IsLatinExtendedAdditional", "\u1E00\u1F00"],
+ ["IsLepcha", "\u1C00\u1C50"],
+ ["IsLetterlikeSymbols", "\u2100\u2150"],
+ ["IsLimbu", "\u1900\u1950"],
+ ["IsLisu", "\uA4D0\uA500"],
+ ["IsMalayalam", "\u0D00\u0D80"],
+ ["IsMandaic", "\u0840\u0860"],
+ ["IsMathematicalOperators", "\u2200\u2300"],
+ ["IsMeeteiMayek", "\uABC0\uAC00"],
+ ["IsMeeteiMayekExtensions", "\uAAE0\uAB00"],
+ ["IsMiscellaneousMathematicalSymbols-A", "\u27C0\u27F0"],
+ ["IsMiscellaneousMathematicalSymbols-B", "\u2980\u2A00"],
+ ["IsMiscellaneousSymbols", "\u2600\u2700"],
+ ["IsMiscellaneousSymbolsandArrows", "\u2B00\u2C00"],
+ ["IsMiscellaneousTechnical", "\u2300\u2400"],
+ ["IsModifierToneLetters", "\uA700\uA720"],
+ ["IsMongolian", "\u1800\u18B0"],
+ ["IsMyanmar", "\u1000\u10A0"],
+ ["IsMyanmarExtended-A", "\uAA60\uAA80"],
+ ["IsMyanmarExtended-B", "\uA9E0\uAA00"],
+ ["IsNKo", "\u07C0\u0800"],
+ ["IsNewTaiLue", "\u1980\u19E0"],
+ ["IsNumberForms", "\u2150\u2190"],
+ ["IsOgham", "\u1680\u16A0"],
+ ["IsOlChiki", "\u1C50\u1C80"],
+ ["IsOpticalCharacterRecognition", "\u2440\u2460"],
+ ["IsOriya", "\u0B00\u0B80"],
+ ["IsPhags-pa", "\uA840\uA880"],
+ ["IsPhoneticExtensions", "\u1D00\u1D80"],
+ ["IsPhoneticExtensionsSupplement", "\u1D80\u1DC0"],
+ ["IsRejang", "\uA930\uA960"],
+ ["IsRunic", "\u16A0\u1700"],
+ ["IsSamaritan", "\u0800\u0840"],
+ ["IsSaurashtra", "\uA880\uA8E0"],
+ ["IsSinhala", "\u0D80\u0E00"],
+ ["IsSmallFormVariants", "\uFE50\uFE70"],
+ ["IsSpacingModifierLetters", "\u02B0\u0300"],
+ ["IsSpecials", "\uFFF0\u10000"],
+ ["IsSundanese", "\u1B80\u1BC0"],
+ ["IsSundaneseSupplement", "\u1CC0\u1CD0"],
+ ["IsSuperscriptsandSubscripts", "\u2070\u20A0"],
+ ["IsSupplementalArrows-A", "\u27F0\u2800"],
+ ["IsSupplementalArrows-B", "\u2900\u2980"],
+ ["IsSupplementalMathematicalOperators", "\u2A00\u2B00"],
+ ["IsSupplementalPunctuation", "\u2E00\u2E80"],
+ ["IsSylotiNagri", "\uA800\uA830"],
+ ["IsSyriac", "\u0700\u0750"],
+ ["IsSyriacSupplement", "\u0860\u0870"],
+ ["IsTagalog", "\u1700\u1720"],
+ ["IsTagbanwa", "\u1760\u1780"],
+ ["IsTaiLe", "\u1950\u1980"],
+ ["IsTaiTham", "\u1A20\u1AB0"],
+ ["IsTaiViet", "\uAA80\uAAE0"],
+ ["IsTamil", "\u0B80\u0C00"],
+ ["IsTelugu", "\u0C00\u0C80"],
+ ["IsThaana", "\u0780\u07C0"],
+ ["IsThai", "\u0E00\u0E80"],
+ ["IsTibetan", "\u0F00\u1000"],
+ ["IsTifinagh", "\u2D30\u2D80"],
+ ["IsUnifiedCanadianAboriginalSyllabics", "\u1400\u1680"],
+ ["IsUnifiedCanadianAboriginalSyllabicsExtended", "\u18B0\u1900"],
+ ["IsVai", "\uA500\uA640"],
+ ["IsVariationSelectors", "\uFE00\uFE10"],
+ ["IsVedicExtensions", "\u1CD0\u1D00"],
+ ["IsVerticalForms", "\uFE10\uFE20"],
+ ["IsYiRadicals", "\uA490\uA4D0"],
+ ["IsYiSyllables", "\uA000\uA490"],
+ ["IsYijingHexagramSymbols", "\u4DC0\u4E00"],
["_xmlC", /* Name Char */ "\u002D\u002F\u0030\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00B7\u00B8\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u02D0\u02D2\u0300\u0346\u0360\u0362\u0386\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0483\u0487\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0653\u0660\u066A\u0670\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06E9\u06EA\u06EE\u06F0\u06FA\u0901\u0904\u0905\u093A\u093C\u094E\u0951\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC"
+"\u09DE\u09DF\u09E4\u09E6\u09F2\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B70\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF0\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2"
+"\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0E01\u0E2F\u0E30\u0E3B\u0E40\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0F18\u0F1A\u0F20\u0F2A\u0F35\u0F36\u0F37\u0F38\u0F39\u0F3A\u0F3E\u0F48\u0F49\u0F6A\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F96\u0F97\u0F98\u0F99\u0FAE\u0FB1\u0FB8\u0FB9\u0FBA\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00"
diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj
new file mode 100644
index 00000000000000..f9929d13fb73d1
--- /dev/null
+++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/GenRegexNamedBlocks.csproj
@@ -0,0 +1,9 @@
+
+
+
+ Exe
+ net10.0
+ enable
+
+
+
diff --git a/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs
new file mode 100644
index 00000000000000..0e1564d54c1326
--- /dev/null
+++ b/src/libraries/System.Text.RegularExpressions/tools/GenRegexNamedBlocks/Program.cs
@@ -0,0 +1,103 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+
+using static System.FormattableString;
+
+namespace GenRegexNamedBlocks
+{
+ ///
+ /// This program outputs the named blocks for RegexCharClass.cs
+ ///
+ class Program
+ {
+ static void Main(string[] args)
+ {
+ if (args.Length < 1)
+ {
+ Console.WriteLine("Usage: dotnet run -- ");
+ Console.WriteLine("Example: dotnet run -- Blocks.txt");
+ return;
+ }
+
+ // The input file should be Blocks.txt from the UCD corresponding to the
+ // version of the Unicode spec we're consuming.
+ // More info: https://www.unicode.org/reports/tr44/
+ // Latest Blocks.txt: https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt
+
+ string[] allInputLines = File.ReadAllLines(args[0]);
+
+ Regex inputLineRegex = new Regex(@"^(?[0-9A-F]{4})\.\.(?[0-9A-F]{4}); (?.+)$");
+
+ var entries = new List<(string name, string startCode, string endCode)>();
+
+ foreach (string inputLine in allInputLines)
+ {
+ // We only care about lines of the form "XXXX..XXXX; Block name"
+ var match = inputLineRegex.Match(inputLine);
+ if (match == null || !match.Success)
+ {
+ continue;
+ }
+
+ string startCode = match.Groups["startCode"].Value;
+ string endCode = match.Groups["endCode"].Value;
+ string blockName = match.Groups["blockName"].Value;
+
+ // Exclude the surrogate range and everything outside the BMP.
+ uint startCodeAsInt = uint.Parse(startCode, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
+ if (startCodeAsInt >= 0x10000 || (startCodeAsInt >= 0xD800 && startCodeAsInt <= 0xDFFF))
+ {
+ continue;
+ }
+
+ // Exclude any private use areas
+ if (blockName.Contains("Private Use", StringComparison.OrdinalIgnoreCase))
+ {
+ continue;
+ }
+
+ // Convert block name to Regex format (with "Is" prefix)
+ string regexBlockName = "Is" + RemoveAllNonAlphanumeric(blockName);
+
+ entries.Add((regexBlockName, startCode, endCode));
+ }
+
+ // Sort by start code for consistent output
+ entries.Sort((a, b) => string.Compare(a.name, b.name, StringComparison.Ordinal));
+
+ // Generate the output
+ foreach (var entry in entries)
+ {
+ Console.WriteLine($" [\"{entry.name}\", \"\\u{entry.startCode}\\u{GetNextCodePoint(entry.endCode)}\"],");
+ }
+ }
+
+ private static string RemoveAllNonAlphanumeric(string blockName)
+ {
+ // Allow only A-Z a-z 0-9 and hyphens
+ // Keep hyphens to preserve naming like "Latin-1" or "Extended-A"
+ return new string(blockName.ToCharArray().Where(c =>
+ ('A' <= c && c <= 'Z') ||
+ ('a' <= c && c <= 'z') ||
+ ('0' <= c && c <= '9') ||
+ c == '-').ToArray());
+ }
+
+ private static string GetNextCodePoint(string hexCode)
+ {
+ // Regex named blocks use the start of the next block as the end code
+ // So we need to add 1 to the end code
+ uint code = uint.Parse(hexCode, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
+ code++;
+ return code.ToString("X4");
+ }
+ }
+}
From 6d94ae7b051c8a3f4a76f9853fca6c892c2e1f01 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Oct 2025 16:58:09 +0000
Subject: [PATCH 03/13] Add IsGreek alias and update tests for new Unicode
blocks
- Add IsGreek as alias for IsGreekandCoptic for backward compatibility
- Add comprehensive tests for 52 new Unicode blocks
- Remove tests for deprecated surrogate and private use blocks
- All 29,287 tests now passing
Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
.../Text/RegularExpressions/RegexCharClass.cs | 1 +
.../FunctionalTests/RegexCharacterSetTests.cs | 62 +++++++++++++++++--
2 files changed, 59 insertions(+), 4 deletions(-)
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
index 37c9645dc14717..2eca0af5633b93 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
@@ -245,6 +245,7 @@ internal sealed partial class RegexCharClass
["IsGeorgianExtended", "\u1C90\u1CC0"],
["IsGeorgianSupplement", "\u2D00\u2D30"],
["IsGlagolitic", "\u2C00\u2C60"],
+ ["IsGreek", "\u0370\u0400"],
["IsGreekExtended", "\u1F00\u2000"],
["IsGreekandCoptic", "\u0370\u0400"],
["IsGujarati", "\u0A80\u0B00"],
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs
index 2eb52774c33842..a6f341a6e9a15e 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexCharacterSetTests.cs
@@ -245,7 +245,14 @@ public static IEnumerable