From 0a1400780addca9ea20b5915ff29989cc05391b0 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 18:47:42 +0200 Subject: [PATCH 1/9] UnicodeData.txt lines from the proposal --- unicodetools/data/ucd/dev/UnicodeData.txt | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a3739..0e29461ddc 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,32 @@ +1DF40;LATIN CAPITAL LETTER BARRED A;Lu;0;L;;;;;N;;;;1DF41; +1DF41;LATIN SMALL LETTER BARRED A;Ll;0;L;;;;;N;;;1DF40;;1DF40 +1DF42;LATIN SMALL LETTER BARRED OPEN O;Ll;0;L;;;;;N;;;;; +1DF43;LATIN SMALL CAPITAL BARRED E;Ll;0;L;;;;;N;;;;; +1DF44;LATIN SMALL LETTER BARRED OPEN E;Ll;0;L;;;;;N;;;;; +1DF45;LATIN SMALL LETTER G WITH STROKE AND PALATAL HOOK;Ll;0;L;;;;;N;;;;; +1DF46;LATIN SMALL LETTER BARRED H;Ll;0;L;;;;;N;;;;; +1DF47;LATIN SMALL LETTER BARRED H WITH HOOK;Ll;0;L;;;;;N;;;;; +1DF48;LATIN CAPITAL LETTER BARRED K;Lu;0;L;;;;;N;;;;1DF49; +1DF49;LATIN SMALL LETTER BARRED K;Ll;0;L;;;;;N;;;1DF48;;1DF48 +1DF4A;LATIN CAPITAL LETTER BARRED M;Ll;0;L;;;;;N;;;;1DF4B; +1DF4B;LATIN SMALL LETTER BARRED M;Ll;0;L;;;;;N;;;1DF4A;;1DF4A +1DF4C;LATIN SMALL LETTER BARRED M WITH HOOK;Ll;0;L;;;;;N;;;;; +1DF4D;LATIN CAPITAL LETTER BARRED N;Lu;0;L;;;;;N;;;;1DF4E; +1DF4E;LATIN SMALL LETTER BARRED N;Ll;0;L;;;;;N;;;1DF4D;;1DF4D +1DF4F;LATIN SMALL LETTER BARRED ENG;Ll;0;L;;;;;N;;;;; +1DF50;LATIN SMALL LETTER TURNED R WITH STROKE;Ll;0;L;;;;;N;;;;; +1DF51;LATIN CAPITAL LETTER BARRED V;Ll;0;L;;;;;N;;;;1DF52; +1DF52;LATIN SMALL LETTER BARRED V;Ll;0;L;;;;;N;;;1DF51;;1DF51 +1DF53;LATIN SMALL LETTER BARRED CLOSED OMEGA;Ll;0;L;;;;;N;;;;; +1DF54;LATIN SMALL LETTER BARRED LATIN CHI;Ll;0;L;;;;;N;;;;; +1DF55;LATIN SMALL LETTER Y WITH LOW STROKE;Ll;0;L;;;;;N;;;;; +1DF56;LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE;Lo;0;L;;;;;N;;;;; +1DFD2;MODIFIER LETTER SMALL B WITH STROKE;Lm;0;L; 0180;;;;N;;;;; +1DFD3;MODIFIER LETTER SMALL D WITH STROKE;Lm;0;L; 0111;;;;N;;;;; +1DFD4;MODIFIER LETTER SMALL G WITH STROKE;Lm;0;L; 01E5;;;;N;;;;; +1DFD5;MODIFIER LETTER SMALL L WITH STROKE;Lm;0;L; 0142;;;;N;;;;; +1DFD6;MODIFIER LETTER SMALL L WITH BAR;Lm;0;L; 019A;;;;N;;;;; +1DFD7;MODIFIER LETTER SMALL TURNED R WITH STROKE;Lm;0;L; 1DF50;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 049f75bac40b788da57a9fdc7ace68589494fc95 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 18:50:06 +0200 Subject: [PATCH 2/9] lb=AL --- unicodetools/data/ucd/dev/LineBreak.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 3ce2582174..59af38e88c 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:55 GMT +# LineBreak-17.0.0.txt +# Date: 2024-10-21, 16:49:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3378,6 +3378,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; AL # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; AL # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; AL # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; AL # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; CM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI From d9fde28d117e9dc2aeb6ebbdd82bf22b2d58e7d9 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 18:52:34 +0200 Subject: [PATCH 3/9] Latin --- unicodetools/data/ucd/dev/Scripts.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd6..548e34ed2d 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,6 @@ +1DF40..1DF55 ; Latin +1DF56 ; Latin +1DFD2..1DFD7 ; Latin # Scripts-16.0.0.txt # Date: 2024-04-30, 21:48:40 GMT # © 2024 Unicode®, Inc. From 74c710f41a900a4828f746679a73bcc94113ff66 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 18:53:32 +0200 Subject: [PATCH 4/9] Diacritic and Other_Lowercase for the modifiers --- unicodetools/data/ucd/dev/PropList.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index fae2831e7a..d4e8ede45d 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,5 @@ +1DFD2..1DFD7; Diacritic +1DFD2..1DFD7; Other_Lowercase # PropList-16.0.0.txt # Date: 2024-05-31, 18:09:48 GMT # © 2024 Unicode®, Inc. From a2421520b5102f922c549de5480555d774dadb03 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 18:54:39 +0200 Subject: [PATCH 5/9] DoNotEmit.txt line from the proposal --- unicodetools/data/ucd/dev/DoNotEmit.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/DoNotEmit.txt b/unicodetools/data/ucd/dev/DoNotEmit.txt index 757a313483..3650742726 100644 --- a/unicodetools/data/ucd/dev/DoNotEmit.txt +++ b/unicodetools/data/ucd/dev/DoNotEmit.txt @@ -370,6 +370,7 @@ 0078 0321; 1D8D; Precomposed_Form # LATIN SMALL LETTER X, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER X WITH PALATAL HOOK 007A 0321; 1D8E; Precomposed_Form # LATIN SMALL LETTER Z, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER Z WITH PALATAL HOOK 014B 0321; 1DF14; Precomposed_Form # LATIN SMALL LETTER ENG, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER ENG WITH PALATAL HOOK +01E5 0321; 1DF45; Precomposed_Form # LATIN SMALL LETTER G WITH STROKE, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER G WITH STROKE AND PALATAL HOOK 0261 0321; 1D83; Precomposed_Form # LATIN SMALL LETTER SCRIPT G, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER G WITH PALATAL HOOK 026C 0321; 1DF13; Precomposed_Form # LATIN SMALL LETTER L WITH BELT, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER L WITH BELT AND PALATAL HOOK 0279 0321; 1DF15; Precomposed_Form # LATIN SMALL LETTER TURNED R, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER TURNED R WITH PALATAL HOOK From e5181baba867d6b4a899225cb6107b58db572e7c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 18:56:38 +0200 Subject: [PATCH 6/9] Regenerate UCD --- unicodetools/data/ucd/dev/CaseFolding.txt | 9 +- unicodetools/data/ucd/dev/DerivedAge.txt | 15 +++- .../data/ucd/dev/DerivedCoreProperties.txt | 86 +++++++++++++++---- .../ucd/dev/DerivedNormalizationProps.txt | 44 ++++++++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 7 +- .../data/ucd/dev/NormalizationTest.txt | 10 ++- unicodetools/data/ucd/dev/PropList.txt | 12 +-- unicodetools/data/ucd/dev/Scripts.txt | 12 +-- unicodetools/data/ucd/dev/UnicodeData.txt | 58 ++++++------- .../data/ucd/dev/VerticalOrientation.txt | 7 +- .../data/ucd/dev/auxiliary/LineBreakTest.html | 4 +- .../dev/auxiliary/SentenceBreakProperty.txt | 18 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 9 +- .../ucd/dev/extracted/DerivedBidiClass.txt | 9 +- .../dev/extracted/DerivedCombiningClass.txt | 9 +- .../extracted/DerivedDecompositionType.txt | 7 +- .../dev/extracted/DerivedEastAsianWidth.txt | 9 +- .../dev/extracted/DerivedGeneralCategory.txt | 26 ++++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 13 +-- .../data/ucd/dev/extracted/DerivedName.txt | 35 +++++++- 20 files changed, 286 insertions(+), 113 deletions(-) diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index 1b7a9c156c..1259a27d97 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -1,5 +1,5 @@ -# CaseFolding-16.0.0.txt -# Date: 2024-04-30, 21:48:11 GMT +# CaseFolding-17.0.0.txt +# Date: 2024-10-21, 16:55:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1616,6 +1616,11 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O 16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI 16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y +1DF40; C; 1DF41; # LATIN CAPITAL LETTER BARRED A +1DF48; C; 1DF49; # LATIN CAPITAL LETTER BARRED K +1DF4A; C; 1DF4B; # LATIN CAPITAL LETTER BARRED M +1DF4D; C; 1DF4E; # LATIN CAPITAL LETTER BARRED N +1DF51; C; 1DF52; # LATIN CAPITAL LETTER BARRED V 1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF 1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI 1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e487..5c38372402 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ -# DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# DerivedAge-17.0.0.txt +# Date: 2024-10-21, 16:55:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2059,4 +2059,15 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Total code points: 5185 +# ================================================ + +# Age=V17_0 + +# Newly assigned in Unicode 17.0.0 (September, 2025) + +1DF40..1DF56 ; 17.0 # [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; 17.0 # [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE + +# Total code points: 29 + # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1075638f1a..2fd5c61fe8 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ -# DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-31, 18:09:32 GMT +# DerivedCoreProperties-17.0.0.txt +# Date: 2024-10-21, 16:55:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1371,6 +1371,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Alphabetic # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Alphabetic # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; Alphabetic # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; Alphabetic # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; Alphabetic # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -1441,7 +1444,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142759 +# Total code points: 142788 # ================================================ @@ -2136,10 +2139,14 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Lowercase # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Lowercase # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF41..1DF47 ; Lowercase # L& [7] LATIN SMALL LETTER BARRED A..LATIN SMALL LETTER BARRED H WITH HOOK +1DF49..1DF4C ; Lowercase # L& [4] LATIN SMALL LETTER BARRED K..LATIN SMALL LETTER BARRED M WITH HOOK +1DF4E..1DF55 ; Lowercase # L& [8] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER Y WITH LOW STROKE +1DFD2..1DFD7 ; Lowercase # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2569 +# Total code points: 2594 # ================================================ @@ -2798,12 +2805,15 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1D756..1D76E ; Uppercase # L& [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA 1D790..1D7A8 ; Uppercase # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Uppercase # L& MATHEMATICAL BOLD CAPITAL DIGAMMA +1DF40 ; Uppercase # L& LATIN CAPITAL LETTER BARRED A +1DF48 ; Uppercase # L& LATIN CAPITAL LETTER BARRED K +1DF4D ; Uppercase # L& LATIN CAPITAL LETTER BARRED N 1E900..1E921 ; Uppercase # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA 1F130..1F149 ; Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1978 +# Total code points: 1981 # ================================================ @@ -2982,13 +2992,15 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Cased # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Cased # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; Cased # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DFD2..1DFD7 ; Cased # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4578 +# Total code points: 4606 # ================================================ @@ -3483,6 +3495,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 1DA84 ; Case_Ignorable # Mn SIGNWRITING LOCATION HEAD NECK 1DA9B..1DA9F ; Case_Ignorable # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF ; Case_Ignorable # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DFD2..1DFD7 ; Case_Ignorable # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Case_Ignorable # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -3505,7 +3518,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2749 +# Total code points: 2755 # ================================================ @@ -4127,9 +4140,14 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10D50..10D65 ; Changes_When_Lowercased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Lowercased # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +1DF40 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER BARRED A +1DF48 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER BARRED K +1DF4A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER BARRED M +1DF4D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER BARRED N +1DF51 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER BARRED V 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1460 +# Total code points: 1465 # ================================================ @@ -4767,9 +4785,14 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 10D70..10D85 ; Changes_When_Uppercased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Uppercased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1DF41 ; Changes_When_Uppercased # L& LATIN SMALL LETTER BARRED A +1DF49 ; Changes_When_Uppercased # L& LATIN SMALL LETTER BARRED K +1DF4B ; Changes_When_Uppercased # L& LATIN SMALL LETTER BARRED M +1DF4E ; Changes_When_Uppercased # L& LATIN SMALL LETTER BARRED N +1DF52 ; Changes_When_Uppercased # L& LATIN SMALL LETTER BARRED V 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1552 +# Total code points: 1557 # ================================================ @@ -5406,9 +5429,14 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 10D70..10D85 ; Changes_When_Titlecased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Titlecased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1DF41 ; Changes_When_Titlecased # L& LATIN SMALL LETTER BARRED A +1DF49 ; Changes_When_Titlecased # L& LATIN SMALL LETTER BARRED K +1DF4B ; Changes_When_Titlecased # L& LATIN SMALL LETTER BARRED M +1DF4E ; Changes_When_Titlecased # L& LATIN SMALL LETTER BARRED N +1DF52 ; Changes_When_Titlecased # L& LATIN SMALL LETTER BARRED V 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1479 +# Total code points: 1484 # ================================================ @@ -6042,9 +6070,14 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10D50..10D65 ; Changes_When_Casefolded # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Casefolded # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +1DF40 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER BARRED A +1DF48 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER BARRED K +1DF4A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER BARRED M +1DF4D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER BARRED N +1DF51 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER BARRED V 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1533 +# Total code points: 1538 # ================================================ @@ -6183,9 +6216,13 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 10D70..10D85 ; Changes_When_Casemapped # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118A0..118DF ; Changes_When_Casemapped # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1DF40..1DF41 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER BARRED A +1DF48..1DF4B ; Changes_When_Casemapped # L& [4] LATIN CAPITAL LETTER BARRED K..LATIN SMALL LETTER BARRED M +1DF4D..1DF4E ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER BARRED N..LATIN SMALL LETTER BARRED N +1DF51..1DF52 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER BARRED V..LATIN SMALL LETTER BARRED V 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2981 +# Total code points: 2991 # ================================================ @@ -6902,6 +6939,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; ID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; ID_Start # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; ID_Start # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; ID_Start # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -6962,7 +7002,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141269 +# Total code points: 141298 # ================================================ @@ -8290,6 +8330,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; ID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; ID_Continue # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; ID_Continue # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; ID_Continue # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -8370,7 +8413,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 144570 # ================================================ @@ -9088,6 +9131,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; XID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; XID_Start # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; XID_Start # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; XID_Start # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -9148,7 +9194,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141246 +# Total code points: 141275 # ================================================ @@ -10477,6 +10523,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; XID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; XID_Continue # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; XID_Continue # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; XID_Continue # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -10557,7 +10606,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 144551 # ================================================ @@ -12693,6 +12742,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Grapheme_Base # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Grapheme_Base # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; Grapheme_Base # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; Grapheme_Base # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; Grapheme_Base # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Grapheme_Base # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; Grapheme_Base # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -12812,7 +12864,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152730 +# Total code points: 152759 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index ce636abb53..008c29db2b 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ -# DerivedNormalizationProps-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedNormalizationProps-17.0.0.txt +# Date: 2024-10-21, 16:55:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1707,6 +1707,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKD_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKD_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKD_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DFD2..1DFD7 ; NFKD_QC; N # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; NFKD_QC; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1EE00..1EE03 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; NFKD_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -1753,7 +1754,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKD_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 17085 +# Total code points: 17091 # ================================================ @@ -2118,6 +2119,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKC_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKC_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKC_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DFD2..1DFD7 ; NFKC_QC; N # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; NFKC_QC; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1EE00..1EE03 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; NFKC_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -2164,7 +2166,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKC_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 4964 +# Total code points: 4970 # ================================================ @@ -8225,6 +8227,17 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] ...... -# Total code points: 10554 +# Total code points: 10565 # ================================================ @@ -14359,6 +14372,17 @@ FFF0..FFF8 ; NFKC_SCF; # Cn [9] ...... -# Total code points: 10516 +# Total code points: 10527 # ================================================ @@ -16246,6 +16270,12 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] ...... -# Total code points: 10554 +# Total code points: 10565 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea5..a1da26fc59 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ -# EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# EastAsianWidth-17.0.0.txt +# Date: 2024-10-21, 16:55:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2468,6 +2468,9 @@ FFFD ; A # So REPLACEMENT CHARACTER 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; N # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; N # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; N # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 3aae8f72e8..8713f73786 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ -# NormalizationTest-16.0.0.txt -# Date: 2024-04-30, 21:48:23 GMT +# NormalizationTest-17.0.0.txt +# Date: 2024-10-21, 16:56:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -16284,6 +16284,12 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 1D7FD;1D7FD;1D7FD;0037;0037; # (𝟽; 𝟽; 𝟽; 7; 7; ) MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE;1D7FE;1D7FE;0038;0038; # (𝟾; 𝟾; 𝟾; 8; 8; ) MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF;1D7FF;1D7FF;0039;0039; # (𝟿; 𝟿; 𝟿; 9; 9; ) MATHEMATICAL MONOSPACE DIGIT NINE +1DFD2;1DFD2;1DFD2;0180;0180; # (𝿒; 𝿒; 𝿒; ƀ; ƀ; ) MODIFIER LETTER SMALL B WITH STROKE +1DFD3;1DFD3;1DFD3;0111;0111; # (𝿓; 𝿓; 𝿓; đ; đ; ) MODIFIER LETTER SMALL D WITH STROKE +1DFD4;1DFD4;1DFD4;01E5;01E5; # (𝿔; 𝿔; 𝿔; ǥ; ǥ; ) MODIFIER LETTER SMALL G WITH STROKE +1DFD5;1DFD5;1DFD5;0142;0142; # (𝿕; 𝿕; 𝿕; ł; ł; ) MODIFIER LETTER SMALL L WITH STROKE +1DFD6;1DFD6;1DFD6;019A;019A; # (𝿖; 𝿖; 𝿖; ƚ; ƚ; ) MODIFIER LETTER SMALL L WITH BAR +1DFD7;1DFD7;1DFD7;1DF50;1DF50; # (𝿗; 𝿗; 𝿗; 𝽐; 𝽐; ) MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030;1E030;1E030;0430;0430; # (𞀰; 𞀰; 𞀰; а; а; ) MODIFIER LETTER CYRILLIC SMALL A 1E031;1E031;1E031;0431;0431; # (𞀱; 𞀱; 𞀱; б; б; ) MODIFIER LETTER CYRILLIC SMALL BE 1E032;1E032;1E032;0432;0432; # (𞀲; 𞀲; 𞀲; в; в; ) MODIFIER LETTER CYRILLIC SMALL VE diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index d4e8ede45d..860ed4b574 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,7 +1,5 @@ -1DFD2..1DFD7; Diacritic -1DFD2..1DFD7; Other_Lowercase -# PropList-16.0.0.txt -# Date: 2024-05-31, 18:09:48 GMT +# PropList-17.0.0.txt +# Date: 2024-10-21, 16:56:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1143,6 +1141,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1DFD2..1DFD7 ; Diacritic # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Diacritic # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE @@ -1152,7 +1151,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1178 +# Total code points: 1184 # ================================================ @@ -1230,9 +1229,10 @@ AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W 10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1DFD2..1DFD7 ; Other_Lowercase # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -# Total code points: 311 +# Total code points: 317 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 548e34ed2d..c878fd4576 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,8 +1,5 @@ -1DF40..1DF55 ; Latin -1DF56 ; Latin -1DFD2..1DFD7 ; Latin -# Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Scripts-17.0.0.txt +# Date: 2024-10-21, 16:56:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -704,8 +701,11 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; Latin # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; Latin # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; Latin # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE -# Total code points: 1487 +# Total code points: 1516 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 0e29461ddc..218cc55cb4 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,32 +1,3 @@ -1DF40;LATIN CAPITAL LETTER BARRED A;Lu;0;L;;;;;N;;;;1DF41; -1DF41;LATIN SMALL LETTER BARRED A;Ll;0;L;;;;;N;;;1DF40;;1DF40 -1DF42;LATIN SMALL LETTER BARRED OPEN O;Ll;0;L;;;;;N;;;;; -1DF43;LATIN SMALL CAPITAL BARRED E;Ll;0;L;;;;;N;;;;; -1DF44;LATIN SMALL LETTER BARRED OPEN E;Ll;0;L;;;;;N;;;;; -1DF45;LATIN SMALL LETTER G WITH STROKE AND PALATAL HOOK;Ll;0;L;;;;;N;;;;; -1DF46;LATIN SMALL LETTER BARRED H;Ll;0;L;;;;;N;;;;; -1DF47;LATIN SMALL LETTER BARRED H WITH HOOK;Ll;0;L;;;;;N;;;;; -1DF48;LATIN CAPITAL LETTER BARRED K;Lu;0;L;;;;;N;;;;1DF49; -1DF49;LATIN SMALL LETTER BARRED K;Ll;0;L;;;;;N;;;1DF48;;1DF48 -1DF4A;LATIN CAPITAL LETTER BARRED M;Ll;0;L;;;;;N;;;;1DF4B; -1DF4B;LATIN SMALL LETTER BARRED M;Ll;0;L;;;;;N;;;1DF4A;;1DF4A -1DF4C;LATIN SMALL LETTER BARRED M WITH HOOK;Ll;0;L;;;;;N;;;;; -1DF4D;LATIN CAPITAL LETTER BARRED N;Lu;0;L;;;;;N;;;;1DF4E; -1DF4E;LATIN SMALL LETTER BARRED N;Ll;0;L;;;;;N;;;1DF4D;;1DF4D -1DF4F;LATIN SMALL LETTER BARRED ENG;Ll;0;L;;;;;N;;;;; -1DF50;LATIN SMALL LETTER TURNED R WITH STROKE;Ll;0;L;;;;;N;;;;; -1DF51;LATIN CAPITAL LETTER BARRED V;Ll;0;L;;;;;N;;;;1DF52; -1DF52;LATIN SMALL LETTER BARRED V;Ll;0;L;;;;;N;;;1DF51;;1DF51 -1DF53;LATIN SMALL LETTER BARRED CLOSED OMEGA;Ll;0;L;;;;;N;;;;; -1DF54;LATIN SMALL LETTER BARRED LATIN CHI;Ll;0;L;;;;;N;;;;; -1DF55;LATIN SMALL LETTER Y WITH LOW STROKE;Ll;0;L;;;;;N;;;;; -1DF56;LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE;Lo;0;L;;;;;N;;;;; -1DFD2;MODIFIER LETTER SMALL B WITH STROKE;Lm;0;L; 0180;;;;N;;;;; -1DFD3;MODIFIER LETTER SMALL D WITH STROKE;Lm;0;L; 0111;;;;N;;;;; -1DFD4;MODIFIER LETTER SMALL G WITH STROKE;Lm;0;L; 01E5;;;;N;;;;; -1DFD5;MODIFIER LETTER SMALL L WITH STROKE;Lm;0;L; 0142;;;;N;;;;; -1DFD6;MODIFIER LETTER SMALL L WITH BAR;Lm;0;L; 019A;;;;N;;;;; -1DFD7;MODIFIER LETTER SMALL TURNED R WITH STROKE;Lm;0;L; 1DF50;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -35685,6 +35656,35 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1DF28;LATIN SMALL LETTER R WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; 1DF29;LATIN SMALL LETTER S WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; 1DF2A;LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; +1DF40;LATIN CAPITAL LETTER BARRED A;Lu;0;L;;;;;N;;;;1DF41; +1DF41;LATIN SMALL LETTER BARRED A;Ll;0;L;;;;;N;;;1DF40;;1DF40 +1DF42;LATIN SMALL LETTER BARRED OPEN O;Ll;0;L;;;;;N;;;;; +1DF43;LATIN SMALL CAPITAL BARRED E;Ll;0;L;;;;;N;;;;; +1DF44;LATIN SMALL LETTER BARRED OPEN E;Ll;0;L;;;;;N;;;;; +1DF45;LATIN SMALL LETTER G WITH STROKE AND PALATAL HOOK;Ll;0;L;;;;;N;;;;; +1DF46;LATIN SMALL LETTER BARRED H;Ll;0;L;;;;;N;;;;; +1DF47;LATIN SMALL LETTER BARRED H WITH HOOK;Ll;0;L;;;;;N;;;;; +1DF48;LATIN CAPITAL LETTER BARRED K;Lu;0;L;;;;;N;;;;1DF49; +1DF49;LATIN SMALL LETTER BARRED K;Ll;0;L;;;;;N;;;1DF48;;1DF48 +1DF4A;LATIN CAPITAL LETTER BARRED M;Ll;0;L;;;;;N;;;;1DF4B; +1DF4B;LATIN SMALL LETTER BARRED M;Ll;0;L;;;;;N;;;1DF4A;;1DF4A +1DF4C;LATIN SMALL LETTER BARRED M WITH HOOK;Ll;0;L;;;;;N;;;;; +1DF4D;LATIN CAPITAL LETTER BARRED N;Lu;0;L;;;;;N;;;;1DF4E; +1DF4E;LATIN SMALL LETTER BARRED N;Ll;0;L;;;;;N;;;1DF4D;;1DF4D +1DF4F;LATIN SMALL LETTER BARRED ENG;Ll;0;L;;;;;N;;;;; +1DF50;LATIN SMALL LETTER TURNED R WITH STROKE;Ll;0;L;;;;;N;;;;; +1DF51;LATIN CAPITAL LETTER BARRED V;Ll;0;L;;;;;N;;;;1DF52; +1DF52;LATIN SMALL LETTER BARRED V;Ll;0;L;;;;;N;;;1DF51;;1DF51 +1DF53;LATIN SMALL LETTER BARRED CLOSED OMEGA;Ll;0;L;;;;;N;;;;; +1DF54;LATIN SMALL LETTER BARRED LATIN CHI;Ll;0;L;;;;;N;;;;; +1DF55;LATIN SMALL LETTER Y WITH LOW STROKE;Ll;0;L;;;;;N;;;;; +1DF56;LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE;Lo;0;L;;;;;N;;;;; +1DFD2;MODIFIER LETTER SMALL B WITH STROKE;Lm;0;L; 0180;;;;N;;;;; +1DFD3;MODIFIER LETTER SMALL D WITH STROKE;Lm;0;L; 0111;;;;N;;;;; +1DFD4;MODIFIER LETTER SMALL G WITH STROKE;Lm;0;L; 01E5;;;;N;;;;; +1DFD5;MODIFIER LETTER SMALL L WITH STROKE;Lm;0;L; 0142;;;;N;;;;; +1DFD6;MODIFIER LETTER SMALL L WITH BAR;Lm;0;L; 019A;;;;N;;;;; +1DFD7;MODIFIER LETTER SMALL TURNED R WITH STROKE;Lm;0;L; 1DF50;;;;N;;;;; 1E000;COMBINING GLAGOLITIC LETTER AZU;Mn;230;NSM;;;;;N;;;;; 1E001;COMBINING GLAGOLITIC LETTER BUKY;Mn;230;NSM;;;;;N;;;;; 1E002;COMBINING GLAGOLITIC LETTER VEDE;Mn;230;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd72285..903980e16d 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ -# VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# VerticalOrientation-17.0.0.txt +# Date: 2024-10-21, 16:56:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2320,6 +2320,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1DF0A ; R # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; R # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; R # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; R # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; R # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; R # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; R # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; R # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; R # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html index 3b2899e64a..15295006da 100644 --- a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html @@ -7,7 +7,7 @@

Line_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-10-14, 12:25:22 GMT

+

Date: 2024-10-21, 16:55:56 GMT

This page illustrates the application of the Line_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of H3 and JT shows ×, with the rule 26.03. Checking below the table, rule 26.03 is “JT | H3 × JT”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -93,7 +93,7 @@

Rules

7.02× ZW 8.0ZW SP* ÷ 8.1ZWJ_O × -9.0(?<X>[^SP BK CR LF NL ZW]) ( CM | ZWJ )* → {X} +9.0(?<X>[^BK CR LF NL SP ZW]) ( CM | ZWJ )* → {X} 10.0( CM | ZWJ ) → A 11.01× WJ 11.02WJ × diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index ca3689e6bc..0e859cebe3 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ -# SentenceBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:32 GMT +# SentenceBreakProperty-17.0.0.txt +# Date: 2024-10-21, 16:56:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1317,10 +1317,14 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Lower # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Lower # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Lower # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF41..1DF47 ; Lower # L& [7] LATIN SMALL LETTER BARRED A..LATIN SMALL LETTER BARRED H WITH HOOK +1DF49..1DF4C ; Lower # L& [4] LATIN SMALL LETTER BARRED K..LATIN SMALL LETTER BARRED M WITH HOOK +1DF4E..1DF55 ; Lower # L& [8] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER Y WITH LOW STROKE +1DFD2..1DFD7 ; Lower # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2522 +# Total code points: 2547 # ================================================ @@ -1977,12 +1981,15 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1D756..1D76E ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA 1D790..1D7A8 ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Upper # L& MATHEMATICAL BOLD CAPITAL DIGAMMA +1DF40 ; Upper # L& LATIN CAPITAL LETTER BARRED A +1DF48 ; Upper # L& LATIN CAPITAL LETTER BARRED K +1DF4D ; Upper # L& LATIN CAPITAL LETTER BARRED N 1E900..1E921 ; Upper # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA 1F130..1F149 ; Upper # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1963 +# Total code points: 1966 # ================================================ @@ -2527,6 +2534,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; OLetter # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1DF0A ; OLetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF56 ; OLetter # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1E100..1E12C ; OLetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; OLetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; OLetter # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ @@ -2585,7 +2593,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136908 +# Total code points: 136909 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e557c3d0de..fc0355b776 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ -# WordBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:36 GMT +# WordBreakProperty-17.0.0.txt +# Date: 2024-10-21, 16:56:33 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1301,6 +1301,9 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF0A ; ALetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ALetter # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; ALetter # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; ALetter # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; ALetter # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; ALetter # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; ALetter # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ALetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ALetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1355,7 +1358,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33791 +# Total code points: 33820 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa0..48e3d90cc0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ -# DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# DerivedBidiClass-17.0.0.txt +# Date: 2024-10-21, 16:55:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1175,6 +1175,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; L # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; L # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; L # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; L # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; L # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1214,7 +1217,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815351 code points not listed here. +# The above property value applies to 815322 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96e..49fafbac08 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ -# DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# DerivedCombiningClass-17.0.0.txt +# Date: 2024-10-21, 16:55:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1936,6 +1936,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1DF0A ; 0 # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; 0 # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; 0 # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; 0 # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; 0 # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; 0 # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; 0 # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; 0 # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; 0 # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -2060,7 +2063,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. +# The above property value applies to 821552 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index a825479ac3..f7075114a0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ -# DerivedDecompositionType-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedDecompositionType-17.0.0.txt +# Date: 2024-10-21, 16:55:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -880,11 +880,12 @@ AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W 10781..10785 ; Super # Lm [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Super # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Super # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1DFD2..1DFD7 ; Super # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E050 ; Super # Lm [33] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL PALOCHKA 1E06B..1E06D ; Super # Lm [3] MODIFIER LETTER CYRILLIC SMALL ES WITH DESCENDER..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1F16A..1F16C ; Super # So [3] RAISED MC SIGN..RAISED MR SIGN -# Total code points: 249 +# Total code points: 255 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaac..415dbc7231 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ -# DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedEastAsianWidth-17.0.0.txt +# Date: 2024-10-21, 16:55:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1965,6 +1965,9 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; N # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; N # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; N # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; N # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -2103,7 +2106,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761099 code points not listed here. +# The above property value applies to 761070 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca93..17939f5f5f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ -# DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedGeneralCategory-17.0.0.txt +# Date: 2024-10-21, 16:55:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -633,7 +633,9 @@ FFFE..FFFF ; Cn # [2] .. 1DAA0 ; Cn # 1DAB0..1DEFF ; Cn # [1104] .. 1DF1F..1DF24 ; Cn # [6] .. -1DF2B..1DFFF ; Cn # [213] .. +1DF2B..1DF3F ; Cn # [21] .. +1DF57..1DFD1 ; Cn # [123] .. +1DFD8..1DFFF ; Cn # [40] .. 1E007 ; Cn # 1E019..1E01A ; Cn # [2] .. 1E022 ; Cn # @@ -747,7 +749,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 819504 # ================================================ @@ -1403,9 +1405,12 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D756..1D76E ; Lu # [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA 1D790..1D7A8 ; Lu # [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA +1DF40 ; Lu # LATIN CAPITAL LETTER BARRED A +1DF48 ; Lu # LATIN CAPITAL LETTER BARRED K +1DF4D ; Lu # LATIN CAPITAL LETTER BARRED N 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1858 +# Total code points: 1861 # ================================================ @@ -2072,9 +2077,12 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1DF00..1DF09 ; Ll # [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Ll # [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF41..1DF47 ; Ll # [7] LATIN SMALL LETTER BARRED A..LATIN SMALL LETTER BARRED H WITH HOOK +1DF49..1DF4C ; Ll # [4] LATIN SMALL LETTER BARRED K..LATIN SMALL LETTER BARRED M WITH HOOK +1DF4E..1DF55 ; Ll # [8] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER Y WITH LOW STROKE 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2258 +# Total code points: 2277 # ================================================ @@ -2168,12 +2176,13 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1AFF0..1AFF3 ; Lm # [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Lm # [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Lm # [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1DFD2..1DFD7 ; Lm # [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Lm # [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E137..1E13D ; Lm # [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 404 +# Total code points: 410 # ================================================ @@ -2653,6 +2662,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1BC80..1BC88 ; Lo # [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; Lo # [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1DF0A ; Lo # LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF56 ; Lo # LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1E100..1E12C ; Lo # [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E14E ; Lo # NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ 1E290..1E2AD ; Lo # [30] TOTO LETTER PA..TOTO LETTER A @@ -2708,7 +2718,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136477 +# Total code points: 136478 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 31d143e928..9fe4599d2b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ -# DerivedLineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:50 GMT +# DerivedLineBreak-17.0.0.txt +# Date: 2024-10-21, 16:55:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757653 code points not listed here. -# Total code points: 895121 +# The above property value applies to 757624 code points not listed here. +# Total code points: 895092 # ================================================ @@ -1520,6 +1520,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; AL # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40..1DF55 ; AL # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; AL # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2..1DFD7 ; AL # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; AL # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; AL # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; AL # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1613,7 +1616,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26679 +# Total code points: 26708 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b55..6fc4577f3f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ -# DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedName-17.0.0.txt +# Date: 2024-10-21, 16:55:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -41461,6 +41461,35 @@ FFFD ; REPLACEMENT CHARACTER 1DF28 ; LATIN SMALL LETTER R WITH MID-HEIGHT LEFT HOOK 1DF29 ; LATIN SMALL LETTER S WITH MID-HEIGHT LEFT HOOK 1DF2A ; LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF40 ; LATIN CAPITAL LETTER BARRED A +1DF41 ; LATIN SMALL LETTER BARRED A +1DF42 ; LATIN SMALL LETTER BARRED OPEN O +1DF43 ; LATIN SMALL CAPITAL BARRED E +1DF44 ; LATIN SMALL LETTER BARRED OPEN E +1DF45 ; LATIN SMALL LETTER G WITH STROKE AND PALATAL HOOK +1DF46 ; LATIN SMALL LETTER BARRED H +1DF47 ; LATIN SMALL LETTER BARRED H WITH HOOK +1DF48 ; LATIN CAPITAL LETTER BARRED K +1DF49 ; LATIN SMALL LETTER BARRED K +1DF4A ; LATIN CAPITAL LETTER BARRED M +1DF4B ; LATIN SMALL LETTER BARRED M +1DF4C ; LATIN SMALL LETTER BARRED M WITH HOOK +1DF4D ; LATIN CAPITAL LETTER BARRED N +1DF4E ; LATIN SMALL LETTER BARRED N +1DF4F ; LATIN SMALL LETTER BARRED ENG +1DF50 ; LATIN SMALL LETTER TURNED R WITH STROKE +1DF51 ; LATIN CAPITAL LETTER BARRED V +1DF52 ; LATIN SMALL LETTER BARRED V +1DF53 ; LATIN SMALL LETTER BARRED CLOSED OMEGA +1DF54 ; LATIN SMALL LETTER BARRED LATIN CHI +1DF55 ; LATIN SMALL LETTER Y WITH LOW STROKE +1DF56 ; LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DFD2 ; MODIFIER LETTER SMALL B WITH STROKE +1DFD3 ; MODIFIER LETTER SMALL D WITH STROKE +1DFD4 ; MODIFIER LETTER SMALL G WITH STROKE +1DFD5 ; MODIFIER LETTER SMALL L WITH STROKE +1DFD6 ; MODIFIER LETTER SMALL L WITH BAR +1DFD7 ; MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000 ; COMBINING GLAGOLITIC LETTER AZU 1E001 ; COMBINING GLAGOLITIC LETTER BUKY 1E002 ; COMBINING GLAGOLITIC LETTER VEDE @@ -45367,6 +45396,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 155027 # EOF From d0acbfe97175bff3aaebfb70544b5613854458c6 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 20:12:10 +0200 Subject: [PATCH 7/9] A failing test --- .../text/UCD/AdditionComparisons/155.txt | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/155.txt diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/155.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/155.txt new file mode 100644 index 0000000000..2a724e7f96 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/155.txt @@ -0,0 +1,77 @@ +# Phonetic characters: Barred letters +# https://github.com/unicode-org/utc-release-management/issues/155 + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Propertywise [\x{1DF40} \N{LATIN CAPITAL LETTER BARRED A} + \x{1DF48} \N{LATIN CAPITAL LETTER BARRED K} + \x{1DF4A} \N{LATIN CAPITAL LETTER BARRED M} + \x{1DF4D} \N{LATIN CAPITAL LETTER BARRED N} + \x{1DF51} \N{LATIN CAPITAL LETTER BARRED V}] + : [\x{1DF41} \N{LATIN SMALL LETTER BARRED A} + \x{1DF49} \N{LATIN SMALL LETTER BARRED K} + \x{1DF4B} \N{LATIN SMALL LETTER BARRED M} + \x{1DF4E} \N{LATIN SMALL LETTER BARRED N} + \x{1DF52} \N{LATIN SMALL LETTER BARRED V}] +CorrespondTo [ \x{A740} Ꝁ \N{LATIN CAPITAL LETTER K WITH STROKE}] + : [ \x{A741} ꝁ \N{LATIN SMALL LETTER K WITH STROKE}] +UpTo: Block (Latin_Extended_G vs Latin_Extended_D) + +Propertywise [\x{1DF42} \N{LATIN SMALL LETTER BARRED OPEN O} + \x{1DF43} \N{LATIN SMALL CAPITAL BARRED E} + \x{1DF44} \N{LATIN SMALL LETTER BARRED OPEN E} + \x{1DF45} \N{LATIN SMALL LETTER G WITH STROKE AND PALATAL HOOK} + \x{1DF46} \N{LATIN SMALL LETTER BARRED H} + \x{1DF47} \N{LATIN SMALL LETTER BARRED H WITH HOOK} + \x{1DF4C} \N{LATIN SMALL LETTER BARRED M WITH HOOK} + \x{1DF4F} \N{LATIN SMALL LETTER BARRED ENG} + \x{1DF50} \N{LATIN SMALL LETTER TURNED R WITH STROKE} + \x{1DF53} \N{LATIN SMALL LETTER BARRED CLOSED OMEGA} + \x{1DF54} \N{LATIN SMALL LETTER BARRED LATIN CHI} + \x{1DF55} \N{LATIN SMALL LETTER Y WITH LOW STROKE} + \x{1DF01} 𝼁 \N{LATIN SMALL LETTER REVERSED SCRIPT G}] + AreAlike + +Propertywise [\x{1DF56} \N{LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE} + \x{1DF0E} 𝼎 \N{LATIN LETTER INVERTED GLOTTAL STOP WITH CURL}] + AreAlike + +Ignoring Block: + +Ignoring Unicode_1_Name: +Propertywise [\x{1DF56} \N{LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE} + \x{02A1} ʡ \N{LATIN LETTER GLOTTAL STOP WITH STROKE}] + AreAlike +end Ignoring; + +Propertywise [\x{1DFD3} \N{MODIFIER LETTER SMALL D WITH STROKE} + \x{1DFD5} \N{MODIFIER LETTER SMALL L WITH STROKE} + \x{1DFD6} \N{MODIFIER LETTER SMALL L WITH BAR}] + ⧴ [\x{0111} đ \N{LATIN SMALL LETTER D WITH STROKE} + \x{0142} ł \N{LATIN SMALL LETTER L WITH STROKE} + \x{019A} ƚ \N{LATIN SMALL LETTER L WITH BAR}] +CorrespondTo [\x{10795} 𐞕 \N{MODIFIER LETTER SMALL H WITH STROKE}] + ⧴ [ \x{0127} ħ \N{LATIN SMALL LETTER H WITH STROKE}] + +Propertywise [\x{1DFD2} \N{MODIFIER LETTER SMALL B WITH STROKE} + \x{1DFD4} \N{MODIFIER LETTER SMALL G WITH STROKE}] + ⧴ [\x{0180} ƀ \N{LATIN SMALL LETTER B WITH STROKE} + \x{01E5} ǥ \N{LATIN SMALL LETTER G WITH STROKE}] +CorrespondTo [\x{10795} 𐞕 \N{MODIFIER LETTER SMALL H WITH STROKE}] + ⧴ [ \x{0127} ħ \N{LATIN SMALL LETTER H WITH STROKE}] + +Propertywise [\x{1DFD7} \N{MODIFIER LETTER SMALL TURNED R WITH STROKE}] + : [\x{1DF50} \N{LATIN SMALL LETTER TURNED R WITH STROKE}] +CorrespondTo [\x{02B4} ʴ \N{MODIFIER LETTER SMALL TURNED R}] + : [\x{0279} ɹ \N{LATIN SMALL LETTER TURNED R}] +end Ignoring; + + +end Ignoring; + +end Ignoring; \ No newline at end of file From e0be7ae99c86f905b76e7cb0f0ee13a79822f7da Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 20:13:25 +0200 Subject: [PATCH 8/9] Fix three gc assignments --- unicodetools/data/ucd/dev/UnicodeData.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 218cc55cb4..5cb9e0fe1e 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -35666,19 +35666,19 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1DF47;LATIN SMALL LETTER BARRED H WITH HOOK;Ll;0;L;;;;;N;;;;; 1DF48;LATIN CAPITAL LETTER BARRED K;Lu;0;L;;;;;N;;;;1DF49; 1DF49;LATIN SMALL LETTER BARRED K;Ll;0;L;;;;;N;;;1DF48;;1DF48 -1DF4A;LATIN CAPITAL LETTER BARRED M;Ll;0;L;;;;;N;;;;1DF4B; +1DF4A;LATIN CAPITAL LETTER BARRED M;Lu;0;L;;;;;N;;;;1DF4B; 1DF4B;LATIN SMALL LETTER BARRED M;Ll;0;L;;;;;N;;;1DF4A;;1DF4A 1DF4C;LATIN SMALL LETTER BARRED M WITH HOOK;Ll;0;L;;;;;N;;;;; 1DF4D;LATIN CAPITAL LETTER BARRED N;Lu;0;L;;;;;N;;;;1DF4E; 1DF4E;LATIN SMALL LETTER BARRED N;Ll;0;L;;;;;N;;;1DF4D;;1DF4D 1DF4F;LATIN SMALL LETTER BARRED ENG;Ll;0;L;;;;;N;;;;; 1DF50;LATIN SMALL LETTER TURNED R WITH STROKE;Ll;0;L;;;;;N;;;;; -1DF51;LATIN CAPITAL LETTER BARRED V;Ll;0;L;;;;;N;;;;1DF52; +1DF51;LATIN CAPITAL LETTER BARRED V;Lu;0;L;;;;;N;;;;1DF52; 1DF52;LATIN SMALL LETTER BARRED V;Ll;0;L;;;;;N;;;1DF51;;1DF51 1DF53;LATIN SMALL LETTER BARRED CLOSED OMEGA;Ll;0;L;;;;;N;;;;; 1DF54;LATIN SMALL LETTER BARRED LATIN CHI;Ll;0;L;;;;;N;;;;; 1DF55;LATIN SMALL LETTER Y WITH LOW STROKE;Ll;0;L;;;;;N;;;;; -1DF56;LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE;Lo;0;L;;;;;N;;;;; +1DF56;LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE;Ll;0;L;;;;;N;;;;; 1DFD2;MODIFIER LETTER SMALL B WITH STROKE;Lm;0;L; 0180;;;;N;;;;; 1DFD3;MODIFIER LETTER SMALL D WITH STROKE;Lm;0;L; 0111;;;;N;;;;; 1DFD4;MODIFIER LETTER SMALL G WITH STROKE;Lm;0;L; 01E5;;;;N;;;;; From 4f8a581c77fdda2d572a16b28e74d865a689108e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 20:15:48 +0200 Subject: [PATCH 9/9] Regenerate UCD --- .../data/ucd/dev/DerivedCoreProperties.txt | 36 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 ++- unicodetools/data/ucd/dev/LineBreak.txt | 5 ++- unicodetools/data/ucd/dev/Scripts.txt | 5 ++- .../data/ucd/dev/VerticalOrientation.txt | 5 ++- .../dev/auxiliary/SentenceBreakProperty.txt | 17 +++++---- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 ++- .../ucd/dev/extracted/DerivedBidiClass.txt | 5 ++- .../dev/extracted/DerivedCombiningClass.txt | 5 ++- .../dev/extracted/DerivedEastAsianWidth.txt | 5 ++- .../dev/extracted/DerivedGeneralCategory.txt | 17 +++++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 5 ++- 12 files changed, 55 insertions(+), 60 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 2fd5c61fe8..073f7f8467 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-10-21, 16:55:47 GMT +# Date: 2024-10-21, 18:14:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1371,8 +1371,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Alphabetic # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Alphabetic # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; Alphabetic # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; Alphabetic # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; Alphabetic # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; Alphabetic # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU @@ -2140,13 +2139,15 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1DF0B..1DF1E ; Lowercase # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Lowercase # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1DF41..1DF47 ; Lowercase # L& [7] LATIN SMALL LETTER BARRED A..LATIN SMALL LETTER BARRED H WITH HOOK -1DF49..1DF4C ; Lowercase # L& [4] LATIN SMALL LETTER BARRED K..LATIN SMALL LETTER BARRED M WITH HOOK -1DF4E..1DF55 ; Lowercase # L& [8] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER Y WITH LOW STROKE +1DF49 ; Lowercase # L& LATIN SMALL LETTER BARRED K +1DF4B..1DF4C ; Lowercase # L& [2] LATIN SMALL LETTER BARRED M..LATIN SMALL LETTER BARRED M WITH HOOK +1DF4E..1DF50 ; Lowercase # L& [3] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER TURNED R WITH STROKE +1DF52..1DF56 ; Lowercase # L& [5] LATIN SMALL LETTER BARRED V..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; Lowercase # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2594 +# Total code points: 2593 # ================================================ @@ -2807,13 +2808,15 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1D7CA ; Uppercase # L& MATHEMATICAL BOLD CAPITAL DIGAMMA 1DF40 ; Uppercase # L& LATIN CAPITAL LETTER BARRED A 1DF48 ; Uppercase # L& LATIN CAPITAL LETTER BARRED K +1DF4A ; Uppercase # L& LATIN CAPITAL LETTER BARRED M 1DF4D ; Uppercase # L& LATIN CAPITAL LETTER BARRED N +1DF51 ; Uppercase # L& LATIN CAPITAL LETTER BARRED V 1E900..1E921 ; Uppercase # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA 1F130..1F149 ; Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1981 +# Total code points: 1983 # ================================================ @@ -2992,7 +2995,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Cased # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Cased # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; Cased # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE +1DF40..1DF56 ; Cased # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; Cased # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA @@ -3000,7 +3003,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4606 +# Total code points: 4607 # ================================================ @@ -6939,8 +6942,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; ID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; ID_Start # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; ID_Start # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; ID_Start # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; ID_Start # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W @@ -8330,8 +8332,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; ID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; ID_Continue # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; ID_Continue # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; ID_Continue # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; ID_Continue # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU @@ -9131,8 +9132,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; XID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; XID_Start # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; XID_Start # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; XID_Start # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; XID_Start # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W @@ -10523,8 +10523,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; XID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; XID_Continue # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; XID_Continue # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; XID_Continue # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; XID_Continue # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU @@ -12742,8 +12741,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Grapheme_Base # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Grapheme_Base # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; Grapheme_Base # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; Grapheme_Base # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; Grapheme_Base # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; Grapheme_Base # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Grapheme_Base # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index a1da26fc59..a9ba75704f 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-10-21, 16:55:54 GMT +# Date: 2024-10-21, 18:14:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2468,8 +2468,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; N # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; N # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; N # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; N # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 59af38e88c..96d9446036 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-10-21, 16:49:03 GMT +# Date: 2024-10-21, 18:14:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3378,8 +3378,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; AL # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; AL # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; AL # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; AL # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; AL # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; CM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index c878fd4576..dc98f1f61d 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-10-21, 16:56:29 GMT +# Date: 2024-10-21, 18:15:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -701,8 +701,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; Latin # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; Latin # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; Latin # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; Latin # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE # Total code points: 1516 diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 903980e16d..758cb5dc65 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-10-21, 16:56:32 GMT +# Date: 2024-10-21, 18:15:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2320,8 +2320,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1DF0A ; R # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; R # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; R # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; R # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; R # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; R # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; R # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; R # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; R # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 0e859cebe3..b18409cde2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-10-21, 16:56:29 GMT +# Date: 2024-10-21, 18:15:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1318,13 +1318,15 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0B..1DF1E ; Lower # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Lower # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1DF41..1DF47 ; Lower # L& [7] LATIN SMALL LETTER BARRED A..LATIN SMALL LETTER BARRED H WITH HOOK -1DF49..1DF4C ; Lower # L& [4] LATIN SMALL LETTER BARRED K..LATIN SMALL LETTER BARRED M WITH HOOK -1DF4E..1DF55 ; Lower # L& [8] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER Y WITH LOW STROKE +1DF49 ; Lower # L& LATIN SMALL LETTER BARRED K +1DF4B..1DF4C ; Lower # L& [2] LATIN SMALL LETTER BARRED M..LATIN SMALL LETTER BARRED M WITH HOOK +1DF4E..1DF50 ; Lower # L& [3] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER TURNED R WITH STROKE +1DF52..1DF56 ; Lower # L& [5] LATIN SMALL LETTER BARRED V..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; Lower # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2547 +# Total code points: 2546 # ================================================ @@ -1983,13 +1985,15 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1D7CA ; Upper # L& MATHEMATICAL BOLD CAPITAL DIGAMMA 1DF40 ; Upper # L& LATIN CAPITAL LETTER BARRED A 1DF48 ; Upper # L& LATIN CAPITAL LETTER BARRED K +1DF4A ; Upper # L& LATIN CAPITAL LETTER BARRED M 1DF4D ; Upper # L& LATIN CAPITAL LETTER BARRED N +1DF51 ; Upper # L& LATIN CAPITAL LETTER BARRED V 1E900..1E921 ; Upper # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA 1F130..1F149 ; Upper # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1966 +# Total code points: 1968 # ================================================ @@ -2534,7 +2538,6 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; OLetter # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1DF0A ; OLetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK -1DF56 ; OLetter # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1E100..1E12C ; OLetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; OLetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; OLetter # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ @@ -2593,7 +2596,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136909 +# Total code points: 136908 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index fc0355b776..86bcde69ac 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-10-21, 16:56:33 GMT +# Date: 2024-10-21, 18:15:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1301,8 +1301,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF0A ; ALetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ALetter # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; ALetter # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; ALetter # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; ALetter # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; ALetter # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; ALetter # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; ALetter # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ALetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 48e3d90cc0..81f2df7cbe 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-10-21, 16:55:44 GMT +# Date: 2024-10-21, 18:14:33 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1175,8 +1175,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; L # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; L # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; L # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; L # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; L # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; L # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 49fafbac08..f1bbe19f51 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-10-21, 16:55:46 GMT +# Date: 2024-10-21, 18:14:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1936,8 +1936,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1DF0A ; 0 # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; 0 # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; 0 # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; 0 # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; 0 # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; 0 # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; 0 # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; 0 # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; 0 # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 415dbc7231..d68f3c9a54 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-10-21, 16:55:49 GMT +# Date: 2024-10-21, 18:14:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1965,8 +1965,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; N # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; N # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; N # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; N # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; N # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 17939f5f5f..d2a8dd5a24 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-10-21, 16:55:49 GMT +# Date: 2024-10-21, 18:14:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1407,10 +1407,12 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1DF40 ; Lu # LATIN CAPITAL LETTER BARRED A 1DF48 ; Lu # LATIN CAPITAL LETTER BARRED K +1DF4A ; Lu # LATIN CAPITAL LETTER BARRED M 1DF4D ; Lu # LATIN CAPITAL LETTER BARRED N +1DF51 ; Lu # LATIN CAPITAL LETTER BARRED V 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1861 +# Total code points: 1863 # ================================================ @@ -2078,11 +2080,13 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1DF0B..1DF1E ; Ll # [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1DF41..1DF47 ; Ll # [7] LATIN SMALL LETTER BARRED A..LATIN SMALL LETTER BARRED H WITH HOOK -1DF49..1DF4C ; Ll # [4] LATIN SMALL LETTER BARRED K..LATIN SMALL LETTER BARRED M WITH HOOK -1DF4E..1DF55 ; Ll # [8] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER Y WITH LOW STROKE +1DF49 ; Ll # LATIN SMALL LETTER BARRED K +1DF4B..1DF4C ; Ll # [2] LATIN SMALL LETTER BARRED M..LATIN SMALL LETTER BARRED M WITH HOOK +1DF4E..1DF50 ; Ll # [3] LATIN SMALL LETTER BARRED N..LATIN SMALL LETTER TURNED R WITH STROKE +1DF52..1DF56 ; Ll # [5] LATIN SMALL LETTER BARRED V..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2277 +# Total code points: 2276 # ================================================ @@ -2662,7 +2666,6 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1BC80..1BC88 ; Lo # [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; Lo # [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1DF0A ; Lo # LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK -1DF56 ; Lo # LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1E100..1E12C ; Lo # [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E14E ; Lo # NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ 1E290..1E2AD ; Lo # [30] TOTO LETTER PA..TOTO LETTER A @@ -2718,7 +2721,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136478 +# Total code points: 136477 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 9fe4599d2b..c00bb1560b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-10-21, 16:55:51 GMT +# Date: 2024-10-21, 18:14:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1520,8 +1520,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; AL # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF40..1DF55 ; AL # L& [22] LATIN CAPITAL LETTER BARRED A..LATIN SMALL LETTER Y WITH LOW STROKE -1DF56 ; AL # Lo LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE +1DF40..1DF56 ; AL # L& [23] LATIN CAPITAL LETTER BARRED A..LATIN LETTER GLOTTAL STOP WITH DOUBLE STROKE 1DFD2..1DFD7 ; AL # Lm [6] MODIFIER LETTER SMALL B WITH STROKE..MODIFIER LETTER SMALL TURNED R WITH STROKE 1E030..1E06D ; AL # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; AL # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W