Skip to content

Commit da4c987

Browse files
committed
core: Move isAlphaNum to Compat module
1 parent 2089651 commit da4c987

File tree

6 files changed

+66
-17
lines changed

6 files changed

+66
-17
lines changed

unicode-data/Changelog.md

+5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
- Removed deprecated `Unicode.Char.Numeric.isNumber`.
1616
Use `Unicode.Char.Numeric.Compat.isNumber` instead.
1717

18+
### Deprecations
19+
20+
- `Unicode.Char.General.isAlphaNum`.
21+
Use `Unicode.Char.General.Compat.isAlphaNum` instead.
22+
1823
## 0.5.0 (July 2024)
1924

2025
- Fix the inlining of `Addr#` literals and reduce their size. This results in

unicode-data/bench/Unicode/Char/General/CompatBench.hs

+4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ benchmarks r = bgroupWithCharRange "Unicode.Char.General.Compat" r $ \chars ->
2020
[ Bench "base" Char.isAlpha
2121
, Bench "unicode-data" GC.isAlpha
2222
]
23+
, bgroupWithChars "isAlphaNum" chars
24+
[ Bench "base" Char.isAlphaNum
25+
, Bench "unicode-data" GC.isAlphaNum
26+
]
2327
, bgroupWithChars "isLetter" chars
2428
[ Bench "base" Char.isLetter
2529
, Bench "unicode-data" GC.isLetter

unicode-data/bench/Unicode/Char/GeneralBench.hs

-4
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@ benchmarks r = bgroupWithCharRange "Unicode.Char.General" r $ \chars ->
2525
, bgroupWithChars "isAlphabetic" chars
2626
[ Bench "unicode-data" G.isAlphabetic
2727
]
28-
, bgroupWithChars "isAlphaNum" chars
29-
[ Bench "base" Char.isAlphaNum
30-
, Bench "unicode-data" G.isAlphaNum
31-
]
3228
, bgroupWithChars "isControl" chars
3329
[ Bench "base" Char.isControl
3430
, Bench "unicode-data" G.isControl

unicode-data/lib/Unicode/Char.hs

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ where
4747
import Data.Char (chr, ord)
4848
import Unicode.Char.Case hiding (Unfold(..), Step(..))
4949
import Unicode.Char.Case.Compat
50-
import Unicode.Char.General
50+
import Unicode.Char.General hiding (isAlphaNum)
5151
import Unicode.Char.General.Compat
5252
import Unicode.Char.Identifiers
5353
import Unicode.Char.Numeric

unicode-data/lib/Unicode/Char/General.hs

+12-12
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,12 @@ import Control.Exception (assert)
100100
import Data.Bits ((.&.))
101101
import Data.Char (isAscii, isLatin1, isAsciiUpper, isAsciiLower, ord)
102102
import Data.Ix (Ix)
103-
import Unicode.Internal.Division (quotRem28)
104103

104+
import qualified Unicode.Char.General.Compat as Compat
105105
import qualified Unicode.Internal.Char.DerivedCoreProperties as P
106106
import qualified Unicode.Internal.Char.PropList as P
107107
import qualified Unicode.Internal.Char.UnicodeData.GeneralCategory as UC
108+
import Unicode.Internal.Division (quotRem28)
108109

109110
--------------------------------------------------------------------------------
110111
-- General Category
@@ -381,20 +382,19 @@ following 'GeneralCategory's, or 'False' otherwise:
381382
382383
prop> isAlphaNum c == Data.Char.isAlphaNum c
383384
385+
__Note:__ this function is incompatible with 'isAlphabetic':
386+
387+
>>> isAlphabetic '\x345'
388+
True
389+
>>> isAlphaNum '\x345'
390+
False
391+
384392
@since 0.3.0
385393
-}
394+
{-# INLINE isAlphaNum #-}
395+
{-# DEPRECATED isAlphaNum "Use Unicode.Char.General.Compat.isAlphaNum instead." #-}
386396
isAlphaNum :: Char -> Bool
387-
isAlphaNum c =
388-
let !cp = ord c
389-
-- NOTE: The guard constant is updated at each Unicode revision.
390-
-- It must be < 0x40000 to be accepted by generalCategoryPlanes0To3.
391-
in cp <= UC.MaxIsAlphaNum &&
392-
let !gc = UC.generalCategoryPlanes0To3 cp
393-
in gc <= UC.OtherLetter ||
394-
(UC.DecimalNumber <= gc && gc <= UC.OtherNumber)
395-
-- Use the following in case the previous code is not valid anymore:
396-
-- gc <= UC.OtherLetter || (UC.DecimalNumber <= gc && gc <= UC.OtherNumber)
397-
-- where !gc = UC.generalCategory c
397+
isAlphaNum = Compat.isAlphaNum
398398

399399
{-| Selects control characters, which are the non-printing characters
400400
of the Latin-1 subset of Unicode.

unicode-data/lib/Unicode/Char/General/Compat.hs

+44
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,64 @@
1313
--
1414
module Unicode.Char.General.Compat
1515
( isAlpha
16+
, isAlphaNum
1617
, isLetter
1718
, isSpace
1819
) where
1920

2021
import Data.Char (ord)
2122
import qualified Unicode.Internal.Char.UnicodeData.GeneralCategory as UC
2223

24+
-- $setup
25+
-- import qualified Unicode.Char.General
26+
2327
-- | Same as 'isLetter'.
2428
--
2529
-- @since 0.3.0
2630
{-# INLINE isAlpha #-}
2731
isAlpha :: Char -> Bool
2832
isAlpha = isLetter
2933

34+
{-| Selects alphabetic or numeric Unicode characters.
35+
36+
This function returns 'True' if its argument has one of the
37+
following 'GeneralCategory's, or 'False' otherwise:
38+
39+
* 'UppercaseLetter'
40+
* 'LowercaseLetter'
41+
* 'TitlecaseLetter'
42+
* 'ModifierLetter'
43+
* 'OtherLetter'
44+
* 'DecimalNumber'
45+
* 'LetterNumber'
46+
* 'OtherNumber'
47+
48+
prop> isAlphaNum c == Data.Char.isAlphaNum c
49+
50+
__Note:__ this function is incompatible with 'Unicode.Char.General.isAlphabetic':
51+
52+
>>> Unicode.Char.General.isAlphabetic '\x345'
53+
True
54+
>>> isAlphaNum '\x345'
55+
False
56+
57+
@since 0.6.0 moved to Compat module
58+
59+
@since 0.3.0
60+
-}
61+
isAlphaNum :: Char -> Bool
62+
isAlphaNum c =
63+
let !cp = ord c
64+
-- NOTE: The guard constant is updated at each Unicode revision.
65+
-- It must be < 0x40000 to be accepted by generalCategoryPlanes0To3.
66+
in cp <= UC.MaxIsAlphaNum &&
67+
let !gc = UC.generalCategoryPlanes0To3 cp
68+
in gc <= UC.OtherLetter ||
69+
(UC.DecimalNumber <= gc && gc <= UC.OtherNumber)
70+
-- Use the following in case the previous code is not valid anymore:
71+
-- gc <= UC.OtherLetter || (UC.DecimalNumber <= gc && gc <= UC.OtherNumber)
72+
-- where !gc = UC.generalCategory c
73+
3074
{-| Selects alphabetic Unicode characters (lower-case, upper-case and title-case
3175
letters, plus letters of caseless scripts and modifiers letters).
3276

0 commit comments

Comments
 (0)