Skip to content

Commit 5961602

Browse files
miss-islingtontonghuarootStanFromIreland
authored
[3.15] gh-152248: Reject a POSIX TZ abbreviation with non-ASCII-letters in pure-Python zoneinfo (GH-152249) (#152650)
(cherry picked from commit 449122e) Co-authored-by: tonghuaroot (童话) <tonghuaroot@gmail.com> Co-authored-by: Stan Ulbrych <stan@python.org>
1 parent ad55e47 commit 5961602

4 files changed

Lines changed: 26 additions & 4 deletions

File tree

Lib/test/test_zoneinfo/test_zoneinfo.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,14 +1009,14 @@ def populate_tzstr_header(cls):
10091009

10101010
cls._tzif_header = bytes(out)
10111011

1012-
def zone_from_tzstr(self, tzstr):
1012+
def zone_from_tzstr(self, tzstr, encoding="ascii"):
10131013
"""Creates a zoneinfo file following a POSIX rule."""
10141014
zonefile = io.BytesIO(self._tzif_header)
10151015
zonefile.seek(0, 2)
10161016

10171017
# Write the footer
10181018
zonefile.write(b"\x0A")
1019-
zonefile.write(tzstr.encode("ascii"))
1019+
zonefile.write(tzstr.encode(encoding))
10201020
zonefile.write(b"\x0A")
10211021

10221022
zonefile.seek(0)
@@ -1150,6 +1150,13 @@ def test_invalid_tzstr(self):
11501150
"+11", # Unquoted alphanumeric
11511151
"GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST
11521152
"GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST
1153+
# Unquoted abbreviation with embedded or leading whitespace
1154+
"AB C3",
1155+
" A B 3",
1156+
"AAA4BB B,J60/2,J300/2", # Embedded whitespace in DST
1157+
# Empty quoted abbreviation
1158+
"<>5",
1159+
"AAA4<>,M3.2.0/2,M11.1.0/3",
11531160
"PST8PDT,M3.2.0/2", # Only one transition rule
11541161
# Invalid offset hours
11551162
"AAA168",
@@ -1232,6 +1239,15 @@ def test_invalid_tzstr(self):
12321239
with self.assertRaisesRegex(ValueError, tzstr_regex):
12331240
self.zone_from_tzstr(invalid_tzstr)
12341241

1242+
def test_invalid_tzstr_non_ascii_abbr(self):
1243+
tzstr = "ABÀC3"
1244+
if self.module is py_zoneinfo:
1245+
expected = re.escape(tzstr)
1246+
else:
1247+
expected = re.escape(repr(tzstr.encode("utf-8")))
1248+
with self.assertRaisesRegex(ValueError, expected):
1249+
self.zone_from_tzstr(tzstr, encoding="utf-8")
1250+
12351251
@classmethod
12361252
def _populate_test_cases(cls):
12371253
# This method uses a somewhat unusual style in that it populates the

Lib/zoneinfo/_zoneinfo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -640,11 +640,11 @@ def _parse_tz_str(tz_str):
640640

641641
parser_re = re.compile(
642642
r"""
643-
(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>)
643+
(?P<std>[a-zA-Z]+|<[a-zA-Z0-9+-]+>)
644644
(?:
645645
(?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)
646646
(?:
647-
(?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>)
647+
(?P<dst>[a-zA-Z]+|<[a-zA-Z0-9+-]+>)
648648
(?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)?
649649
)? # dst
650650
)? # stdoff
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Make the C and pure-Python :mod:`zoneinfo` parsers validate POSIX TZ
2+
abbreviations consistently, rejecting unquoted abbreviations with non-letter
3+
characters and empty quoted abbreviations. Patch by tonghuaroot.

Modules/_zoneinfo.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1762,6 +1762,9 @@ parse_abbr(const char **p, PyObject **abbr)
17621762
ptr++;
17631763
}
17641764
str_end = ptr;
1765+
if (str_end == str_start) {
1766+
return -1;
1767+
}
17651768
ptr++;
17661769
}
17671770
else {

0 commit comments

Comments
 (0)