From 461b70a041baf9db94e489ed8d6814da41931cc0 Mon Sep 17 00:00:00 2001 From: anahta <55799596+anahta@users.noreply.github.com> Date: Tue, 19 Apr 2022 21:13:02 -0400 Subject: [PATCH 1/2] Update parser.py --- arrow/parser.py | 64 ++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index e95d78b0..2230e947 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -81,10 +81,15 @@ class ParserMatchError(ParserError): "d", "a", "A", + "*", + "**", + "***", + "****", ] class _Parts(TypedDict, total=False): + wildcard: str year: int month: int day_of_year: int @@ -103,28 +108,28 @@ class _Parts(TypedDict, total=False): class DateTimeParser: _FORMAT_RE: ClassVar[Pattern[str]] = re.compile( - r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" + r"((\*)?(\*)?(\*)?(\*)|YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" ) _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]") - _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}") - _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}") - _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+") - _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}") - _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}") - _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}") + _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}|\*{1,2}") + _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}|\*{1,3}") + _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+|\*{1,}") + _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}|\*{2}") + _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}|\*{3}") + _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}|\*{4}") _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+") # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will # break cases like "15 Jul 2000" and a format list (see issue #447) - _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$") - _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$") + _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d|\*+\.?\d|\*+$") + _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d|\*+$") _TIME_RE: ClassVar[Pattern[str]] = re.compile( r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$" ) _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile( - r"(?P\d{4})[\-]?W(?P\d{2})[\-]?(?P\d)?" + r"(?P\d{4}|\*{4})[\-]?W(?P\d{2}|\*{2})[\-]?(?P\d|\*)?" ) _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = { @@ -450,6 +455,11 @@ def _parse_token( "ss", "s", "x", + "*", + "**", + "***", + "****", + ], value: Union[str, bytes, SupportsInt, bytearray], parts: _Parts, @@ -624,23 +634,23 @@ def _build_datetime(parts: _Parts) -> datetime: if day_of_year is not None: _year = parts.get("year") month = parts.get("month") - if _year is None: - raise ParserError( - "Year component is required with the DDD and DDDD tokens." - ) - - if month is not None: - raise ParserError( - "Month component is not allowed with the DDD and DDDD tokens." - ) - - date_string = f"{_year}-{day_of_year}" - try: - dt = datetime.strptime(date_string, "%Y-%j") - except ValueError: - raise ParserError( - f"The provided day of year {day_of_year!r} is invalid." - ) + # if _year is None: + # raise ParserError( + # "Year component is required with the DDD and DDDD tokens." + # ) + + # if month is not None: + # raise ParserError( + # "Month component is not allowed with the DDD and DDDD tokens." + # ) + + # date_string = f"{_year}-{day_of_year}" + # try: + # dt = datetime.strptime(date_string, "%Y-%j") + # except ValueError: + # raise ParserError( + # f"The provided day of year {day_of_year!r} is invalid." + # ) parts["year"] = dt.year parts["month"] = dt.month From a7d20f577db9f163de1cfe34d68d49b2411c27f9 Mon Sep 17 00:00:00 2001 From: anahta Date: Thu, 21 Apr 2022 04:27:55 -0400 Subject: [PATCH 2/2] with tests --- arrow/parser.py | 64 +++++++++++++++++++------------------------- tests/test_parser.py | 8 ++++++ 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 2230e947..e95d78b0 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -81,15 +81,10 @@ class ParserMatchError(ParserError): "d", "a", "A", - "*", - "**", - "***", - "****", ] class _Parts(TypedDict, total=False): - wildcard: str year: int month: int day_of_year: int @@ -108,28 +103,28 @@ class _Parts(TypedDict, total=False): class DateTimeParser: _FORMAT_RE: ClassVar[Pattern[str]] = re.compile( - r"((\*)?(\*)?(\*)?(\*)|YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" + r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" ) _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]") - _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}|\*{1,2}") - _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}|\*{1,3}") - _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+|\*{1,}") - _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}|\*{2}") - _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}|\*{3}") - _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}|\*{4}") + _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}") + _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}") + _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+") + _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}") + _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}") + _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}") _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+") # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will # break cases like "15 Jul 2000" and a format list (see issue #447) - _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d|\*+\.?\d|\*+$") - _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d|\*+$") + _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$") + _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$") _TIME_RE: ClassVar[Pattern[str]] = re.compile( r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$" ) _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile( - r"(?P\d{4}|\*{4})[\-]?W(?P\d{2}|\*{2})[\-]?(?P\d|\*)?" + r"(?P\d{4})[\-]?W(?P\d{2})[\-]?(?P\d)?" ) _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = { @@ -455,11 +450,6 @@ def _parse_token( "ss", "s", "x", - "*", - "**", - "***", - "****", - ], value: Union[str, bytes, SupportsInt, bytearray], parts: _Parts, @@ -634,23 +624,23 @@ def _build_datetime(parts: _Parts) -> datetime: if day_of_year is not None: _year = parts.get("year") month = parts.get("month") - # if _year is None: - # raise ParserError( - # "Year component is required with the DDD and DDDD tokens." - # ) - - # if month is not None: - # raise ParserError( - # "Month component is not allowed with the DDD and DDDD tokens." - # ) - - # date_string = f"{_year}-{day_of_year}" - # try: - # dt = datetime.strptime(date_string, "%Y-%j") - # except ValueError: - # raise ParserError( - # f"The provided day of year {day_of_year!r} is invalid." - # ) + if _year is None: + raise ParserError( + "Year component is required with the DDD and DDDD tokens." + ) + + if month is not None: + raise ParserError( + "Month component is not allowed with the DDD and DDDD tokens." + ) + + date_string = f"{_year}-{day_of_year}" + try: + dt = datetime.strptime(date_string, "%Y-%j") + except ValueError: + raise ParserError( + f"The provided day of year {day_of_year!r} is invalid." + ) parts["year"] = dt.year parts["month"] = dt.month diff --git a/tests/test_parser.py b/tests/test_parser.py index 4a4cfe41..eebae384 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1320,6 +1320,13 @@ def test_iso8601_basic_format(self): 2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200) ) + #wildcard token tests + assert self.parser.parse_iso("2018**17") == datetime(2018, "**", 17) + assert self.parser.parse_iso("****0127") == datetime("****", 1, 27) + assert self.parser.parse_iso("102909**") == datetime(1029, 9,"*") + assert self.parser.parse_iso("2022****") == datetime(2022, "**","*") + + # ordinal in basic format: YYYYDDDD assert self.parser.parse_iso("1998136") == datetime(1998, 5, 16) @@ -1337,6 +1344,7 @@ def test_iso8601_basic_format(self): # too many digits in time with pytest.raises(ParserError): self.parser.parse_iso("20180517T1055213Z") + def test_midnight_end_day(self): assert self.parser.parse_iso("2019-10-30T24:00:00") == datetime(