diff options
| author | Serhiy Storchaka <storchaka@gmail.com> | 2023-10-14 20:24:33 (GMT) |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-10-14 20:24:33 (GMT) |
| commit | ab08ff7882b6181fb785eed7410dbf8030aded70 (patch) | |
| tree | a8012e6a72e4bcd41a07e591d41de37881cce93f /Lib | |
| parent | 12deda763359d46d4eccbb8991afed71fa31a68b (diff) | |
| download | cpython-ab08ff7882b6181fb785eed7410dbf8030aded70.zip cpython-ab08ff7882b6181fb785eed7410dbf8030aded70.tar.gz cpython-ab08ff7882b6181fb785eed7410dbf8030aded70.tar.bz2 | |
bpo-42663: Fix parsing TZ strings in zoneinfo module (GH-23825)
zipinfo now supports the full range of values in the TZ string
determined by RFC 8536 and detects all invalid formats.
Both Python and C implementations now raise exceptions of the same
type on invalid data.
Diffstat (limited to 'Lib')
| -rw-r--r-- | Lib/test/test_zoneinfo/test_zoneinfo.py | 125 | ||||
| -rw-r--r-- | Lib/zoneinfo/_zoneinfo.py | 86 |
2 files changed, 171 insertions, 40 deletions
diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index ae921f7..3766cea 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1001,6 +1001,80 @@ class TZStrTest(ZoneInfoTestBase): self.assertEqual(dt_act, dt_utc) + def test_extreme_tzstr(self): + tzstrs = [ + # Extreme offset hour + "AAA24", + "AAA+24", + "AAA-24", + "AAA24BBB,J60/2,J300/2", + "AAA+24BBB,J60/2,J300/2", + "AAA-24BBB,J60/2,J300/2", + "AAA4BBB24,J60/2,J300/2", + "AAA4BBB+24,J60/2,J300/2", + "AAA4BBB-24,J60/2,J300/2", + # Extreme offset minutes + "AAA4:00BBB,J60/2,J300/2", + "AAA4:59BBB,J60/2,J300/2", + "AAA4BBB5:00,J60/2,J300/2", + "AAA4BBB5:59,J60/2,J300/2", + # Extreme offset seconds + "AAA4:00:00BBB,J60/2,J300/2", + "AAA4:00:59BBB,J60/2,J300/2", + "AAA4BBB5:00:00,J60/2,J300/2", + "AAA4BBB5:00:59,J60/2,J300/2", + # Extreme total offset + "AAA24:59:59BBB5,J60/2,J300/2", + "AAA-24:59:59BBB5,J60/2,J300/2", + "AAA4BBB24:59:59,J60/2,J300/2", + "AAA4BBB-24:59:59,J60/2,J300/2", + # Extreme months + "AAA4BBB,M12.1.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M12.1.1/2", + # Extreme weeks + "AAA4BBB,M1.5.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M1.5.1/2", + # Extreme weekday + "AAA4BBB,M1.1.6/2,M2.1.1/2", + "AAA4BBB,M1.1.1/2,M2.1.6/2", + # Extreme numeric offset + "AAA4BBB,0/2,20/2", + "AAA4BBB,0/2,0/14", + "AAA4BBB,20/2,365/2", + "AAA4BBB,365/2,365/14", + # Extreme julian offset + "AAA4BBB,J1/2,J20/2", + "AAA4BBB,J1/2,J1/14", + "AAA4BBB,J20/2,J365/2", + "AAA4BBB,J365/2,J365/14", + # Extreme transition hour + "AAA4BBB,J60/167,J300/2", + "AAA4BBB,J60/+167,J300/2", + "AAA4BBB,J60/-167,J300/2", + "AAA4BBB,J60/2,J300/167", + "AAA4BBB,J60/2,J300/+167", + "AAA4BBB,J60/2,J300/-167", + # Extreme transition minutes + "AAA4BBB,J60/2:00,J300/2", + "AAA4BBB,J60/2:59,J300/2", + "AAA4BBB,J60/2,J300/2:00", + "AAA4BBB,J60/2,J300/2:59", + # Extreme transition seconds + "AAA4BBB,J60/2:00:00,J300/2", + "AAA4BBB,J60/2:00:59,J300/2", + "AAA4BBB,J60/2,J300/2:00:00", + "AAA4BBB,J60/2,J300/2:00:59", + # Extreme total transition time + "AAA4BBB,J60/167:59:59,J300/2", + "AAA4BBB,J60/-167:59:59,J300/2", + "AAA4BBB,J60/2,J300/167:59:59", + "AAA4BBB,J60/2,J300/-167:59:59", + ] + + for tzstr in tzstrs: + with self.subTest(tzstr=tzstr): + self.zone_from_tzstr(tzstr) + def test_invalid_tzstr(self): invalid_tzstrs = [ "PST8PDT", # DST but no transition specified @@ -1008,16 +1082,33 @@ class TZStrTest(ZoneInfoTestBase): "GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST "GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST "PST8PDT,M3.2.0/2", # Only one transition rule - # Invalid offsets - "STD+25", - "STD-25", - "STD+374", - "STD+374DST,M3.2.0/2,M11.1.0/3", - "STD+23DST+25,M3.2.0/2,M11.1.0/3", - "STD-23DST-25,M3.2.0/2,M11.1.0/3", + # Invalid offset hours + "AAA168", + "AAA+168", + "AAA-168", + "AAA168BBB,J60/2,J300/2", + "AAA+168BBB,J60/2,J300/2", + "AAA-168BBB,J60/2,J300/2", + "AAA4BBB168,J60/2,J300/2", + "AAA4BBB+168,J60/2,J300/2", + "AAA4BBB-168,J60/2,J300/2", + # Invalid offset minutes + "AAA4:0BBB,J60/2,J300/2", + "AAA4:100BBB,J60/2,J300/2", + "AAA4BBB5:0,J60/2,J300/2", + "AAA4BBB5:100,J60/2,J300/2", + # Invalid offset seconds + "AAA4:00:0BBB,J60/2,J300/2", + "AAA4:00:100BBB,J60/2,J300/2", + "AAA4BBB5:00:0,J60/2,J300/2", + "AAA4BBB5:00:100,J60/2,J300/2", # Completely invalid dates "AAA4BBB,M1443339,M11.1.0/3", "AAA4BBB,M3.2.0/2,0349309483959c", + "AAA4BBB,,J300/2", + "AAA4BBB,z,J300/2", + "AAA4BBB,J60/2,", + "AAA4BBB,J60/2,z", # Invalid months "AAA4BBB,M13.1.1/2,M1.1.1/2", "AAA4BBB,M1.1.1/2,M13.1.1/2", @@ -1037,6 +1128,26 @@ class TZStrTest(ZoneInfoTestBase): # Invalid julian offset "AAA4BBB,J0/2,J20/2", "AAA4BBB,J20/2,J366/2", + # Invalid transition time + "AAA4BBB,J60/2/3,J300/2", + "AAA4BBB,J60/2,J300/2/3", + # Invalid transition hour + "AAA4BBB,J60/168,J300/2", + "AAA4BBB,J60/+168,J300/2", + "AAA4BBB,J60/-168,J300/2", + "AAA4BBB,J60/2,J300/168", + "AAA4BBB,J60/2,J300/+168", + "AAA4BBB,J60/2,J300/-168", + # Invalid transition minutes + "AAA4BBB,J60/2:0,J300/2", + "AAA4BBB,J60/2:100,J300/2", + "AAA4BBB,J60/2,J300/2:0", + "AAA4BBB,J60/2,J300/2:100", + # Invalid transition seconds + "AAA4BBB,J60/2:00:0,J300/2", + "AAA4BBB,J60/2:00:100,J300/2", + "AAA4BBB,J60/2,J300/2:00:0", + "AAA4BBB,J60/2,J300/2:00:100", ] for invalid_tzstr in invalid_tzstrs: diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py index eede15b..b77dc0e 100644 --- a/Lib/zoneinfo/_zoneinfo.py +++ b/Lib/zoneinfo/_zoneinfo.py @@ -517,8 +517,8 @@ class _DayOffset: __slots__ = ["d", "julian", "hour", "minute", "second"] def __init__(self, d, julian, hour=2, minute=0, second=0): - if not (0 + julian) <= d <= 365: - min_day = 0 + julian + min_day = 0 + julian # convert bool to int + if not min_day <= d <= 365: raise ValueError(f"d must be in [{min_day}, 365], not: {d}") self.d = d @@ -560,11 +560,11 @@ class _CalendarOffset: ) def __init__(self, m, w, d, hour=2, minute=0, second=0): - if not 0 < m <= 12: - raise ValueError("m must be in (0, 12]") + if not 1 <= m <= 12: + raise ValueError("m must be in [1, 12]") - if not 0 < w <= 5: - raise ValueError("w must be in (0, 5]") + if not 1 <= w <= 5: + raise ValueError("w must be in [1, 5]") if not 0 <= d <= 6: raise ValueError("d must be in [0, 6]") @@ -634,18 +634,21 @@ def _parse_tz_str(tz_str): offset_str, *start_end_str = tz_str.split(",", 1) - # fmt: off parser_re = re.compile( - r"(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" + - r"((?P<stdoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" + - r"((?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" + - r"((?P<dstoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" + - r")?" + # dst - r")?$" # stdoff + r""" + (?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>) + (?: + (?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?) + (?: + (?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>) + (?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)? + )? # dst + )? # stdoff + """, + re.ASCII|re.VERBOSE ) - # fmt: on - m = parser_re.match(offset_str) + m = parser_re.fullmatch(offset_str) if m is None: raise ValueError(f"{tz_str} is not a valid TZ string") @@ -696,16 +699,17 @@ def _parse_tz_str(tz_str): def _parse_dst_start_end(dststr): - date, *time = dststr.split("/") - if date[0] == "M": + date, *time = dststr.split("/", 1) + type = date[:1] + if type == "M": n_is_julian = False - m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date) + m = re.fullmatch(r"M(\d{1,2})\.(\d).(\d)", date, re.ASCII) if m is None: raise ValueError(f"Invalid dst start/end date: {dststr}") date_offset = tuple(map(int, m.groups())) offset = _CalendarOffset(*date_offset) else: - if date[0] == "J": + if type == "J": n_is_julian = True date = date[1:] else: @@ -715,38 +719,54 @@ def _parse_dst_start_end(dststr): offset = _DayOffset(doy, n_is_julian) if time: - time_components = list(map(int, time[0].split(":"))) - n_components = len(time_components) - if n_components < 3: - time_components.extend([0] * (3 - n_components)) - offset.hour, offset.minute, offset.second = time_components + offset.hour, offset.minute, offset.second = _parse_transition_time(time[0]) return offset +def _parse_transition_time(time_str): + match = re.fullmatch( + r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?", + time_str, + re.ASCII + ) + if match is None: + raise ValueError(f"Invalid time: {time_str}") + + h, m, s = (int(v or 0) for v in match.group("h", "m", "s")) + + if h > 167: + raise ValueError( + f"Hour must be in [0, 167]: {time_str}" + ) + + if match.group("sign") == "-": + h, m, s = -h, -m, -s + + return h, m, s + + def _parse_tz_delta(tz_delta): - match = re.match( - r"(?P<sign>[+-])?(?P<h>\d{1,2})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?", + match = re.fullmatch( + r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?", tz_delta, + re.ASCII ) # Anything passed to this function should already have hit an equivalent # regular expression to find the section to parse. assert match is not None, tz_delta - h, m, s = ( - int(v) if v is not None else 0 - for v in map(match.group, ("h", "m", "s")) - ) + h, m, s = (int(v or 0) for v in match.group("h", "m", "s")) total = h * 3600 + m * 60 + s - if not -86400 < total < 86400: + if h > 24: raise ValueError( - f"Offset must be strictly between -24h and +24h: {tz_delta}" + f"Offset hours must be in [0, 24]: {tz_delta}" ) # Yes, +5 maps to an offset of -5h if match.group("sign") != "-": - total *= -1 + total = -total return total |
