summaryrefslogtreecommitdiffstats
path: root/Lib/zoneinfo
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2023-10-14 20:24:33 (GMT)
committerGitHub <noreply@github.com>2023-10-14 20:24:33 (GMT)
commitab08ff7882b6181fb785eed7410dbf8030aded70 (patch)
treea8012e6a72e4bcd41a07e591d41de37881cce93f /Lib/zoneinfo
parent12deda763359d46d4eccbb8991afed71fa31a68b (diff)
downloadcpython-ab08ff7882b6181fb785eed7410dbf8030aded70.zip
cpython-ab08ff7882b6181fb785eed7410dbf8030aded70.tar.gz
cpython-ab08ff7882b6181fb785eed7410dbf8030aded70.tar.bz2
bpo-42663: Fix parsing TZ strings in zoneinfo module (GH-23825)
zipinfo now supports the full range of values in the TZ string determined by RFC 8536 and detects all invalid formats. Both Python and C implementations now raise exceptions of the same type on invalid data.
Diffstat (limited to 'Lib/zoneinfo')
-rw-r--r--Lib/zoneinfo/_zoneinfo.py86
1 files changed, 53 insertions, 33 deletions
diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py
index eede15b..b77dc0e 100644
--- a/Lib/zoneinfo/_zoneinfo.py
+++ b/Lib/zoneinfo/_zoneinfo.py
@@ -517,8 +517,8 @@ class _DayOffset:
__slots__ = ["d", "julian", "hour", "minute", "second"]
def __init__(self, d, julian, hour=2, minute=0, second=0):
- if not (0 + julian) <= d <= 365:
- min_day = 0 + julian
+ min_day = 0 + julian # convert bool to int
+ if not min_day <= d <= 365:
raise ValueError(f"d must be in [{min_day}, 365], not: {d}")
self.d = d
@@ -560,11 +560,11 @@ class _CalendarOffset:
)
def __init__(self, m, w, d, hour=2, minute=0, second=0):
- if not 0 < m <= 12:
- raise ValueError("m must be in (0, 12]")
+ if not 1 <= m <= 12:
+ raise ValueError("m must be in [1, 12]")
- if not 0 < w <= 5:
- raise ValueError("w must be in (0, 5]")
+ if not 1 <= w <= 5:
+ raise ValueError("w must be in [1, 5]")
if not 0 <= d <= 6:
raise ValueError("d must be in [0, 6]")
@@ -634,18 +634,21 @@ def _parse_tz_str(tz_str):
offset_str, *start_end_str = tz_str.split(",", 1)
- # fmt: off
parser_re = re.compile(
- r"(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
- r"((?P<stdoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" +
- r"((?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
- r"((?P<dstoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" +
- r")?" + # dst
- r")?$" # stdoff
+ r"""
+ (?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>)
+ (?:
+ (?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)
+ (?:
+ (?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>)
+ (?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)?
+ )? # dst
+ )? # stdoff
+ """,
+ re.ASCII|re.VERBOSE
)
- # fmt: on
- m = parser_re.match(offset_str)
+ m = parser_re.fullmatch(offset_str)
if m is None:
raise ValueError(f"{tz_str} is not a valid TZ string")
@@ -696,16 +699,17 @@ def _parse_tz_str(tz_str):
def _parse_dst_start_end(dststr):
- date, *time = dststr.split("/")
- if date[0] == "M":
+ date, *time = dststr.split("/", 1)
+ type = date[:1]
+ if type == "M":
n_is_julian = False
- m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date)
+ m = re.fullmatch(r"M(\d{1,2})\.(\d).(\d)", date, re.ASCII)
if m is None:
raise ValueError(f"Invalid dst start/end date: {dststr}")
date_offset = tuple(map(int, m.groups()))
offset = _CalendarOffset(*date_offset)
else:
- if date[0] == "J":
+ if type == "J":
n_is_julian = True
date = date[1:]
else:
@@ -715,38 +719,54 @@ def _parse_dst_start_end(dststr):
offset = _DayOffset(doy, n_is_julian)
if time:
- time_components = list(map(int, time[0].split(":")))
- n_components = len(time_components)
- if n_components < 3:
- time_components.extend([0] * (3 - n_components))
- offset.hour, offset.minute, offset.second = time_components
+ offset.hour, offset.minute, offset.second = _parse_transition_time(time[0])
return offset
+def _parse_transition_time(time_str):
+ match = re.fullmatch(
+ r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
+ time_str,
+ re.ASCII
+ )
+ if match is None:
+ raise ValueError(f"Invalid time: {time_str}")
+
+ h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))
+
+ if h > 167:
+ raise ValueError(
+ f"Hour must be in [0, 167]: {time_str}"
+ )
+
+ if match.group("sign") == "-":
+ h, m, s = -h, -m, -s
+
+ return h, m, s
+
+
def _parse_tz_delta(tz_delta):
- match = re.match(
- r"(?P<sign>[+-])?(?P<h>\d{1,2})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
+ match = re.fullmatch(
+ r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
tz_delta,
+ re.ASCII
)
# Anything passed to this function should already have hit an equivalent
# regular expression to find the section to parse.
assert match is not None, tz_delta
- h, m, s = (
- int(v) if v is not None else 0
- for v in map(match.group, ("h", "m", "s"))
- )
+ h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))
total = h * 3600 + m * 60 + s
- if not -86400 < total < 86400:
+ if h > 24:
raise ValueError(
- f"Offset must be strictly between -24h and +24h: {tz_delta}"
+ f"Offset hours must be in [0, 24]: {tz_delta}"
)
# Yes, +5 maps to an offset of -5h
if match.group("sign") != "-":
- total *= -1
+ total = -total
return total