diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2023-10-14 20:24:33 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-14 20:24:33 (GMT) |
commit | ab08ff7882b6181fb785eed7410dbf8030aded70 (patch) | |
tree | a8012e6a72e4bcd41a07e591d41de37881cce93f | |
parent | 12deda763359d46d4eccbb8991afed71fa31a68b (diff) | |
download | cpython-ab08ff7882b6181fb785eed7410dbf8030aded70.zip cpython-ab08ff7882b6181fb785eed7410dbf8030aded70.tar.gz cpython-ab08ff7882b6181fb785eed7410dbf8030aded70.tar.bz2 |
bpo-42663: Fix parsing TZ strings in zoneinfo module (GH-23825)
zipinfo now supports the full range of values in the TZ string
determined by RFC 8536 and detects all invalid formats.
Both Python and C implementations now raise exceptions of the same
type on invalid data.
-rw-r--r-- | Lib/test/test_zoneinfo/test_zoneinfo.py | 125 | ||||
-rw-r--r-- | Lib/zoneinfo/_zoneinfo.py | 86 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2022-05-06-15-49-57.gh-issue-86826.rf006W.rst | 4 | ||||
-rw-r--r-- | Modules/_zoneinfo.c | 369 |
4 files changed, 326 insertions, 258 deletions
diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index ae921f7..3766cea 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1001,6 +1001,80 @@ class TZStrTest(ZoneInfoTestBase): self.assertEqual(dt_act, dt_utc) + def test_extreme_tzstr(self): + tzstrs = [ + # Extreme offset hour + "AAA24", + "AAA+24", + "AAA-24", + "AAA24BBB,J60/2,J300/2", + "AAA+24BBB,J60/2,J300/2", + "AAA-24BBB,J60/2,J300/2", + "AAA4BBB24,J60/2,J300/2", + "AAA4BBB+24,J60/2,J300/2", + "AAA4BBB-24,J60/2,J300/2", + # Extreme offset minutes + "AAA4:00BBB,J60/2,J300/2", + "AAA4:59BBB,J60/2,J300/2", + "AAA4BBB5:00,J60/2,J300/2", + "AAA4BBB5:59,J60/2,J300/2", + # Extreme offset seconds + "AAA4:00:00BBB,J60/2,J300/2", + "AAA4:00:59BBB,J60/2,J300/2", + "AAA4BBB5:00:00,J60/2,J300/2", + "AAA4BBB5:00:59,J60/2,J300/2", + # Extreme total offset + "AAA24:59:59BBB5,J60/2,J300/2", + "AAA-24:59:59BBB5,J60/2,J300/2", + "AAA4BBB24:59:59,J60/2,J300/2", + "AAA4BBB-24:59:59,J60/2,J300/2", + # Extreme months + "AAA4BBB,M12.1.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M12.1.1/2", + # Extreme weeks + "AAA4BBB,M1.5.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M1.5.1/2", + # Extreme weekday + "AAA4BBB,M1.1.6/2,M2.1.1/2", + "AAA4BBB,M1.1.1/2,M2.1.6/2", + # Extreme numeric offset + "AAA4BBB,0/2,20/2", + "AAA4BBB,0/2,0/14", + "AAA4BBB,20/2,365/2", + "AAA4BBB,365/2,365/14", + # Extreme julian offset + "AAA4BBB,J1/2,J20/2", + "AAA4BBB,J1/2,J1/14", + "AAA4BBB,J20/2,J365/2", + "AAA4BBB,J365/2,J365/14", + # Extreme transition hour + "AAA4BBB,J60/167,J300/2", + "AAA4BBB,J60/+167,J300/2", + "AAA4BBB,J60/-167,J300/2", + "AAA4BBB,J60/2,J300/167", + "AAA4BBB,J60/2,J300/+167", + "AAA4BBB,J60/2,J300/-167", + # Extreme transition minutes + "AAA4BBB,J60/2:00,J300/2", + "AAA4BBB,J60/2:59,J300/2", + "AAA4BBB,J60/2,J300/2:00", + "AAA4BBB,J60/2,J300/2:59", + # Extreme transition seconds + "AAA4BBB,J60/2:00:00,J300/2", + "AAA4BBB,J60/2:00:59,J300/2", + "AAA4BBB,J60/2,J300/2:00:00", + "AAA4BBB,J60/2,J300/2:00:59", + # Extreme total transition time + "AAA4BBB,J60/167:59:59,J300/2", + "AAA4BBB,J60/-167:59:59,J300/2", + "AAA4BBB,J60/2,J300/167:59:59", + "AAA4BBB,J60/2,J300/-167:59:59", + ] + + for tzstr in tzstrs: + with self.subTest(tzstr=tzstr): + self.zone_from_tzstr(tzstr) + def test_invalid_tzstr(self): invalid_tzstrs = [ "PST8PDT", # DST but no transition specified @@ -1008,16 +1082,33 @@ class TZStrTest(ZoneInfoTestBase): "GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST "GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST "PST8PDT,M3.2.0/2", # Only one transition rule - # Invalid offsets - "STD+25", - "STD-25", - "STD+374", - "STD+374DST,M3.2.0/2,M11.1.0/3", - "STD+23DST+25,M3.2.0/2,M11.1.0/3", - "STD-23DST-25,M3.2.0/2,M11.1.0/3", + # Invalid offset hours + "AAA168", + "AAA+168", + "AAA-168", + "AAA168BBB,J60/2,J300/2", + "AAA+168BBB,J60/2,J300/2", + "AAA-168BBB,J60/2,J300/2", + "AAA4BBB168,J60/2,J300/2", + "AAA4BBB+168,J60/2,J300/2", + "AAA4BBB-168,J60/2,J300/2", + # Invalid offset minutes + "AAA4:0BBB,J60/2,J300/2", + "AAA4:100BBB,J60/2,J300/2", + "AAA4BBB5:0,J60/2,J300/2", + "AAA4BBB5:100,J60/2,J300/2", + # Invalid offset seconds + "AAA4:00:0BBB,J60/2,J300/2", + "AAA4:00:100BBB,J60/2,J300/2", + "AAA4BBB5:00:0,J60/2,J300/2", + "AAA4BBB5:00:100,J60/2,J300/2", # Completely invalid dates "AAA4BBB,M1443339,M11.1.0/3", "AAA4BBB,M3.2.0/2,0349309483959c", + "AAA4BBB,,J300/2", + "AAA4BBB,z,J300/2", + "AAA4BBB,J60/2,", + "AAA4BBB,J60/2,z", # Invalid months "AAA4BBB,M13.1.1/2,M1.1.1/2", "AAA4BBB,M1.1.1/2,M13.1.1/2", @@ -1037,6 +1128,26 @@ class TZStrTest(ZoneInfoTestBase): # Invalid julian offset "AAA4BBB,J0/2,J20/2", "AAA4BBB,J20/2,J366/2", + # Invalid transition time + "AAA4BBB,J60/2/3,J300/2", + "AAA4BBB,J60/2,J300/2/3", + # Invalid transition hour + "AAA4BBB,J60/168,J300/2", + "AAA4BBB,J60/+168,J300/2", + "AAA4BBB,J60/-168,J300/2", + "AAA4BBB,J60/2,J300/168", + "AAA4BBB,J60/2,J300/+168", + "AAA4BBB,J60/2,J300/-168", + # Invalid transition minutes + "AAA4BBB,J60/2:0,J300/2", + "AAA4BBB,J60/2:100,J300/2", + "AAA4BBB,J60/2,J300/2:0", + "AAA4BBB,J60/2,J300/2:100", + # Invalid transition seconds + "AAA4BBB,J60/2:00:0,J300/2", + "AAA4BBB,J60/2:00:100,J300/2", + "AAA4BBB,J60/2,J300/2:00:0", + "AAA4BBB,J60/2,J300/2:00:100", ] for invalid_tzstr in invalid_tzstrs: diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py index eede15b..b77dc0e 100644 --- a/Lib/zoneinfo/_zoneinfo.py +++ b/Lib/zoneinfo/_zoneinfo.py @@ -517,8 +517,8 @@ class _DayOffset: __slots__ = ["d", "julian", "hour", "minute", "second"] def __init__(self, d, julian, hour=2, minute=0, second=0): - if not (0 + julian) <= d <= 365: - min_day = 0 + julian + min_day = 0 + julian # convert bool to int + if not min_day <= d <= 365: raise ValueError(f"d must be in [{min_day}, 365], not: {d}") self.d = d @@ -560,11 +560,11 @@ class _CalendarOffset: ) def __init__(self, m, w, d, hour=2, minute=0, second=0): - if not 0 < m <= 12: - raise ValueError("m must be in (0, 12]") + if not 1 <= m <= 12: + raise ValueError("m must be in [1, 12]") - if not 0 < w <= 5: - raise ValueError("w must be in (0, 5]") + if not 1 <= w <= 5: + raise ValueError("w must be in [1, 5]") if not 0 <= d <= 6: raise ValueError("d must be in [0, 6]") @@ -634,18 +634,21 @@ def _parse_tz_str(tz_str): offset_str, *start_end_str = tz_str.split(",", 1) - # fmt: off parser_re = re.compile( - r"(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" + - r"((?P<stdoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" + - r"((?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" + - r"((?P<dstoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" + - r")?" + # dst - r")?$" # stdoff + r""" + (?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>) + (?: + (?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?) + (?: + (?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>) + (?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)? + )? # dst + )? # stdoff + """, + re.ASCII|re.VERBOSE ) - # fmt: on - m = parser_re.match(offset_str) + m = parser_re.fullmatch(offset_str) if m is None: raise ValueError(f"{tz_str} is not a valid TZ string") @@ -696,16 +699,17 @@ def _parse_tz_str(tz_str): def _parse_dst_start_end(dststr): - date, *time = dststr.split("/") - if date[0] == "M": + date, *time = dststr.split("/", 1) + type = date[:1] + if type == "M": n_is_julian = False - m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date) + m = re.fullmatch(r"M(\d{1,2})\.(\d).(\d)", date, re.ASCII) if m is None: raise ValueError(f"Invalid dst start/end date: {dststr}") date_offset = tuple(map(int, m.groups())) offset = _CalendarOffset(*date_offset) else: - if date[0] == "J": + if type == "J": n_is_julian = True date = date[1:] else: @@ -715,38 +719,54 @@ def _parse_dst_start_end(dststr): offset = _DayOffset(doy, n_is_julian) if time: - time_components = list(map(int, time[0].split(":"))) - n_components = len(time_components) - if n_components < 3: - time_components.extend([0] * (3 - n_components)) - offset.hour, offset.minute, offset.second = time_components + offset.hour, offset.minute, offset.second = _parse_transition_time(time[0]) return offset +def _parse_transition_time(time_str): + match = re.fullmatch( + r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?", + time_str, + re.ASCII + ) + if match is None: + raise ValueError(f"Invalid time: {time_str}") + + h, m, s = (int(v or 0) for v in match.group("h", "m", "s")) + + if h > 167: + raise ValueError( + f"Hour must be in [0, 167]: {time_str}" + ) + + if match.group("sign") == "-": + h, m, s = -h, -m, -s + + return h, m, s + + def _parse_tz_delta(tz_delta): - match = re.match( - r"(?P<sign>[+-])?(?P<h>\d{1,2})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?", + match = re.fullmatch( + r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?", tz_delta, + re.ASCII ) # Anything passed to this function should already have hit an equivalent # regular expression to find the section to parse. assert match is not None, tz_delta - h, m, s = ( - int(v) if v is not None else 0 - for v in map(match.group, ("h", "m", "s")) - ) + h, m, s = (int(v or 0) for v in match.group("h", "m", "s")) total = h * 3600 + m * 60 + s - if not -86400 < total < 86400: + if h > 24: raise ValueError( - f"Offset must be strictly between -24h and +24h: {tz_delta}" + f"Offset hours must be in [0, 24]: {tz_delta}" ) # Yes, +5 maps to an offset of -5h if match.group("sign") != "-": - total *= -1 + total = -total return total diff --git a/Misc/NEWS.d/next/Library/2022-05-06-15-49-57.gh-issue-86826.rf006W.rst b/Misc/NEWS.d/next/Library/2022-05-06-15-49-57.gh-issue-86826.rf006W.rst new file mode 100644 index 0000000..02cd75e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-06-15-49-57.gh-issue-86826.rf006W.rst @@ -0,0 +1,4 @@ +:mod:`zipinfo` now supports the full range of values in the TZ string +determined by RFC 8536 and detects all invalid formats. +Both Python and C implementations now raise exceptions of the same +type on invalid data. diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index eb4e522..3f14dd1 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -61,21 +61,21 @@ struct TransitionRuleType { typedef struct { TransitionRuleType base; - uint8_t month; - uint8_t week; - uint8_t day; - int8_t hour; - int8_t minute; - int8_t second; + uint8_t month; /* 1 - 12 */ + uint8_t week; /* 1 - 5 */ + uint8_t day; /* 0 - 6 */ + int16_t hour; /* -167 - 167, RFC 8536 §3.3.1 */ + int8_t minute; /* signed 2 digits */ + int8_t second; /* signed 2 digits */ } CalendarRule; typedef struct { TransitionRuleType base; - uint8_t julian; - unsigned int day; - int8_t hour; - int8_t minute; - int8_t second; + uint8_t julian; /* 0, 1 */ + uint16_t day; /* 0 - 365 */ + int16_t hour; /* -167 - 167, RFC 8536 §3.3.1 */ + int8_t minute; /* signed 2 digits */ + int8_t second; /* signed 2 digits */ } DayRule; struct StrongCacheNode { @@ -133,15 +133,14 @@ ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, static int parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out); -static Py_ssize_t -parse_abbr(const char *const p, PyObject **abbr); -static Py_ssize_t -parse_tz_delta(const char *const p, long *total_seconds); -static Py_ssize_t -parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, - int8_t *second); -static Py_ssize_t -parse_transition_rule(const char *const p, TransitionRuleType **out); +static int +parse_abbr(const char **p, PyObject **abbr); +static int +parse_tz_delta(const char **p, long *total_seconds); +static int +parse_transition_time(const char **p, int *hour, int *minute, int *second); +static int +parse_transition_rule(const char **p, TransitionRuleType **out); static _ttinfo * find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year); @@ -1327,14 +1326,14 @@ calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year) } int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL; - return ((ordinal * 86400) + (int64_t)(self->hour * 3600) + - (int64_t)(self->minute * 60) + (int64_t)(self->second)); + return ordinal * 86400 + (int64_t)self->hour * 3600 + + (int64_t)self->minute * 60 + self->second; } /* Constructor for CalendarRule. */ int -calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour, - int8_t minute, int8_t second, CalendarRule *out) +calendarrule_new(int month, int week, int day, int hour, + int minute, int second, CalendarRule *out) { // These bounds come from the POSIX standard, which describes an Mm.n.d // rule as: @@ -1343,33 +1342,36 @@ calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour, // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which // may occur in either the fourth or the fifth week). Week 1 is the first // week in which the d'th day occurs. Day zero is Sunday. - if (month <= 0 || month > 12) { - PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]"); + if (month < 1 || month > 12) { + PyErr_Format(PyExc_ValueError, "Month must be in [1, 12]"); return -1; } - if (week <= 0 || week > 5) { - PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]"); + if (week < 1 || week > 5) { + PyErr_Format(PyExc_ValueError, "Week must be in [1, 5]"); return -1; } - // If the 'day' parameter type is changed to a signed type, - // "day < 0" check must be added. - if (/* day < 0 || */ day > 6) { + if (day < 0 || day > 6) { PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]"); return -1; } + if (hour < -167 || hour > 167) { + PyErr_Format(PyExc_ValueError, "Hour must be in [0, 167]"); + return -1; + } + TransitionRuleType base = {&calendarrule_year_to_timestamp}; CalendarRule new_offset = { .base = base, - .month = month, - .week = week, - .day = day, - .hour = hour, - .minute = minute, - .second = second, + .month = (uint8_t)month, + .week = (uint8_t)week, + .day = (uint8_t)day, + .hour = (int16_t)hour, + .minute = (int8_t)minute, + .second = (int8_t)second, }; *out = new_offset; @@ -1409,40 +1411,45 @@ dayrule_year_to_timestamp(TransitionRuleType *base_self, int year) // always transitions on a given calendar day (other than February 29th), // you would use a Julian day, e.g. J91 always refers to April 1st and J365 // always refers to December 31st. - unsigned int day = self->day; + uint16_t day = self->day; if (self->julian && day >= 59 && is_leap_year(year)) { day += 1; } - return ((days_before_year + day) * 86400) + (self->hour * 3600) + - (self->minute * 60) + self->second; + return (days_before_year + day) * 86400 + (int64_t)self->hour * 3600 + + (int64_t)self->minute * 60 + self->second; } /* Constructor for DayRule. */ static int -dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute, - int8_t second, DayRule *out) +dayrule_new(int julian, int day, int hour, int minute, + int second, DayRule *out) { // The POSIX standard specifies that Julian days must be in the range (1 <= // n <= 365) and that non-Julian (they call it "0-based Julian") days must // be in the range (0 <= n <= 365). if (day < julian || day > 365) { - PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u", + PyErr_Format(PyExc_ValueError, "day must be in [%d, 365], not: %d", julian, day); return -1; } + if (hour < -167 || hour > 167) { + PyErr_Format(PyExc_ValueError, "Hour must be in [0, 167]"); + return -1; + } + TransitionRuleType base = { &dayrule_year_to_timestamp, }; DayRule tmp = { .base = base, - .julian = julian, - .day = day, - .hour = hour, - .minute = minute, - .second = second, + .julian = (uint8_t)julian, + .day = (int16_t)day, + .hour = (int16_t)hour, + .minute = (int8_t)minute, + .second = (int8_t)second, }; *out = tmp; @@ -1599,21 +1606,18 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out) const char *p = tz_str; // Read the `std` abbreviation, which must be at least 3 characters long. - Py_ssize_t num_chars = parse_abbr(p, &std_abbr); - if (num_chars < 1) { - PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj); + if (parse_abbr(&p, &std_abbr)) { + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj); + } goto error; } - p += num_chars; - // Now read the STD offset, which is required - num_chars = parse_tz_delta(p, &std_offset); - if (num_chars < 0) { + if (parse_tz_delta(&p, &std_offset)) { PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj); goto error; } - p += num_chars; // If the string ends here, there is no DST, otherwise we must parse the // DST abbreviation and start and end dates and times. @@ -1621,12 +1625,12 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out) goto complete; } - num_chars = parse_abbr(p, &dst_abbr); - if (num_chars < 1) { - PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj); + if (parse_abbr(&p, &dst_abbr)) { + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj); + } goto error; } - p += num_chars; if (*p == ',') { // From the POSIX standard: @@ -1636,14 +1640,11 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out) dst_offset = std_offset + 3600; } else { - num_chars = parse_tz_delta(p, &dst_offset); - if (num_chars < 0) { + if (parse_tz_delta(&p, &dst_offset)) { PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R", tz_str_obj); goto error; } - - p += num_chars; } TransitionRuleType **transitions[2] = {&start, &end}; @@ -1656,14 +1657,12 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out) } p++; - num_chars = parse_transition_rule(p, transitions[i]); - if (num_chars < 0) { + if (parse_transition_rule(&p, transitions[i])) { PyErr_Format(PyExc_ValueError, "Malformed transition rule in TZ string: %R", tz_str_obj); goto error; } - p += num_chars; } if (*p != '\0') { @@ -1698,21 +1697,25 @@ error: } static int -parse_uint(const char *const p, uint8_t *value) +parse_digits(const char **p, int min, int max, int *value) { - if (!Py_ISDIGIT(*p)) { - return -1; + assert(max <= 3); + *value = 0; + for (int i = 0; i < max; i++, (*p)++) { + if (!Py_ISDIGIT(**p)) { + return (i < min) ? -1 : 0; + } + *value *= 10; + *value += (**p) - '0'; } - - *value = (*p) - '0'; return 0; } /* Parse the STD and DST abbreviations from a TZ string. */ -static Py_ssize_t -parse_abbr(const char *const p, PyObject **abbr) +static int +parse_abbr(const char **p, PyObject **abbr) { - const char *ptr = p; + const char *ptr = *p; const char *str_start; const char *str_end; @@ -1741,7 +1744,7 @@ parse_abbr(const char *const p, PyObject **abbr) ptr++; } else { - str_start = p; + str_start = ptr; // From the POSIX standard: // // In the unquoted form, all characters in these fields shall be @@ -1751,6 +1754,9 @@ parse_abbr(const char *const p, PyObject **abbr) ptr++; } str_end = ptr; + if (str_end == str_start) { + return -1; + } } *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start); @@ -1758,12 +1764,13 @@ parse_abbr(const char *const p, PyObject **abbr) return -1; } - return ptr - p; + *p = ptr; + return 0; } /* Parse a UTC offset from a TZ str. */ -static Py_ssize_t -parse_tz_delta(const char *const p, long *total_seconds) +static int +parse_tz_delta(const char **p, long *total_seconds) { // From the POSIX spec: // @@ -1778,75 +1785,30 @@ parse_tz_delta(const char *const p, long *total_seconds) // The POSIX spec says that the values for `hour` must be between 0 and 24 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the // transition times may be signed and range from -167 to 167. - long sign = -1; - long hours = 0; - long minutes = 0; - long seconds = 0; - - const char *ptr = p; - char buff = *ptr; - if (buff == '-' || buff == '+') { - // Negative numbers correspond to *positive* offsets, from the spec: - // - // If preceded by a '-', the timezone shall be east of the Prime - // Meridian; otherwise, it shall be west (which may be indicated by - // an optional preceding '+' ). - if (buff == '-') { - sign = 1; - } - - ptr++; - } - - // The hour can be 1 or 2 numeric characters - for (size_t i = 0; i < 2; ++i) { - buff = *ptr; - if (!Py_ISDIGIT(buff)) { - if (i == 0) { - return -1; - } - else { - break; - } - } + int hours = 0; + int minutes = 0; + int seconds = 0; - hours *= 10; - hours += buff - '0'; - ptr++; - } - - if (hours > 24 || hours < 0) { + if (parse_transition_time(p, &hours, &minutes, &seconds)) { return -1; } - // Minutes and seconds always of the format ":dd" - long *outputs[2] = {&minutes, &seconds}; - for (size_t i = 0; i < 2; ++i) { - if (*ptr != ':') { - goto complete; - } - ptr++; - - for (size_t j = 0; j < 2; ++j) { - buff = *ptr; - if (!Py_ISDIGIT(buff)) { - return -1; - } - *(outputs[i]) *= 10; - *(outputs[i]) += buff - '0'; - ptr++; - } + if (hours > 24 || hours < -24) { + return -1; } -complete: - *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds); - - return ptr - p; + // Negative numbers correspond to *positive* offsets, from the spec: + // + // If preceded by a '-', the timezone shall be east of the Prime + // Meridian; otherwise, it shall be west (which may be indicated by + // an optional preceding '+' ). + *total_seconds = -((hours * 3600L) + (minutes * 60) + seconds); + return 0; } /* Parse the date portion of a transition rule. */ -static Py_ssize_t -parse_transition_rule(const char *const p, TransitionRuleType **out) +static int +parse_transition_rule(const char **p, TransitionRuleType **out) { // The full transition rule indicates when to change back and forth between // STD and DST, and has the form: @@ -1858,10 +1820,10 @@ parse_transition_rule(const char *const p, TransitionRuleType **out) // does not include the ',' at the end of the first rule. // // The POSIX spec states that if *time* is not given, the default is 02:00. - const char *ptr = p; - int8_t hour = 2; - int8_t minute = 0; - int8_t second = 0; + const char *ptr = *p; + int hour = 2; + int minute = 0; + int second = 0; // Rules come in one of three flavors: // @@ -1870,44 +1832,30 @@ parse_transition_rule(const char *const p, TransitionRuleType **out) // 3. Mm.n.d: Specifying by month, week and day-of-week. if (*ptr == 'M') { - uint8_t month, week, day; + int month, week, day; ptr++; - if (parse_uint(ptr, &month)) { + + if (parse_digits(&ptr, 1, 2, &month)) { return -1; } - ptr++; - if (*ptr != '.') { - uint8_t tmp; - if (parse_uint(ptr, &tmp)) { - return -1; - } - - month *= 10; - month += tmp; - ptr++; + if (*ptr++ != '.') { + return -1; } - - uint8_t *values[2] = {&week, &day}; - for (size_t i = 0; i < 2; ++i) { - if (*ptr != '.') { - return -1; - } - ptr++; - - if (parse_uint(ptr, values[i])) { - return -1; - } - ptr++; + if (parse_digits(&ptr, 1, 1, &week)) { + return -1; + } + if (*ptr++ != '.') { + return -1; + } + if (parse_digits(&ptr, 1, 1, &day)) { + return -1; } if (*ptr == '/') { ptr++; - Py_ssize_t num_chars = - parse_transition_time(ptr, &hour, &minute, &second); - if (num_chars < 0) { + if (parse_transition_time(&ptr, &hour, &minute, &second)) { return -1; } - ptr += num_chars; } CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule)); @@ -1923,33 +1871,22 @@ parse_transition_rule(const char *const p, TransitionRuleType **out) *out = (TransitionRuleType *)rv; } else { - uint8_t julian = 0; - unsigned int day = 0; + int julian = 0; + int day = 0; if (*ptr == 'J') { julian = 1; ptr++; } - for (size_t i = 0; i < 3; ++i) { - if (!Py_ISDIGIT(*ptr)) { - if (i == 0) { - return -1; - } - break; - } - day *= 10; - day += (*ptr) - '0'; - ptr++; + if (parse_digits(&ptr, 1, 3, &day)) { + return -1; } if (*ptr == '/') { ptr++; - Py_ssize_t num_chars = - parse_transition_time(ptr, &hour, &minute, &second); - if (num_chars < 0) { + if (parse_transition_time(&ptr, &hour, &minute, &second)) { return -1; } - ptr += num_chars; } DayRule *rv = PyMem_Calloc(1, sizeof(DayRule)); @@ -1964,13 +1901,13 @@ parse_transition_rule(const char *const p, TransitionRuleType **out) *out = (TransitionRuleType *)rv; } - return ptr - p; + *p = ptr; + return 0; } /* Parse the time portion of a transition rule (e.g. following an /) */ -static Py_ssize_t -parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, - int8_t *second) +static int +parse_transition_time(const char **p, int *hour, int *minute, int *second) { // From the spec: // @@ -1982,12 +1919,9 @@ parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, // h[h][:mm[:ss]] // // RFC 8536 also allows transition times to be signed and to range from - // -167 to +167, but the current version only supports [0, 99]. - // - // TODO: Support the full range of transition hours. - int8_t *components[3] = {hour, minute, second}; - const char *ptr = p; - int8_t sign = 1; + // -167 to +167. + const char *ptr = *p; + int sign = 1; if (*ptr == '-' || *ptr == '+') { if (*ptr == '-') { @@ -1996,32 +1930,31 @@ parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, ptr++; } - for (size_t i = 0; i < 3; ++i) { - if (i > 0) { - if (*ptr != ':') { - break; - } - ptr++; + // The hour can be 1 to 3 numeric characters + if (parse_digits(&ptr, 1, 3, hour)) { + return -1; + } + *hour *= sign; + + // Minutes and seconds always of the format ":dd" + if (*ptr == ':') { + ptr++; + if (parse_digits(&ptr, 2, 2, minute)) { + return -1; } + *minute *= sign; - uint8_t buff = 0; - for (size_t j = 0; j < 2; j++) { - if (!Py_ISDIGIT(*ptr)) { - if (i == 0 && j > 0) { - break; - } + if (*ptr == ':') { + ptr++; + if (parse_digits(&ptr, 2, 2, second)) { return -1; } - - buff *= 10; - buff += (*ptr) - '0'; - ptr++; + *second *= sign; } - - *(components[i]) = sign * buff; } - return ptr - p; + *p = ptr; + return 0; } /* Constructor for a _tzrule. @@ -2376,8 +2309,8 @@ get_local_timestamp(PyObject *dt, int64_t *local_ts) } } - *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 + - (int64_t)(hour * 3600 + minute * 60 + second); + *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400L + + (int64_t)(hour * 3600L + minute * 60 + second); return 0; } |