From 09dc2f508c8513e0466a759cc27a09108c1e55c2 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 21 Dec 2017 00:33:49 -0500 Subject: bpo-15873: Implement [date][time].fromisoformat (#4699) Closes bpo-15873. --- Doc/library/datetime.rst | 48 ++- Lib/datetime.py | 205 ++++++++-- Lib/test/datetimetester.py | 412 ++++++++++++++++++++- .../2017-12-04-17-41-40.bpo-15873.-T4TRK.rst | 3 + Modules/_datetimemodule.c | 353 ++++++++++++++++++ 5 files changed, 989 insertions(+), 32 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-12-04-17-41-40.bpo-15873.-T4TRK.rst diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index dce51a1..c1b164e 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -436,6 +436,21 @@ Other constructors, all class methods: d``. +.. classmethod:: date.fromisoformat(date_string) + + Return a :class:`date` corresponding to a *date_string* in the format emitted + by :meth:`date.isoformat`. Specifically, this function supports strings in + the format(s) ``YYYY-MM-DD``. + + .. caution:: + + This does not support parsing arbitrary ISO 8601 strings - it is only intended + as the inverse operation of :meth:`date.isoformat`. + + .. versionadded:: 3.7 + + + Class attributes: .. attribute:: date.min @@ -819,6 +834,21 @@ Other constructors, all class methods: Added the *tzinfo* argument. +.. classmethod:: datetime.fromisoformat(date_string) + + Return a :class:`datetime` corresponding to a *date_string* in one of the + formats emitted by :meth:`date.isoformat` and :meth:`datetime.isoformat`. + Specifically, this function supports strings in the format(s) + ``YYYY-MM-DD[*HH[:MM[:SS[.mmm[mmm]]]][+HH:MM[:SS[.ffffff]]]]``, + where ``*`` can match any single character. + + .. caution:: + + This does not support parsing arbitrary ISO 8601 strings - it is only intended + as the inverse operation of :meth:`datetime.isoformat`. + + .. versionadded:: 3.7 + .. classmethod:: datetime.strptime(date_string, format) Return a :class:`.datetime` corresponding to *date_string*, parsed according to @@ -1486,6 +1516,23 @@ In boolean contexts, a :class:`.time` object is always considered to be true. error-prone and has been removed in Python 3.5. See :issue:`13936` for full details. + +Other constructor: + +.. classmethod:: time.fromisoformat(time_string) + + Return a :class:`time` corresponding to a *time_string* in one of the + formats emitted by :meth:`time.isoformat`. Specifically, this function supports + strings in the format(s) ``HH[:MM[:SS[.mmm[mmm]]]][+HH:MM[:SS[.ffffff]]]``. + + .. caution:: + + This does not support parsing arbitrary ISO 8601 strings - it is only intended + as the inverse operation of :meth:`time.isoformat`. + + .. versionadded:: 3.7 + + Instance methods: .. method:: time.replace(hour=self.hour, minute=self.minute, second=self.second, \ @@ -1587,7 +1634,6 @@ Instance methods: ``self.tzinfo.tzname(None)``, or raises an exception if the latter doesn't return ``None`` or a string object. - Example: >>> from datetime import time, tzinfo, timedelta diff --git a/Lib/datetime.py b/Lib/datetime.py index 67d8600..8fa18a7 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -173,6 +173,24 @@ def _format_time(hh, mm, ss, us, timespec='auto'): else: return fmt.format(hh, mm, ss, us) +def _format_offset(off): + s = '' + if off is not None: + if off.days < 0: + sign = "-" + off = -off + else: + sign = "+" + hh, mm = divmod(off, timedelta(hours=1)) + mm, ss = divmod(mm, timedelta(minutes=1)) + s += "%s%02d:%02d" % (sign, hh, mm) + if ss or ss.microseconds: + s += ":%02d" % ss.seconds + + if ss.microseconds: + s += '.%06d' % ss.microseconds + return s + # Correctly substitute for %z and %Z escapes in strftime formats. def _wrap_strftime(object, format, timetuple): # Don't call utcoffset() or tzname() unless actually needed. @@ -237,6 +255,102 @@ def _wrap_strftime(object, format, timetuple): newformat = "".join(newformat) return _time.strftime(newformat, timetuple) +# Helpers for parsing the result of isoformat() +def _parse_isoformat_date(dtstr): + # It is assumed that this function will only be called with a + # string of length exactly 10, and (though this is not used) ASCII-only + year = int(dtstr[0:4]) + if dtstr[4] != '-': + raise ValueError('Invalid date separator: %s' % dtstr[4]) + + month = int(dtstr[5:7]) + + if dtstr[7] != '-': + raise ValueError('Invalid date separator') + + day = int(dtstr[8:10]) + + return [year, month, day] + +def _parse_hh_mm_ss_ff(tstr): + # Parses things of the form HH[:MM[:SS[.fff[fff]]]] + len_str = len(tstr) + + time_comps = [0, 0, 0, 0] + pos = 0 + for comp in range(0, 3): + if (len_str - pos) < 2: + raise ValueError('Incomplete time component') + + time_comps[comp] = int(tstr[pos:pos+2]) + + pos += 2 + next_char = tstr[pos:pos+1] + + if not next_char or comp >= 2: + break + + if next_char != ':': + raise ValueError('Invalid time separator: %c' % next_char) + + pos += 1 + + if pos < len_str: + if tstr[pos] != '.': + raise ValueError('Invalid microsecond component') + else: + pos += 1 + + len_remainder = len_str - pos + if len_remainder not in (3, 6): + raise ValueError('Invalid microsecond component') + + time_comps[3] = int(tstr[pos:]) + if len_remainder == 3: + time_comps[3] *= 1000 + + return time_comps + +def _parse_isoformat_time(tstr): + # Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]] + len_str = len(tstr) + if len_str < 2: + raise ValueError('Isoformat time too short') + + # This is equivalent to re.search('[+-]', tstr), but faster + tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1) + timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr + + time_comps = _parse_hh_mm_ss_ff(timestr) + + tzi = None + if tz_pos > 0: + tzstr = tstr[tz_pos:] + + # Valid time zone strings are: + # HH:MM len: 5 + # HH:MM:SS len: 8 + # HH:MM:SS.ffffff len: 15 + + if len(tzstr) not in (5, 8, 15): + raise ValueError('Malformed time zone string') + + tz_comps = _parse_hh_mm_ss_ff(tzstr) + if all(x == 0 for x in tz_comps): + tzi = timezone.utc + else: + tzsign = -1 if tstr[tz_pos - 1] == '-' else 1 + + td = timedelta(hours=tz_comps[0], minutes=tz_comps[1], + seconds=tz_comps[2], microseconds=tz_comps[3]) + + tzi = timezone(tzsign * td) + + time_comps.append(tzi) + + return time_comps + + # Just raise TypeError if the arg isn't None or a string. def _check_tzname(name): if name is not None and not isinstance(name, str): @@ -732,6 +846,19 @@ class date: y, m, d = _ord2ymd(n) return cls(y, m, d) + @classmethod + def fromisoformat(cls, date_string): + """Construct a date from the output of date.isoformat().""" + if not isinstance(date_string, str): + raise TypeError('fromisoformat: argument must be str') + + try: + assert len(date_string) == 10 + return cls(*_parse_isoformat_date(date_string)) + except Exception: + raise ValueError('Invalid isoformat string: %s' % date_string) + + # Conversions to string def __repr__(self): @@ -1190,22 +1317,10 @@ class time: # Conversion to string - def _tzstr(self, sep=":"): - """Return formatted timezone offset (+xx:xx) or None.""" + def _tzstr(self): + """Return formatted timezone offset (+xx:xx) or an empty string.""" off = self.utcoffset() - if off is not None: - if off.days < 0: - sign = "-" - off = -off - else: - sign = "+" - hh, mm = divmod(off, timedelta(hours=1)) - mm, ss = divmod(mm, timedelta(minutes=1)) - assert 0 <= hh < 24 - off = "%s%02d%s%02d" % (sign, hh, sep, mm) - if ss: - off += ':%02d' % ss.seconds - return off + return _format_offset(off) def __repr__(self): """Convert to formal string, for repr().""" @@ -1244,6 +1359,18 @@ class time: __str__ = isoformat + @classmethod + def fromisoformat(cls, time_string): + """Construct a time from the output of isoformat().""" + if not isinstance(time_string, str): + raise TypeError('fromisoformat: argument must be str') + + try: + return cls(*_parse_isoformat_time(time_string)) + except Exception: + raise ValueError('Invalid isoformat string: %s' % time_string) + + def strftime(self, fmt): """Format using strftime(). The date part of the timestamp passed to underlying strftime should not be used. @@ -1497,6 +1624,31 @@ class datetime(date): time.hour, time.minute, time.second, time.microsecond, tzinfo, fold=time.fold) + @classmethod + def fromisoformat(cls, date_string): + """Construct a datetime from the output of datetime.isoformat().""" + if not isinstance(date_string, str): + raise TypeError('fromisoformat: argument must be str') + + # Split this at the separator + dstr = date_string[0:10] + tstr = date_string[11:] + + try: + date_components = _parse_isoformat_date(dstr) + except ValueError: + raise ValueError('Invalid isoformat string: %s' % date_string) + + if tstr: + try: + time_components = _parse_isoformat_time(tstr) + except ValueError: + raise ValueError('Invalid isoformat string: %s' % date_string) + else: + time_components = [0, 0, 0, 0, None] + + return cls(*(date_components + time_components)) + def timetuple(self): "Return local time tuple compatible with time.localtime()." dst = self.dst() @@ -1673,18 +1825,10 @@ class datetime(date): self._microsecond, timespec)) off = self.utcoffset() - if off is not None: - if off.days < 0: - sign = "-" - off = -off - else: - sign = "+" - hh, mm = divmod(off, timedelta(hours=1)) - mm, ss = divmod(mm, timedelta(minutes=1)) - s += "%s%02d:%02d" % (sign, hh, mm) - if ss: - assert not ss.microseconds - s += ":%02d" % ss.seconds + tz = _format_offset(off) + if tz: + s += tz + return s def __repr__(self): @@ -2275,9 +2419,10 @@ else: _check_date_fields, _check_int_field, _check_time_fields, _check_tzinfo_arg, _check_tzname, _check_utc_offset, _cmp, _cmperror, _date_class, _days_before_month, _days_before_year, _days_in_month, - _format_time, _is_leap, _isoweek1monday, _math, _ord2ymd, - _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, - _divide_and_round) + _format_time, _format_offset, _is_leap, _isoweek1monday, _math, + _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, + _divide_and_round, _parse_isoformat_date, _parse_isoformat_time, + _parse_hh_mm_ss_ff) # XXX Since import * above excludes names that start with _, # docstring does not get overwritten. In the future, it may be # appropriate to maintain a single module level docstring and diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index d0886c4..1d0c1c5 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -49,7 +49,6 @@ OTHERSTUFF = (10, 34.5, "abc", {}, [], ()) INF = float("inf") NAN = float("nan") - ############################################################################# # module tests @@ -1588,6 +1587,63 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase): # blow up because other fields are insane. self.theclass(base[:2] + bytes([ord_byte]) + base[3:]) + def test_fromisoformat(self): + # Test that isoformat() is reversible + base_dates = [ + (1, 1, 1), + (1000, 2, 14), + (1900, 1, 1), + (2000, 2, 29), + (2004, 11, 12), + (2004, 4, 3), + (2017, 5, 30) + ] + + for dt_tuple in base_dates: + dt = self.theclass(*dt_tuple) + dt_str = dt.isoformat() + with self.subTest(dt_str=dt_str): + dt_rt = self.theclass.fromisoformat(dt.isoformat()) + + self.assertEqual(dt, dt_rt) + + def test_fromisoformat_subclass(self): + class DateSubclass(self.theclass): + pass + + dt = DateSubclass(2014, 12, 14) + + dt_rt = DateSubclass.fromisoformat(dt.isoformat()) + + self.assertIsInstance(dt_rt, DateSubclass) + + def test_fromisoformat_fails(self): + # Test that fromisoformat() fails on invalid values + bad_strs = [ + '', # Empty string + '009-03-04', # Not 10 characters + '123456789', # Not a date + '200a-12-04', # Invalid character in year + '2009-1a-04', # Invalid character in month + '2009-12-0a', # Invalid character in day + '2009-01-32', # Invalid day + '2009-02-29', # Invalid leap day + '20090228', # Valid ISO8601 output not from isoformat() + ] + + for bad_str in bad_strs: + with self.assertRaises(ValueError): + self.theclass.fromisoformat(bad_str) + + def test_fromisoformat_fails_typeerror(self): + # Test that fromisoformat fails when passed the wrong type + import io + + bad_types = [b'2009-03-01', None, io.StringIO('2009-03-01')] + for bad_type in bad_types: + with self.assertRaises(TypeError): + self.theclass.fromisoformat(bad_type) + ############################################################################# # datetime tests @@ -1675,6 +1731,36 @@ class TestDateTime(TestDate): t = self.theclass(2, 3, 2, tzinfo=tz) self.assertEqual(t.isoformat(), "0002-03-02T00:00:00+00:00:16") + def test_isoformat_timezone(self): + tzoffsets = [ + ('05:00', timedelta(hours=5)), + ('02:00', timedelta(hours=2)), + ('06:27', timedelta(hours=6, minutes=27)), + ('12:32:30', timedelta(hours=12, minutes=32, seconds=30)), + ('02:04:09.123456', timedelta(hours=2, minutes=4, seconds=9, microseconds=123456)) + ] + + tzinfos = [ + ('', None), + ('+00:00', timezone.utc), + ('+00:00', timezone(timedelta(0))), + ] + + tzinfos += [ + (prefix + expected, timezone(sign * td)) + for expected, td in tzoffsets + for prefix, sign in [('-', -1), ('+', 1)] + ] + + dt_base = self.theclass(2016, 4, 1, 12, 37, 9) + exp_base = '2016-04-01T12:37:09' + + for exp_tz, tzi in tzinfos: + dt = dt_base.replace(tzinfo=tzi) + exp = exp_base + exp_tz + with self.subTest(tzi=tzi): + assert dt.isoformat() == exp + def test_format(self): dt = self.theclass(2007, 9, 10, 4, 5, 1, 123) self.assertEqual(dt.__format__(''), str(dt)) @@ -2334,6 +2420,173 @@ class TestDateTime(TestDate): self.assertEqual(dt2.newmeth(-7), dt1.year + dt1.month + dt1.second - 7) + def test_fromisoformat_datetime(self): + # Test that isoformat() is reversible + base_dates = [ + (1, 1, 1), + (1900, 1, 1), + (2004, 11, 12), + (2017, 5, 30) + ] + + base_times = [ + (0, 0, 0, 0), + (0, 0, 0, 241000), + (0, 0, 0, 234567), + (12, 30, 45, 234567) + ] + + separators = [' ', 'T'] + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=-5)), + timezone(timedelta(hours=2))] + + dts = [self.theclass(*date_tuple, *time_tuple, tzinfo=tzi) + for date_tuple in base_dates + for time_tuple in base_times + for tzi in tzinfos] + + for dt in dts: + for sep in separators: + dtstr = dt.isoformat(sep=sep) + + with self.subTest(dtstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + self.assertEqual(dt, dt_rt) + + def test_fromisoformat_timezone(self): + base_dt = self.theclass(2014, 12, 30, 12, 30, 45, 217456) + + tzoffsets = [ + timedelta(hours=5), timedelta(hours=2), + timedelta(hours=6, minutes=27), + timedelta(hours=12, minutes=32, seconds=30), + timedelta(hours=2, minutes=4, seconds=9, microseconds=123456) + ] + + tzoffsets += [-1 * td for td in tzoffsets] + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=0))] + + tzinfos += [timezone(td) for td in tzoffsets] + + for tzi in tzinfos: + dt = base_dt.replace(tzinfo=tzi) + dtstr = dt.isoformat() + + with self.subTest(tstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + assert dt == dt_rt, dt_rt + + def test_fromisoformat_separators(self): + separators = [ + ' ', 'T', '\u007f', # 1-bit widths + '\u0080', 'ʁ', # 2-bit widths + 'ᛇ', '時', # 3-bit widths + '🐍' # 4-bit widths + ] + + for sep in separators: + dt = self.theclass(2018, 1, 31, 23, 59, 47, 124789) + dtstr = dt.isoformat(sep=sep) + + with self.subTest(dtstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + self.assertEqual(dt, dt_rt) + + def test_fromisoformat_ambiguous(self): + # Test strings like 2018-01-31+12:15 (where +12:15 is not a time zone) + separators = ['+', '-'] + for sep in separators: + dt = self.theclass(2018, 1, 31, 12, 15) + dtstr = dt.isoformat(sep=sep) + + with self.subTest(dtstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + self.assertEqual(dt, dt_rt) + + def test_fromisoformat_timespecs(self): + datetime_bases = [ + (2009, 12, 4, 8, 17, 45, 123456), + (2009, 12, 4, 8, 17, 45, 0)] + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=-5)), + timezone(timedelta(hours=2)), + timezone(timedelta(hours=6, minutes=27))] + + timespecs = ['hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds'] + + for ip, ts in enumerate(timespecs): + for tzi in tzinfos: + for dt_tuple in datetime_bases: + if ts == 'milliseconds': + new_microseconds = 1000 * (dt_tuple[6] // 1000) + dt_tuple = dt_tuple[0:6] + (new_microseconds,) + + dt = self.theclass(*(dt_tuple[0:(4 + ip)]), tzinfo=tzi) + dtstr = dt.isoformat(timespec=ts) + with self.subTest(dtstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + self.assertEqual(dt, dt_rt) + + def test_fromisoformat_fails_datetime(self): + # Test that fromisoformat() fails on invalid values + bad_strs = [ + '', # Empty string + '2009.04-19T03', # Wrong first separator + '2009-04.19T03', # Wrong second separator + '2009-04-19T0a', # Invalid hours + '2009-04-19T03:1a:45', # Invalid minutes + '2009-04-19T03:15:4a', # Invalid seconds + '2009-04-19T03;15:45', # Bad first time separator + '2009-04-19T03:15;45', # Bad second time separator + '2009-04-19T03:15:4500:00', # Bad time zone separator + '2009-04-19T03:15:45.2345', # Too many digits for milliseconds + '2009-04-19T03:15:45.1234567', # Too many digits for microseconds + '2009-04-19T03:15:45.123456+24:30', # Invalid time zone offset + '2009-04-19T03:15:45.123456-24:30', # Invalid negative offset + '2009-04-10ᛇᛇᛇᛇᛇ12:15', # Too many unicode separators + '2009-04-19T1', # Incomplete hours + '2009-04-19T12:3', # Incomplete minutes + '2009-04-19T12:30:4', # Incomplete seconds + '2009-04-19T12:', # Ends with time separator + '2009-04-19T12:30:', # Ends with time separator + '2009-04-19T12:30:45.', # Ends with time separator + '2009-04-19T12:30:45.123456+', # Ends with timzone separator + '2009-04-19T12:30:45.123456-', # Ends with timzone separator + '2009-04-19T12:30:45.123456-05:00a', # Extra text + '2009-04-19T12:30:45.123-05:00a', # Extra text + '2009-04-19T12:30:45-05:00a', # Extra text + ] + + for bad_str in bad_strs: + with self.subTest(bad_str=bad_str): + with self.assertRaises(ValueError): + self.theclass.fromisoformat(bad_str) + + def test_fromisoformat_utc(self): + dt_str = '2014-04-19T13:21:13+00:00' + dt = self.theclass.fromisoformat(dt_str) + + self.assertIs(dt.tzinfo, timezone.utc) + + def test_fromisoformat_subclass(self): + class DateTimeSubclass(self.theclass): + pass + + dt = DateTimeSubclass(2014, 12, 14, 9, 30, 45, 457390, + tzinfo=timezone(timedelta(hours=10, minutes=45))) + + dt_rt = DateTimeSubclass.fromisoformat(dt.isoformat()) + + self.assertEqual(dt, dt_rt) + self.assertIsInstance(dt_rt, DateTimeSubclass) + + class TestSubclassDateTime(TestDateTime): theclass = SubclassDatetime # Override tests not designed for subclass @@ -2517,6 +2770,36 @@ class TestTime(HarmlessMixedComparison, unittest.TestCase): self.assertEqual(t.isoformat(timespec='microseconds'), "12:34:56.000000") self.assertEqual(t.isoformat(timespec='auto'), "12:34:56") + def test_isoformat_timezone(self): + tzoffsets = [ + ('05:00', timedelta(hours=5)), + ('02:00', timedelta(hours=2)), + ('06:27', timedelta(hours=6, minutes=27)), + ('12:32:30', timedelta(hours=12, minutes=32, seconds=30)), + ('02:04:09.123456', timedelta(hours=2, minutes=4, seconds=9, microseconds=123456)) + ] + + tzinfos = [ + ('', None), + ('+00:00', timezone.utc), + ('+00:00', timezone(timedelta(0))), + ] + + tzinfos += [ + (prefix + expected, timezone(sign * td)) + for expected, td in tzoffsets + for prefix, sign in [('-', -1), ('+', 1)] + ] + + t_base = self.theclass(12, 37, 9) + exp_base = '12:37:09' + + for exp_tz, tzi in tzinfos: + t = t_base.replace(tzinfo=tzi) + exp = exp_base + exp_tz + with self.subTest(tzi=tzi): + assert t.isoformat() == exp + def test_1653736(self): # verify it doesn't accept extra keyword arguments t = self.theclass(second=1) @@ -3055,6 +3338,133 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase): t2 = t2.replace(tzinfo=Varies()) self.assertTrue(t1 < t2) # t1's offset counter still going up + def test_fromisoformat(self): + time_examples = [ + (0, 0, 0, 0), + (23, 59, 59, 999999), + ] + + hh = (9, 12, 20) + mm = (5, 30) + ss = (4, 45) + usec = (0, 245000, 678901) + + time_examples += list(itertools.product(hh, mm, ss, usec)) + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=2)), + timezone(timedelta(hours=6, minutes=27))] + + for ttup in time_examples: + for tzi in tzinfos: + t = self.theclass(*ttup, tzinfo=tzi) + tstr = t.isoformat() + + with self.subTest(tstr=tstr): + t_rt = self.theclass.fromisoformat(tstr) + self.assertEqual(t, t_rt) + + def test_fromisoformat_timezone(self): + base_time = self.theclass(12, 30, 45, 217456) + + tzoffsets = [ + timedelta(hours=5), timedelta(hours=2), + timedelta(hours=6, minutes=27), + timedelta(hours=12, minutes=32, seconds=30), + timedelta(hours=2, minutes=4, seconds=9, microseconds=123456) + ] + + tzoffsets += [-1 * td for td in tzoffsets] + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=0))] + + tzinfos += [timezone(td) for td in tzoffsets] + + for tzi in tzinfos: + t = base_time.replace(tzinfo=tzi) + tstr = t.isoformat() + + with self.subTest(tstr=tstr): + t_rt = self.theclass.fromisoformat(tstr) + assert t == t_rt, t_rt + + def test_fromisoformat_timespecs(self): + time_bases = [ + (8, 17, 45, 123456), + (8, 17, 45, 0) + ] + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=-5)), + timezone(timedelta(hours=2)), + timezone(timedelta(hours=6, minutes=27))] + + timespecs = ['hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds'] + + for ip, ts in enumerate(timespecs): + for tzi in tzinfos: + for t_tuple in time_bases: + if ts == 'milliseconds': + new_microseconds = 1000 * (t_tuple[-1] // 1000) + t_tuple = t_tuple[0:-1] + (new_microseconds,) + + t = self.theclass(*(t_tuple[0:(1 + ip)]), tzinfo=tzi) + tstr = t.isoformat(timespec=ts) + with self.subTest(tstr=tstr): + t_rt = self.theclass.fromisoformat(tstr) + self.assertEqual(t, t_rt) + + def test_fromisoformat_fails(self): + bad_strs = [ + '', # Empty string + '12:', # Ends on a separator + '12:30:', # Ends on a separator + '12:30:15.', # Ends on a separator + '1', # Incomplete hours + '12:3', # Incomplete minutes + '12:30:1', # Incomplete seconds + '1a:30:45.334034', # Invalid character in hours + '12:a0:45.334034', # Invalid character in minutes + '12:30:a5.334034', # Invalid character in seconds + '12:30:45.1234', # Too many digits for milliseconds + '12:30:45.1234567', # Too many digits for microseconds + '12:30:45.123456+24:30', # Invalid time zone offset + '12:30:45.123456-24:30', # Invalid negative offset + '12:30:45', # Uses full-width unicode colons + '12:30:45․123456', # Uses \u2024 in place of decimal point + '12:30:45a', # Extra at tend of basic time + '12:30:45.123a', # Extra at end of millisecond time + '12:30:45.123456a', # Extra at end of microsecond time + '12:30:45.123456+12:00:30a', # Extra at end of full time + ] + + for bad_str in bad_strs: + with self.subTest(bad_str=bad_str): + with self.assertRaises(ValueError): + self.theclass.fromisoformat(bad_str) + + def test_fromisoformat_fails_typeerror(self): + # Test the fromisoformat fails when passed the wrong type + import io + + bad_types = [b'12:30:45', None, io.StringIO('12:30:45')] + + for bad_type in bad_types: + with self.assertRaises(TypeError): + self.theclass.fromisoformat(bad_type) + + def test_fromisoformat_subclass(self): + class TimeSubclass(self.theclass): + pass + + tsc = TimeSubclass(12, 14, 45, 203745, tzinfo=timezone.utc) + tsc_rt = TimeSubclass.fromisoformat(tsc.isoformat()) + + self.assertEqual(tsc, tsc_rt) + self.assertIsInstance(tsc_rt, TimeSubclass) + def test_subclass_timetz(self): class C(self.theclass): diff --git a/Misc/NEWS.d/next/Library/2017-12-04-17-41-40.bpo-15873.-T4TRK.rst b/Misc/NEWS.d/next/Library/2017-12-04-17-41-40.bpo-15873.-T4TRK.rst new file mode 100644 index 0000000..98a8415 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-12-04-17-41-40.bpo-15873.-T4TRK.rst @@ -0,0 +1,3 @@ +Added new alternate constructors :meth:`datetime.datetime.fromisoformat`, +:meth:`datetime.time.fromisoformat` and :meth:`datetime.date.fromisoformat` +as the inverse operation of each classes's respective ``isoformat`` methods. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index b50cdda..6241967 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -664,6 +664,167 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d) } /* --------------------------------------------------------------------------- + * String parsing utilities and helper functions + */ + +static const char* +parse_digits(const char* ptr, int* var, size_t num_digits) +{ + for (size_t i = 0; i < num_digits; ++i) { + unsigned int tmp = (unsigned int)(*(ptr++) - '0'); + if (tmp > 9) { + return NULL; + } + *var *= 10; + *var += (signed int)tmp; + } + + return ptr; +} + +static int parse_isoformat_date(const char *dtstr, + int* year, int *month, int* day) { + /* Parse the date components of the result of date.isoformat() + * + * Return codes: + * 0: Success + * -1: Failed to parse date component + * -2: Failed to parse dateseparator + */ + const char *p = dtstr; + p = parse_digits(p, year, 4); + if (NULL == p) { + return -1; + } + + if (*(p++) != '-') { + return -2; + } + + p = parse_digits(p, month, 2); + if (NULL == p) { + return -1; + } + + if (*(p++) != '-') { + return -2; + } + + p = parse_digits(p, day, 2); + if (p == NULL) { + return -1; + } + + return 0; +} + +static int +parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, + int* hour, int* minute, int *second, int *microsecond) { + const char *p = tstr; + const char *p_end = tstr_end; + int *vals[3] = {hour, minute, second}; + + // Parse [HH[:MM[:SS]]] + for (size_t i = 0; i < 3; ++i) { + p = parse_digits(p, vals[i], 2); + if (NULL == p) { + return -3; + } + + char c = *(p++); + if (p >= p_end) { + return c != '\0'; + } else if (c == ':') { + continue; + } else if (c == '.') { + break; + } else { + return -4; // Malformed time separator + } + } + + // Parse .fff[fff] + size_t len_remains = p_end - p; + if (!(len_remains == 6 || len_remains == 3)) { + return -3; + } + + p = parse_digits(p, microsecond, len_remains); + if (NULL == p) { + return -3; + } + + if (len_remains == 3) { + *microsecond *= 1000; + } + + // Return 1 if it's not the end of the string + return *p != '\0'; +} + +static int +parse_isoformat_time(const char *dtstr, size_t dtlen, + int* hour, int *minute, int *second, int *microsecond, + int* tzoffset, int *tzmicrosecond) { + // Parse the time portion of a datetime.isoformat() string + // + // Return codes: + // 0: Success (no tzoffset) + // 1: Success (with tzoffset) + // -3: Failed to parse time component + // -4: Failed to parse time separator + // -5: Malformed timezone string + + const char *p = dtstr; + const char *p_end = dtstr + dtlen; + + const char *tzinfo_pos = p; + do { + if (*tzinfo_pos == '+' || *tzinfo_pos == '-') { + break; + } + } while(++tzinfo_pos < p_end); + + int rv = parse_hh_mm_ss_ff(dtstr, tzinfo_pos, + hour, minute, second, microsecond); + + if (rv < 0) { + return rv; + } else if (tzinfo_pos == p_end) { + // We know that there's no time zone, so if there's stuff at the + // end of the string it's an error. + if (rv == 1) { + return -5; + } else { + return 0; + } + } + + // Parse time zone component + // Valid formats are: + // - +HH:MM (len 6) + // - +HH:MM:SS (len 9) + // - +HH:MM:SS.ffffff (len 16) + size_t tzlen = p_end - tzinfo_pos; + if (!(tzlen == 6 || tzlen == 9 || tzlen == 16)) { + return -5; + } + + int tzsign = (*tzinfo_pos == '-')?-1:1; + tzinfo_pos++; + int tzhour = 0, tzminute = 0, tzsecond = 0; + rv = parse_hh_mm_ss_ff(tzinfo_pos, p_end, + &tzhour, &tzminute, &tzsecond, tzmicrosecond); + + *tzoffset = tzsign * ((tzhour * 3600) + (tzminute * 60) + tzsecond); + *tzmicrosecond *= tzsign; + + return rv?-5:1; +} + + +/* --------------------------------------------------------------------------- * Create various objects, mostly without range checking. */ @@ -1063,6 +1224,27 @@ append_keyword_fold(PyObject *repr, int fold) return repr; } +static inline PyObject * +tzinfo_from_isoformat_results(int rv, int tzoffset, int tz_useconds) { + PyObject *tzinfo; + if (rv == 1) { + // Create a timezone from offset in seconds (0 returns UTC) + if (tzoffset == 0) { + Py_INCREF(PyDateTime_TimeZone_UTC); + return PyDateTime_TimeZone_UTC; + } + + PyObject *delta = new_delta(0, tzoffset, tz_useconds, 1); + tzinfo = new_timezone(delta, NULL); + Py_XDECREF(delta); + } else { + tzinfo = Py_None; + Py_INCREF(Py_None); + } + + return tzinfo; +} + /* --------------------------------------------------------------------------- * String format helpers. */ @@ -2607,6 +2789,7 @@ date_fromtimestamp(PyObject *cls, PyObject *args) return result; } + /* Return new date from proleptic Gregorian ordinal. Raises ValueError if * the ordinal is out of range. */ @@ -2633,6 +2816,46 @@ date_fromordinal(PyObject *cls, PyObject *args) return result; } +/* Return the new date from a string as generated by date.isoformat() */ +static PyObject * +date_fromisoformat(PyObject *cls, PyObject *dtstr) { + assert(dtstr != NULL); + + if (!PyUnicode_Check(dtstr)) { + PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str"); + return NULL; + } + + Py_ssize_t len; + + const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); + + int year = 0, month = 0, day = 0; + + int rv; + if (len == 10) { + rv = parse_isoformat_date(dt_ptr, &year, &month, &day); + } else { + rv = -1; + } + + if (rv < 0) { + PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", + dt_ptr); + return NULL; + } + + PyObject *result; + if ( (PyTypeObject*)cls == &PyDateTime_DateType ) { + result = new_date_ex(year, month, day, (PyTypeObject*)cls); + } else { + result = PyObject_CallFunction(cls, "iii", year, month, day); + } + + return result; +} + + /* * Date arithmetic. */ @@ -2925,6 +3148,10 @@ static PyMethodDef date_methods[] = { PyDoc_STR("int -> date corresponding to a proleptic Gregorian " "ordinal.")}, + {"fromisoformat", (PyCFunction)date_fromisoformat, METH_O | + METH_CLASS, + PyDoc_STR("str -> Construct a date from the output of date.isoformat()")}, + {"today", (PyCFunction)date_today, METH_NOARGS | METH_CLASS, PyDoc_STR("Current date or datetime: same as " "self.__class__.fromtimestamp(time.time()).")}, @@ -3972,6 +4199,49 @@ time_replace(PyDateTime_Time *self, PyObject *args, PyObject *kw) return clone; } +static PyObject * +time_fromisoformat(PyObject *cls, PyObject *tstr) { + assert(tstr != NULL); + + if (!PyUnicode_Check(tstr)) { + PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str"); + return NULL; + } + + Py_ssize_t len; + const char *p = PyUnicode_AsUTF8AndSize(tstr, &len); + + int hour = 0, minute = 0, second = 0, microsecond = 0; + int tzoffset, tzimicrosecond = 0; + int rv = parse_isoformat_time(p, len, + &hour, &minute, &second, µsecond, + &tzoffset, &tzimicrosecond); + + if (rv < 0) { + PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", p); + return NULL; + } + + PyObject *tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, + tzimicrosecond); + + if (tzinfo == NULL) { + return NULL; + } + + PyObject *t; + if ( (PyTypeObject *)cls == &PyDateTime_TimeType ) { + t = new_time(hour, minute, second, microsecond, tzinfo, 0); + } else { + t = PyObject_CallFunction(cls, "iiiiO", + hour, minute, second, microsecond, tzinfo); + } + + Py_DECREF(tzinfo); + return t; +} + + /* Pickle support, a simple use of __reduce__. */ /* Let basestate be the non-tzinfo data string. @@ -4041,6 +4311,9 @@ static PyMethodDef time_methods[] = { {"replace", (PyCFunction)time_replace, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Return time with new specified fields.")}, + {"fromisoformat", (PyCFunction)time_fromisoformat, METH_O | METH_CLASS, + PyDoc_STR("string -> time from time.isoformat() output")}, + {"__reduce_ex__", (PyCFunction)time_reduce_ex, METH_VARARGS, PyDoc_STR("__reduce_ex__(proto) -> (cls, state)")}, @@ -4506,6 +4779,82 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw) return result; } +static PyObject * +datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { + assert(dtstr != NULL); + + if (!PyUnicode_Check(dtstr)) { + PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str"); + return NULL; + } + + Py_ssize_t len; + const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); + const char * p = dt_ptr; + + int year = 0, month = 0, day = 0; + int hour = 0, minute = 0, second = 0, microsecond = 0; + int tzoffset = 0, tzusec = 0; + + // date has a fixed length of 10 + int rv = parse_isoformat_date(p, &year, &month, &day); + + if (!rv && len > 10) { + // In UTF-8, the length of multi-byte characters is encoded in the MSB + if ((p[10] & 0x80) == 0) { + p += 11; + } else { + switch(p[10] & 0xf0) { + case 0xe0: + p += 13; + break; + case 0xf0: + p += 14; + break; + default: + p += 12; + break; + } + } + + len -= (p - dt_ptr); + rv = parse_isoformat_time(p, len, + &hour, &minute, &second, µsecond, + &tzoffset, &tzusec); + } + if (rv < 0) { + PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr); + return NULL; + } + + PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, tzusec); + if (tzinfo == NULL) { + return NULL; + } + + PyObject* dt; + if ( (PyTypeObject*)cls == &PyDateTime_DateTimeType ) { + // Use the fast path constructor + dt = new_datetime(year, month, day, hour, minute, second, microsecond, + tzinfo, 0); + } else { + // Subclass + dt = PyObject_CallFunction(cls, "iiiiiiiO", + year, + month, + day, + hour, + minute, + second, + microsecond, + tzinfo); + } + + Py_DECREF(tzinfo); + return dt; +} + + /* * Destructor. */ @@ -5519,6 +5868,10 @@ static PyMethodDef datetime_methods[] = { METH_VARARGS | METH_KEYWORDS | METH_CLASS, PyDoc_STR("date, time -> datetime with same date and time fields")}, + {"fromisoformat", (PyCFunction)datetime_fromisoformat, + METH_O | METH_CLASS, + PyDoc_STR("string -> datetime from datetime.isoformat() output")}, + /* Instance methods: */ {"date", (PyCFunction)datetime_getdate, METH_NOARGS, -- cgit v0.12