bpo-34454: Clean up datetime.fromisoformat surrogate handling (GH-8959)

* Use _PyUnicode_Copy in sanitize_isoformat_str * Use repr in fromisoformat error message This reverses commit 67b74a98b2 per Serhiy Storchaka's suggestion: I suggested to use %R in the error message because including the raw string can be confusing in the case of empty string, or string containing trailing whitespaces, invisible or unprintable characters. We agree that it is better to change both the C and pure Python versions to use repr. * Retain non-sanitized dtstr for error printing This does not create an extra string, it just holds on to a reference to the original input string for purposes of creating the error message. * PEP 7 fixes to from_isoformat * Separate handling of Unicode and other errors In the initial implementation, errors other than encoding errors would both raise an error indicating an invalid format, which would not be true for errors like MemoryError. * Drop needs_decref from _sanitize_isoformat_str Instead _sanitize_isoformat_str returns a new reference, even to the original string.
author: Paul Ganssle <pganssle@users.noreply.github.com> 2018-10-22 16:32:52 (GMT)
committer: Victor Stinner <vstinner@redhat.com> 2018-10-22 16:32:52 (GMT)
commit: 3df85404d4bf420db3362eeae1345f2cad948a71 (patch)
tree: a18de8801d592700411ed1c4a282c0180270ff05 /Lib
parent: 5a95ba29da7e55fe6a8777b6ea4c68f60cf0e407 (diff)
download: cpython-3df85404d4bf420db3362eeae1345f2cad948a71.zip
cpython-3df85404d4bf420db3362eeae1345f2cad948a71.tar.gz
cpython-3df85404d4bf420db3362eeae1345f2cad948a71.tar.bz2
2 files changed, 13 insertions, 4 deletions
diff --git a/Lib/datetime.py b/Lib/datetime.py
index cff9203..292919f 100644
--- a/Lib/datetime.py
+++ b/Lib/datetime.py
@@ -857,7 +857,7 @@ class date:
             assert len(date_string) == 10
             return cls(*_parse_isoformat_date(date_string))
         except Exception:
-            raise ValueError('Invalid isoformat string: %s' % date_string)
+            raise ValueError(f'Invalid isoformat string: {date_string!r}')
 
 
     # Conversions to string
@@ -1369,7 +1369,7 @@ class time:
         try:
             return cls(*_parse_isoformat_time(time_string))
         except Exception:
-            raise ValueError('Invalid isoformat string: %s' % time_string)
+            raise ValueError(f'Invalid isoformat string: {time_string!r}')
 
 
     def strftime(self, fmt):
@@ -1646,13 +1646,13 @@ class datetime(date):
         try:
             date_components = _parse_isoformat_date(dstr)
         except ValueError:
-            raise ValueError('Invalid isoformat string: %s' % date_string)
+            raise ValueError(f'Invalid isoformat string: {date_string!r}')
 
         if tstr:
             try:
                 time_components = _parse_isoformat_time(tstr)
             except ValueError:
-                raise ValueError('Invalid isoformat string: %s' % date_string)
+                raise ValueError(f'Invalid isoformat string: {date_string!r}')
         else:
             time_components = [0, 0, 0, 0, None]
 
diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index 9c6e71c..122f6b5 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -13,6 +13,7 @@ import sys
 import os
 import pickle
 import random
+import re
 import struct
 import unittest
 
@@ -2676,6 +2677,14 @@ class TestDateTime(TestDate):
                 with self.assertRaises(ValueError):
                     self.theclass.fromisoformat(bad_str)
 
+    def test_fromisoformat_fails_surrogate(self):
+        # Test that when fromisoformat() fails with a surrogate character as
+        # the separator, the error message contains the original string
+        dtstr = "2018-01-03\ud80001:0113"
+
+        with self.assertRaisesRegex(ValueError, re.escape(repr(dtstr))):
+            self.theclass.fromisoformat(dtstr)
+
     def test_fromisoformat_utc(self):
         dt_str = '2014-04-19T13:21:13+00:00'
         dt = self.theclass.fromisoformat(dt_str)
author	Paul Ganssle <pganssle@users.noreply.github.com>	2018-10-22 16:32:52 (GMT)
committer	Victor Stinner <vstinner@redhat.com>	2018-10-22 16:32:52 (GMT)
commit	3df85404d4bf420db3362eeae1345f2cad948a71 (patch)
tree	a18de8801d592700411ed1c4a282c0180270ff05 /Lib
parent	5a95ba29da7e55fe6a8777b6ea4c68f60cf0e407 (diff)
download	cpython-3df85404d4bf420db3362eeae1345f2cad948a71.zip cpython-3df85404d4bf420db3362eeae1345f2cad948a71.tar.gz cpython-3df85404d4bf420db3362eeae1345f2cad948a71.tar.bz2