summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-04-13 11:09:22 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-04-13 11:09:22 (GMT)
commitf7351b40b54b3354c4a8b01d9072b888256fca87 (patch)
treef52f592075724b0a8719ebbd9a00cd0b8bc3f0ea /Lib
parent1bc6f6ea5e2e2f81922174a96ec7ce759ac8dc8c (diff)
downloadcpython-f7351b40b54b3354c4a8b01d9072b888256fca87.zip
cpython-f7351b40b54b3354c4a8b01d9072b888256fca87.tar.gz
cpython-f7351b40b54b3354c4a8b01d9072b888256fca87.tar.bz2
Merged revisions 80031 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r80031 | victor.stinner | 2010-04-13 13:07:24 +0200 (mar., 13 avril 2010) | 4 lines Issue #8383: pickle and pickletools use surrogatepass error handler when encoding unicode as utf8 to support lone surrogates and stay compatible with Python 2.x and 3.0 ........
Diffstat (limited to 'Lib')
-rw-r--r--Lib/pickle.py4
-rw-r--r--Lib/pickletools.py2
-rw-r--r--Lib/test/pickletester.py4
3 files changed, 6 insertions, 4 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 7af4ce9..7b48527 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -499,7 +499,7 @@ class _Pickler:
def save_str(self, obj, pack=struct.pack):
if self.bin:
- encoded = obj.encode('utf-8')
+ encoded = obj.encode('utf-8', 'surrogatepass')
n = len(encoded)
self.write(BINUNICODE + pack("<i", n) + encoded)
else:
@@ -966,7 +966,7 @@ class _Unpickler:
def load_binunicode(self):
len = mloads(b'i' + self.read(4))
- self.append(str(self.read(len), 'utf-8'))
+ self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
dispatch[BINUNICODE[0]] = load_binunicode
def load_short_binstring(self):
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index ca11aa3..6ab75c7 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -469,7 +469,7 @@ def read_unicodestring4(f):
raise ValueError("unicodestring4 byte count < 0: %d" % n)
data = f.read(n)
if len(data) == n:
- return str(data, 'utf-8')
+ return str(data, 'utf-8', 'surrogatepass')
raise ValueError("expected %d bytes in a unicodestring4, but only %d "
"remain" % (n, len(data)))
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 7ecc105..30ff4ef 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -515,7 +515,9 @@ class AbstractPickleTests(unittest.TestCase):
def test_unicode(self):
endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
- '<\\>', '<\\\U00012345>']
+ '<\\>', '<\\\U00012345>',
+ # surrogates
+ '<\udc80>']
for proto in protocols:
for u in endcases:
p = self.dumps(u, proto)