From 2b2ac3a1e80ef2139e0f5f3cf066705af5b557a2 Mon Sep 17 00:00:00 2001 From: Moshe Zadka Date: Fri, 30 Mar 2001 17:20:58 +0000 Subject: #126161 and 123634 -- Unicode strings could not be pickled correctly. This is *backwards incompatible* with the previous pickling scheme, which wasnot reversible --- Lib/pickle.py | 2 ++ Misc/NEWS | 5 +++++ Modules/cPickle.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/Lib/pickle.py b/Lib/pickle.py index 128a627..142cf8e 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -288,6 +288,8 @@ class Pickler: s = mdumps(l)[1:] self.write(BINUNICODE + s + encoding) else: + object = object.replace(u"\\", u"\\u005c") + object = object.replace(u"\n", u"\\u000a") self.write(UNICODE + object.encode('raw-unicode-escape') + '\n') memo_len = len(memo) diff --git a/Misc/NEWS b/Misc/NEWS index a4ed067..835183d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -33,6 +33,11 @@ http://sourceforge.net/tracker/index.php?func=detail&aid=&group_id=5470&atid - #121965 -- fixing containment in xrange() objects +- #126161 #123634 -- pickle.py, cPickle.c -- fix pickling unicode strings + this is *backwards incompatible* with older pickles containing unicode + strings -- but this has to be done, the previous pickling scheme broke + anyway. + What's New in Python 2.0? ========================= diff --git a/Modules/cPickle.c b/Modules/cPickle.c index 5b02c2a..aac2e61 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -1149,6 +1149,51 @@ err: } +/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates + backslash and newline characters to \uXXXX escapes. */ +static PyObject * +modified_EncodeRawUnicodeEscape(const Py_UNICODE *s, int size) +{ + PyObject *repr; + char *p; + char *q; + + static const char *hexdigit = "0123456789ABCDEF"; + + repr = PyString_FromStringAndSize(NULL, 6 * size); + if (repr == NULL) + return NULL; + if (size == 0) + return repr; + + p = q = PyString_AS_STRING(repr); + while (size-- > 0) { + Py_UNICODE ch = *s++; + /* Map 16-bit characters to '\uxxxx' */ + if (ch >= 256 || ch == '\\' || ch == '\n') { + *p++ = '\\'; + *p++ = 'u'; + *p++ = hexdigit[(ch >> 12) & 0xf]; + *p++ = hexdigit[(ch >> 8) & 0xf]; + *p++ = hexdigit[(ch >> 4) & 0xf]; + *p++ = hexdigit[ch & 15]; + } + /* Copy everything else as-is */ + else + *p++ = (char) ch; + } + *p = '\0'; + if (_PyString_Resize(&repr, p - q)) + goto onError; + + return repr; + + onError: + Py_DECREF(repr); + return NULL; +} + + static int save_unicode(Picklerobject *self, PyObject *args, int doput) { int size, len; @@ -1161,7 +1206,8 @@ save_unicode(Picklerobject *self, PyObject *args, int doput) { char *repr_str; static char string = UNICODE; - UNLESS (repr = PyUnicode_AsRawUnicodeEscapeString(args)) + UNLESS(repr = modified_EncodeRawUnicodeEscape( + PyUnicode_AS_UNICODE(args), PyUnicode_GET_SIZE(args))) return -1; if ((len = PyString_Size(repr)) < 0) @@ -2745,7 +2791,7 @@ load_unicode(Unpicklerobject *self) { char *s; if ((len = (*self->readline_func)(self, &s)) < 0) return -1; - if (len < 2) return bad_readline(); + if (len < 1) return bad_readline(); UNLESS (str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL)) goto finally; -- cgit v0.12