diff options
author | Anthony Baxter <anthonybaxter@gmail.com> | 2006-03-28 07:32:36 (GMT) |
---|---|---|
committer | Anthony Baxter <anthonybaxter@gmail.com> | 2006-03-28 07:32:36 (GMT) |
commit | cb9051a608525c1fd191701e169cd51910f9ceee (patch) | |
tree | 8387ee1e1cd7c68d92cc1edf0d832defe5f9caf7 | |
parent | 51487fe93375b861edc12b03a0789f833ad24392 (diff) | |
download | cpython-cb9051a608525c1fd191701e169cd51910f9ceee.zip cpython-cb9051a608525c1fd191701e169cd51910f9ceee.tar.gz cpython-cb9051a608525c1fd191701e169cd51910f9ceee.tar.bz2 |
after discussions with perky, reverted fix for Bug #1379994: Builtin
unicode_escape and raw_unicode_escape codec now encodes backslash correctly.
This caused another issue for unicode repr strings being double-escaped
(SF Bug #1459029). Correct fix will be in 2.5, but is too risky for 2.4.3.
Added a testcase for #1459029.
-rw-r--r-- | Lib/test/test_unicode.py | 36 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 6 |
3 files changed, 29 insertions, 17 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 7f6a152..f70da9d 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -626,24 +626,20 @@ class UnicodeTest( self.assertEqual(u'hello'.encode('latin-1'), 'hello') # Roundtrip safety for BMP (just the first 1024 chars) - for c in xrange(1024): - u = unichr(c) - for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', - 'utf-16-be', 'raw_unicode_escape', - 'unicode_escape', 'unicode_internal'): - self.assertEqual(unicode(u.encode(encoding),encoding), u) + u = u''.join(map(unichr, xrange(1024))) + for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', + 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): + self.assertEqual(unicode(u.encode(encoding),encoding), u) # Roundtrip safety for BMP (just the first 256 chars) - for c in xrange(256): - u = unichr(c) - for encoding in ('latin-1',): - self.assertEqual(unicode(u.encode(encoding),encoding), u) + u = u''.join(map(unichr, xrange(256))) + for encoding in ('latin-1',): + self.assertEqual(unicode(u.encode(encoding),encoding), u) # Roundtrip safety for BMP (just the first 128 chars) - for c in xrange(128): - u = unichr(c) - for encoding in ('ascii',): - self.assertEqual(unicode(u.encode(encoding),encoding), u) + u = u''.join(map(unichr, xrange(128))) + for encoding in ('ascii',): + self.assertEqual(unicode(u.encode(encoding),encoding), u) # Roundtrip safety for non-BMP (just a few chars) u = u'\U00010001\U00020002\U00030003\U00040004\U00050005' @@ -744,6 +740,18 @@ class UnicodeTest( y = x.encode("raw-unicode-escape").decode("raw-unicode-escape") self.assertEqual(x, y) + def test_unicode_repr(self): + class s1: + def __repr__(self): + return '\\n' + + class s2: + def __repr__(self): + return u'\\n' + + self.assertEqual(repr(s1()), '\\n') + self.assertEqual(repr(s2()), '\\n') + def test_main(): test_support.run_unittest(UnicodeTest) @@ -17,6 +17,10 @@ Core and builtins - A threading issue that caused random segfaults on some platforms from the testsuite was fixed in test_capi. +- Reverted fix for Bug #1379994: Builtin unicode_escape and + raw_unicode_escape codec now encodes backslash correctly. + This caused another issue for unicode repr strings being double-escaped + (SF Bug #1459029). Correct fix will be in 2.5, but is too risky for 2.4.3. What's New in Python 2.4.3c1? ============================= diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 690f016..fc503c7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1982,9 +1982,9 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, while (size-- > 0) { Py_UNICODE ch = *s++; - /* Escape quotes and backslashes */ - if ((quotes && - ch == (Py_UNICODE) PyString_AS_STRING(repr)[1]) || ch == '\\') { + /* Escape quotes */ + if (quotes && + (ch == (Py_UNICODE) PyString_AS_STRING(repr)[1] || ch == '\\')) { *p++ = '\\'; *p++ = (char) ch; continue; |