summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnthony Baxter <anthonybaxter@gmail.com>2006-03-28 07:32:36 (GMT)
committerAnthony Baxter <anthonybaxter@gmail.com>2006-03-28 07:32:36 (GMT)
commitcb9051a608525c1fd191701e169cd51910f9ceee (patch)
tree8387ee1e1cd7c68d92cc1edf0d832defe5f9caf7
parent51487fe93375b861edc12b03a0789f833ad24392 (diff)
downloadcpython-cb9051a608525c1fd191701e169cd51910f9ceee.zip
cpython-cb9051a608525c1fd191701e169cd51910f9ceee.tar.gz
cpython-cb9051a608525c1fd191701e169cd51910f9ceee.tar.bz2
after discussions with perky, reverted fix for Bug #1379994: Builtin
unicode_escape and raw_unicode_escape codec now encodes backslash correctly. This caused another issue for unicode repr strings being double-escaped (SF Bug #1459029). Correct fix will be in 2.5, but is too risky for 2.4.3. Added a testcase for #1459029.
-rw-r--r--Lib/test/test_unicode.py36
-rw-r--r--Misc/NEWS4
-rw-r--r--Objects/unicodeobject.c6
3 files changed, 29 insertions, 17 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 7f6a152..f70da9d 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -626,24 +626,20 @@ class UnicodeTest(
self.assertEqual(u'hello'.encode('latin-1'), 'hello')
# Roundtrip safety for BMP (just the first 1024 chars)
- for c in xrange(1024):
- u = unichr(c)
- for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
- 'utf-16-be', 'raw_unicode_escape',
- 'unicode_escape', 'unicode_internal'):
- self.assertEqual(unicode(u.encode(encoding),encoding), u)
+ u = u''.join(map(unichr, xrange(1024)))
+ for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+ 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
+ self.assertEqual(unicode(u.encode(encoding),encoding), u)
# Roundtrip safety for BMP (just the first 256 chars)
- for c in xrange(256):
- u = unichr(c)
- for encoding in ('latin-1',):
- self.assertEqual(unicode(u.encode(encoding),encoding), u)
+ u = u''.join(map(unichr, xrange(256)))
+ for encoding in ('latin-1',):
+ self.assertEqual(unicode(u.encode(encoding),encoding), u)
# Roundtrip safety for BMP (just the first 128 chars)
- for c in xrange(128):
- u = unichr(c)
- for encoding in ('ascii',):
- self.assertEqual(unicode(u.encode(encoding),encoding), u)
+ u = u''.join(map(unichr, xrange(128)))
+ for encoding in ('ascii',):
+ self.assertEqual(unicode(u.encode(encoding),encoding), u)
# Roundtrip safety for non-BMP (just a few chars)
u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
@@ -744,6 +740,18 @@ class UnicodeTest(
y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
self.assertEqual(x, y)
+ def test_unicode_repr(self):
+ class s1:
+ def __repr__(self):
+ return '\\n'
+
+ class s2:
+ def __repr__(self):
+ return u'\\n'
+
+ self.assertEqual(repr(s1()), '\\n')
+ self.assertEqual(repr(s2()), '\\n')
+
def test_main():
test_support.run_unittest(UnicodeTest)
diff --git a/Misc/NEWS b/Misc/NEWS
index fe3884d..aa41dd7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -17,6 +17,10 @@ Core and builtins
- A threading issue that caused random segfaults on some platforms from
the testsuite was fixed in test_capi.
+- Reverted fix for Bug #1379994: Builtin unicode_escape and
+ raw_unicode_escape codec now encodes backslash correctly.
+ This caused another issue for unicode repr strings being double-escaped
+ (SF Bug #1459029). Correct fix will be in 2.5, but is too risky for 2.4.3.
What's New in Python 2.4.3c1?
=============================
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 690f016..fc503c7 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1982,9 +1982,9 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
while (size-- > 0) {
Py_UNICODE ch = *s++;
- /* Escape quotes and backslashes */
- if ((quotes &&
- ch == (Py_UNICODE) PyString_AS_STRING(repr)[1]) || ch == '\\') {
+ /* Escape quotes */
+ if (quotes &&
+ (ch == (Py_UNICODE) PyString_AS_STRING(repr)[1] || ch == '\\')) {
*p++ = '\\';
*p++ = (char) ch;
continue;