Martin's fix for

[ 529104 ] broken error handling in unicode-escape I presume this will need to be fixed on the trunk, too. Later.
author: Michael W. Hudson <mwh@python.net> 2002-03-18 12:47:52 (GMT)
committer: Michael W. Hudson <mwh@python.net> 2002-03-18 12:47:52 (GMT)
commit: c2a5e3c0f912fa282cfa80935ec53e5f0c23e849 (patch)
tree: 3b31e24f392592db2dfcf329e3d027a230bf8e3a
parent: 36de099f047a8a4c069bd06b1f5edd38d6e626ad (diff)
download: cpython-c2a5e3c0f912fa282cfa80935ec53e5f0c23e849.zip
cpython-c2a5e3c0f912fa282cfa80935ec53e5f0c23e849.tar.gz
cpython-c2a5e3c0f912fa282cfa80935ec53e5f0c23e849.tar.bz2
1 files changed, 24 insertions, 14 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 95ba361..5a8777b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1507,8 +1507,7 @@ PyObject *PyUnicode_AsUTF16String(PyObject *unicode)
 /* --- Unicode Escape Codec ----------------------------------------------- */
 
 static
-int unicodeescape_decoding_error(const char **source,
-                                 Py_UNICODE *x,
+int unicodeescape_decoding_error(Py_UNICODE **x,
                                  const char *errors,
                                  const char *details) 
 {
@@ -1523,7 +1522,8 @@ int unicodeescape_decoding_error(const char **source,
         return 0;
     }
     else if (strcmp(errors,"replace") == 0) {
-        *x = Py_UNICODE_REPLACEMENT_CHARACTER;
+        **x = Py_UNICODE_REPLACEMENT_CHARACTER;
+	(*x)++;
         return 0;
     }
     else {
@@ -1621,9 +1621,9 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
             for (i = 0; i < digits; i++) {
                 c = (unsigned char) s[i];
                 if (!isxdigit(c)) {
-                    if (unicodeescape_decoding_error(&s, &x, errors, message))
+                    if (unicodeescape_decoding_error(&p, errors, message))
                         goto onError;
-                    chr = x;
+                    chr = 0xffffffff;
                     i++;
                     break;
                 }
@@ -1636,6 +1636,10 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
                     chr += 10 + c - 'A';
             }
             s += i;
+	    if (chr == 0xffffffff)
+		    /* _decoding_error will have already written into the
+		       target buffer. */
+		    break;
         store:
             /* when we get here, chr is a 32-bit unicode character */
             if (chr <= 0xffff)
@@ -1653,11 +1657,10 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
 #endif
             } else {
                 if (unicodeescape_decoding_error(
-                    &s, &x, errors,
+                    &p, errors,
                     "illegal Unicode character")
                     )
                     goto onError;
-                *p++ = x; /* store replacement character */
             }
             break;
 
@@ -1692,14 +1695,19 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
                         goto store;
                 }
             }
-            if (unicodeescape_decoding_error(&s, &x, errors, message))
+            if (unicodeescape_decoding_error(&p, errors, message))
                 goto onError;
-            *p++ = x;
             break;
 
         default:
-            *p++ = '\\';
-            *p++ = (unsigned char)s[-1];
+	    if (s > end) {
+		if (unicodeescape_decoding_error(&p, errors, "\\ at end of string"))
+		    goto onError;
+	    }
+	    else {
+		*p++ = '\\';
+		*p++ = (unsigned char)s[-1];
+	    }
             break;
         }
     }
@@ -1902,7 +1910,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
     end = s + size;
     while (s < end) {
 	unsigned char c;
-	Py_UNICODE x;
+	Py_UCS4 x;
 	int i;
 
 	/* Non-escape characters are interpreted as Unicode ordinals */
@@ -1931,9 +1939,10 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
 	for (x = 0, i = 0; i < 4; i++) {
 	    c = (unsigned char)s[i];
 	    if (!isxdigit(c)) {
-		if (unicodeescape_decoding_error(&s, &x, errors,
+		if (unicodeescape_decoding_error(&p, errors,
 						 "truncated \\uXXXX"))
 		    goto onError;
+		x = 0xffffffff;
 		i++;
 		break;
 	    }
@@ -1946,7 +1955,8 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
 		x += 10 + c - 'A';
 	}
 	s += i;
-	*p++ = x;
+	if (x != 0xffffffff)
+		*p++ = x;
     }
     if (_PyUnicode_Resize(&v, (int)(p - buf)))
 	goto onError;
author	Michael W. Hudson <mwh@python.net>	2002-03-18 12:47:52 (GMT)
committer	Michael W. Hudson <mwh@python.net>	2002-03-18 12:47:52 (GMT)
commit	c2a5e3c0f912fa282cfa80935ec53e5f0c23e849 (patch)
tree	3b31e24f392592db2dfcf329e3d027a230bf8e3a
parent	36de099f047a8a4c069bd06b1f5edd38d6e626ad (diff)
download	cpython-c2a5e3c0f912fa282cfa80935ec53e5f0c23e849.zip cpython-c2a5e3c0f912fa282cfa80935ec53e5f0c23e849.tar.gz cpython-c2a5e3c0f912fa282cfa80935ec53e5f0c23e849.tar.bz2