From 80d1dd5f3b83c96c5c8e4a51417f1c748318de94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= <mal@egenix.com>
Date: Wed, 25 Jul 2001 16:05:59 +0000
Subject: Fix for bug #444493: u'\U00010001' segfaults with current CVS on wide
 builds.

---
 Lib/test/test_unicode.py |  3 ++-
 Objects/unicodeobject.c  | 27 +++++++++++++++++++++------
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index eb74854..dde16ef 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -455,7 +455,8 @@ for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
 u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
 for encoding in ('utf-8',
                  'utf-16', 'utf-16-le', 'utf-16-be',
-                 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
+                 #'raw_unicode_escape',
+                 'unicode_escape', 'unicode_internal'):
     verify(unicode(u.encode(encoding),encoding) == u)
 
 u = u''.join(map(unichr, range(256)))
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 172c61c..08ba065 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1415,7 +1415,6 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
 {
     PyObject *repr;
     char *p;
-    char *q;
 
     static const char *hexdigit = "0123456789abcdef";
 
@@ -1423,7 +1422,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
     if (repr == NULL)
         return NULL;
 
-    p = q = PyString_AS_STRING(repr);
+    p = PyString_AS_STRING(repr);
 
     if (quotes) {
         *p++ = 'u';
@@ -1432,14 +1431,26 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
     }
     while (size-- > 0) {
         Py_UNICODE ch = *s++;
+
         /* Escape quotes */
-        if (quotes && (ch == (Py_UNICODE) q[1] || ch == '\\')) {
+        if (quotes && 
+	    (ch == (Py_UNICODE) PyString_AS_STRING(repr)[1] || ch == '\\')) {
             *p++ = '\\';
             *p++ = (char) ch;
         } 
+
 #ifdef Py_UNICODE_WIDE
         /* Map 21-bit characters to '\U00xxxxxx' */
         else if (ch >= 0x10000) {
+	    int offset = p - PyString_AS_STRING(repr);
+	    
+	    /* Resize the string if necessary */
+	    if (offset + 12 > PyString_GET_SIZE(repr)) {
+		if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100))
+		    goto onError;
+		p = PyString_AS_STRING(repr) + offset;
+	    }
+
             *p++ = '\\';
             *p++ = 'U';
             *p++ = hexdigit[(ch >> 28) & 0x0000000F];
@@ -1449,7 +1460,8 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
             *p++ = hexdigit[(ch >> 12) & 0x0000000F];
             *p++ = hexdigit[(ch >> 8) & 0x0000000F];
             *p++ = hexdigit[(ch >> 4) & 0x0000000F];
-            *p++ = hexdigit[ch & 15];
+            *p++ = hexdigit[ch & 0x0000000F];
+	    continue;
         }
 #endif
 	/* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
@@ -1487,6 +1499,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
             *p++ = hexdigit[(ch >> 4) & 0x000F];
             *p++ = hexdigit[ch & 0x000F];
         }
+
         /* Map special whitespace to '\t', \n', '\r' */
         else if (ch == '\t') {
             *p++ = '\\';
@@ -1500,6 +1513,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
             *p++ = '\\';
             *p++ = 'r';
         }
+
         /* Map non-printable US ASCII to '\xhh' */
         else if (ch < ' ' || ch >= 128) {
             *p++ = '\\';
@@ -1507,15 +1521,16 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
             *p++ = hexdigit[(ch >> 4) & 0x000F];
             *p++ = hexdigit[ch & 0x000F];
         } 
+
         /* Copy everything else as-is */
         else
             *p++ = (char) ch;
     }
     if (quotes)
-        *p++ = q[1];
+        *p++ = PyString_AS_STRING(repr)[1];
 
     *p = '\0';
-    if (_PyString_Resize(&repr, p - q))
+    if (_PyString_Resize(&repr, p - PyString_AS_STRING(repr)))
 	goto onError;
 
     return repr;
-- 
cgit v0.12