1 files changed, 466 insertions, 393 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7a70a5e..4361908 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -142,16 +142,18 @@ const unsigned char _Py_ascii_whitespace[] = {
     0, 0, 0, 0, 0, 0, 0, 0
 };
 
-static PyObject *unicode_encode_call_errorhandler(const char *errors,
+static PyObject *
+unicode_encode_call_errorhandler(const char *errors,
        PyObject **errorHandler,const char *encoding, const char *reason,
        const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
        Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos);
 
-static void raise_encode_exception(PyObject **exceptionObject,
-                                   const char *encoding,
-                                   const Py_UNICODE *unicode, Py_ssize_t size,
-                                   Py_ssize_t startpos, Py_ssize_t endpos,
-                                   const char *reason);
+static void
+raise_encode_exception(PyObject **exceptionObject,
+		       const char *encoding,
+		       const Py_UNICODE *unicode, Py_ssize_t size,
+		       Py_ssize_t startpos, Py_ssize_t endpos,
+		       const char *reason);
 
 /* Same for linebreaks */
 static unsigned char ascii_linebreak[] = {
@@ -223,7 +225,8 @@ static BLOOM_MASK bloom_linebreak;
     ((ch) < 128U ? ascii_linebreak[(ch)] :                              \
      (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch)))
 
-Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
+Py_LOCAL_INLINE(BLOOM_MASK)
+make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
 {
     /* calculate simple bloom-style bitmask for a given unicode string */
 
@@ -237,7 +240,8 @@ Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
     return mask;
 }
 
-Py_LOCAL_INLINE(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen)
+Py_LOCAL_INLINE(int)
+unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen)
 {
     Py_ssize_t i;
 
@@ -253,9 +257,9 @@ Py_LOCAL_INLINE(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t
 
 /* --- Unicode Object ----------------------------------------------------- */
 
-static
-int unicode_resize(register PyUnicodeObject *unicode,
-                   Py_ssize_t length)
+static int
+unicode_resize(register PyUnicodeObject *unicode,
+	       Py_ssize_t length)
 {
     void *oldstr;
 
@@ -311,8 +315,8 @@ int unicode_resize(register PyUnicodeObject *unicode,
 
 */
 
-static
-PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
+static PyUnicodeObject *
+_PyUnicode_New(Py_ssize_t length)
 {
     register PyUnicodeObject *unicode;
 
@@ -383,8 +387,8 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
     return NULL;
 }
 
-static
-void unicode_dealloc(register PyUnicodeObject *unicode)
+static void
+unicode_dealloc(register PyUnicodeObject *unicode)
 {
     switch (PyUnicode_CHECK_INTERNED(unicode)) {
     case SSTATE_NOT_INTERNED:
@@ -428,8 +432,8 @@ void unicode_dealloc(register PyUnicodeObject *unicode)
     }
 }
 
-static
-int _PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length)
+static int
+_PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length)
 {
     register PyUnicodeObject *v;
 
@@ -464,13 +468,14 @@ int _PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length)
     return unicode_resize(v, length);
 }
 
-int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length)
+int
+PyUnicode_Resize(PyObject **unicode, Py_ssize_t length)
 {
     return _PyUnicode_Resize((PyUnicodeObject **)unicode, length);
 }
 
-PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
-                                Py_ssize_t size)
+PyObject *
+PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
 {
     PyUnicodeObject *unicode;
 
@@ -511,7 +516,8 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
     return (PyObject *)unicode;
 }
 
-PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
+PyObject *
+PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
 {
     PyUnicodeObject *unicode;
 
@@ -558,7 +564,8 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
     return (PyObject *)unicode;
 }
 
-PyObject *PyUnicode_FromString(const char *u)
+PyObject *
+PyUnicode_FromString(const char *u)
 {
     size_t size = strlen(u);
     if (size > PY_SSIZE_T_MAX) {
@@ -580,8 +587,8 @@ PyObject *PyUnicode_FromString(const char *u)
 /* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need
    to convert from UTF32 to UTF16. */
 
-PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
-                                 Py_ssize_t size)
+PyObject *
+PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
 {
     PyUnicodeObject *unicode;
     register Py_ssize_t i;
@@ -631,8 +638,8 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
 
 #else
 
-PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
-                                 Py_ssize_t size)
+PyObject *
+PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
 {
     PyUnicodeObject *unicode;
 
@@ -707,6 +714,71 @@ makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
     *fmt = '\0';
 }
 
+/* helper for PyUnicode_FromFormatV() */
+
+static const char*
+parse_format_flags(const char *f,
+                   int *p_width, int *p_precision,
+                   int *p_longflag, int *p_longlongflag, int *p_size_tflag)
+{
+    int width, precision, longflag, longlongflag, size_tflag;
+
+    /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
+    f++;
+    width = 0;
+    while (Py_ISDIGIT((unsigned)*f))
+        width = (width*10) + *f++ - '0';
+    precision = 0;
+    if (*f == '.') {
+        f++;
+        while (Py_ISDIGIT((unsigned)*f))
+            precision = (precision*10) + *f++ - '0';
+        if (*f == '%') {
+            /* "%.3%s" => f points to "3" */
+            f--;
+        }
+    }
+    if (*f == '\0') {
+        /* bogus format "%.1" => go backward, f points to "1" */
+        f--;
+    }
+    if (p_width != NULL)
+        *p_width = width;
+    if (p_precision != NULL)
+        *p_precision = precision;
+
+    /* Handle %ld, %lu, %lld and %llu. */
+    longflag = 0;
+    longlongflag = 0;
+    size_tflag = 0;
+
+    if (*f == 'l') {
+        if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') {
+            longflag = 1;
+            ++f;
+        }
+#ifdef HAVE_LONG_LONG
+        else if (f[1] == 'l' &&
+                 (f[2] == 'd' || f[2] == 'u' || f[2] == 'i')) {
+            longlongflag = 1;
+            f += 2;
+        }
+#endif
+    }
+    /* handle the size_t flag. */
+    else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u' || f[1] == 'i')) {
+        size_tflag = 1;
+        ++f;
+    }
+    if (p_longflag != NULL)
+        *p_longflag = longflag;
+    if (p_longlongflag != NULL)
+        *p_longlongflag = longlongflag;
+    if (p_size_tflag != NULL)
+        *p_size_tflag = size_tflag;
+    return f;
+}
+
 #define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
 
 /* size of fixed-size buffer for formatting single arguments */
@@ -750,15 +822,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
      * result in an array) */
     for (f = format; *f; f++) {
          if (*f == '%') {
-             if (*(f+1)=='%')
-                 continue;
-             if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V')
-                 ++callcount;
-             while (Py_ISDIGIT((unsigned)*f))
-                 width = (width*10) + *f++ - '0';
-             while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
-                 ;
-             if (*f == 's')
+             /* skip width or width.precision (eg. "1.2" of "%1.2f") */
+             f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL);
+             if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
                  ++callcount;
          }
          else if (128 <= (unsigned char)*f) {
@@ -783,33 +849,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
     for (f = format; *f; f++) {
         if (*f == '%') {
 #ifdef HAVE_LONG_LONG
-            int longlongflag = 0;
+            int longlongflag;
 #endif
-            const char* p = f;
-            width = 0;
-            while (Py_ISDIGIT((unsigned)*f))
-                width = (width*10) + *f++ - '0';
-            while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
-                ;
-
-            /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
-             * they don't affect the amount of space we reserve.
-             */
-            if (*f == 'l') {
-                if (f[1] == 'd' || f[1] == 'u') {
-                    ++f;
-                }
-#ifdef HAVE_LONG_LONG
-                else if (f[1] == 'l' &&
-                         (f[2] == 'd' || f[2] == 'u')) {
-                    longlongflag = 1;
-                    f += 2;
-                }
-#endif
-            }
-            else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
-                ++f;
-            }
+            const char* p;
+
+            p = f;
+            f = parse_format_flags(f, &width, NULL,
+                                   NULL, &longlongflag, NULL);
 
             switch (*f) {
             case 'c':
@@ -974,40 +1020,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
 
     for (f = format; *f; f++) {
         if (*f == '%') {
-            const char* p = f++;
-            int longflag = 0;
-            int longlongflag = 0;
-            int size_tflag = 0;
-            zeropad = (*f == '0');
-            /* parse the width.precision part */
-            width = 0;
-            while (Py_ISDIGIT((unsigned)*f))
-                width = (width*10) + *f++ - '0';
-            precision = 0;
-            if (*f == '.') {
-                f++;
-                while (Py_ISDIGIT((unsigned)*f))
-                    precision = (precision*10) + *f++ - '0';
-            }
-            /* Handle %ld, %lu, %lld and %llu. */
-            if (*f == 'l') {
-                if (f[1] == 'd' || f[1] == 'u') {
-                    longflag = 1;
-                    ++f;
-                }
-#ifdef HAVE_LONG_LONG
-                else if (f[1] == 'l' &&
-                         (f[2] == 'd' || f[2] == 'u')) {
-                    longlongflag = 1;
-                    f += 2;
-                }
-#endif
-            }
-            /* handle the size_t flag. */
-            if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
-                size_tflag = 1;
-                ++f;
-            }
+            const char* p;
+            int longflag;
+            int longlongflag;
+            int size_tflag;
+
+            p = f;
+            zeropad = (f[1] == '0');
+            f = parse_format_flags(f, &width, &precision,
+                                   &longflag, &longlongflag, &size_tflag);
 
             switch (*f) {
             case 'c':
@@ -1023,9 +1044,10 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                 *s++ = ordinal;
                 break;
             }
+            case 'i':
             case 'd':
                 makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
-                        width, precision, 'd');
+                        width, precision, *f);
                 if (longflag)
                     sprintf(realbuffer, fmt, va_arg(vargs, long));
 #ifdef HAVE_LONG_LONG
@@ -1054,11 +1076,6 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                     sprintf(realbuffer, fmt, va_arg(vargs, unsigned int));
                 appendstring(realbuffer);
                 break;
-            case 'i':
-                makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'i');
-                sprintf(realbuffer, fmt, va_arg(vargs, int));
-                appendstring(realbuffer);
-                break;
             case 'x':
                 makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x');
                 sprintf(realbuffer, fmt, va_arg(vargs, int));
@@ -1336,7 +1353,8 @@ PyUnicode_AsWideCharString(PyObject *unicode,
 
 #endif
 
-PyObject *PyUnicode_FromOrdinal(int ordinal)
+PyObject *
+PyUnicode_FromOrdinal(int ordinal)
 {
     Py_UNICODE s[2];
 
@@ -1359,7 +1377,8 @@ PyObject *PyUnicode_FromOrdinal(int ordinal)
     return PyUnicode_FromUnicode(s, 1);
 }
 
-PyObject *PyUnicode_FromObject(register PyObject *obj)
+PyObject *
+PyUnicode_FromObject(register PyObject *obj)
 {
     /* XXX Perhaps we should make this API an alias of
        PyObject_Str() instead ?! */
@@ -1379,9 +1398,10 @@ PyObject *PyUnicode_FromObject(register PyObject *obj)
     return NULL;
 }
 
-PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
-                                      const char *encoding,
-                                      const char *errors)
+PyObject *
+PyUnicode_FromEncodedObject(register PyObject *obj,
+			    const char *encoding,
+			    const char *errors)
 {
     Py_buffer buffer;
     PyObject *v;
@@ -1464,23 +1484,26 @@ normalize_encoding(const char *encoding,
     return 1;
 }
 
-PyObject *PyUnicode_Decode(const char *s,
-                           Py_ssize_t size,
-                           const char *encoding,
-                           const char *errors)
+PyObject *
+PyUnicode_Decode(const char *s,
+		 Py_ssize_t size,
+		 const char *encoding,
+		 const char *errors)
 {
     PyObject *buffer = NULL, *unicode;
     Py_buffer info;
     char lower[11];  /* Enough for any encoding shortcut */
 
     if (encoding == NULL)
-        encoding = PyUnicode_GetDefaultEncoding();
+        return PyUnicode_DecodeUTF8(s, size, errors);
 
     /* Shortcuts for common default encodings */
     if (normalize_encoding(encoding, lower, sizeof(lower))) {
-        if (strcmp(lower, "utf-8") == 0)
+        if ((strcmp(lower, "utf-8") == 0) ||
+            (strcmp(lower, "utf8") == 0))
             return PyUnicode_DecodeUTF8(s, size, errors);
         else if ((strcmp(lower, "latin-1") == 0) ||
+                 (strcmp(lower, "latin1") == 0) ||
                  (strcmp(lower, "iso-8859-1") == 0))
             return PyUnicode_DecodeLatin1(s, size, errors);
 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
@@ -1520,9 +1543,10 @@ PyObject *PyUnicode_Decode(const char *s,
     return NULL;
 }
 
-PyObject *PyUnicode_AsDecodedObject(PyObject *unicode,
-                                    const char *encoding,
-                                    const char *errors)
+PyObject *
+PyUnicode_AsDecodedObject(PyObject *unicode,
+			  const char *encoding,
+			  const char *errors)
 {
     PyObject *v;
 
@@ -1544,9 +1568,10 @@ PyObject *PyUnicode_AsDecodedObject(PyObject *unicode,
     return NULL;
 }
 
-PyObject *PyUnicode_AsDecodedUnicode(PyObject *unicode,
-                                     const char *encoding,
-                                     const char *errors)
+PyObject *
+PyUnicode_AsDecodedUnicode(PyObject *unicode,
+			   const char *encoding,
+			   const char *errors)
 {
     PyObject *v;
 
@@ -1575,10 +1600,11 @@ PyObject *PyUnicode_AsDecodedUnicode(PyObject *unicode,
     return NULL;
 }
 
-PyObject *PyUnicode_Encode(const Py_UNICODE *s,
-                           Py_ssize_t size,
-                           const char *encoding,
-                           const char *errors)
+PyObject *
+PyUnicode_Encode(const Py_UNICODE *s,
+		 Py_ssize_t size,
+		 const char *encoding,
+		 const char *errors)
 {
     PyObject *v, *unicode;
 
@@ -1590,9 +1616,10 @@ PyObject *PyUnicode_Encode(const Py_UNICODE *s,
     return v;
 }
 
-PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
-                                    const char *encoding,
-                                    const char *errors)
+PyObject *
+PyUnicode_AsEncodedObject(PyObject *unicode,
+			  const char *encoding,
+			  const char *errors)
 {
     PyObject *v;
 
@@ -1679,9 +1706,10 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
 #endif
 }
 
-PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
-                                    const char *encoding,
-                                    const char *errors)
+PyObject *
+PyUnicode_AsEncodedString(PyObject *unicode,
+			  const char *encoding,
+			  const char *errors)
 {
     PyObject *v;
     char lower[11];  /* Enough for any encoding shortcut */
@@ -1691,16 +1719,29 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
         return NULL;
     }
 
-    if (encoding == NULL)
-        encoding = PyUnicode_GetDefaultEncoding();
-
-    /* Shortcuts for common default encodings */
-    if (normalize_encoding(encoding, lower, sizeof(lower))) {
-        if (strcmp(lower, "utf-8") == 0)
+    if (encoding == NULL) {
+        if (errors == NULL || strcmp(errors, "strict") == 0)
+            return PyUnicode_AsUTF8String(unicode);
+        else
             return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                                         PyUnicode_GET_SIZE(unicode),
                                         errors);
+    }
+
+    /* Shortcuts for common default encodings */
+    if (normalize_encoding(encoding, lower, sizeof(lower))) {
+        if ((strcmp(lower, "utf-8") == 0) ||
+            (strcmp(lower, "utf8") == 0))
+        {
+            if (errors == NULL || strcmp(errors, "strict") == 0)
+                return PyUnicode_AsUTF8String(unicode);
+            else
+                return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+                                            PyUnicode_GET_SIZE(unicode),
+                                            errors);
+        }
         else if ((strcmp(lower, "latin-1") == 0) ||
+                 (strcmp(lower, "latin1") == 0) ||
                  (strcmp(lower, "iso-8859-1") == 0))
             return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
                                           PyUnicode_GET_SIZE(unicode),
@@ -1716,21 +1757,6 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
                                          PyUnicode_GET_SIZE(unicode),
                                          errors);
     }
-    /* During bootstrap, we may need to find the encodings
-       package, to load the file system encoding, and require the
-       file system encoding in order to load the encodings
-       package.
-
-       Break out of this dependency by assuming that the path to
-       the encodings module is ASCII-only.  XXX could try wcstombs
-       instead, if the file system encoding is the locale's
-       encoding. */
-    if (Py_FileSystemDefaultEncoding &&
-             strcmp(encoding, Py_FileSystemDefaultEncoding) == 0 &&
-             !PyThreadState_GET()->interp->codecs_initialized)
-        return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
-                                     PyUnicode_GET_SIZE(unicode),
-                                     errors);
 
     /* Encode via the codec registry */
     v = PyCodec_Encode(unicode, encoding, errors);
@@ -1766,9 +1792,10 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
     return NULL;
 }
 
-PyObject *PyUnicode_AsEncodedUnicode(PyObject *unicode,
-                                     const char *encoding,
-                                     const char *errors)
+PyObject *
+PyUnicode_AsEncodedUnicode(PyObject *unicode,
+			   const char *encoding,
+			   const char *errors)
 {
     PyObject *v;
 
@@ -1797,14 +1824,12 @@ PyObject *PyUnicode_AsEncodedUnicode(PyObject *unicode,
     return NULL;
 }
 
-PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
-                                            const char *errors)
+PyObject *
+_PyUnicode_AsDefaultEncodedString(PyObject *unicode)
 {
     PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
     if (v)
         return v;
-    if (errors != NULL)
-        Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString");
     v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                              PyUnicode_GET_SIZE(unicode),
                              NULL);
@@ -1955,7 +1980,7 @@ _PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
         PyErr_BadArgument();
         return NULL;
     }
-    bytes = _PyUnicode_AsDefaultEncodedString(unicode, NULL);
+    bytes = _PyUnicode_AsDefaultEncodedString(unicode);
     if (bytes == NULL)
         return NULL;
     if (psize != NULL)
@@ -1969,7 +1994,8 @@ _PyUnicode_AsString(PyObject *unicode)
     return _PyUnicode_AsStringAndSize(unicode, NULL);
 }
 
-Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode)
+Py_UNICODE *
+PyUnicode_AsUnicode(PyObject *unicode)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
@@ -1981,7 +2007,8 @@ Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode)
     return NULL;
 }
 
-Py_ssize_t PyUnicode_GetSize(PyObject *unicode)
+Py_ssize_t
+PyUnicode_GetSize(PyObject *unicode)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
@@ -1993,7 +2020,8 @@ Py_ssize_t PyUnicode_GetSize(PyObject *unicode)
     return -1;
 }
 
-const char *PyUnicode_GetDefaultEncoding(void)
+const char *
+PyUnicode_GetDefaultEncoding(void)
 {
     return "utf-8";
 }
@@ -2032,12 +2060,12 @@ onError:
    return 0 on success, -1 on error
 */
 
-static
-int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
-                                     const char *encoding, const char *reason,
-                                     const char **input, const char **inend, Py_ssize_t *startinpos,
-                                     Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
-                                     PyUnicodeObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr)
+static int
+unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
+				 const char *encoding, const char *reason,
+				 const char **input, const char **inend, Py_ssize_t *startinpos,
+				 Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
+				 PyUnicodeObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr)
 {
     static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
 
@@ -2207,9 +2235,10 @@ char utf7_category[128] = {
       (directWS && (utf7_category[(c)] == 2)) ||        \
       (directO && (utf7_category[(c)] == 1))))
 
-PyObject *PyUnicode_DecodeUTF7(const char *s,
-                               Py_ssize_t size,
-                               const char *errors)
+PyObject *
+PyUnicode_DecodeUTF7(const char *s,
+		     Py_ssize_t size,
+		     const char *errors)
 {
     return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
 }
@@ -2221,10 +2250,11 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
  * all the shift state (seen bits, number of bits seen, high
  * surrogate). */
 
-PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
-                                       Py_ssize_t size,
-                                       const char *errors,
-                                       Py_ssize_t *consumed)
+PyObject *
+PyUnicode_DecodeUTF7Stateful(const char *s,
+			     Py_ssize_t size,
+			     const char *errors,
+			     Py_ssize_t *consumed)
 {
     const char *starts = s;
     Py_ssize_t startinpos;
@@ -2411,11 +2441,12 @@ utf7Error:
 }
 
 
-PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
-                               Py_ssize_t size,
-                               int base64SetO,
-                               int base64WhiteSpace,
-                               const char *errors)
+PyObject *
+PyUnicode_EncodeUTF7(const Py_UNICODE *s,
+		     Py_ssize_t size,
+		     int base64SetO,
+		     int base64WhiteSpace,
+		     const char *errors)
 {
     PyObject *v;
     /* It might be possible to tighten this worst case */
@@ -2536,9 +2567,10 @@ char utf8_code_length[256] = {
     4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /* F0-F4 + F5-FF */
 };
 
-PyObject *PyUnicode_DecodeUTF8(const char *s,
-                               Py_ssize_t size,
-                               const char *errors)
+PyObject *
+PyUnicode_DecodeUTF8(const char *s,
+		     Py_ssize_t size,
+		     const char *errors)
 {
     return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
 }
@@ -2556,10 +2588,11 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
 # error C 'long' size should be either 4 or 8!
 #endif
 
-PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
-                                       Py_ssize_t size,
-                                       const char *errors,
-                                       Py_ssize_t *consumed)
+PyObject *
+PyUnicode_DecodeUTF8Stateful(const char *s,
+			     Py_ssize_t size,
+			     const char *errors,
+			     Py_ssize_t *consumed)
 {
     const char *starts = s;
     int n;
@@ -3065,15 +3098,19 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
 #undef MAX_SHORT_UNICHARS
 }
 
-PyObject *PyUnicode_AsUTF8String(PyObject *unicode)
+PyObject *
+PyUnicode_AsUTF8String(PyObject *unicode)
 {
+    PyObject *utf8;
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
         return NULL;
     }
-    return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
-                                PyUnicode_GET_SIZE(unicode),
-                                NULL);
+    utf8 = _PyUnicode_AsDefaultEncodedString(unicode);
+    if (utf8 == NULL)
+        return NULL;
+    Py_INCREF(utf8);
+    return utf8;
 }
 
 /* --- UTF-32 Codec ------------------------------------------------------- */
@@ -3339,7 +3376,8 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
 #undef STORECHAR
 }
 
-PyObject *PyUnicode_AsUTF32String(PyObject *unicode)
+PyObject *
+PyUnicode_AsUTF32String(PyObject *unicode)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
@@ -3729,7 +3767,8 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
 #undef STORECHAR
 }
 
-PyObject *PyUnicode_AsUTF16String(PyObject *unicode)
+PyObject *
+PyUnicode_AsUTF16String(PyObject *unicode)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
@@ -3745,9 +3784,10 @@ PyObject *PyUnicode_AsUTF16String(PyObject *unicode)
 
 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
 
-PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
-                                        Py_ssize_t size,
-                                        const char *errors)
+PyObject *
+PyUnicode_DecodeUnicodeEscape(const char *s,
+			      Py_ssize_t size,
+			      const char *errors)
 {
     const char *starts = s;
     Py_ssize_t startinpos;
@@ -4003,8 +4043,9 @@ Py_LOCAL_INLINE(const Py_UNICODE *) findchar(const Py_UNICODE *s,
 
 static const char *hexdigits = "0123456789abcdef";
 
-PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
-                                        Py_ssize_t size)
+PyObject *
+PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
+			      Py_ssize_t size)
 {
     PyObject *repr;
     char *p;
@@ -4144,7 +4185,8 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
     return repr;
 }
 
-PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
+PyObject *
+PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
 {
     PyObject *s;
     if (!PyUnicode_Check(unicode)) {
@@ -4158,9 +4200,10 @@ PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
 
 /* --- Raw Unicode Escape Codec ------------------------------------------- */
 
-PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
-                                           Py_ssize_t size,
-                                           const char *errors)
+PyObject *
+PyUnicode_DecodeRawUnicodeEscape(const char *s,
+				 Py_ssize_t size,
+				 const char *errors)
 {
     const char *starts = s;
     Py_ssize_t startinpos;
@@ -4275,8 +4318,9 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
     return NULL;
 }
 
-PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
-                                           Py_ssize_t size)
+PyObject *
+PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
+				 Py_ssize_t size)
 {
     PyObject *repr;
     char *p;
@@ -4363,7 +4407,8 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
     return repr;
 }
 
-PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
+PyObject *
+PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
 {
     PyObject *s;
     if (!PyUnicode_Check(unicode)) {
@@ -4378,9 +4423,10 @@ PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
 
 /* --- Unicode Internal Codec ------------------------------------------- */
 
-PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
-                                           Py_ssize_t size,
-                                           const char *errors)
+PyObject *
+_PyUnicode_DecodeUnicodeInternal(const char *s,
+				 Py_ssize_t size,
+				 const char *errors)
 {
     const char *starts = s;
     Py_ssize_t startinpos;
@@ -4456,9 +4502,10 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
 
 /* --- Latin-1 Codec ------------------------------------------------------ */
 
-PyObject *PyUnicode_DecodeLatin1(const char *s,
-                                 Py_ssize_t size,
-                                 const char *errors)
+PyObject *
+PyUnicode_DecodeLatin1(const char *s,
+		       Py_ssize_t size,
+		       const char *errors)
 {
     PyUnicodeObject *v;
     Py_UNICODE *p;
@@ -4498,11 +4545,12 @@ PyObject *PyUnicode_DecodeLatin1(const char *s,
 }
 
 /* create or adjust a UnicodeEncodeError */
-static void make_encode_exception(PyObject **exceptionObject,
-                                  const char *encoding,
-                                  const Py_UNICODE *unicode, Py_ssize_t size,
-                                  Py_ssize_t startpos, Py_ssize_t endpos,
-                                  const char *reason)
+static void
+make_encode_exception(PyObject **exceptionObject,
+		      const char *encoding,
+		      const Py_UNICODE *unicode, Py_ssize_t size,
+		      Py_ssize_t startpos, Py_ssize_t endpos,
+		      const char *reason)
 {
     if (*exceptionObject == NULL) {
         *exceptionObject = PyUnicodeEncodeError_Create(
@@ -4523,11 +4571,12 @@ static void make_encode_exception(PyObject **exceptionObject,
 }
 
 /* raises a UnicodeEncodeError */
-static void raise_encode_exception(PyObject **exceptionObject,
-                                   const char *encoding,
-                                   const Py_UNICODE *unicode, Py_ssize_t size,
-                                   Py_ssize_t startpos, Py_ssize_t endpos,
-                                   const char *reason)
+static void
+raise_encode_exception(PyObject **exceptionObject,
+		       const char *encoding,
+		       const Py_UNICODE *unicode, Py_ssize_t size,
+		       Py_ssize_t startpos, Py_ssize_t endpos,
+		       const char *reason)
 {
     make_encode_exception(exceptionObject,
                           encoding, unicode, size, startpos, endpos, reason);
@@ -4539,12 +4588,13 @@ static void raise_encode_exception(PyObject **exceptionObject,
    build arguments, call the callback and check the arguments,
    put the result into newpos and return the replacement string, which
    has to be freed by the caller */
-static PyObject *unicode_encode_call_errorhandler(const char *errors,
-                                                  PyObject **errorHandler,
-                                                  const char *encoding, const char *reason,
-                                                  const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
-                                                  Py_ssize_t startpos, Py_ssize_t endpos,
-                                                  Py_ssize_t *newpos)
+static PyObject *
+unicode_encode_call_errorhandler(const char *errors,
+				 PyObject **errorHandler,
+				 const char *encoding, const char *reason,
+				 const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
+				 Py_ssize_t startpos, Py_ssize_t endpos,
+				 Py_ssize_t *newpos)
 {
     static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
 
@@ -4593,10 +4643,11 @@ static PyObject *unicode_encode_call_errorhandler(const char *errors,
     return resunicode;
 }
 
-static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
-                                     Py_ssize_t size,
-                                     const char *errors,
-                                     int limit)
+static PyObject *
+unicode_encode_ucs1(const Py_UNICODE *p,
+		    Py_ssize_t size,
+		    const char *errors,
+		    int limit)
 {
     /* output object */
     PyObject *res;
@@ -4789,14 +4840,16 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
     return NULL;
 }
 
-PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
-                                 Py_ssize_t size,
-                                 const char *errors)
+PyObject *
+PyUnicode_EncodeLatin1(const Py_UNICODE *p,
+		       Py_ssize_t size,
+		       const char *errors)
 {
     return unicode_encode_ucs1(p, size, errors, 256);
 }
 
-PyObject *PyUnicode_AsLatin1String(PyObject *unicode)
+PyObject *
+PyUnicode_AsLatin1String(PyObject *unicode)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
@@ -4809,9 +4862,10 @@ PyObject *PyUnicode_AsLatin1String(PyObject *unicode)
 
 /* --- 7-bit ASCII Codec -------------------------------------------------- */
 
-PyObject *PyUnicode_DecodeASCII(const char *s,
-                                Py_ssize_t size,
-                                const char *errors)
+PyObject *
+PyUnicode_DecodeASCII(const char *s,
+                      Py_ssize_t size,
+                      const char *errors)
 {
     const char *starts = s;
     PyUnicodeObject *v;
@@ -4868,14 +4922,16 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
     return NULL;
 }
 
-PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
-                                Py_ssize_t size,
-                                const char *errors)
+PyObject *
+PyUnicode_EncodeASCII(const Py_UNICODE *p,
+                      Py_ssize_t size,
+                      const char *errors)
 {
     return unicode_encode_ucs1(p, size, errors, 128);
 }
 
-PyObject *PyUnicode_AsASCIIString(PyObject *unicode)
+PyObject *
+PyUnicode_AsASCIIString(PyObject *unicode)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
@@ -4899,7 +4955,8 @@ PyObject *PyUnicode_AsASCIIString(PyObject *unicode)
    b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte
    encodings, see IsDBCSLeadByteEx documentation. */
 
-static int is_dbcs_lead_byte(const char *s, int offset)
+static int
+is_dbcs_lead_byte(const char *s, int offset)
 {
     const char *curr = s + offset;
 
@@ -4914,11 +4971,12 @@ static int is_dbcs_lead_byte(const char *s, int offset)
  * Decode MBCS string into unicode object. If 'final' is set, converts
  * trailing lead-byte too. Returns consumed size if succeed, -1 otherwise.
  */
-static int decode_mbcs(PyUnicodeObject **v,
-                       const char *s, /* MBCS string */
-                       int size, /* sizeof MBCS string */
-                       int final,
-                       const char *errors)
+static int
+decode_mbcs(PyUnicodeObject **v,
+            const char *s, /* MBCS string */
+            int size, /* sizeof MBCS string */
+            int final,
+            const char *errors)
 {
     Py_UNICODE *p;
     Py_ssize_t n;
@@ -4997,10 +5055,11 @@ mbcs_decode_error:
     return -1;
 }
 
-PyObject *PyUnicode_DecodeMBCSStateful(const char *s,
-                                       Py_ssize_t size,
-                                       const char *errors,
-                                       Py_ssize_t *consumed)
+PyObject *
+PyUnicode_DecodeMBCSStateful(const char *s,
+                             Py_ssize_t size,
+                             const char *errors,
+                             Py_ssize_t *consumed)
 {
     PyUnicodeObject *v = NULL;
     int done;
@@ -5035,9 +5094,10 @@ PyObject *PyUnicode_DecodeMBCSStateful(const char *s,
     return (PyObject *)v;
 }
 
-PyObject *PyUnicode_DecodeMBCS(const char *s,
-                               Py_ssize_t size,
-                               const char *errors)
+PyObject *
+PyUnicode_DecodeMBCS(const char *s,
+                     Py_ssize_t size,
+                     const char *errors)
 {
     return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL);
 }
@@ -5046,10 +5106,11 @@ PyObject *PyUnicode_DecodeMBCS(const char *s,
  * Convert unicode into string object (MBCS).
  * Returns 0 if succeed, -1 otherwise.
  */
-static int encode_mbcs(PyObject **repr,
-                       const Py_UNICODE *p, /* unicode */
-                       int size, /* size of unicode */
-                       const char* errors)
+static int
+encode_mbcs(PyObject **repr,
+            const Py_UNICODE *p, /* unicode */
+            int size, /* size of unicode */
+            const char* errors)
 {
     BOOL usedDefaultChar = FALSE;
     BOOL *pusedDefaultChar;
@@ -5122,9 +5183,10 @@ mbcs_encode_error:
     return -1;
 }
 
-PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p,
-                               Py_ssize_t size,
-                               const char *errors)
+PyObject *
+PyUnicode_EncodeMBCS(const Py_UNICODE *p,
+                     Py_ssize_t size,
+                     const char *errors)
 {
     PyObject *repr = NULL;
     int ret;
@@ -5153,7 +5215,8 @@ PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p,
     return repr;
 }
 
-PyObject *PyUnicode_AsMBCSString(PyObject *unicode)
+PyObject *
+PyUnicode_AsMBCSString(PyObject *unicode)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
@@ -5170,10 +5233,11 @@ PyObject *PyUnicode_AsMBCSString(PyObject *unicode)
 
 /* --- Character Mapping Codec -------------------------------------------- */
 
-PyObject *PyUnicode_DecodeCharmap(const char *s,
-                                  Py_ssize_t size,
-                                  PyObject *mapping,
-                                  const char *errors)
+PyObject *
+PyUnicode_DecodeCharmap(const char *s,
+                        Py_ssize_t size,
+                        PyObject *mapping,
+                        const char *errors)
 {
     const char *starts = s;
     Py_ssize_t startinpos;
@@ -5333,7 +5397,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
 
 /* Charmap encoding: the lookup table */
 
-struct encoding_map{
+struct encoding_map {
     PyObject_HEAD
     unsigned char level1[32];
     int count2, count3;
@@ -5460,7 +5524,6 @@ PyUnicode_BuildEncodingMap(PyObject* string)
         if (!result)
             return NULL;
         for (i = 0; i < 256; i++) {
-            key = value = NULL;
             key = PyLong_FromLong(decode[i]);
             value = PyLong_FromLong(i);
             if (!key || !value)
@@ -5548,7 +5611,8 @@ encoding_map_lookup(Py_UNICODE c, PyObject *mapping)
 /* Lookup the character ch in the mapping. If the character
    can't be found, Py_None is returned (or NULL, if another
    error occurred). */
-static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
+static PyObject *
+charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
 {
     PyObject *w = PyLong_FromLong((long)c);
     PyObject *x;
@@ -5605,16 +5669,16 @@ charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requireds
 
 typedef enum charmapencode_result {
     enc_SUCCESS, enc_FAILED, enc_EXCEPTION
-}charmapencode_result;
+} charmapencode_result;
 /* lookup the character, put the result in the output string and adjust
    various state variables. Resize the output bytes object if not enough
    space is available. Return a new reference to the object that
    was put in the output buffer, or Py_None, if the mapping was undefined
    (in which case no character was written) or NULL, if a
    reallocation error occurred. The caller must decref the result */
-static
-charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping,
-                                          PyObject **outobj, Py_ssize_t *outpos)
+static charmapencode_result
+charmapencode_output(Py_UNICODE c, PyObject *mapping,
+                     PyObject **outobj, Py_ssize_t *outpos)
 {
     PyObject *rep;
     char *outstart;
@@ -5670,8 +5734,8 @@ charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping,
 
 /* handle an error in PyUnicode_EncodeCharmap
    Return 0 on success, -1 on error */
-static
-int charmap_encoding_error(
+static int
+charmap_encoding_error(
     const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping,
     PyObject **exceptionObject,
     int *known_errorHandler, PyObject **errorHandler, const char *errors,
@@ -5805,10 +5869,11 @@ int charmap_encoding_error(
     return 0;
 }
 
-PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
-                                  Py_ssize_t size,
-                                  PyObject *mapping,
-                                  const char *errors)
+PyObject *
+PyUnicode_EncodeCharmap(const Py_UNICODE *p,
+                        Py_ssize_t size,
+                        PyObject *mapping,
+                        const char *errors)
 {
     /* output object */
     PyObject *res = NULL;
@@ -5869,8 +5934,9 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
     return NULL;
 }
 
-PyObject *PyUnicode_AsCharmapString(PyObject *unicode,
-                                    PyObject *mapping)
+PyObject *
+PyUnicode_AsCharmapString(PyObject *unicode,
+                          PyObject *mapping)
 {
     if (!PyUnicode_Check(unicode) || mapping == NULL) {
         PyErr_BadArgument();
@@ -5883,10 +5949,11 @@ PyObject *PyUnicode_AsCharmapString(PyObject *unicode,
 }
 
 /* create or adjust a UnicodeTranslateError */
-static void make_translate_exception(PyObject **exceptionObject,
-                                     const Py_UNICODE *unicode, Py_ssize_t size,
-                                     Py_ssize_t startpos, Py_ssize_t endpos,
-                                     const char *reason)
+static void
+make_translate_exception(PyObject **exceptionObject,
+                         const Py_UNICODE *unicode, Py_ssize_t size,
+                         Py_ssize_t startpos, Py_ssize_t endpos,
+                         const char *reason)
 {
     if (*exceptionObject == NULL) {
         *exceptionObject = PyUnicodeTranslateError_Create(
@@ -5907,10 +5974,11 @@ static void make_translate_exception(PyObject **exceptionObject,
 }
 
 /* raises a UnicodeTranslateError */
-static void raise_translate_exception(PyObject **exceptionObject,
-                                      const Py_UNICODE *unicode, Py_ssize_t size,
-                                      Py_ssize_t startpos, Py_ssize_t endpos,
-                                      const char *reason)
+static void
+raise_translate_exception(PyObject **exceptionObject,
+                          const Py_UNICODE *unicode, Py_ssize_t size,
+                          Py_ssize_t startpos, Py_ssize_t endpos,
+                          const char *reason)
 {
     make_translate_exception(exceptionObject,
                              unicode, size, startpos, endpos, reason);
@@ -5922,12 +5990,13 @@ static void raise_translate_exception(PyObject **exceptionObject,
    build arguments, call the callback and check the arguments,
    put the result into newpos and return the replacement string, which
    has to be freed by the caller */
-static PyObject *unicode_translate_call_errorhandler(const char *errors,
-                                                     PyObject **errorHandler,
-                                                     const char *reason,
-                                                     const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
-                                                     Py_ssize_t startpos, Py_ssize_t endpos,
-                                                     Py_ssize_t *newpos)
+static PyObject *
+unicode_translate_call_errorhandler(const char *errors,
+                                    PyObject **errorHandler,
+                                    const char *reason,
+                                    const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
+                                    Py_ssize_t startpos, Py_ssize_t endpos,
+                                    Py_ssize_t *newpos)
 {
     static char *argparse = "O!n;translating error handler must return (str, int) tuple";
 
@@ -5977,8 +6046,8 @@ static PyObject *unicode_translate_call_errorhandler(const char *errors,
 /* Lookup the character ch in the mapping and put the result in result,
    which must be decrefed by the caller.
    Return 0 on success, -1 on error */
-static
-int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
+static int
+charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
 {
     PyObject *w = PyLong_FromLong((long)c);
     PyObject *x;
@@ -6027,8 +6096,8 @@ int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
 /* ensure that *outobj is at least requiredsize characters long,
    if not reallocate and adjust various state variables.
    Return 0 on success, -1 on error */
-static
-int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp,
+static int
+charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp,
                                Py_ssize_t requiredsize)
 {
     Py_ssize_t oldsize = PyUnicode_GET_SIZE(*outobj);
@@ -6050,10 +6119,10 @@ int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp,
    undefined (in which case no character was written).
    The called must decref result.
    Return 0 on success, -1 on error. */
-static
-int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp,
-                            Py_ssize_t insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp,
-                            PyObject **res)
+static int
+charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp,
+                        Py_ssize_t insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp,
+                        PyObject **res)
 {
     if (charmaptranslate_lookup(*curinp, mapping, res))
         return -1;
@@ -6089,10 +6158,11 @@ int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp
     return 0;
 }
 
-PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
-                                     Py_ssize_t size,
-                                     PyObject *mapping,
-                                     const char *errors)
+PyObject *
+PyUnicode_TranslateCharmap(const Py_UNICODE *p,
+                           Py_ssize_t size,
+                           PyObject *mapping,
+                           const char *errors)
 {
     /* output object */
     PyObject *res = NULL;
@@ -6231,9 +6301,10 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
     return NULL;
 }
 
-PyObject *PyUnicode_Translate(PyObject *str,
-                              PyObject *mapping,
-                              const char *errors)
+PyObject *
+PyUnicode_Translate(PyObject *str,
+                    PyObject *mapping,
+                    const char *errors)
 {
     PyObject *result;
 
@@ -6278,10 +6349,11 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
 }
 /* --- Decimal Encoder ---------------------------------------------------- */
 
-int PyUnicode_EncodeDecimal(Py_UNICODE *s,
-                            Py_ssize_t length,
-                            char *output,
-                            const char *errors)
+int
+PyUnicode_EncodeDecimal(Py_UNICODE *s,
+                        Py_ssize_t length,
+                        char *output,
+                        const char *errors)
 {
     Py_UNICODE *p, *end;
     PyObject *errorHandler = NULL;
@@ -6442,10 +6514,11 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
             start = 0;                          \
     }
 
-Py_ssize_t PyUnicode_Count(PyObject *str,
-                           PyObject *substr,
-                           Py_ssize_t start,
-                           Py_ssize_t end)
+Py_ssize_t
+PyUnicode_Count(PyObject *str,
+                PyObject *substr,
+                Py_ssize_t start,
+                Py_ssize_t end)
 {
     Py_ssize_t result;
     PyUnicodeObject* str_obj;
@@ -6472,11 +6545,12 @@ Py_ssize_t PyUnicode_Count(PyObject *str,
     return result;
 }
 
-Py_ssize_t PyUnicode_Find(PyObject *str,
-                          PyObject *sub,
-                          Py_ssize_t start,
-                          Py_ssize_t end,
-                          int direction)
+Py_ssize_t
+PyUnicode_Find(PyObject *str,
+               PyObject *sub,
+               Py_ssize_t start,
+               Py_ssize_t end,
+               int direction)
 {
     Py_ssize_t result;
 
@@ -6508,12 +6582,12 @@ Py_ssize_t PyUnicode_Find(PyObject *str,
     return result;
 }
 
-static
-int tailmatch(PyUnicodeObject *self,
-              PyUnicodeObject *substring,
-              Py_ssize_t start,
-              Py_ssize_t end,
-              int direction)
+static int
+tailmatch(PyUnicodeObject *self,
+          PyUnicodeObject *substring,
+          Py_ssize_t start,
+          Py_ssize_t end,
+          int direction)
 {
     if (substring->length == 0)
         return 1;
@@ -6534,11 +6608,12 @@ int tailmatch(PyUnicodeObject *self,
     return 0;
 }
 
-Py_ssize_t PyUnicode_Tailmatch(PyObject *str,
-                               PyObject *substr,
-                               Py_ssize_t start,
-                               Py_ssize_t end,
-                               int direction)
+Py_ssize_t
+PyUnicode_Tailmatch(PyObject *str,
+                    PyObject *substr,
+                    Py_ssize_t start,
+                    Py_ssize_t end,
+                    int direction)
 {
     Py_ssize_t result;
 
@@ -6562,9 +6637,9 @@ Py_ssize_t PyUnicode_Tailmatch(PyObject *str,
 /* Apply fixfct filter to the Unicode object self and return a
    reference to the modified object */
 
-static
-PyObject *fixup(PyUnicodeObject *self,
-                int (*fixfct)(PyUnicodeObject *s))
+static PyObject *
+fixup(PyUnicodeObject *self,
+      int (*fixfct)(PyUnicodeObject *s))
 {
 
     PyUnicodeObject *u;
@@ -6586,8 +6661,8 @@ PyObject *fixup(PyUnicodeObject *self,
     return (PyObject*) u;
 }
 
-static
-int fixupper(PyUnicodeObject *self)
+static int
+fixupper(PyUnicodeObject *self)
 {
     Py_ssize_t len = self->length;
     Py_UNICODE *s = self->str;
@@ -6607,8 +6682,8 @@ int fixupper(PyUnicodeObject *self)
     return status;
 }
 
-static
-int fixlower(PyUnicodeObject *self)
+static int
+fixlower(PyUnicodeObject *self)
 {
     Py_ssize_t len = self->length;
     Py_UNICODE *s = self->str;
@@ -6628,8 +6703,8 @@ int fixlower(PyUnicodeObject *self)
     return status;
 }
 
-static
-int fixswapcase(PyUnicodeObject *self)
+static int
+fixswapcase(PyUnicodeObject *self)
 {
     Py_ssize_t len = self->length;
     Py_UNICODE *s = self->str;
@@ -6649,8 +6724,8 @@ int fixswapcase(PyUnicodeObject *self)
     return status;
 }
 
-static
-int fixcapitalize(PyUnicodeObject *self)
+static int
+fixcapitalize(PyUnicodeObject *self)
 {
     Py_ssize_t len = self->length;
     Py_UNICODE *s = self->str;
@@ -6673,8 +6748,8 @@ int fixcapitalize(PyUnicodeObject *self)
     return status;
 }
 
-static
-int fixtitle(PyUnicodeObject *self)
+static int
+fixtitle(PyUnicodeObject *self)
 {
     register Py_UNICODE *p = PyUnicode_AS_UNICODE(self);
     register Py_UNICODE *e;
@@ -6824,11 +6899,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
     return NULL;
 }
 
-static
-PyUnicodeObject *pad(PyUnicodeObject *self,
-                     Py_ssize_t left,
-                     Py_ssize_t right,
-                     Py_UNICODE fill)
+static PyUnicodeObject *
+pad(PyUnicodeObject *self,
+    Py_ssize_t left,
+    Py_ssize_t right,
+    Py_UNICODE fill)
 {
     PyUnicodeObject *u;
 
@@ -6859,7 +6934,8 @@ PyUnicodeObject *pad(PyUnicodeObject *self,
     return u;
 }
 
-PyObject *PyUnicode_Splitlines(PyObject *string, int keepends)
+PyObject *
+PyUnicode_Splitlines(PyObject *string, int keepends)
 {
     PyObject *list;
 
@@ -6875,10 +6951,10 @@ PyObject *PyUnicode_Splitlines(PyObject *string, int keepends)
     return list;
 }
 
-static
-PyObject *split(PyUnicodeObject *self,
-                PyUnicodeObject *substring,
-                Py_ssize_t maxcount)
+static PyObject *
+split(PyUnicodeObject *self,
+      PyUnicodeObject *substring,
+      Py_ssize_t maxcount)
 {
     if (maxcount < 0)
         maxcount = PY_SSIZE_T_MAX;
@@ -6895,10 +6971,10 @@ PyObject *split(PyUnicodeObject *self,
         );
 }
 
-static
-PyObject *rsplit(PyUnicodeObject *self,
-                 PyUnicodeObject *substring,
-                 Py_ssize_t maxcount)
+static PyObject *
+rsplit(PyUnicodeObject *self,
+       PyUnicodeObject *substring,
+       Py_ssize_t maxcount)
 {
     if (maxcount < 0)
         maxcount = PY_SSIZE_T_MAX;
@@ -6915,11 +6991,11 @@ PyObject *rsplit(PyUnicodeObject *self,
         );
 }
 
-static
-PyObject *replace(PyUnicodeObject *self,
-                  PyUnicodeObject *str1,
-                  PyUnicodeObject *str2,
-                  Py_ssize_t maxcount)
+static PyObject *
+replace(PyUnicodeObject *self,
+        PyUnicodeObject *str1,
+        PyUnicodeObject *str2,
+        Py_ssize_t maxcount)
 {
     PyUnicodeObject *u;
 
@@ -6977,7 +7053,7 @@ PyObject *replace(PyUnicodeObject *self,
         }
     } else {
 
-        Py_ssize_t n, i, j, e;
+        Py_ssize_t n, i, j;
         Py_ssize_t product, new_size, delta;
         Py_UNICODE *p;
 
@@ -7009,7 +7085,6 @@ PyObject *replace(PyUnicodeObject *self,
             return NULL;
         i = 0;
         p = u->str;
-        e = self->length - str1->length;
         if (str1->length > 0) {
             while (n-- > 0) {
                 /* look for next match */
@@ -7254,8 +7329,8 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
 
 #endif
 
-int PyUnicode_Compare(PyObject *left,
-                      PyObject *right)
+int
+PyUnicode_Compare(PyObject *left, PyObject *right)
 {
     if (PyUnicode_Check(left) && PyUnicode_Check(right))
         return unicode_compare((PyUnicodeObject *)left,
@@ -7291,16 +7366,14 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
 #define TEST_COND(cond)                         \
     ((cond) ? Py_True : Py_False)
 
-PyObject *PyUnicode_RichCompare(PyObject *left,
-                                PyObject *right,
-                                int op)
+PyObject *
+PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
 {
     int result;
 
     if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
         PyObject *v;
-        if (((PyUnicodeObject *) left)->length !=
-            ((PyUnicodeObject *) right)->length) {
+        if (PyUnicode_GET_SIZE(left) != PyUnicode_GET_SIZE(right)) {
             if (op == Py_EQ) {
                 Py_INCREF(Py_False);
                 return Py_False;
@@ -7348,8 +7421,8 @@ PyObject *PyUnicode_RichCompare(PyObject *left,
     return Py_NotImplemented;
 }
 
-int PyUnicode_Contains(PyObject *container,
-                       PyObject *element)
+int
+PyUnicode_Contains(PyObject *container, PyObject *element)
 {
     PyObject *str, *sub;
     int result;
@@ -7379,8 +7452,8 @@ int PyUnicode_Contains(PyObject *container,
 
 /* Concat to string or Unicode object giving a new Unicode object. */
 
-PyObject *PyUnicode_Concat(PyObject *left,
-                           PyObject *right)
+PyObject *
+PyUnicode_Concat(PyObject *left, PyObject *right)
 {
     PyUnicodeObject *u = NULL, *v = NULL, *w;
 
@@ -8293,10 +8366,11 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len)
     return (PyObject*) u;
 }
 
-PyObject *PyUnicode_Replace(PyObject *obj,
-                            PyObject *subobj,
-                            PyObject *replobj,
-                            Py_ssize_t maxcount)
+PyObject *
+PyUnicode_Replace(PyObject *obj,
+                  PyObject *subobj,
+                  PyObject *replobj,
+                  Py_ssize_t maxcount)
 {
     PyObject *self;
     PyObject *str1;
@@ -8360,8 +8434,8 @@ unicode_replace(PyUnicodeObject *self, PyObject *args)
     return result;
 }
 
-static
-PyObject *unicode_repr(PyObject *unicode)
+static PyObject *
+unicode_repr(PyObject *unicode)
 {
     PyObject *repr;
     Py_UNICODE *p;
@@ -8596,9 +8670,8 @@ unicode_rjust(PyUnicodeObject *self, PyObject *args)
     return (PyObject*) pad(self, width - self->length, 0, fillchar);
 }
 
-PyObject *PyUnicode_Split(PyObject *s,
-                          PyObject *sep,
-                          Py_ssize_t maxsplit)
+PyObject *
+PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
 {
     PyObject *result;
 
@@ -8727,9 +8800,8 @@ unicode_rpartition(PyUnicodeObject *self, PyObject *separator)
     return PyUnicode_RPartition((PyObject *)self, separator);
 }
 
-PyObject *PyUnicode_RSplit(PyObject *s,
-                           PyObject *sep,
-                           Py_ssize_t maxsplit)
+PyObject *
+PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
 {
     PyObject *result;
 
@@ -9426,8 +9498,8 @@ formatchar(Py_UNICODE *buf,
 */
 #define FORMATBUFLEN (size_t)10
 
-PyObject *PyUnicode_Format(PyObject *format,
-                           PyObject *args)
+PyObject *
+PyUnicode_Format(PyObject *format, PyObject *args)
 {
     Py_UNICODE *fmt, *res;
     Py_ssize_t fmtcnt, rescnt, reslen, arglen, argidx;
@@ -10118,7 +10190,8 @@ PyUnicode_InternFromString(const char *cp)
     return s;
 }
 
-void _Py_ReleaseInternedUnicodeStrings(void)
+void
+_Py_ReleaseInternedUnicodeStrings(void)
 {
     PyObject *keys;
     PyUnicodeObject *s;