1 files changed, 276 insertions, 296 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3bf1932..5234901 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -727,7 +727,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
     _Py_DEC_REFTOTAL;
     _Py_ForgetReference(unicode);
 
-    new_unicode = (PyObject *)PyObject_REALLOC((char *)unicode, new_size);
+    new_unicode = (PyObject *)PyObject_REALLOC(unicode, new_size);
     if (new_unicode == NULL) {
         _Py_NewReference(unicode);
         PyErr_NoMemory();
@@ -816,7 +816,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
     assert(_PyUnicode_WSTR(unicode) != NULL);
 
     /* check for integer overflow */
-    if (length > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
+    if (length > PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) - 1) {
         PyErr_NoMemory();
         return -1;
     }
@@ -888,7 +888,7 @@ _PyUnicode_New(Py_ssize_t length)
     }
 
     /* Ensure we won't overflow the size. */
-    if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
+    if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) {
         return (PyUnicodeObject *)PyErr_NoMemory();
     }
     if (length < 0) {
@@ -2239,7 +2239,7 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
     if (copy_null)
         targetlen++;
     if (!target) {
-        if (PY_SSIZE_T_MAX / sizeof(Py_UCS4) < targetlen) {
+        if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UCS4) < targetlen) {
             PyErr_NoMemory();
             return NULL;
         }
@@ -2313,35 +2313,6 @@ PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size)
 
 #endif /* HAVE_WCHAR_H */
 
-static void
-makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
-        char c)
-{
-    *fmt++ = '%';
-    if (longflag)
-        *fmt++ = 'l';
-    else if (longlongflag) {
-        /* longlongflag should only ever be nonzero on machines with
-           HAVE_LONG_LONG defined */
-#ifdef HAVE_LONG_LONG
-        char *f = PY_FORMAT_LONG_LONG;
-        while (*f)
-            *fmt++ = *f++;
-#else
-        /* we shouldn't ever get here */
-        assert(0);
-        *fmt++ = 'l';
-#endif
-    }
-    else if (size_tflag) {
-        char *f = PY_FORMAT_SIZE_T;
-        while (*f)
-            *fmt++ = *f++;
-    }
-    *fmt++ = c;
-    *fmt = '\0';
-}
-
 /* maximum number of characters required for output of %lld or %p.
    We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
    plus 1 for the sign.  53/22 is an upper bound for log10(256). */
@@ -2517,48 +2488,42 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
     case 'x':
     {
         /* used by sprintf */
-        char fmt[10]; /* should be enough for "%0lld\0" */
         char buffer[MAX_LONG_LONG_CHARS];
         Py_ssize_t arglen;
 
         if (*f == 'u') {
-            makefmt(fmt, longflag, longlongflag, size_tflag, *f);
-
             if (longflag)
-                len = sprintf(buffer, fmt,
+                len = sprintf(buffer, "%lu",
                         va_arg(*vargs, unsigned long));
 #ifdef HAVE_LONG_LONG
             else if (longlongflag)
-                len = sprintf(buffer, fmt,
+                len = sprintf(buffer, "%" PY_FORMAT_LONG_LONG "u",
                         va_arg(*vargs, unsigned PY_LONG_LONG));
 #endif
             else if (size_tflag)
-                len = sprintf(buffer, fmt,
+                len = sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
                         va_arg(*vargs, size_t));
             else
-                len = sprintf(buffer, fmt,
+                len = sprintf(buffer, "%u",
                         va_arg(*vargs, unsigned int));
         }
         else if (*f == 'x') {
-            makefmt(fmt, 0, 0, 0, 'x');
-            len = sprintf(buffer, fmt, va_arg(*vargs, int));
+            len = sprintf(buffer, "%x", va_arg(*vargs, int));
         }
         else {
-            makefmt(fmt, longflag, longlongflag, size_tflag, *f);
-
             if (longflag)
-                len = sprintf(buffer, fmt,
+                len = sprintf(buffer, "%li",
                         va_arg(*vargs, long));
 #ifdef HAVE_LONG_LONG
             else if (longlongflag)
-                len = sprintf(buffer, fmt,
+                len = sprintf(buffer, "%" PY_FORMAT_LONG_LONG "i",
                         va_arg(*vargs, PY_LONG_LONG));
 #endif
             else if (size_tflag)
-                len = sprintf(buffer, fmt,
+                len = sprintf(buffer, "%" PY_FORMAT_SIZE_T "i",
                         va_arg(*vargs, Py_ssize_t));
             else
-                len = sprintf(buffer, fmt,
+                len = sprintf(buffer, "%i",
                         va_arg(*vargs, int));
         }
         assert(len >= 0);
@@ -2852,7 +2817,7 @@ PyUnicode_AsWideCharString(PyObject *unicode,
     buflen = unicode_aswidechar(unicode, NULL, 0);
     if (buflen == -1)
         return NULL;
-    if (PY_SSIZE_T_MAX / sizeof(wchar_t) < buflen) {
+    if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < buflen) {
         PyErr_NoMemory();
         return NULL;
     }
@@ -3247,7 +3212,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
     wlen2 = wcslen(wstr);
     if (wlen2 != wlen) {
         PyMem_Free(wstr);
-        PyErr_SetString(PyExc_TypeError, "embedded null character");
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
         return NULL;
     }
 
@@ -3255,7 +3220,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
         /* "surrogateescape" error handler */
         char *str;
 
-        str = _Py_wchar2char(wstr, &error_pos);
+        str = Py_EncodeLocale(wstr, &error_pos);
         if (str == NULL) {
             if (error_pos == (size_t)-1) {
                 PyErr_NoMemory();
@@ -3308,7 +3273,7 @@ encode_error:
 
     if (errmsg != NULL) {
         size_t errlen;
-        wstr = _Py_char2wchar(errmsg, &errlen);
+        wstr = Py_DecodeLocale(errmsg, &errlen);
         if (wstr != NULL) {
             reason = PyUnicode_FromWideChar(wstr, errlen);
             PyMem_RawFree(wstr);
@@ -3483,7 +3448,7 @@ mbstowcs_errorpos(const char *str, size_t len)
     memset(&mbs, 0, sizeof mbs);
     while (len)
     {
-        converted = mbrtowc(&ch, (char*)str, len, &mbs);
+        converted = mbrtowc(&ch, str, len, &mbs);
         if (converted == 0)
             /* Reached end of string */
             break;
@@ -3519,14 +3484,14 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
     if (locale_error_handler(errors, &surrogateescape) < 0)
         return NULL;
 
-    if (str[len] != '\0' || len != strlen(str)) {
-        PyErr_SetString(PyExc_TypeError, "embedded null character");
+    if (str[len] != '\0' || (size_t)len != strlen(str))  {
+        PyErr_SetString(PyExc_ValueError, "embedded null byte");
         return NULL;
     }
 
     if (surrogateescape) {
         /* "surrogateescape" error handler */
-        wstr = _Py_char2wchar(str, &wlen);
+        wstr = Py_DecodeLocale(str, &wlen);
         if (wstr == NULL) {
             if (wlen == (size_t)-1)
                 PyErr_NoMemory();
@@ -3581,7 +3546,7 @@ decode_error:
     error_pos = mbstowcs_errorpos(str, len);
     if (errmsg != NULL) {
         size_t errlen;
-        wstr = _Py_char2wchar(errmsg, &errlen);
+        wstr = Py_DecodeLocale(errmsg, &errlen);
         if (wstr != NULL) {
             reason = PyUnicode_FromWideChar(wstr, errlen);
             PyMem_RawFree(wstr);
@@ -3696,8 +3661,8 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
     }
     size = PyBytes_GET_SIZE(output);
     data = PyBytes_AS_STRING(output);
-    if (size != strlen(data)) {
-        PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+    if ((size_t)size != strlen(data)) {
+        PyErr_SetString(PyExc_ValueError, "embedded null byte");
         Py_DECREF(output);
         return 0;
     }
@@ -3741,7 +3706,7 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
     }
     if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output),
                  PyUnicode_GET_LENGTH(output), 0, 1) >= 0) {
-        PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
         Py_DECREF(output);
         return 0;
     }
@@ -6849,28 +6814,6 @@ code_page_name(UINT code_page, PyObject **obj)
     return PyBytes_AS_STRING(*obj);
 }
 
-static int
-is_dbcs_lead_byte(UINT code_page, const char *s, int offset)
-{
-    const char *curr = s + offset;
-    const char *prev;
-
-    if (!IsDBCSLeadByteEx(code_page, *curr))
-        return 0;
-
-    prev = CharPrevExA(code_page, s, curr, 0);
-    if (prev == curr)
-        return 1;
-    /* FIXME: This code is limited to "true" double-byte encodings,
-       as it assumes an incomplete character consists of a single
-       byte. */
-    if (curr - prev == 2)
-        return 1;
-    if (!IsDBCSLeadByteEx(code_page, *prev))
-        return 1;
-    return 0;
-}
-
 static DWORD
 decode_code_page_flags(UINT code_page)
 {
@@ -6945,7 +6888,7 @@ static int
 decode_code_page_errors(UINT code_page,
                         PyObject **v,
                         const char *in, const int size,
-                        const char *errors)
+                        const char *errors, int final)
 {
     const char *startin = in;
     const char *endin = in + size;
@@ -6972,7 +6915,7 @@ decode_code_page_errors(UINT code_page,
     if (encoding == NULL)
         return -1;
 
-    if (errors == NULL || strcmp(errors, "strict") == 0) {
+    if ((errors == NULL || strcmp(errors, "strict") == 0) && final) {
         /* The last error was ERROR_NO_UNICODE_TRANSLATION, then we raise a
            UnicodeDecodeError. */
         make_decode_exception(&exc, encoding, in, size, 0, 0, reason);
@@ -7035,6 +6978,10 @@ decode_code_page_errors(UINT code_page,
         if (outsize <= 0) {
             Py_ssize_t startinpos, endinpos, outpos;
 
+            /* last character in partial decode? */
+            if (in + insize >= endin && !final)
+                break;
+
             startinpos = in - startin;
             endinpos = startinpos + 1;
             outpos = out - PyUnicode_AS_UNICODE(*v);
@@ -7063,7 +7010,8 @@ decode_code_page_errors(UINT code_page,
     assert(outsize <= PyUnicode_WSTR_LENGTH(*v));
     if (unicode_resize(v, outsize) < 0)
         goto error;
-    ret = size;
+    /* (in - startin) <= size and size is an int */
+    ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int);
 
 error:
     Py_XDECREF(encoding_obj);
@@ -7104,24 +7052,19 @@ decode_code_page_stateful(int code_page,
             done = 1;
         }
 
-        /* Skip trailing lead-byte unless 'final' is set */
-        if (!final && is_dbcs_lead_byte(code_page, s, chunk_size - 1))
-            --chunk_size;
-
         if (chunk_size == 0 && done) {
             if (v != NULL)
                 break;
             _Py_RETURN_UNICODE_EMPTY();
         }
 
-
         converted = decode_code_page_strict(code_page, &v,
                                             s, chunk_size);
         if (converted == -2)
             converted = decode_code_page_errors(code_page, &v,
                                                 s, chunk_size,
-                                                errors);
-        assert(converted != 0);
+                                                errors, final);
+        assert(converted != 0 || done);
 
         if (converted < 0) {
             Py_XDECREF(v);
@@ -8528,10 +8471,10 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
     }
     else if (PyLong_Check(x)) {
         long value = PyLong_AS_LONG(x);
-        long max = PyUnicode_GetMax();
-        if (value < 0 || value > max) {
-            PyErr_Format(PyExc_TypeError,
-                         "character mapping must be in range(0x%x)", max+1);
+        if (value < 0 || value > MAX_UNICODE) {
+            PyErr_Format(PyExc_ValueError,
+                         "character mapping must be in range(0x%x)",
+                         MAX_UNICODE+1);
             Py_DECREF(x);
             return -1;
         }
@@ -8550,76 +8493,168 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
         return -1;
     }
 }
-/* ensure that *outobj is at least requiredsize characters long,
-   if not reallocate and adjust various state variables.
-   Return 0 on success, -1 on error */
+
+/* lookup the character, write the result into the writer.
+   Return 1 if the result was written into the writer, return 0 if the mapping
+   was undefined, raise an exception return -1 on error. */
 static int
-charmaptranslate_makespace(Py_UCS4 **outobj, Py_ssize_t *psize,
-                               Py_ssize_t requiredsize)
-{
-    Py_ssize_t oldsize = *psize;
-    Py_UCS4 *new_outobj;
-    if (requiredsize > oldsize) {
-        /* exponentially overallocate to minimize reallocations */
-        if (requiredsize < 2 * oldsize)
-            requiredsize = 2 * oldsize;
-        new_outobj = PyMem_Realloc(*outobj, requiredsize * sizeof(Py_UCS4));
-        if (new_outobj == 0)
+charmaptranslate_output(Py_UCS4 ch, PyObject *mapping,
+                        _PyUnicodeWriter *writer)
+{
+    PyObject *item;
+
+    if (charmaptranslate_lookup(ch, mapping, &item))
+        return -1;
+
+    if (item == NULL) {
+        /* not found => default to 1:1 mapping */
+        if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) {
             return -1;
-        *outobj = new_outobj;
-        *psize = requiredsize;
+        }
+        return 1;
     }
-    return 0;
+
+    if (item == Py_None) {
+        Py_DECREF(item);
+        return 0;
+    }
+
+    if (PyLong_Check(item)) {
+        long ch = (Py_UCS4)PyLong_AS_LONG(item);
+        /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already
+           used it */
+        if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) {
+            Py_DECREF(item);
+            return -1;
+        }
+        Py_DECREF(item);
+        return 1;
+    }
+
+    if (!PyUnicode_Check(item)) {
+        Py_DECREF(item);
+        return -1;
+    }
+
+    if (_PyUnicodeWriter_WriteStr(writer, item) < 0) {
+        Py_DECREF(item);
+        return -1;
+    }
+
+    Py_DECREF(item);
+    return 1;
 }
-/* lookup the character, put the result in the output string and adjust
-   various state variables. Return a new reference to the object that
-   was put in the output buffer in *result, or Py_None, if the mapping was
-   undefined (in which case no character was written).
-   The called must decref result.
-   Return 0 on success, -1 on error. */
+
 static int
-charmaptranslate_output(PyObject *input, Py_ssize_t ipos,
-                        PyObject *mapping, Py_UCS4 **output,
-                        Py_ssize_t *osize, Py_ssize_t *opos,
-                        PyObject **res)
+unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
+                              Py_UCS1 *translate)
 {
-    Py_UCS4 curinp = PyUnicode_READ_CHAR(input, ipos);
-    if (charmaptranslate_lookup(curinp, mapping, res))
+    PyObject *item = NULL;
+    int ret = 0;
+
+    if (charmaptranslate_lookup(ch, mapping, &item)) {
         return -1;
-    if (*res==NULL) {
+    }
+
+    if (item == Py_None) {
+        /* deletion */
+        translate[ch] = 0xfe;
+    }
+    else if (item == NULL) {
         /* not found => default to 1:1 mapping */
-        (*output)[(*opos)++] = curinp;
+        translate[ch] = ch;
+        return 1;
     }
-    else if (*res==Py_None)
-        ;
-    else if (PyLong_Check(*res)) {
-        /* no overflow check, because we know that the space is enough */
-        (*output)[(*opos)++] = (Py_UCS4)PyLong_AS_LONG(*res);
+    else if (PyLong_Check(item)) {
+        long replace = PyLong_AS_LONG(item);
+        /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already
+           used it */
+        if (127 < replace) {
+            /* invalid character or character outside ASCII:
+               skip the fast translate */
+            goto exit;
+        }
+        translate[ch] = (Py_UCS1)replace;
     }
-    else if (PyUnicode_Check(*res)) {
-        Py_ssize_t repsize;
-        if (PyUnicode_READY(*res) == -1)
+    else if (PyUnicode_Check(item)) {
+        Py_UCS4 replace;
+
+        if (PyUnicode_READY(item) == -1) {
+            Py_DECREF(item);
             return -1;
-        repsize = PyUnicode_GET_LENGTH(*res);
-        if (repsize==1) {
-            /* no overflow check, because we know that the space is enough */
-            (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, 0);
         }
-        else if (repsize!=0) {
-            /* more than one character */
-            Py_ssize_t requiredsize = *opos +
-                (PyUnicode_GET_LENGTH(input) - ipos) +
-                repsize - 1;
-            Py_ssize_t i;
-            if (charmaptranslate_makespace(output, osize, requiredsize))
+        if (PyUnicode_GET_LENGTH(item) != 1)
+            goto exit;
+
+        replace = PyUnicode_READ_CHAR(item, 0);
+        if (replace > 127)
+            goto exit;
+        translate[ch] = (Py_UCS1)replace;
+    }
+    else {
+        /* not None, NULL, long or unicode */
+        goto exit;
+    }
+    ret = 1;
+
+  exit:
+    Py_DECREF(item);
+    return ret;
+}
+
+/* Fast path for ascii => ascii translation. Return 1 if the whole string
+   was translated into writer, return 0 if the input string was partially
+   translated into writer, raise an exception and return -1 on error. */
+static int
+unicode_fast_translate(PyObject *input, PyObject *mapping,
+                       _PyUnicodeWriter *writer, int ignore)
+{
+    Py_UCS1 ascii_table[128], ch, ch2;
+    Py_ssize_t len;
+    Py_UCS1 *in, *end, *out;
+    int res = 0;
+
+    if (PyUnicode_READY(input) == -1)
+        return -1;
+    if (!PyUnicode_IS_ASCII(input))
+        return 0;
+    len = PyUnicode_GET_LENGTH(input);
+
+    memset(ascii_table, 0xff, 128);
+
+    in = PyUnicode_1BYTE_DATA(input);
+    end = in + len;
+
+    assert(PyUnicode_IS_ASCII(writer->buffer));
+    assert(PyUnicode_GET_LENGTH(writer->buffer) == len);
+    out = PyUnicode_1BYTE_DATA(writer->buffer);
+
+    for (; in < end; in++) {
+        ch = *in;
+        ch2 = ascii_table[ch];
+        if (ch2 == 0xff) {
+            int translate = unicode_fast_translate_lookup(mapping, ch,
+                                                          ascii_table);
+            if (translate < 0)
                 return -1;
-            for(i = 0; i < repsize; i++)
-                (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, i);
+            if (translate == 0)
+                goto exit;
+            ch2 = ascii_table[ch];
         }
+        if (ch2 == 0xfe) {
+            if (ignore)
+                continue;
+            goto exit;
+        }
+        assert(ch2 < 128);
+        *out = ch2;
+        out++;
     }
-    else
-        return -1;
-    return 0;
+    res = 1;
+
+exit:
+    writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer);
+    return res;
 }
 
 PyObject *
@@ -8628,22 +8663,17 @@ _PyUnicode_TranslateCharmap(PyObject *input,
                             const char *errors)
 {
     /* input object */
-    char *idata;
+    char *data;
     Py_ssize_t size, i;
     int kind;
     /* output buffer */
-    Py_UCS4 *output = NULL;
-    Py_ssize_t osize;
-    PyObject *res;
-    /* current output position */
-    Py_ssize_t opos;
+    _PyUnicodeWriter writer;
+    /* error handler */
     char *reason = "character maps to <undefined>";
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
-    /* the following variable is used for caching string comparisons
-     * -1=not initialized, 0=unknown, 1=strict, 2=replace,
-     * 3=ignore, 4=xmlcharrefreplace */
-    int known_errorHandler = -1;
+    int ignore;
+    int res;
 
     if (mapping == NULL) {
         PyErr_BadArgument();
@@ -8652,10 +8682,9 @@ _PyUnicode_TranslateCharmap(PyObject *input,
 
     if (PyUnicode_READY(input) == -1)
         return NULL;
-    idata = (char*)PyUnicode_DATA(input);
+    data = (char*)PyUnicode_DATA(input);
     kind = PyUnicode_KIND(input);
     size = PyUnicode_GET_LENGTH(input);
-    i = 0;
 
     if (size == 0) {
         Py_INCREF(input);
@@ -8664,121 +8693,81 @@ _PyUnicode_TranslateCharmap(PyObject *input,
 
     /* allocate enough for a simple 1:1 translation without
        replacements, if we need more, we'll resize */
-    osize = size;
-    output = PyMem_Malloc(osize * sizeof(Py_UCS4));
-    opos = 0;
-    if (output == NULL) {
-        PyErr_NoMemory();
+    _PyUnicodeWriter_Init(&writer);
+    if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1)
         goto onError;
+
+    ignore = (errors != NULL && strcmp(errors, "ignore") == 0);
+
+    res = unicode_fast_translate(input, mapping, &writer, ignore);
+    if (res < 0) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
     }
+    if (res == 1)
+        return _PyUnicodeWriter_Finish(&writer);
 
+    i = writer.pos;
     while (i<size) {
         /* try to encode it */
-        PyObject *x = NULL;
-        if (charmaptranslate_output(input, i, mapping,
-                                    &output, &osize, &opos, &x)) {
-            Py_XDECREF(x);
+        int translate;
+        PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
+        Py_ssize_t newpos;
+        /* startpos for collecting untranslatable chars */
+        Py_ssize_t collstart;
+        Py_ssize_t collend;
+        Py_UCS4 ch;
+
+        ch = PyUnicode_READ(kind, data, i);
+        translate = charmaptranslate_output(ch, mapping, &writer);
+        if (translate < 0)
             goto onError;
-        }
-        Py_XDECREF(x);
-        if (x!=Py_None) /* it worked => adjust input pointer */
+
+        if (translate != 0) {
+            /* it worked => adjust input pointer */
             ++i;
-        else { /* untranslatable character */
-            PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
-            Py_ssize_t repsize;
-            Py_ssize_t newpos;
-            Py_ssize_t uni2;
-            /* startpos for collecting untranslatable chars */
-            Py_ssize_t collstart = i;
-            Py_ssize_t collend = i+1;
-            Py_ssize_t coll;
-
-            /* find all untranslatable characters */
-            while (collend < size) {
-                if (charmaptranslate_lookup(PyUnicode_READ(kind,idata, collend), mapping, &x))
-                    goto onError;
-                Py_XDECREF(x);
-                if (x!=Py_None)
-                    break;
-                ++collend;
-            }
-            /* cache callback name lookup
-             * (if not done yet, i.e. it's the first error) */
-            if (known_errorHandler==-1) {
-                if ((errors==NULL) || (!strcmp(errors, "strict")))
-                    known_errorHandler = 1;
-                else if (!strcmp(errors, "replace"))
-                    known_errorHandler = 2;
-                else if (!strcmp(errors, "ignore"))
-                    known_errorHandler = 3;
-                else if (!strcmp(errors, "xmlcharrefreplace"))
-                    known_errorHandler = 4;
-                else
-                    known_errorHandler = 0;
-            }
-            switch (known_errorHandler) {
-            case 1: /* strict */
-                make_translate_exception(&exc,
-                                         input, collstart, collend, reason);
-                if (exc != NULL)
-                    PyCodec_StrictErrors(exc);
+            continue;
+        }
+
+        /* untranslatable character */
+        collstart = i;
+        collend = i+1;
+
+        /* find all untranslatable characters */
+        while (collend < size) {
+            PyObject *x;
+            ch = PyUnicode_READ(kind, data, collend);
+            if (charmaptranslate_lookup(ch, mapping, &x))
                 goto onError;
-            case 2: /* replace */
-                /* No need to check for space, this is a 1:1 replacement */
-                for (coll = collstart; coll<collend; coll++)
-                    output[opos++] = '?';
-                /* fall through */
-            case 3: /* ignore */
-                i = collend;
-                break;
-            case 4: /* xmlcharrefreplace */
-                /* generate replacement (temporarily (mis)uses i) */
-                for (i = collstart; i < collend; ++i) {
-                    char buffer[2+29+1+1];
-                    char *cp;
-                    sprintf(buffer, "&#%d;", PyUnicode_READ(kind, idata, i));
-                    if (charmaptranslate_makespace(&output, &osize,
-                                                   opos+strlen(buffer)+(size-collend)))
-                        goto onError;
-                    for (cp = buffer; *cp; ++cp)
-                        output[opos++] = *cp;
-                }
-                i = collend;
+            Py_XDECREF(x);
+            if (x != Py_None)
                 break;
-            default:
-                repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
-                                                                 reason, input, &exc,
-                                                                 collstart, collend, &newpos);
-                if (repunicode == NULL)
-                    goto onError;
-                if (PyUnicode_READY(repunicode) == -1) {
-                    Py_DECREF(repunicode);
-                    goto onError;
-                }
-                /* generate replacement  */
-                repsize = PyUnicode_GET_LENGTH(repunicode);
-                if (charmaptranslate_makespace(&output, &osize,
-                                               opos+repsize+(size-collend))) {
-                    Py_DECREF(repunicode);
-                    goto onError;
-                }
-                for (uni2 = 0; repsize-->0; ++uni2)
-                    output[opos++] = PyUnicode_READ_CHAR(repunicode, uni2);
-                i = newpos;
+            ++collend;
+        }
+
+        if (ignore) {
+            i = collend;
+        }
+        else {
+            repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
+                                                             reason, input, &exc,
+                                                             collstart, collend, &newpos);
+            if (repunicode == NULL)
+                goto onError;
+            if (_PyUnicodeWriter_WriteStr(&writer, repunicode) < 0) {
                 Py_DECREF(repunicode);
+                goto onError;
             }
+            Py_DECREF(repunicode);
+            i = newpos;
         }
     }
-    res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, opos);
-    if (!res)
-        goto onError;
-    PyMem_Free(output);
     Py_XDECREF(exc);
     Py_XDECREF(errorHandler);
-    return res;
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    PyMem_Free(output);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(exc);
     Py_XDECREF(errorHandler);
     return NULL;
@@ -8880,7 +8869,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
 
     maxchar = 127;
     for (i = 0; i < length; i++) {
-        Py_UNICODE ch = s[i];
+        Py_UCS4 ch = s[i];
         if (ch > 127) {
             int decimal = Py_UNICODE_TODECIMAL(ch);
             if (decimal >= 0)
@@ -8897,7 +8886,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
     data = PyUnicode_DATA(decimal);
     /* Iterate over code points */
     for (i = 0; i < length; i++) {
-        Py_UNICODE ch = s[i];
+        Py_UCS4 ch = s[i];
         if (ch > 127) {
             int decimal = Py_UNICODE_TODECIMAL(ch);
             if (decimal >= 0)
@@ -10843,7 +10832,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
         void *data = PyUnicode_DATA(uni);
         /* Compare Unicode string and source character set string */
         for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
-            if (chr != str[i])
+            if (chr != (unsigned char)str[i])
                 return (chr < (unsigned char)(str[i])) ? -1 : 1;
         /* This check keeps Python strings that end in '\0' from comparing equal
          to C strings identical up to that point. */
@@ -14051,24 +14040,14 @@ mainformatlong(PyObject *v,
     if (!PyNumber_Check(v))
         goto wrongtype;
 
-    /* make sure number is a type of integer */
-    /* if not, issue deprecation warning for now */
+    /* make sure number is a type of integer for o, x, and X */
     if (!PyLong_Check(v)) {
         if (type == 'o' || type == 'x' || type == 'X') {
             iobj = PyNumber_Index(v);
             if (iobj == NULL) {
-                PyErr_Clear();
-                if (PyErr_WarnEx(PyExc_DeprecationWarning,
-                                 "automatic int conversions have been deprecated",
-                                 1)) {
-                    return -1;
-                }
-                iobj = PyNumber_Long(v);
-                if (iobj == NULL ) {
-                    if (PyErr_ExceptionMatches(PyExc_TypeError))
-                        goto wrongtype;
-                    return -1;
-                }
+                if (PyErr_ExceptionMatches(PyExc_TypeError))
+                    goto wrongtype;
+                return -1;
             }
         }
         else {
@@ -14129,10 +14108,23 @@ mainformatlong(PyObject *v,
     return 0;
 
 wrongtype:
-    PyErr_Format(PyExc_TypeError,
-            "%%%c format: a number is required, "
-            "not %.200s",
-            type, Py_TYPE(v)->tp_name);
+    switch(type)
+    {
+        case 'o':
+        case 'x':
+        case 'X':
+            PyErr_Format(PyExc_TypeError,
+                    "%%%c format: an integer is required, "
+                    "not %.200s",
+                    type, Py_TYPE(v)->tp_name);
+            break;
+        default:
+            PyErr_Format(PyExc_TypeError,
+                    "%%%c format: a number is required, "
+                    "not %.200s",
+                    type, Py_TYPE(v)->tp_name);
+            break;
+    }
     return -1;
 }
 
@@ -14150,22 +14142,10 @@ formatchar(PyObject *v)
         PyObject *iobj;
         long x;
         /* make sure number is a type of integer */
-        /* if not, issue deprecation warning for now */
         if (!PyLong_Check(v)) {
             iobj = PyNumber_Index(v);
             if (iobj == NULL) {
-                PyErr_Clear();
-                if (PyErr_WarnEx(PyExc_DeprecationWarning,
-                                 "automatic int conversions have been deprecated",
-                                 1)) {
-                    return -1;
-                }
-                iobj = PyNumber_Long(v);
-                if (iobj == NULL ) {
-                    if (PyErr_ExceptionMatches(PyExc_TypeError))
-                        goto onError;
-                    return -1;
-                }
+                goto onError;
             }
             v = iobj;
             Py_DECREF(iobj);
@@ -15419,7 +15399,7 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
     if (u == NULL)
         return NULL;
     /* Ensure we won't overflow the size. */
-    if (len > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
+    if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) {
         PyErr_NoMemory();
         return NULL;
     }